From 2091870c9f229868e9cd9379082b15e8bd007e31 Mon Sep 17 00:00:00 2001 From: DRC Date: Tue, 19 Jul 2011 09:15:20 +0000 Subject: [PATCH 1/2] Oops. Apparently forgot to change the description when borrowing this from VirtualGL git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/branches/1.0.x@673 632fc199-4ca6-4c93-a231-07263d6284db --- release/deb-control.tmpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release/deb-control.tmpl b/release/deb-control.tmpl index 55890ea7..0a3f11fa 100644 --- a/release/deb-control.tmpl +++ b/release/deb-control.tmpl @@ -5,7 +5,7 @@ Priority: optional Architecture: {__ARCH} Essential: no Maintainer: The libjpeg-turbo Project [http://www.libjpeg-turbo.org] -Description: A toolkit for displaying OpenGL applications to thin clients +Description: A SIMD-accelerated JPEG codec which provides both the libjpeg and TurboJPEG APIs libjpeg-turbo is a high-speed version of libjpeg for x86 and x86-64 processors which uses SIMD instructions (MMX, SSE2, etc.) to accelerate baseline JPEG compression and decompression. libjpeg-turbo is generally 2-4x as fast From 795e6ad334d86feb4ba8e509dd126a995a8f9972 Mon Sep 17 00:00:00 2001 From: DRC Date: Thu, 1 Dec 2011 11:14:18 +0000 Subject: [PATCH 2/2] Fixed non-fatal out-of-bounds read in SSE2 SIMD code reported by valgrind when decompressing a JPEG image to a bitmap buffer whose size was not a multiple of 16 bytes. git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/branches/1.0.x@729 632fc199-4ca6-4c93-a231-07263d6284db --- ChangeLog.txt | 6 +++++ simd/jdclrss2-64.asm | 53 ++++++++++++++++++++++++++++++++++++++++++++ simd/jdclrss2.asm | 53 ++++++++++++++++++++++++++++++++++++++++++++ simd/jdmrgss2-64.asm | 53 ++++++++++++++++++++++++++++++++++++++++++++ simd/jdmrgss2.asm | 53 ++++++++++++++++++++++++++++++++++++++++++++ simd/jsimdext.inc | 2 ++ 6 files changed, 220 insertions(+) diff --git a/ChangeLog.txt b/ChangeLog.txt index a10f8e3a..d482563e 100644 --- a/ChangeLog.txt +++ b/ChangeLog.txt @@ -45,6 +45,12 @@ occurred when the application was invoked using I/O redirection fail to compile if the Windows system headers were included before jpeglib.h. This issue was caused by a conflict in the definition of the INT32 type. +[9] Fixed out-of-bounds read in SSE2 SIMD code that occurred when decompressing +a JPEG image to a bitmap buffer whose size was not a multiple of 16 bytes. +This was more of an annoyance than an actual bug, since it did not cause any +actual run-time problems, but the issue showed up when running libjpeg-turbo in +valgrind. See http://crbug.com/72399 for more information. + 1.0.1 ===== diff --git a/simd/jdclrss2-64.asm b/simd/jdclrss2-64.asm index 4282bd26..0acf1885 100644 --- a/simd/jdclrss2-64.asm +++ b/simd/jdclrss2-64.asm @@ -292,6 +292,41 @@ EXTN(jsimd_ycc_rgb_convert_sse2): movdqa xmmA,xmmD sub rcx, byte SIZEOF_XMMWORD .column_st15: +%ifdef STRICT_MEMORY_ACCESS + ; Store the lower 8 bytes of xmmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_MMWORD + jb short .column_st7 + movq MMWORD [rdi], xmmA + add rdi, byte SIZEOF_MMWORD + sub rcx, byte SIZEOF_MMWORD + psrldq xmmA, SIZEOF_MMWORD +.column_st7: + ; Store the lower 4 bytes of xmmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_DWORD + jb short .column_st3 + movd DWORD [rdi], xmmA + add rdi, byte SIZEOF_DWORD + sub rcx, byte SIZEOF_DWORD + psrldq xmmA, SIZEOF_DWORD +.column_st3: + ; Store the lower 2 bytes of rax to the output when it has enough + ; space. + movd eax, xmmA + cmp rcx, byte SIZEOF_WORD + jb short .column_st1 + mov WORD [rdi], ax + add rdi, byte SIZEOF_WORD + sub rcx, byte SIZEOF_WORD + shr rax, 16 +.column_st1: + ; Store the lower 1 byte of rax to the output when it has enough + ; space. + test rcx, rcx + jz short .nextrow + mov BYTE [rdi], al +%else mov rax,rcx xor rcx, byte 0x0F shl rcx, 2 @@ -331,6 +366,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2): por xmmE,xmmC .adj0: ; ---------------- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [rdi], xmmA +%endif ; STRICT_MEMORY_ACCESS ; --------------- %else ; RGB_PIXELSIZE == 4 ; ----------- @@ -415,6 +451,22 @@ EXTN(jsimd_ycc_rgb_convert_sse2): movdqa xmmA,xmmD sub rcx, byte SIZEOF_XMMWORD/4 .column_st15: +%ifdef STRICT_MEMORY_ACCESS + ; Store two pixels (8 bytes) of xmmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_XMMWORD/8 + jb short .column_st7 + movq MMWORD [rdi], xmmA + add rdi, byte SIZEOF_XMMWORD/8*4 + sub rcx, byte SIZEOF_XMMWORD/8 + psrldq xmmA, SIZEOF_XMMWORD/8*4 +.column_st7: + ; Store one pixel (4 bytes) of xmmA to the output when it has enough + ; space. + test rcx, rcx + jz short .nextrow + movd DWORD [rdi], xmmA +%else cmp rcx, byte SIZEOF_XMMWORD/16 jb near .nextrow mov rax,rcx @@ -454,6 +506,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2): por xmmE,xmmG .adj0: ; ---------------- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [rdi], xmmA +%endif ; STRICT_MEMORY_ACCESS ; --------------- %endif ; RGB_PIXELSIZE ; --------------- diff --git a/simd/jdclrss2.asm b/simd/jdclrss2.asm index 865fa824..71547bab 100644 --- a/simd/jdclrss2.asm +++ b/simd/jdclrss2.asm @@ -304,6 +304,41 @@ EXTN(jsimd_ycc_rgb_convert_sse2): movdqa xmmA,xmmD sub ecx, byte SIZEOF_XMMWORD .column_st15: +%ifdef STRICT_MEMORY_ACCESS + ; Store the lower 8 bytes of xmmA to the output when it has enough + ; space. + cmp ecx, byte SIZEOF_MMWORD + jb short .column_st7 + movq MMWORD [edi], xmmA + add edi, byte SIZEOF_MMWORD + sub ecx, byte SIZEOF_MMWORD + psrldq xmmA, SIZEOF_MMWORD +.column_st7: + ; Store the lower 4 bytes of xmmA to the output when it has enough + ; space. + cmp ecx, byte SIZEOF_DWORD + jb short .column_st3 + movd DWORD [edi], xmmA + add edi, byte SIZEOF_DWORD + sub ecx, byte SIZEOF_DWORD + psrldq xmmA, SIZEOF_DWORD +.column_st3: + ; Store the lower 2 bytes of eax to the output when it has enough + ; space. + movd eax, xmmA + cmp ecx, byte SIZEOF_WORD + jb short .column_st1 + mov WORD [edi], ax + add edi, byte SIZEOF_WORD + sub ecx, byte SIZEOF_WORD + shr eax, 16 +.column_st1: + ; Store the lower 1 byte of eax to the output when it has enough + ; space. + test ecx, ecx + jz short .nextrow + mov BYTE [edi], al +%else mov eax,ecx xor ecx, byte 0x0F shl ecx, 2 @@ -343,6 +378,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2): por xmmE,xmmC .adj0: ; ---------------- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA +%endif ; STRICT_MEMORY_ACCESS ; --------------- %else ; RGB_PIXELSIZE == 4 ; ----------- @@ -428,6 +464,22 @@ EXTN(jsimd_ycc_rgb_convert_sse2): movdqa xmmA,xmmD sub ecx, byte SIZEOF_XMMWORD/4 .column_st15: +%ifdef STRICT_MEMORY_ACCESS + ; Store two pixels (8 bytes) of xmmA to the output when it has enough + ; space. + cmp ecx, byte SIZEOF_XMMWORD/8 + jb short .column_st7 + movq MMWORD [edi], xmmA + add edi, byte SIZEOF_XMMWORD/8*4 + sub ecx, byte SIZEOF_XMMWORD/8 + psrldq xmmA, SIZEOF_XMMWORD/8*4 +.column_st7: + ; Store one pixel (4 bytes) of xmmA to the output when it has enough + ; space. + test ecx, ecx + jz short .nextrow + movd DWORD [edi], xmmA +%else cmp ecx, byte SIZEOF_XMMWORD/16 jb short .nextrow mov eax,ecx @@ -467,6 +519,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2): por xmmE,xmmG .adj0: ; ---------------- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA +%endif ; STRICT_MEMORY_ACCESS ; --------------- %endif ; RGB_PIXELSIZE ; --------------- diff --git a/simd/jdmrgss2-64.asm b/simd/jdmrgss2-64.asm index 121bb82b..36e25822 100644 --- a/simd/jdmrgss2-64.asm +++ b/simd/jdmrgss2-64.asm @@ -296,6 +296,41 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): movdqa xmmA,xmmD sub rcx, byte SIZEOF_XMMWORD .column_st15: +%ifdef STRICT_MEMORY_ACCESS + ; Store the lower 8 bytes of xmmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_MMWORD + jb short .column_st7 + movq MMWORD [rdi], xmmA + add rdi, byte SIZEOF_MMWORD + sub rcx, byte SIZEOF_MMWORD + psrldq xmmA, SIZEOF_MMWORD +.column_st7: + ; Store the lower 4 bytes of xmmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_DWORD + jb short .column_st3 + movd DWORD [rdi], xmmA + add rdi, byte SIZEOF_DWORD + sub rcx, byte SIZEOF_DWORD + psrldq xmmA, SIZEOF_DWORD +.column_st3: + ; Store the lower 2 bytes of rax to the output when it has enough + ; space. + movd eax, xmmA + cmp rcx, byte SIZEOF_WORD + jb short .column_st1 + mov WORD [rdi], ax + add rdi, byte SIZEOF_WORD + sub rcx, byte SIZEOF_WORD + shr rax, 16 +.column_st1: + ; Store the lower 1 byte of rax to the output when it has enough + ; space. + test rcx, rcx + jz short .endcolumn + mov BYTE [rdi], al +%else mov rax,rcx xor rcx, byte 0x0F shl rcx, 2 @@ -335,6 +370,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): por xmmE,xmmC .adj0: ; ---------------- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA +%endif ; STRICT_MEMORY_ACCESS ; --------------- %else ; RGB_PIXELSIZE == 4 ; ----------- @@ -422,6 +458,22 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): movdqa xmmA,xmmD sub rcx, byte SIZEOF_XMMWORD/4 .column_st15: +%ifdef STRICT_MEMORY_ACCESS + ; Store two pixels (8 bytes) of xmmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_XMMWORD/8 + jb short .column_st7 + movq MMWORD [rdi], xmmA + add rdi, byte SIZEOF_XMMWORD/8*4 + sub rcx, byte SIZEOF_XMMWORD/8 + psrldq xmmA, SIZEOF_XMMWORD/8*4 +.column_st7: + ; Store one pixel (4 bytes) of xmmA to the output when it has enough + ; space. + test rcx, rcx + jz short .endcolumn + movd DWORD [rdi], xmmA +%else cmp rcx, byte SIZEOF_XMMWORD/16 jb near .endcolumn mov rax,rcx @@ -461,6 +513,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): por xmmE,xmmG .adj0: ; ---------------- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA +%endif ; STRICT_MEMORY_ACCESS ; --------------- %endif ; RGB_PIXELSIZE ; --------------- diff --git a/simd/jdmrgss2.asm b/simd/jdmrgss2.asm index 99b7eb9f..6a0dbd91 100644 --- a/simd/jdmrgss2.asm +++ b/simd/jdmrgss2.asm @@ -309,6 +309,41 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): movdqa xmmA,xmmD sub ecx, byte SIZEOF_XMMWORD .column_st15: +%ifdef STRICT_MEMORY_ACCESS + ; Store the lower 8 bytes of xmmA to the output when it has enough + ; space. + cmp ecx, byte SIZEOF_MMWORD + jb short .column_st7 + movq MMWORD [edi], xmmA + add edi, byte SIZEOF_MMWORD + sub ecx, byte SIZEOF_MMWORD + psrldq xmmA, SIZEOF_MMWORD +.column_st7: + ; Store the lower 4 bytes of xmmA to the output when it has enough + ; space. + cmp ecx, byte SIZEOF_DWORD + jb short .column_st3 + movd DWORD [edi], xmmA + add edi, byte SIZEOF_DWORD + sub ecx, byte SIZEOF_DWORD + psrldq xmmA, SIZEOF_DWORD +.column_st3: + ; Store the lower 2 bytes of eax to the output when it has enough + ; space. + movd eax, xmmA + cmp ecx, byte SIZEOF_WORD + jb short .column_st1 + mov WORD [edi], ax + add edi, byte SIZEOF_WORD + sub ecx, byte SIZEOF_WORD + shr eax, 16 +.column_st1: + ; Store the lower 1 byte of eax to the output when it has enough + ; space. + test ecx, ecx + jz short .endcolumn + mov BYTE [edi], al +%else mov eax,ecx xor ecx, byte 0x0F shl ecx, 2 @@ -348,6 +383,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): por xmmE,xmmC .adj0: ; ---------------- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA +%endif ; STRICT_MEMORY_ACCESS ; --------------- %else ; RGB_PIXELSIZE == 4 ; ----------- @@ -436,6 +472,22 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): movdqa xmmA,xmmD sub ecx, byte SIZEOF_XMMWORD/4 .column_st15: +%ifdef STRICT_MEMORY_ACCESS + ; Store two pixels (8 bytes) of xmmA to the output when it has enough + ; space. + cmp ecx, byte SIZEOF_XMMWORD/8 + jb short .column_st7 + movq MMWORD [edi], xmmA + add edi, byte SIZEOF_XMMWORD/2 + sub ecx, byte SIZEOF_XMMWORD/8 + psrldq xmmA, 64 +.column_st7: + ; Store one pixel (4 bytes) of xmmA to the output when it has enough + ; space. + test ecx, ecx + jz short .endcolumn + movd DWORD [edi], xmmA +%else cmp ecx, byte SIZEOF_XMMWORD/16 jb short .endcolumn mov eax,ecx @@ -475,6 +527,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): por xmmE,xmmG .adj0: ; ---------------- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA +%endif ; STRICT_MEMORY_ACCESS ; --------------- %endif ; RGB_PIXELSIZE ; --------------- diff --git a/simd/jsimdext.inc b/simd/jsimdext.inc index c4297f9c..12a04c2e 100644 --- a/simd/jsimdext.inc +++ b/simd/jsimdext.inc @@ -76,6 +76,8 @@ section .note.GNU-stack noalloc noexec nowrite progbits %define SEG_CONST .rodata progbits alloc noexec nowrite align=16 %endif +%define STRICT_MEMORY_ACCESS 1 + ; To make the code position-independent, append -DPIC to the commandline ; %define GOT_SYMBOL _GLOBAL_OFFSET_TABLE_ ; ELF supports PIC