diff --git a/ChangeLog.txt b/ChangeLog.txt index c8b554b0..69e1262a 100644 --- a/ChangeLog.txt +++ b/ChangeLog.txt @@ -11,6 +11,16 @@ incorrectly encode certain JPEG images when quality=100 and the fast integer forward DCT were used. This was known to cause 'make test' to fail when the library was built with '-march=haswell' on x86 systems. +[3] Fixed an issue whereby libjpeg-turbo would crash when built with the latest +& greatest development version of the Clang/LLVM compiler. This was caused by +an x86-64 ABI conformance issue in some of libjpeg-turbo's 64-bit SSE2 SIMD +routines. Those routines were incorrectly using a 64-bit mov instruction to +transfer a 32-bit JDIMENSION argument, whereas the x86-64 ABI allows the upper +(unused) 32 bits of a 32-bit argument's register to be undefined. The new +Clang/LLVM optimizer uses load combining to transfer multiple adjacent 32-bit +structure members into a single 64-bit register, and this exposed the ABI +conformance issue. + 1.4.1 ===== diff --git a/jmorecfg.h b/jmorecfg.h index e86ccc60..8e3007c4 100644 --- a/jmorecfg.h +++ b/jmorecfg.h @@ -162,7 +162,9 @@ typedef long INT32; * images up to 64K*64K due to 16-bit fields in SOF markers. Therefore * "unsigned int" is sufficient on all machines. However, if you need to * handle larger images and you don't mind deviating from the spec, you - * can change this datatype. + * can change this datatype. (Note that changing this datatype will + * potentially require modifying the SIMD code. The x86-64 SIMD extensions, + * in particular, assume a 32-bit JDIMENSION.) */ typedef unsigned int JDIMENSION; diff --git a/simd/jccolext-sse2-64.asm b/simd/jccolext-sse2-64.asm index 7bd6d016..7ad43438 100644 --- a/simd/jccolext-sse2-64.asm +++ b/simd/jccolext-sse2-64.asm @@ -50,14 +50,14 @@ EXTN(jsimd_rgb_ycc_convert_sse2): collect_args push rbx - mov rcx, r10 + mov ecx, r10d test rcx,rcx jz near .return push rcx mov rsi, r12 - mov rcx, r13 + mov ecx, r13d mov rdi, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY] mov rbx, JSAMPARRAY [rsi+1*SIZEOF_JSAMPARRAY] mov rdx, JSAMPARRAY [rsi+2*SIZEOF_JSAMPARRAY] diff --git a/simd/jcgryext-sse2-64.asm b/simd/jcgryext-sse2-64.asm index 5af02e04..82c0fc81 100644 --- a/simd/jcgryext-sse2-64.asm +++ b/simd/jcgryext-sse2-64.asm @@ -50,14 +50,14 @@ EXTN(jsimd_rgb_gray_convert_sse2): collect_args push rbx - mov rcx, r10 + mov ecx, r10d test rcx,rcx jz near .return push rcx mov rsi, r12 - mov rcx, r13 + mov ecx, r13d mov rdi, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY] lea rdi, [rdi+rcx*SIZEOF_JSAMPROW] diff --git a/simd/jcsample-sse2-64.asm b/simd/jcsample-sse2-64.asm index f32fb4fc..7693285c 100644 --- a/simd/jcsample-sse2-64.asm +++ b/simd/jcsample-sse2-64.asm @@ -49,11 +49,11 @@ EXTN(jsimd_h2v1_downsample_sse2): mov rbp,rsp collect_args - mov rcx, r13 + mov ecx, r13d shl rcx,3 ; imul rcx,DCTSIZE (rcx = output_cols) jz near .return - mov rdx, r10 + mov edx, r10d ; -- expand_right_edge @@ -90,7 +90,7 @@ EXTN(jsimd_h2v1_downsample_sse2): ; -- h2v1_downsample - mov rax, r12 ; rowctr + mov eax, r12d ; rowctr test eax,eax jle near .return @@ -193,11 +193,11 @@ EXTN(jsimd_h2v2_downsample_sse2): mov rbp,rsp collect_args - mov rcx, r13 + mov ecx, r13d shl rcx,3 ; imul rcx,DCTSIZE (rcx = output_cols) jz near .return - mov rdx, r10 + mov edx, r10d ; -- expand_right_edge @@ -234,7 +234,7 @@ EXTN(jsimd_h2v2_downsample_sse2): ; -- h2v2_downsample - mov rax, r12 ; rowctr + mov eax, r12d ; rowctr test rax,rax jle near .return diff --git a/simd/jdcolext-sse2-64.asm b/simd/jdcolext-sse2-64.asm index bfd1f35d..d356e659 100644 --- a/simd/jdcolext-sse2-64.asm +++ b/simd/jdcolext-sse2-64.asm @@ -52,14 +52,14 @@ EXTN(jsimd_ycc_rgb_convert_sse2): collect_args push rbx - mov rcx, r10 ; num_cols + mov ecx, r10d ; num_cols test rcx,rcx jz near .return push rcx mov rdi, r11 - mov rcx, r12 + mov ecx, r12d mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY] mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY] mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY] diff --git a/simd/jdmrgext-sse2-64.asm b/simd/jdmrgext-sse2-64.asm index ff127b57..989d7f17 100644 --- a/simd/jdmrgext-sse2-64.asm +++ b/simd/jdmrgext-sse2-64.asm @@ -52,14 +52,14 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): collect_args push rbx - mov rcx, r10 ; col + mov ecx, r10d ; col test rcx,rcx jz near .return push rcx mov rdi, r11 - mov rcx, r12 + mov ecx, r12d mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY] mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY] mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY] @@ -455,10 +455,10 @@ EXTN(jsimd_h2v2_merged_upsample_sse2): collect_args push rbx - mov rax, r10 + mov eax, r10d mov rdi, r11 - mov rcx, r12 + mov ecx, r12d mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY] mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY] mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY] diff --git a/simd/jdsample-sse2-64.asm b/simd/jdsample-sse2-64.asm index 335ce2aa..2287c00e 100644 --- a/simd/jdsample-sse2-64.asm +++ b/simd/jdsample-sse2-64.asm @@ -67,7 +67,7 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2): mov rbp,rsp collect_args - mov rax, r11 ; colctr + mov eax, r11d ; colctr test rax,rax jz near .return @@ -214,7 +214,7 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2): collect_args push rbx - mov rax, r11 ; colctr + mov eax, r11d ; colctr test rax,rax jz near .return @@ -506,7 +506,7 @@ EXTN(jsimd_h2v1_upsample_sse2): mov rbp,rsp collect_args - mov rdx, r11 + mov edx, r11d add rdx, byte (2*SIZEOF_XMMWORD)-1 and rdx, byte -(2*SIZEOF_XMMWORD) jz near .return @@ -596,7 +596,7 @@ EXTN(jsimd_h2v2_upsample_sse2): collect_args push rbx - mov rdx, r11 + mov edx, r11d add rdx, byte (2*SIZEOF_XMMWORD)-1 and rdx, byte -(2*SIZEOF_XMMWORD) jz near .return diff --git a/simd/jidctflt-sse2-64.asm b/simd/jidctflt-sse2-64.asm index 32e4ec22..95bd4dcd 100644 --- a/simd/jidctflt-sse2-64.asm +++ b/simd/jidctflt-sse2-64.asm @@ -326,7 +326,7 @@ EXTN(jsimd_idct_float_sse2): mov rax, [original_rbp] lea rsi, [workspace] ; FAST_FLOAT * wsptr mov rdi, r12 ; (JSAMPROW *) - mov rax, r13 + mov eax, r13d mov rcx, DCTSIZE/4 ; ctr .rowloop: diff --git a/simd/jidctfst-sse2-64.asm b/simd/jidctfst-sse2-64.asm index bcc3e3a6..0f864291 100644 --- a/simd/jidctfst-sse2-64.asm +++ b/simd/jidctfst-sse2-64.asm @@ -323,7 +323,7 @@ EXTN(jsimd_idct_ifast_sse2): mov rax, [original_rbp] mov rdi, r12 ; (JSAMPROW *) - mov rax, r13 + mov eax, r13d ; -- Even part diff --git a/simd/jidctint-sse2-64.asm b/simd/jidctint-sse2-64.asm index 32bbfd8d..1cc30866 100644 --- a/simd/jidctint-sse2-64.asm +++ b/simd/jidctint-sse2-64.asm @@ -515,7 +515,7 @@ EXTN(jsimd_idct_islow_sse2): mov rax, [original_rbp] mov rdi, r12 ; (JSAMPROW *) - mov rax, r13 + mov eax, r13d ; -- Even part diff --git a/simd/jidctred-sse2-64.asm b/simd/jidctred-sse2-64.asm index dad43d90..02b155a8 100644 --- a/simd/jidctred-sse2-64.asm +++ b/simd/jidctred-sse2-64.asm @@ -312,7 +312,7 @@ EXTN(jsimd_idct_4x4_sse2): mov rax, [original_rbp] mov rdi, r12 ; (JSAMPROW *) - mov rax, r13 + mov eax, r13d ; -- Even part @@ -521,7 +521,7 @@ EXTN(jsimd_idct_2x2_sse2): ; ---- Pass 2: process rows, store into output array. mov rdi, r12 ; (JSAMPROW *) - mov rax, r13 + mov eax, r13d ; | input:| result:| ; | A0 B0 | | diff --git a/simd/jquantf-sse2-64.asm b/simd/jquantf-sse2-64.asm index 20e815f7..cf7f0d82 100644 --- a/simd/jquantf-sse2-64.asm +++ b/simd/jquantf-sse2-64.asm @@ -50,7 +50,7 @@ EXTN(jsimd_convsamp_float_sse2): packsswb xmm7,xmm7 ; xmm7 = PB_CENTERJSAMPLE (0x808080..) mov rsi, r10 - mov rax, r11 + mov eax, r11d mov rdi, r12 mov rcx, DCTSIZE/2 .convloop: diff --git a/simd/jquanti-sse2-64.asm b/simd/jquanti-sse2-64.asm index 50b8dce7..b61f4db2 100644 --- a/simd/jquanti-sse2-64.asm +++ b/simd/jquanti-sse2-64.asm @@ -50,7 +50,7 @@ EXTN(jsimd_convsamp_sse2): psllw xmm7,7 ; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..} mov rsi, r10 - mov rax, r11 + mov eax, r11d mov rdi, r12 mov rcx, DCTSIZE/4 .convloop: