x86: Fix "short jump is out of range" w/ NASM<2.04

This commit is contained in:
DRC
2017-07-07 15:15:19 -05:00
parent b0971e47d7
commit e5c1613ccd
2 changed files with 10 additions and 6 deletions

View File

@@ -64,6 +64,10 @@ timer, in order to improve the consistency of the results. Furthermore, the
`-warmup` option is now used to specify the amount of warmup time rather than `-warmup` option is now used to specify the amount of warmup time rather than
the number of warmup iterations. the number of warmup iterations.
11. Fixed an error (`short jump is out of range`) that occurred when assembling
the 32-bit x86 SIMD extensions with NASM versions prior to 2.04. This was a
regression introduced by 1.5 beta1[12].
1.5.1 1.5.1
===== =====

View File

@@ -1,7 +1,7 @@
; ;
; jchuff-sse2.asm - Huffman entropy encoding (SSE2) ; jchuff-sse2.asm - Huffman entropy encoding (SSE2)
; ;
; Copyright (C) 2009-2011, 2014-2016, D. R. Commander. ; Copyright (C) 2009-2011, 2014-2017, D. R. Commander.
; Copyright (C) 2015, Matthieu Darbois. ; Copyright (C) 2015, Matthieu Darbois.
; ;
; Based on the x86 SIMD extension for IJG JPEG library ; Based on the x86 SIMD extension for IJG JPEG library
@@ -288,13 +288,13 @@ EXTN(jsimd_huff_encode_one_block_sse2):
.BLOOP: .BLOOP:
bsf ecx, edx ; r = __builtin_ctzl(index); bsf ecx, edx ; r = __builtin_ctzl(index);
jz .ELOOP jz near .ELOOP
lea esi, [esi+ecx*2] ; k += r; lea esi, [esi+ecx*2] ; k += r;
shr edx, cl ; index >>= r; shr edx, cl ; index >>= r;
mov DWORD [esp+temp3], edx mov DWORD [esp+temp3], edx
.BRLOOP: .BRLOOP:
cmp ecx, 16 ; while (r > 15) { cmp ecx, 16 ; while (r > 15) {
jl .ERLOOP jl near .ERLOOP
sub ecx, 16 ; r -= 16; sub ecx, 16 ; r -= 16;
mov DWORD [esp+temp], ecx mov DWORD [esp+temp], ecx
mov eax, INT [ebp + 240 * 4] ; code_0xf0 = actbl->ehufco[0xf0]; mov eax, INT [ebp + 240 * 4] ; code_0xf0 = actbl->ehufco[0xf0];
@@ -348,7 +348,7 @@ EXTN(jsimd_huff_encode_one_block_sse2):
sub eax, esi sub eax, esi
shr eax, 1 shr eax, 1
bsf ecx, edx ; r = __builtin_ctzl(index); bsf ecx, edx ; r = __builtin_ctzl(index);
jz .ELOOP2 jz near .ELOOP2
shr edx, cl ; index >>= r; shr edx, cl ; index >>= r;
add ecx, eax add ecx, eax
lea esi, [esi+ecx*2] ; k += r; lea esi, [esi+ecx*2] ; k += r;
@@ -356,13 +356,13 @@ EXTN(jsimd_huff_encode_one_block_sse2):
jmp .BRLOOP2 jmp .BRLOOP2
.BLOOP2: .BLOOP2:
bsf ecx, edx ; r = __builtin_ctzl(index); bsf ecx, edx ; r = __builtin_ctzl(index);
jz .ELOOP2 jz near .ELOOP2
lea esi, [esi+ecx*2] ; k += r; lea esi, [esi+ecx*2] ; k += r;
shr edx, cl ; index >>= r; shr edx, cl ; index >>= r;
mov DWORD [esp+temp3], edx mov DWORD [esp+temp3], edx
.BRLOOP2: .BRLOOP2:
cmp ecx, 16 ; while (r > 15) { cmp ecx, 16 ; while (r > 15) {
jl .ERLOOP2 jl near .ERLOOP2
sub ecx, 16 ; r -= 16; sub ecx, 16 ; r -= 16;
mov DWORD [esp+temp], ecx mov DWORD [esp+temp], ecx
mov eax, INT [ebp + 240 * 4] ; code_0xf0 = actbl->ehufco[0xf0]; mov eax, INT [ebp + 240 * 4] ; code_0xf0 = actbl->ehufco[0xf0];