Merge branch 'master' into dev
This commit is contained in:
11
ChangeLog.md
11
ChangeLog.md
@@ -46,6 +46,17 @@ longer supports 32-bit Java virtual machines. Oracle no longer provides a
|
||||
systems) is long obsolete.
|
||||
|
||||
|
||||
2.0.4
|
||||
=====
|
||||
|
||||
### Significant changes relative to 2.0.3:
|
||||
|
||||
1. Fixed a regression in the Windows packaging system (introduced by
|
||||
2.0 beta1[2]) whereby, if both the 64-bit libjpeg-turbo SDK for GCC and the
|
||||
64-bit libjpeg-turbo SDK for Visual C++ were installed on the same system, only
|
||||
one of them could be uninstalled.
|
||||
|
||||
|
||||
2.0.3
|
||||
=====
|
||||
|
||||
|
||||
16
appveyor.yml
16
appveyor.yml
@@ -8,11 +8,23 @@ install:
|
||||
|
||||
7z x c:\installers\nasm-2.10.01-win32.zip -oc:\ > c:\installers\nasm.install.log
|
||||
|
||||
if not exist c:\installers\i686-6.4.0-release-posix-dwarf-rt_v5-rev0.7z curl -fSL -o c:\installers\i686-6.4.0-release-posix-dwarf-rt_v5-rev0.7z "https://sourceforge.net/projects/mingw-w64/files/Toolchains targetting Win32/Personal Builds/mingw-builds/6.4.0/threads-posix/dwarf/i686-6.4.0-release-posix-dwarf-rt_v5-rev0.7z"
|
||||
|
||||
md "c:\Program Files (x86)\mingw-w64\i686-6.4.0-posix-dwarf-rt_v5-rev0"
|
||||
|
||||
7z x c:\installers\i686-6.4.0-release-posix-dwarf-rt_v5-rev0.7z -o"c:\Program Files (x86)\mingw-w64\i686-6.4.0-posix-dwarf-rt_v5-rev0" > c:\installers\mingw32.install.log
|
||||
|
||||
if not exist c:\installers\x86_64-6.4.0-release-posix-seh-rt_v5-rev0.7z curl -fSL -o c:\installers\x86_64-6.4.0-release-posix-seh-rt_v5-rev0.7z "https://sourceforge.net/projects/mingw-w64/files/Toolchains targetting Win64/Personal Builds/mingw-builds/6.4.0/threads-posix/seh/x86_64-6.4.0-release-posix-seh-rt_v5-rev0.7z"
|
||||
|
||||
md "c:\Program Files\mingw-w64\x86_64-6.4.0-posix-seh-rt_v5-rev0"
|
||||
|
||||
7z x c:\installers\x86_64-6.4.0-release-posix-seh-rt_v5-rev0.7z -o"c:\Program Files\mingw-w64\x86_64-6.4.0-posix-seh-rt_v5-rev0" > c:\installers\mingw64.install.log
|
||||
|
||||
set INCLUDE=c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include;c:\Program Files (x86)\Microsoft SDKs\Windows\v7.1A\include
|
||||
|
||||
set LIB=c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\lib\amd64;c:\Program Files (x86)\Microsoft SDKs\Windows\v7.1A\lib\x64
|
||||
|
||||
set PATH=c:\nasm-2.10.01;c:\Program Files (x86)\NSIS;c:\msys64\mingw32\bin;c:\msys64\usr\bin;c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\bin\amd64;c:\Program Files (x86)\Microsoft Visual Studio 10.0\Common7\IDE;c:\Program Files (x86)\Microsoft SDKs\Windows\v7.1A\bin\x64;c:\Program Files (x86)\Microsoft SDKs\Windows\v7.1A\bin;%PATH%
|
||||
set PATH=c:\nasm-2.10.01;c:\Program Files (x86)\NSIS;c:\msys64\usr\bin;c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\bin\amd64;c:\Program Files (x86)\Microsoft Visual Studio 10.0\Common7\IDE;c:\Program Files (x86)\Microsoft SDKs\Windows\v7.1A\bin\x64;c:\Program Files (x86)\Microsoft SDKs\Windows\v7.1A\bin;%PATH%
|
||||
|
||||
set MSYSTEM=MINGW32
|
||||
|
||||
@@ -24,6 +36,8 @@ install:
|
||||
|
||||
cache:
|
||||
- c:\installers\nasm-2.10.01-win32.zip -> appveyor.yml
|
||||
- c:\installers\i686-6.4.0-release-posix-dwarf-rt_v5-rev0.7z -> appveyor.yml
|
||||
- c:\installers\x86_64-6.4.0-release-posix-seh-rt_v5-rev0.7z -> appveyor.yml
|
||||
|
||||
build_script:
|
||||
- cmd: >-
|
||||
|
||||
@@ -83,7 +83,7 @@ endif()
|
||||
if(BITS EQUAL 64)
|
||||
set(INST_PLATFORM "${INST_PLATFORM} 64-bit")
|
||||
set(INST_NAME ${INST_NAME}64)
|
||||
set(INST_REG_NAME ${INST_DIR}64)
|
||||
set(INST_REG_NAME ${INST_REG_NAME}64)
|
||||
set(INST_DEFS ${INST_DEFS} -DWIN64)
|
||||
endif()
|
||||
|
||||
|
||||
4
jchuff.c
4
jchuff.c
@@ -44,8 +44,8 @@
|
||||
*/
|
||||
|
||||
/* NOTE: Both GCC and Clang define __GNUC__ */
|
||||
#if defined __GNUC__ && (defined __arm__ || defined __aarch64__)
|
||||
#if !defined __thumb__ || defined __thumb2__
|
||||
#if defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))
|
||||
#if !defined(__thumb__) || defined(__thumb2__)
|
||||
#define USE_CLZ_INTRINSIC
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@@ -52,8 +52,8 @@
|
||||
*/
|
||||
|
||||
/* NOTE: Both GCC and Clang define __GNUC__ */
|
||||
#if defined __GNUC__ && (defined __arm__ || defined __aarch64__)
|
||||
#if !defined __thumb__ || defined __thumb2__
|
||||
#if defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))
|
||||
#if !defined(__thumb__) || defined(__thumb2__)
|
||||
#define USE_CLZ_INTRINSIC
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jcolsamp.inc"
|
||||
|
||||
@@ -110,12 +108,12 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
|
||||
test cl, SIZEOF_BYTE
|
||||
jz short .column_ld2
|
||||
sub ecx, byte SIZEOF_BYTE
|
||||
movzx eax, BYTE [esi+ecx]
|
||||
movzx eax, byte [esi+ecx]
|
||||
.column_ld2:
|
||||
test cl, SIZEOF_WORD
|
||||
jz short .column_ld4
|
||||
sub ecx, byte SIZEOF_WORD
|
||||
movzx edx, WORD [esi+ecx]
|
||||
movzx edx, word [esi+ecx]
|
||||
shl eax, WORD_BIT
|
||||
or eax, edx
|
||||
.column_ld4:
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jcolsamp.inc"
|
||||
|
||||
@@ -111,13 +109,13 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
|
||||
jz short .column_ld2
|
||||
sub ecx, byte SIZEOF_BYTE
|
||||
xor eax, eax
|
||||
mov al, BYTE [esi+ecx]
|
||||
mov al, byte [esi+ecx]
|
||||
.column_ld2:
|
||||
test cl, SIZEOF_WORD
|
||||
jz short .column_ld4
|
||||
sub ecx, byte SIZEOF_WORD
|
||||
xor edx, edx
|
||||
mov dx, WORD [esi+ecx]
|
||||
mov dx, word [esi+ecx]
|
||||
shl eax, WORD_BIT
|
||||
or eax, edx
|
||||
.column_ld4:
|
||||
@@ -127,7 +125,7 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
|
||||
test cl, SIZEOF_DWORD
|
||||
jz short .column_ld8
|
||||
sub ecx, byte SIZEOF_DWORD
|
||||
movd mmG, DWORD [esi+ecx]
|
||||
movd mmG, dword [esi+ecx]
|
||||
psllq mmA, DWORD_BIT
|
||||
por mmA, mmG
|
||||
.column_ld8:
|
||||
@@ -197,7 +195,7 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
|
||||
test cl, SIZEOF_MMWORD/8
|
||||
jz short .column_ld2
|
||||
sub ecx, byte SIZEOF_MMWORD/8
|
||||
movd mmA, DWORD [esi+ecx*RGB_PIXELSIZE]
|
||||
movd mmA, dword [esi+ecx*RGB_PIXELSIZE]
|
||||
.column_ld2:
|
||||
test cl, SIZEOF_MMWORD/4
|
||||
jz short .column_ld4
|
||||
|
||||
@@ -12,8 +12,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jcolsamp.inc"
|
||||
|
||||
@@ -109,12 +107,12 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
|
||||
test cl, SIZEOF_BYTE
|
||||
jz short .column_ld2
|
||||
sub ecx, byte SIZEOF_BYTE
|
||||
movzx eax, BYTE [esi+ecx]
|
||||
movzx eax, byte [esi+ecx]
|
||||
.column_ld2:
|
||||
test cl, SIZEOF_WORD
|
||||
jz short .column_ld4
|
||||
sub ecx, byte SIZEOF_WORD
|
||||
movzx edx, WORD [esi+ecx]
|
||||
movzx edx, word [esi+ecx]
|
||||
shl eax, WORD_BIT
|
||||
or eax, edx
|
||||
.column_ld4:
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -12,8 +12,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -12,8 +12,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jcolsamp.inc"
|
||||
|
||||
@@ -102,12 +100,12 @@ EXTN(jsimd_rgb_gray_convert_avx2):
|
||||
test cl, SIZEOF_BYTE
|
||||
jz short .column_ld2
|
||||
sub ecx, byte SIZEOF_BYTE
|
||||
movzx eax, BYTE [esi+ecx]
|
||||
movzx eax, byte [esi+ecx]
|
||||
.column_ld2:
|
||||
test cl, SIZEOF_WORD
|
||||
jz short .column_ld4
|
||||
sub ecx, byte SIZEOF_WORD
|
||||
movzx edx, WORD [esi+ecx]
|
||||
movzx edx, word [esi+ecx]
|
||||
shl eax, WORD_BIT
|
||||
or eax, edx
|
||||
.column_ld4:
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jcolsamp.inc"
|
||||
|
||||
@@ -103,13 +101,13 @@ EXTN(jsimd_rgb_gray_convert_mmx):
|
||||
jz short .column_ld2
|
||||
sub ecx, byte SIZEOF_BYTE
|
||||
xor eax, eax
|
||||
mov al, BYTE [esi+ecx]
|
||||
mov al, byte [esi+ecx]
|
||||
.column_ld2:
|
||||
test cl, SIZEOF_WORD
|
||||
jz short .column_ld4
|
||||
sub ecx, byte SIZEOF_WORD
|
||||
xor edx, edx
|
||||
mov dx, WORD [esi+ecx]
|
||||
mov dx, word [esi+ecx]
|
||||
shl eax, WORD_BIT
|
||||
or eax, edx
|
||||
.column_ld4:
|
||||
@@ -119,7 +117,7 @@ EXTN(jsimd_rgb_gray_convert_mmx):
|
||||
test cl, SIZEOF_DWORD
|
||||
jz short .column_ld8
|
||||
sub ecx, byte SIZEOF_DWORD
|
||||
movd mmG, DWORD [esi+ecx]
|
||||
movd mmG, dword [esi+ecx]
|
||||
psllq mmA, DWORD_BIT
|
||||
por mmA, mmG
|
||||
.column_ld8:
|
||||
@@ -189,7 +187,7 @@ EXTN(jsimd_rgb_gray_convert_mmx):
|
||||
test cl, SIZEOF_MMWORD/8
|
||||
jz short .column_ld2
|
||||
sub ecx, byte SIZEOF_MMWORD/8
|
||||
movd mmA, DWORD [esi+ecx*RGB_PIXELSIZE]
|
||||
movd mmA, dword [esi+ecx*RGB_PIXELSIZE]
|
||||
.column_ld2:
|
||||
test cl, SIZEOF_MMWORD/4
|
||||
jz short .column_ld4
|
||||
|
||||
@@ -12,8 +12,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jcolsamp.inc"
|
||||
|
||||
@@ -101,12 +99,12 @@ EXTN(jsimd_rgb_gray_convert_sse2):
|
||||
test cl, SIZEOF_BYTE
|
||||
jz short .column_ld2
|
||||
sub ecx, byte SIZEOF_BYTE
|
||||
movzx eax, BYTE [esi+ecx]
|
||||
movzx eax, byte [esi+ecx]
|
||||
.column_ld2:
|
||||
test cl, SIZEOF_WORD
|
||||
jz short .column_ld4
|
||||
sub ecx, byte SIZEOF_WORD
|
||||
movzx edx, WORD [esi+ecx]
|
||||
movzx edx, word [esi+ecx]
|
||||
shl eax, WORD_BIT
|
||||
or eax, edx
|
||||
.column_ld4:
|
||||
|
||||
@@ -17,8 +17,6 @@
|
||||
; This file contains an SSE2 implementation for Huffman coding of one block.
|
||||
; The following code is based directly on jchuff.c; see jchuff.c for more
|
||||
; details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
@@ -197,8 +195,8 @@ EXTN(jsimd_huff_encode_one_block_sse2):
|
||||
push ebp
|
||||
|
||||
mov esi, POINTER [eax+8] ; (working_state *state)
|
||||
mov put_buffer, DWORD [esi+8] ; put_buffer = state->cur.put_buffer;
|
||||
mov put_bits, DWORD [esi+12] ; put_bits = state->cur.put_bits;
|
||||
mov put_buffer, dword [esi+8] ; put_buffer = state->cur.put_buffer;
|
||||
mov put_bits, dword [esi+12] ; put_bits = state->cur.put_bits;
|
||||
push esi ; esi is now scratch
|
||||
|
||||
get_GOT edx ; get GOT address
|
||||
@@ -214,7 +212,7 @@ EXTN(jsimd_huff_encode_one_block_sse2):
|
||||
; Encode the DC coefficient difference per section F.1.2.1
|
||||
mov esi, POINTER [esp+block] ; block
|
||||
movsx ecx, word [esi] ; temp = temp2 = block[0] - last_dc_val;
|
||||
sub ecx, DWORD [eax+20]
|
||||
sub ecx, dword [eax+20]
|
||||
mov esi, ecx
|
||||
|
||||
; This is a well-known technique for obtaining the absolute value
|
||||
@@ -229,12 +227,12 @@ EXTN(jsimd_huff_encode_one_block_sse2):
|
||||
; For a negative input, want temp2 = bitwise complement of abs(input)
|
||||
; This code assumes we are on a two's complement machine
|
||||
add esi, edx ; temp2 += temp3;
|
||||
mov DWORD [esp+temp], esi ; backup temp2 in temp
|
||||
mov dword [esp+temp], esi ; backup temp2 in temp
|
||||
|
||||
; Find the number of bits needed for the magnitude of the coefficient
|
||||
movpic ebp, POINTER [esp+gotptr] ; load GOT address (ebp)
|
||||
movzx edx, byte [GOTOFF(ebp, jpeg_nbits_table + ecx)] ; nbits = JPEG_NBITS(temp);
|
||||
mov DWORD [esp+temp2], edx ; backup nbits in temp2
|
||||
mov dword [esp+temp2], edx ; backup nbits in temp2
|
||||
|
||||
; Emit the Huffman-coded symbol for the number of bits
|
||||
mov ebp, POINTER [eax+24] ; After this point, arguments are not accessible anymore
|
||||
@@ -242,13 +240,13 @@ EXTN(jsimd_huff_encode_one_block_sse2):
|
||||
movzx ecx, byte [ebp + edx + 1024] ; size = dctbl->ehufsi[nbits];
|
||||
EMIT_BITS eax ; EMIT_BITS(code, size)
|
||||
|
||||
mov ecx, DWORD [esp+temp2] ; restore nbits
|
||||
mov ecx, dword [esp+temp2] ; restore nbits
|
||||
|
||||
; Mask off any extra bits in code
|
||||
mov eax, 1
|
||||
shl eax, cl
|
||||
dec eax
|
||||
and eax, DWORD [esp+temp] ; temp2 &= (((JLONG)1)<<nbits) - 1;
|
||||
and eax, dword [esp+temp] ; temp2 &= (((JLONG)1)<<nbits) - 1;
|
||||
|
||||
; Emit that number of bits of the value, if positive,
|
||||
; or the complement of its magnitude, if negative.
|
||||
@@ -291,22 +289,22 @@ EXTN(jsimd_huff_encode_one_block_sse2):
|
||||
jz near .ELOOP
|
||||
lea esi, [esi+ecx*2] ; k += r;
|
||||
shr edx, cl ; index >>= r;
|
||||
mov DWORD [esp+temp3], edx
|
||||
mov dword [esp+temp3], edx
|
||||
.BRLOOP:
|
||||
cmp ecx, 16 ; while (r > 15) {
|
||||
jl near .ERLOOP
|
||||
sub ecx, 16 ; r -= 16;
|
||||
mov DWORD [esp+temp], ecx
|
||||
mov dword [esp+temp], ecx
|
||||
mov eax, INT [ebp + 240 * 4] ; code_0xf0 = actbl->ehufco[0xf0];
|
||||
movzx ecx, byte [ebp + 1024 + 240] ; size_0xf0 = actbl->ehufsi[0xf0];
|
||||
EMIT_BITS eax ; EMIT_BITS(code_0xf0, size_0xf0)
|
||||
mov ecx, DWORD [esp+temp]
|
||||
mov ecx, dword [esp+temp]
|
||||
jmp .BRLOOP
|
||||
.ERLOOP:
|
||||
movsx eax, word [esi] ; temp = t1[k];
|
||||
movpic edx, POINTER [esp+gotptr] ; load GOT address (edx)
|
||||
movzx eax, byte [GOTOFF(edx, jpeg_nbits_table + eax)] ; nbits = JPEG_NBITS(temp);
|
||||
mov DWORD [esp+temp2], eax
|
||||
mov dword [esp+temp2], eax
|
||||
; Emit Huffman symbol for run length / number of bits
|
||||
shl ecx, 4 ; temp3 = (r << 4) + nbits;
|
||||
add ecx, eax
|
||||
@@ -316,13 +314,13 @@ EXTN(jsimd_huff_encode_one_block_sse2):
|
||||
|
||||
movsx edx, word [esi+DCTSIZE2*2] ; temp2 = t2[k];
|
||||
; Mask off any extra bits in code
|
||||
mov ecx, DWORD [esp+temp2]
|
||||
mov ecx, dword [esp+temp2]
|
||||
mov eax, 1
|
||||
shl eax, cl
|
||||
dec eax
|
||||
and eax, edx ; temp2 &= (((JLONG)1)<<nbits) - 1;
|
||||
EMIT_BITS eax ; PUT_BITS(temp2, nbits)
|
||||
mov edx, DWORD [esp+temp3]
|
||||
mov edx, dword [esp+temp3]
|
||||
add esi, 2 ; ++k;
|
||||
shr edx, 1 ; index >>= 1;
|
||||
|
||||
@@ -352,29 +350,29 @@ EXTN(jsimd_huff_encode_one_block_sse2):
|
||||
shr edx, cl ; index >>= r;
|
||||
add ecx, eax
|
||||
lea esi, [esi+ecx*2] ; k += r;
|
||||
mov DWORD [esp+temp3], edx
|
||||
mov dword [esp+temp3], edx
|
||||
jmp .BRLOOP2
|
||||
.BLOOP2:
|
||||
bsf ecx, edx ; r = __builtin_ctzl(index);
|
||||
jz near .ELOOP2
|
||||
lea esi, [esi+ecx*2] ; k += r;
|
||||
shr edx, cl ; index >>= r;
|
||||
mov DWORD [esp+temp3], edx
|
||||
mov dword [esp+temp3], edx
|
||||
.BRLOOP2:
|
||||
cmp ecx, 16 ; while (r > 15) {
|
||||
jl near .ERLOOP2
|
||||
sub ecx, 16 ; r -= 16;
|
||||
mov DWORD [esp+temp], ecx
|
||||
mov dword [esp+temp], ecx
|
||||
mov eax, INT [ebp + 240 * 4] ; code_0xf0 = actbl->ehufco[0xf0];
|
||||
movzx ecx, byte [ebp + 1024 + 240] ; size_0xf0 = actbl->ehufsi[0xf0];
|
||||
EMIT_BITS eax ; EMIT_BITS(code_0xf0, size_0xf0)
|
||||
mov ecx, DWORD [esp+temp]
|
||||
mov ecx, dword [esp+temp]
|
||||
jmp .BRLOOP2
|
||||
.ERLOOP2:
|
||||
movsx eax, word [esi] ; temp = t1[k];
|
||||
bsr eax, eax ; nbits = 32 - __builtin_clz(temp);
|
||||
inc eax
|
||||
mov DWORD [esp+temp2], eax
|
||||
mov dword [esp+temp2], eax
|
||||
; Emit Huffman symbol for run length / number of bits
|
||||
shl ecx, 4 ; temp3 = (r << 4) + nbits;
|
||||
add ecx, eax
|
||||
@@ -384,13 +382,13 @@ EXTN(jsimd_huff_encode_one_block_sse2):
|
||||
|
||||
movsx edx, word [esi+DCTSIZE2*2] ; temp2 = t2[k];
|
||||
; Mask off any extra bits in code
|
||||
mov ecx, DWORD [esp+temp2]
|
||||
mov ecx, dword [esp+temp2]
|
||||
mov eax, 1
|
||||
shl eax, cl
|
||||
dec eax
|
||||
and eax, edx ; temp2 &= (((JLONG)1)<<nbits) - 1;
|
||||
EMIT_BITS eax ; PUT_BITS(temp2, nbits)
|
||||
mov edx, DWORD [esp+temp3]
|
||||
mov edx, dword [esp+temp3]
|
||||
add esi, 2 ; ++k;
|
||||
shr edx, 1 ; index >>= 1;
|
||||
|
||||
@@ -407,8 +405,8 @@ EXTN(jsimd_huff_encode_one_block_sse2):
|
||||
mov eax, [esp+buffer]
|
||||
pop esi
|
||||
; Save put_buffer & put_bits
|
||||
mov DWORD [esi+8], put_buffer ; state->cur.put_buffer = put_buffer;
|
||||
mov DWORD [esi+12], put_bits ; state->cur.put_bits = put_bits;
|
||||
mov dword [esi+8], put_buffer ; state->cur.put_buffer = put_buffer;
|
||||
mov dword [esi+12], put_bits ; state->cur.put_bits = put_bits;
|
||||
|
||||
pop ebp
|
||||
pop edi
|
||||
|
||||
@@ -15,8 +15,6 @@
|
||||
;
|
||||
; This file contains an SSE2 implementation of data preparation for progressive
|
||||
; Huffman encoding. See jcphuff.c for more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -14,8 +14,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -14,8 +14,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jcolsamp.inc"
|
||||
|
||||
@@ -348,7 +346,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
|
||||
vmovd eax, xmmA
|
||||
cmp ecx, byte SIZEOF_WORD
|
||||
jb short .column_st1
|
||||
mov WORD [edi], ax
|
||||
mov word [edi], ax
|
||||
add edi, byte SIZEOF_WORD
|
||||
sub ecx, byte SIZEOF_WORD
|
||||
shr eax, 16
|
||||
@@ -357,7 +355,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
|
||||
; space.
|
||||
test ecx, ecx
|
||||
jz short .nextrow
|
||||
mov BYTE [edi], al
|
||||
mov byte [edi], al
|
||||
|
||||
%else ; RGB_PIXELSIZE == 4 ; -----------
|
||||
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jcolsamp.inc"
|
||||
|
||||
@@ -280,7 +278,7 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
|
||||
movd eax, mmA
|
||||
cmp ecx, byte SIZEOF_DWORD
|
||||
jb short .column_st2
|
||||
mov DWORD [edi+0*SIZEOF_DWORD], eax
|
||||
mov dword [edi+0*SIZEOF_DWORD], eax
|
||||
psrlq mmA, DWORD_BIT
|
||||
movd eax, mmA
|
||||
sub ecx, byte SIZEOF_DWORD
|
||||
@@ -288,14 +286,14 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
|
||||
.column_st2:
|
||||
cmp ecx, byte SIZEOF_WORD
|
||||
jb short .column_st1
|
||||
mov WORD [edi+0*SIZEOF_WORD], ax
|
||||
mov word [edi+0*SIZEOF_WORD], ax
|
||||
shr eax, WORD_BIT
|
||||
sub ecx, byte SIZEOF_WORD
|
||||
add edi, byte SIZEOF_WORD
|
||||
.column_st1:
|
||||
cmp ecx, byte SIZEOF_BYTE
|
||||
jb short .nextrow
|
||||
mov BYTE [edi+0*SIZEOF_BYTE], al
|
||||
mov byte [edi+0*SIZEOF_BYTE], al
|
||||
|
||||
%else ; RGB_PIXELSIZE == 4 ; -----------
|
||||
|
||||
@@ -367,7 +365,7 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
|
||||
.column_st4:
|
||||
cmp ecx, byte SIZEOF_MMWORD/8
|
||||
jb short .nextrow
|
||||
movd DWORD [edi+0*SIZEOF_DWORD], mmA
|
||||
movd dword [edi+0*SIZEOF_DWORD], mmA
|
||||
|
||||
%endif ; RGB_PIXELSIZE ; ---------------
|
||||
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jcolsamp.inc"
|
||||
|
||||
@@ -320,7 +318,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
|
||||
movd eax, xmmA
|
||||
cmp ecx, byte SIZEOF_WORD
|
||||
jb short .column_st1
|
||||
mov WORD [edi], ax
|
||||
mov word [edi], ax
|
||||
add edi, byte SIZEOF_WORD
|
||||
sub ecx, byte SIZEOF_WORD
|
||||
shr eax, 16
|
||||
@@ -329,7 +327,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
|
||||
; space.
|
||||
test ecx, ecx
|
||||
jz short .nextrow
|
||||
mov BYTE [edi], al
|
||||
mov byte [edi], al
|
||||
|
||||
%else ; RGB_PIXELSIZE == 4 ; -----------
|
||||
|
||||
|
||||
@@ -14,8 +14,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -14,8 +14,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -14,8 +14,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jcolsamp.inc"
|
||||
|
||||
@@ -354,7 +352,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
|
||||
vmovd eax, xmmA
|
||||
cmp ecx, byte SIZEOF_WORD
|
||||
jb short .column_st1
|
||||
mov WORD [edi], ax
|
||||
mov word [edi], ax
|
||||
add edi, byte SIZEOF_WORD
|
||||
sub ecx, byte SIZEOF_WORD
|
||||
shr eax, 16
|
||||
@@ -363,7 +361,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
|
||||
; space.
|
||||
test ecx, ecx
|
||||
jz short .endcolumn
|
||||
mov BYTE [edi], al
|
||||
mov byte [edi], al
|
||||
|
||||
%else ; RGB_PIXELSIZE == 4 ; -----------
|
||||
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jcolsamp.inc"
|
||||
|
||||
@@ -283,7 +281,7 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
|
||||
movd eax, mmA
|
||||
cmp ecx, byte SIZEOF_DWORD
|
||||
jb short .column_st2
|
||||
mov DWORD [edi+0*SIZEOF_DWORD], eax
|
||||
mov dword [edi+0*SIZEOF_DWORD], eax
|
||||
psrlq mmA, DWORD_BIT
|
||||
movd eax, mmA
|
||||
sub ecx, byte SIZEOF_DWORD
|
||||
@@ -291,14 +289,14 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
|
||||
.column_st2:
|
||||
cmp ecx, byte SIZEOF_WORD
|
||||
jb short .column_st1
|
||||
mov WORD [edi+0*SIZEOF_WORD], ax
|
||||
mov word [edi+0*SIZEOF_WORD], ax
|
||||
shr eax, WORD_BIT
|
||||
sub ecx, byte SIZEOF_WORD
|
||||
add edi, byte SIZEOF_WORD
|
||||
.column_st1:
|
||||
cmp ecx, byte SIZEOF_BYTE
|
||||
jb short .endcolumn
|
||||
mov BYTE [edi+0*SIZEOF_BYTE], al
|
||||
mov byte [edi+0*SIZEOF_BYTE], al
|
||||
|
||||
%else ; RGB_PIXELSIZE == 4 ; -----------
|
||||
|
||||
@@ -373,7 +371,7 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
|
||||
.column_st4:
|
||||
cmp ecx, byte SIZEOF_MMWORD/8
|
||||
jb short .endcolumn
|
||||
movd DWORD [edi+0*SIZEOF_DWORD], mmA
|
||||
movd dword [edi+0*SIZEOF_DWORD], mmA
|
||||
|
||||
%endif ; RGB_PIXELSIZE ; ---------------
|
||||
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jcolsamp.inc"
|
||||
|
||||
@@ -325,7 +323,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
|
||||
movd eax, xmmA
|
||||
cmp ecx, byte SIZEOF_WORD
|
||||
jb short .column_st1
|
||||
mov WORD [edi], ax
|
||||
mov word [edi], ax
|
||||
add edi, byte SIZEOF_WORD
|
||||
sub ecx, byte SIZEOF_WORD
|
||||
shr eax, 16
|
||||
@@ -334,7 +332,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
|
||||
; space.
|
||||
test ecx, ecx
|
||||
jz short .endcolumn
|
||||
mov BYTE [edi], al
|
||||
mov byte [edi], al
|
||||
|
||||
%else ; RGB_PIXELSIZE == 4 ; -----------
|
||||
|
||||
|
||||
@@ -14,8 +14,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -17,8 +17,6 @@
|
||||
; This file contains a floating-point implementation of the forward DCT
|
||||
; (Discrete Cosine Transform). The following code is based directly on
|
||||
; the IJG's original jfdctflt.c; see the jfdctflt.c for more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
|
||||
@@ -17,8 +17,6 @@
|
||||
; This file contains a floating-point implementation of the forward DCT
|
||||
; (Discrete Cosine Transform). The following code is based directly on
|
||||
; the IJG's original jfdctflt.c; see the jfdctflt.c for more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
|
||||
@@ -18,8 +18,6 @@
|
||||
; the forward DCT (Discrete Cosine Transform). The following code is
|
||||
; based directly on the IJG's original jfdctfst.c; see the jfdctfst.c
|
||||
; for more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
|
||||
@@ -18,8 +18,6 @@
|
||||
; the forward DCT (Discrete Cosine Transform). The following code is
|
||||
; based directly on the IJG's original jfdctfst.c; see the jfdctfst.c
|
||||
; for more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
|
||||
@@ -18,8 +18,6 @@
|
||||
; forward DCT (Discrete Cosine Transform). The following code is based
|
||||
; directly on the IJG's original jfdctint.c; see the jfdctint.c for
|
||||
; more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
|
||||
@@ -18,8 +18,6 @@
|
||||
; forward DCT (Discrete Cosine Transform). The following code is based
|
||||
; directly on the IJG's original jfdctint.c; see the jfdctint.c for
|
||||
; more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
|
||||
@@ -18,8 +18,6 @@
|
||||
; forward DCT (Discrete Cosine Transform). The following code is based
|
||||
; directly on the IJG's original jfdctint.c; see the jfdctint.c for
|
||||
; more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
|
||||
@@ -17,8 +17,6 @@
|
||||
; This file contains a floating-point implementation of the inverse DCT
|
||||
; (Discrete Cosine Transform). The following code is based directly on
|
||||
; the IJG's original jidctflt.c; see the jidctflt.c for more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
@@ -92,23 +90,23 @@ EXTN(jsimd_idct_float_3dnow):
|
||||
alignx 16, 7
|
||||
.columnloop:
|
||||
%ifndef NO_ZERO_COLUMN_TEST_FLOAT_3DNOW
|
||||
mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
jnz short .columnDCT
|
||||
|
||||
pushpic ebx ; save GOT address
|
||||
mov ebx, DWORD [DWBLOCK(3,0,esi,SIZEOF_JCOEF)]
|
||||
mov eax, DWORD [DWBLOCK(4,0,esi,SIZEOF_JCOEF)]
|
||||
or ebx, DWORD [DWBLOCK(5,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, DWORD [DWBLOCK(6,0,esi,SIZEOF_JCOEF)]
|
||||
or ebx, DWORD [DWBLOCK(7,0,esi,SIZEOF_JCOEF)]
|
||||
mov ebx, dword [DWBLOCK(3,0,esi,SIZEOF_JCOEF)]
|
||||
mov eax, dword [DWBLOCK(4,0,esi,SIZEOF_JCOEF)]
|
||||
or ebx, dword [DWBLOCK(5,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, dword [DWBLOCK(6,0,esi,SIZEOF_JCOEF)]
|
||||
or ebx, dword [DWBLOCK(7,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, ebx
|
||||
poppic ebx ; restore GOT address
|
||||
jnz short .columnDCT
|
||||
|
||||
; -- AC terms all zero
|
||||
|
||||
movd mm0, DWORD [DWBLOCK(0,0,esi,SIZEOF_JCOEF)]
|
||||
movd mm0, dword [DWBLOCK(0,0,esi,SIZEOF_JCOEF)]
|
||||
|
||||
punpcklwd mm0, mm0
|
||||
psrad mm0, (DWORD_BIT-WORD_BIT)
|
||||
@@ -135,10 +133,10 @@ EXTN(jsimd_idct_float_3dnow):
|
||||
|
||||
; -- Even part
|
||||
|
||||
movd mm0, DWORD [DWBLOCK(0,0,esi,SIZEOF_JCOEF)]
|
||||
movd mm1, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
movd mm2, DWORD [DWBLOCK(4,0,esi,SIZEOF_JCOEF)]
|
||||
movd mm3, DWORD [DWBLOCK(6,0,esi,SIZEOF_JCOEF)]
|
||||
movd mm0, dword [DWBLOCK(0,0,esi,SIZEOF_JCOEF)]
|
||||
movd mm1, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
movd mm2, dword [DWBLOCK(4,0,esi,SIZEOF_JCOEF)]
|
||||
movd mm3, dword [DWBLOCK(6,0,esi,SIZEOF_JCOEF)]
|
||||
|
||||
punpcklwd mm0, mm0
|
||||
punpcklwd mm1, mm1
|
||||
@@ -182,10 +180,10 @@ EXTN(jsimd_idct_float_3dnow):
|
||||
|
||||
; -- Odd part
|
||||
|
||||
movd mm2, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
movd mm3, DWORD [DWBLOCK(3,0,esi,SIZEOF_JCOEF)]
|
||||
movd mm5, DWORD [DWBLOCK(5,0,esi,SIZEOF_JCOEF)]
|
||||
movd mm1, DWORD [DWBLOCK(7,0,esi,SIZEOF_JCOEF)]
|
||||
movd mm2, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
movd mm3, dword [DWBLOCK(3,0,esi,SIZEOF_JCOEF)]
|
||||
movd mm5, dword [DWBLOCK(5,0,esi,SIZEOF_JCOEF)]
|
||||
movd mm1, dword [DWBLOCK(7,0,esi,SIZEOF_JCOEF)]
|
||||
|
||||
punpcklwd mm2, mm2
|
||||
punpcklwd mm3, mm3
|
||||
|
||||
@@ -17,8 +17,6 @@
|
||||
; This file contains a floating-point implementation of the inverse DCT
|
||||
; (Discrete Cosine Transform). The following code is based directly on
|
||||
; the IJG's original jidctflt.c; see the jidctflt.c for more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
@@ -102,8 +100,8 @@ EXTN(jsimd_idct_float_sse):
|
||||
alignx 16, 7
|
||||
.columnloop:
|
||||
%ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE
|
||||
mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
jnz near .columnDCT
|
||||
|
||||
movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
|
||||
@@ -17,8 +17,6 @@
|
||||
; This file contains a floating-point implementation of the inverse DCT
|
||||
; (Discrete Cosine Transform). The following code is based directly on
|
||||
; the IJG's original jidctflt.c; see the jidctflt.c for more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
@@ -102,8 +100,8 @@ EXTN(jsimd_idct_float_sse2):
|
||||
alignx 16, 7
|
||||
.columnloop:
|
||||
%ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE
|
||||
mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
jnz near .columnDCT
|
||||
|
||||
movq xmm1, XMM_MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
|
||||
@@ -18,8 +18,6 @@
|
||||
; the inverse DCT (Discrete Cosine Transform). The following code is
|
||||
; based directly on the IJG's original jidctfst.c; see the jidctfst.c
|
||||
; for more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
@@ -123,8 +121,8 @@ EXTN(jsimd_idct_ifast_mmx):
|
||||
alignx 16, 7
|
||||
.columnloop:
|
||||
%ifndef NO_ZERO_COLUMN_TEST_IFAST_MMX
|
||||
mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
jnz short .columnDCT
|
||||
|
||||
movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
|
||||
@@ -18,8 +18,6 @@
|
||||
; the inverse DCT (Discrete Cosine Transform). The following code is
|
||||
; based directly on the IJG's original jidctfst.c; see the jidctfst.c
|
||||
; for more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
@@ -118,8 +116,8 @@ EXTN(jsimd_idct_ifast_sse2):
|
||||
mov esi, JCOEFPTR [coef_block(eax)] ; inptr
|
||||
|
||||
%ifndef NO_ZERO_COLUMN_TEST_IFAST_SSE2
|
||||
mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
jnz near .columnDCT
|
||||
|
||||
movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
|
||||
@@ -18,8 +18,6 @@
|
||||
; inverse DCT (Discrete Cosine Transform). The following code is based
|
||||
; directly on the IJG's original jidctint.c; see the jidctint.c for
|
||||
; more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
@@ -320,8 +318,8 @@ EXTN(jsimd_idct_islow_avx2):
|
||||
mov esi, JCOEFPTR [coef_block(eax)] ; inptr
|
||||
|
||||
%ifndef NO_ZERO_COLUMN_TEST_ISLOW_AVX2
|
||||
mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
jnz near .columnDCT
|
||||
|
||||
movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
|
||||
@@ -18,8 +18,6 @@
|
||||
; inverse DCT (Discrete Cosine Transform). The following code is based
|
||||
; directly on the IJG's original jidctint.c; see the jidctint.c for
|
||||
; more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
@@ -136,8 +134,8 @@ EXTN(jsimd_idct_islow_mmx):
|
||||
alignx 16, 7
|
||||
.columnloop:
|
||||
%ifndef NO_ZERO_COLUMN_TEST_ISLOW_MMX
|
||||
mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
jnz short .columnDCT
|
||||
|
||||
movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
|
||||
@@ -18,8 +18,6 @@
|
||||
; inverse DCT (Discrete Cosine Transform). The following code is based
|
||||
; directly on the IJG's original jidctint.c; see the jidctint.c for
|
||||
; more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
@@ -131,8 +129,8 @@ EXTN(jsimd_idct_islow_sse2):
|
||||
mov esi, JCOEFPTR [coef_block(eax)] ; inptr
|
||||
|
||||
%ifndef NO_ZERO_COLUMN_TEST_ISLOW_SSE2
|
||||
mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
jnz near .columnDCT
|
||||
|
||||
movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
|
||||
@@ -18,8 +18,6 @@
|
||||
; output: either 4x4 or 2x2 pixels from an 8x8 DCT block.
|
||||
; The following code is based directly on the IJG's original jidctred.c;
|
||||
; see the jidctred.c for more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
@@ -144,8 +142,8 @@ EXTN(jsimd_idct_4x4_mmx):
|
||||
alignx 16, 7
|
||||
.columnloop:
|
||||
%ifndef NO_ZERO_COLUMN_TEST_4X4_MMX
|
||||
mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
jnz short .columnDCT
|
||||
|
||||
movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
@@ -464,16 +462,16 @@ EXTN(jsimd_idct_4x4_mmx):
|
||||
|
||||
mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
|
||||
mov esi, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
|
||||
movd DWORD [edx+eax*SIZEOF_JSAMPLE], mm1
|
||||
movd DWORD [esi+eax*SIZEOF_JSAMPLE], mm0
|
||||
movd dword [edx+eax*SIZEOF_JSAMPLE], mm1
|
||||
movd dword [esi+eax*SIZEOF_JSAMPLE], mm0
|
||||
|
||||
psrlq mm1, 4*BYTE_BIT
|
||||
psrlq mm0, 4*BYTE_BIT
|
||||
|
||||
mov edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
|
||||
mov esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW]
|
||||
movd DWORD [edx+eax*SIZEOF_JSAMPLE], mm1
|
||||
movd DWORD [esi+eax*SIZEOF_JSAMPLE], mm0
|
||||
movd dword [edx+eax*SIZEOF_JSAMPLE], mm1
|
||||
movd dword [esi+eax*SIZEOF_JSAMPLE], mm0
|
||||
|
||||
emms ; empty MMX state
|
||||
|
||||
@@ -688,8 +686,8 @@ EXTN(jsimd_idct_2x2_mmx):
|
||||
|
||||
mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
|
||||
mov esi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
|
||||
mov WORD [edx+eax*SIZEOF_JSAMPLE], bx
|
||||
mov WORD [esi+eax*SIZEOF_JSAMPLE], cx
|
||||
mov word [edx+eax*SIZEOF_JSAMPLE], bx
|
||||
mov word [esi+eax*SIZEOF_JSAMPLE], cx
|
||||
|
||||
emms ; empty MMX state
|
||||
|
||||
|
||||
@@ -18,8 +18,6 @@
|
||||
; output: either 4x4 or 2x2 pixels from an 8x8 DCT block.
|
||||
; The following code is based directly on the IJG's original jidctred.c;
|
||||
; see the jidctred.c for more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
@@ -139,8 +137,8 @@ EXTN(jsimd_idct_4x4_sse2):
|
||||
mov esi, JCOEFPTR [coef_block(eax)] ; inptr
|
||||
|
||||
%ifndef NO_ZERO_COLUMN_TEST_4X4_SSE2
|
||||
mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
jnz short .columnDCT
|
||||
|
||||
movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
@@ -578,8 +576,8 @@ EXTN(jsimd_idct_2x2_sse2):
|
||||
|
||||
mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
|
||||
mov esi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
|
||||
mov WORD [edx+eax*SIZEOF_JSAMPLE], bx
|
||||
mov WORD [esi+eax*SIZEOF_JSAMPLE], cx
|
||||
mov word [edx+eax*SIZEOF_JSAMPLE], bx
|
||||
mov word [esi+eax*SIZEOF_JSAMPLE], cx
|
||||
|
||||
pop edi
|
||||
pop esi
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
|
||||
@@ -14,8 +14,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -7,8 +7,6 @@
|
||||
; Based on the x86 SIMD extension for IJG JPEG library
|
||||
; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||
; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
; --------------------------------------------------------------------------
|
||||
|
||||
|
||||
@@ -7,8 +7,6 @@
|
||||
; Based on the x86 SIMD extension for IJG JPEG library
|
||||
; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||
; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
; Each IDCT routine is responsible for range-limiting its results and
|
||||
; converting them to unsigned form (0..MAXJSAMPLE). The raw outputs could
|
||||
|
||||
@@ -25,8 +25,6 @@
|
||||
; 2. Altered source versions must be plainly marked as such, and must not be
|
||||
; misrepresented as being the original software.
|
||||
; 3. This notice may not be removed or altered from any source distribution.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
; ==========================================================================
|
||||
; System-dependent configurations
|
||||
@@ -204,19 +202,19 @@ section .note.GNU-stack noalloc noexec nowrite progbits
|
||||
%define XMM_DWORD
|
||||
%define XMM_MMWORD
|
||||
|
||||
%define SIZEOF_BYTE 1 ; sizeof(BYTE)
|
||||
%define SIZEOF_WORD 2 ; sizeof(WORD)
|
||||
%define SIZEOF_DWORD 4 ; sizeof(DWORD)
|
||||
%define SIZEOF_QWORD 8 ; sizeof(QWORD)
|
||||
%define SIZEOF_OWORD 16 ; sizeof(OWORD)
|
||||
%define SIZEOF_YWORD 32 ; sizeof(YWORD)
|
||||
%define SIZEOF_BYTE 1 ; sizeof(byte)
|
||||
%define SIZEOF_WORD 2 ; sizeof(word)
|
||||
%define SIZEOF_DWORD 4 ; sizeof(dword)
|
||||
%define SIZEOF_QWORD 8 ; sizeof(qword)
|
||||
%define SIZEOF_OWORD 16 ; sizeof(oword)
|
||||
%define SIZEOF_YWORD 32 ; sizeof(yword)
|
||||
|
||||
%define BYTE_BIT 8 ; CHAR_BIT in C
|
||||
%define WORD_BIT 16 ; sizeof(WORD)*BYTE_BIT
|
||||
%define DWORD_BIT 32 ; sizeof(DWORD)*BYTE_BIT
|
||||
%define QWORD_BIT 64 ; sizeof(QWORD)*BYTE_BIT
|
||||
%define OWORD_BIT 128 ; sizeof(OWORD)*BYTE_BIT
|
||||
%define YWORD_BIT 256 ; sizeof(YWORD)*BYTE_BIT
|
||||
%define WORD_BIT 16 ; sizeof(word)*BYTE_BIT
|
||||
%define DWORD_BIT 32 ; sizeof(dword)*BYTE_BIT
|
||||
%define QWORD_BIT 64 ; sizeof(qword)*BYTE_BIT
|
||||
%define OWORD_BIT 128 ; sizeof(oword)*BYTE_BIT
|
||||
%define YWORD_BIT 256 ; sizeof(yword)*BYTE_BIT
|
||||
|
||||
; --------------------------------------------------------------------------
|
||||
; External Symbol Name
|
||||
|
||||
@@ -14,8 +14,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jcolsamp.inc"
|
||||
|
||||
@@ -97,12 +95,12 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
|
||||
test cl, SIZEOF_BYTE
|
||||
jz short .column_ld2
|
||||
sub rcx, byte SIZEOF_BYTE
|
||||
movzx rax, BYTE [rsi+rcx]
|
||||
movzx rax, byte [rsi+rcx]
|
||||
.column_ld2:
|
||||
test cl, SIZEOF_WORD
|
||||
jz short .column_ld4
|
||||
sub rcx, byte SIZEOF_WORD
|
||||
movzx rdx, WORD [rsi+rcx]
|
||||
movzx rdx, word [rsi+rcx]
|
||||
shl rax, WORD_BIT
|
||||
or rax, rdx
|
||||
.column_ld4:
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jcolsamp.inc"
|
||||
|
||||
@@ -96,12 +94,12 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
|
||||
test cl, SIZEOF_BYTE
|
||||
jz short .column_ld2
|
||||
sub rcx, byte SIZEOF_BYTE
|
||||
movzx rax, BYTE [rsi+rcx]
|
||||
movzx rax, byte [rsi+rcx]
|
||||
.column_ld2:
|
||||
test cl, SIZEOF_WORD
|
||||
jz short .column_ld4
|
||||
sub rcx, byte SIZEOF_WORD
|
||||
movzx rdx, WORD [rsi+rcx]
|
||||
movzx rdx, word [rsi+rcx]
|
||||
shl rax, WORD_BIT
|
||||
or rax, rdx
|
||||
.column_ld4:
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -12,8 +12,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -12,8 +12,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -14,8 +14,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jcolsamp.inc"
|
||||
|
||||
@@ -89,12 +87,12 @@ EXTN(jsimd_rgb_gray_convert_avx2):
|
||||
test cl, SIZEOF_BYTE
|
||||
jz short .column_ld2
|
||||
sub rcx, byte SIZEOF_BYTE
|
||||
movzx rax, BYTE [rsi+rcx]
|
||||
movzx rax, byte [rsi+rcx]
|
||||
.column_ld2:
|
||||
test cl, SIZEOF_WORD
|
||||
jz short .column_ld4
|
||||
sub rcx, byte SIZEOF_WORD
|
||||
movzx rdx, WORD [rsi+rcx]
|
||||
movzx rdx, word [rsi+rcx]
|
||||
shl rax, WORD_BIT
|
||||
or rax, rdx
|
||||
.column_ld4:
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jcolsamp.inc"
|
||||
|
||||
@@ -88,12 +86,12 @@ EXTN(jsimd_rgb_gray_convert_sse2):
|
||||
test cl, SIZEOF_BYTE
|
||||
jz short .column_ld2
|
||||
sub rcx, byte SIZEOF_BYTE
|
||||
movzx rax, BYTE [rsi+rcx]
|
||||
movzx rax, byte [rsi+rcx]
|
||||
.column_ld2:
|
||||
test cl, SIZEOF_WORD
|
||||
jz short .column_ld4
|
||||
sub rcx, byte SIZEOF_WORD
|
||||
movzx rdx, WORD [rsi+rcx]
|
||||
movzx rdx, word [rsi+rcx]
|
||||
shl rax, WORD_BIT
|
||||
or rax, rdx
|
||||
.column_ld4:
|
||||
|
||||
@@ -18,8 +18,6 @@
|
||||
; This file contains an SSE2 implementation for Huffman coding of one block.
|
||||
; The following code is based directly on jchuff.c; see jchuff.c for more
|
||||
; details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
@@ -201,7 +199,7 @@ EXTN(jsimd_huff_encode_one_block_sse2):
|
||||
mov buffer, r11 ; r11 is now sratch
|
||||
|
||||
mov put_buffer, MMWORD [r10+SIZEOF_POINTER*2] ; put_buffer = state->cur.put_buffer;
|
||||
mov put_bits, DWORD [r10+SIZEOF_POINTER*2+8] ; put_bits = state->cur.put_bits;
|
||||
mov put_bits, dword [r10+SIZEOF_POINTER*2+8] ; put_bits = state->cur.put_bits;
|
||||
push r10 ; r10 is now scratch
|
||||
|
||||
; Encode the DC coefficient difference per section F.1.2.1
|
||||
@@ -334,7 +332,7 @@ EXTN(jsimd_huff_encode_one_block_sse2):
|
||||
pop r10
|
||||
; Save put_buffer & put_bits
|
||||
mov MMWORD [r10+SIZEOF_POINTER*2], put_buffer ; state->cur.put_buffer = put_buffer;
|
||||
mov DWORD [r10+SIZEOF_POINTER*2+8], put_bits ; state->cur.put_bits = put_bits;
|
||||
mov dword [r10+SIZEOF_POINTER*2+8], put_bits ; state->cur.put_bits = put_bits;
|
||||
|
||||
pop rbx
|
||||
uncollect_args 6
|
||||
|
||||
@@ -16,8 +16,6 @@
|
||||
;
|
||||
; This file contains an SSE2 implementation of data preparation for progressive
|
||||
; Huffman encoding. See jcphuff.c for more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -15,8 +15,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -14,8 +14,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -15,8 +15,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jcolsamp.inc"
|
||||
|
||||
@@ -335,7 +333,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
|
||||
vmovd eax, xmmA
|
||||
cmp rcx, byte SIZEOF_WORD
|
||||
jb short .column_st1
|
||||
mov WORD [rdi], ax
|
||||
mov word [rdi], ax
|
||||
add rdi, byte SIZEOF_WORD
|
||||
sub rcx, byte SIZEOF_WORD
|
||||
shr rax, 16
|
||||
@@ -344,7 +342,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
|
||||
; space.
|
||||
test rcx, rcx
|
||||
jz short .nextrow
|
||||
mov BYTE [rdi], al
|
||||
mov byte [rdi], al
|
||||
|
||||
%else ; RGB_PIXELSIZE == 4 ; -----------
|
||||
|
||||
|
||||
@@ -14,8 +14,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jcolsamp.inc"
|
||||
|
||||
@@ -307,7 +305,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
|
||||
movd eax, xmmA
|
||||
cmp rcx, byte SIZEOF_WORD
|
||||
jb short .column_st1
|
||||
mov WORD [rdi], ax
|
||||
mov word [rdi], ax
|
||||
add rdi, byte SIZEOF_WORD
|
||||
sub rcx, byte SIZEOF_WORD
|
||||
shr rax, 16
|
||||
@@ -316,7 +314,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
|
||||
; space.
|
||||
test rcx, rcx
|
||||
jz short .nextrow
|
||||
mov BYTE [rdi], al
|
||||
mov byte [rdi], al
|
||||
|
||||
%else ; RGB_PIXELSIZE == 4 ; -----------
|
||||
|
||||
|
||||
@@ -14,8 +14,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -14,8 +14,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -15,8 +15,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jcolsamp.inc"
|
||||
|
||||
@@ -340,7 +338,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
|
||||
vmovd eax, xmmA
|
||||
cmp rcx, byte SIZEOF_WORD
|
||||
jb short .column_st1
|
||||
mov WORD [rdi], ax
|
||||
mov word [rdi], ax
|
||||
add rdi, byte SIZEOF_WORD
|
||||
sub rcx, byte SIZEOF_WORD
|
||||
shr rax, 16
|
||||
@@ -349,7 +347,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
|
||||
; space.
|
||||
test rcx, rcx
|
||||
jz short .endcolumn
|
||||
mov BYTE [rdi], al
|
||||
mov byte [rdi], al
|
||||
|
||||
%else ; RGB_PIXELSIZE == 4 ; -----------
|
||||
|
||||
|
||||
@@ -14,8 +14,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jcolsamp.inc"
|
||||
|
||||
@@ -311,7 +309,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
|
||||
movd eax, xmmA
|
||||
cmp rcx, byte SIZEOF_WORD
|
||||
jb short .column_st1
|
||||
mov WORD [rdi], ax
|
||||
mov word [rdi], ax
|
||||
add rdi, byte SIZEOF_WORD
|
||||
sub rcx, byte SIZEOF_WORD
|
||||
shr rax, 16
|
||||
@@ -320,7 +318,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
|
||||
; space.
|
||||
test rcx, rcx
|
||||
jz short .endcolumn
|
||||
mov BYTE [rdi], al
|
||||
mov byte [rdi], al
|
||||
|
||||
%else ; RGB_PIXELSIZE == 4 ; -----------
|
||||
|
||||
|
||||
@@ -15,8 +15,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -14,8 +14,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -17,8 +17,6 @@
|
||||
; This file contains a floating-point implementation of the forward DCT
|
||||
; (Discrete Cosine Transform). The following code is based directly on
|
||||
; the IJG's original jfdctflt.c; see the jfdctflt.c for more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
|
||||
@@ -18,8 +18,6 @@
|
||||
; the forward DCT (Discrete Cosine Transform). The following code is
|
||||
; based directly on the IJG's original jfdctfst.c; see the jfdctfst.c
|
||||
; for more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
|
||||
@@ -18,8 +18,6 @@
|
||||
; forward DCT (Discrete Cosine Transform). The following code is based
|
||||
; directly on the IJG's original jfdctint.c; see the jfdctint.c for
|
||||
; more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
|
||||
@@ -18,8 +18,6 @@
|
||||
; forward DCT (Discrete Cosine Transform). The following code is based
|
||||
; directly on the IJG's original jfdctint.c; see the jfdctint.c for
|
||||
; more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
|
||||
@@ -18,8 +18,6 @@
|
||||
; This file contains a floating-point implementation of the inverse DCT
|
||||
; (Discrete Cosine Transform). The following code is based directly on
|
||||
; the IJG's original jidctflt.c; see the jidctflt.c for more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
@@ -96,8 +94,8 @@ EXTN(jsimd_idct_float_sse2):
|
||||
mov rcx, DCTSIZE/4 ; ctr
|
||||
.columnloop:
|
||||
%ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE
|
||||
mov eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)]
|
||||
or eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)]
|
||||
mov eax, dword [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)]
|
||||
or eax, dword [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)]
|
||||
jnz near .columnDCT
|
||||
|
||||
movq xmm1, XMM_MMWORD [MMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
|
||||
|
||||
@@ -19,8 +19,6 @@
|
||||
; the inverse DCT (Discrete Cosine Transform). The following code is
|
||||
; based directly on the IJG's original jidctfst.c; see the jidctfst.c
|
||||
; for more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
@@ -112,8 +110,8 @@ EXTN(jsimd_idct_ifast_sse2):
|
||||
mov rsi, r11 ; inptr
|
||||
|
||||
%ifndef NO_ZERO_COLUMN_TEST_IFAST_SSE2
|
||||
mov eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)]
|
||||
or eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)]
|
||||
mov eax, dword [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)]
|
||||
or eax, dword [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)]
|
||||
jnz near .columnDCT
|
||||
|
||||
movdqa xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
|
||||
|
||||
@@ -19,8 +19,6 @@
|
||||
; inverse DCT (Discrete Cosine Transform). The following code is based
|
||||
; directly on the IJG's original jidctint.c; see the jidctint.c for
|
||||
; more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
@@ -293,8 +291,8 @@ EXTN(jsimd_idct_islow_avx2):
|
||||
; ---- Pass 1: process columns.
|
||||
|
||||
%ifndef NO_ZERO_COLUMN_TEST_ISLOW_AVX2
|
||||
mov eax, DWORD [DWBLOCK(1,0,r11,SIZEOF_JCOEF)]
|
||||
or eax, DWORD [DWBLOCK(2,0,r11,SIZEOF_JCOEF)]
|
||||
mov eax, dword [DWBLOCK(1,0,r11,SIZEOF_JCOEF)]
|
||||
or eax, dword [DWBLOCK(2,0,r11,SIZEOF_JCOEF)]
|
||||
jnz near .columnDCT
|
||||
|
||||
movdqa xmm0, XMMWORD [XMMBLOCK(1,0,r11,SIZEOF_JCOEF)]
|
||||
|
||||
@@ -19,8 +19,6 @@
|
||||
; inverse DCT (Discrete Cosine Transform). The following code is based
|
||||
; directly on the IJG's original jidctint.c; see the jidctint.c for
|
||||
; more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
@@ -125,8 +123,8 @@ EXTN(jsimd_idct_islow_sse2):
|
||||
mov rsi, r11 ; inptr
|
||||
|
||||
%ifndef NO_ZERO_COLUMN_TEST_ISLOW_SSE2
|
||||
mov eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)]
|
||||
or eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)]
|
||||
mov eax, dword [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)]
|
||||
or eax, dword [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)]
|
||||
jnz near .columnDCT
|
||||
|
||||
movdqa xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
|
||||
|
||||
@@ -19,8 +19,6 @@
|
||||
; output: either 4x4 or 2x2 pixels from an 8x8 DCT block.
|
||||
; The following code is based directly on the IJG's original jidctred.c;
|
||||
; see the jidctred.c for more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
@@ -133,8 +131,8 @@ EXTN(jsimd_idct_4x4_sse2):
|
||||
mov rsi, r11 ; inptr
|
||||
|
||||
%ifndef NO_ZERO_COLUMN_TEST_4X4_SSE2
|
||||
mov eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)]
|
||||
or eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)]
|
||||
mov eax, dword [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)]
|
||||
or eax, dword [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)]
|
||||
jnz short .columnDCT
|
||||
|
||||
movdqa xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
|
||||
@@ -563,8 +561,8 @@ EXTN(jsimd_idct_2x2_sse2):
|
||||
|
||||
mov rdxp, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
|
||||
mov rsip, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
|
||||
mov WORD [rdx+rax*SIZEOF_JSAMPLE], bx
|
||||
mov WORD [rsi+rax*SIZEOF_JSAMPLE], cx
|
||||
mov word [rdx+rax*SIZEOF_JSAMPLE], bx
|
||||
mov word [rsi+rax*SIZEOF_JSAMPLE], cx
|
||||
|
||||
pop rbx
|
||||
uncollect_args 4
|
||||
|
||||
@@ -14,8 +14,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
|
||||
@@ -15,8 +15,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
|
||||
@@ -14,8 +14,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
|
||||
@@ -14,8 +14,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user