Merge branch 'master' into dev
This commit is contained in:
11
ChangeLog.md
11
ChangeLog.md
@@ -46,6 +46,17 @@ longer supports 32-bit Java virtual machines. Oracle no longer provides a
|
|||||||
systems) is long obsolete.
|
systems) is long obsolete.
|
||||||
|
|
||||||
|
|
||||||
|
2.0.4
|
||||||
|
=====
|
||||||
|
|
||||||
|
### Significant changes relative to 2.0.3:
|
||||||
|
|
||||||
|
1. Fixed a regression in the Windows packaging system (introduced by
|
||||||
|
2.0 beta1[2]) whereby, if both the 64-bit libjpeg-turbo SDK for GCC and the
|
||||||
|
64-bit libjpeg-turbo SDK for Visual C++ were installed on the same system, only
|
||||||
|
one of them could be uninstalled.
|
||||||
|
|
||||||
|
|
||||||
2.0.3
|
2.0.3
|
||||||
=====
|
=====
|
||||||
|
|
||||||
|
|||||||
16
appveyor.yml
16
appveyor.yml
@@ -8,11 +8,23 @@ install:
|
|||||||
|
|
||||||
7z x c:\installers\nasm-2.10.01-win32.zip -oc:\ > c:\installers\nasm.install.log
|
7z x c:\installers\nasm-2.10.01-win32.zip -oc:\ > c:\installers\nasm.install.log
|
||||||
|
|
||||||
|
if not exist c:\installers\i686-6.4.0-release-posix-dwarf-rt_v5-rev0.7z curl -fSL -o c:\installers\i686-6.4.0-release-posix-dwarf-rt_v5-rev0.7z "https://sourceforge.net/projects/mingw-w64/files/Toolchains targetting Win32/Personal Builds/mingw-builds/6.4.0/threads-posix/dwarf/i686-6.4.0-release-posix-dwarf-rt_v5-rev0.7z"
|
||||||
|
|
||||||
|
md "c:\Program Files (x86)\mingw-w64\i686-6.4.0-posix-dwarf-rt_v5-rev0"
|
||||||
|
|
||||||
|
7z x c:\installers\i686-6.4.0-release-posix-dwarf-rt_v5-rev0.7z -o"c:\Program Files (x86)\mingw-w64\i686-6.4.0-posix-dwarf-rt_v5-rev0" > c:\installers\mingw32.install.log
|
||||||
|
|
||||||
|
if not exist c:\installers\x86_64-6.4.0-release-posix-seh-rt_v5-rev0.7z curl -fSL -o c:\installers\x86_64-6.4.0-release-posix-seh-rt_v5-rev0.7z "https://sourceforge.net/projects/mingw-w64/files/Toolchains targetting Win64/Personal Builds/mingw-builds/6.4.0/threads-posix/seh/x86_64-6.4.0-release-posix-seh-rt_v5-rev0.7z"
|
||||||
|
|
||||||
|
md "c:\Program Files\mingw-w64\x86_64-6.4.0-posix-seh-rt_v5-rev0"
|
||||||
|
|
||||||
|
7z x c:\installers\x86_64-6.4.0-release-posix-seh-rt_v5-rev0.7z -o"c:\Program Files\mingw-w64\x86_64-6.4.0-posix-seh-rt_v5-rev0" > c:\installers\mingw64.install.log
|
||||||
|
|
||||||
set INCLUDE=c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include;c:\Program Files (x86)\Microsoft SDKs\Windows\v7.1A\include
|
set INCLUDE=c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include;c:\Program Files (x86)\Microsoft SDKs\Windows\v7.1A\include
|
||||||
|
|
||||||
set LIB=c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\lib\amd64;c:\Program Files (x86)\Microsoft SDKs\Windows\v7.1A\lib\x64
|
set LIB=c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\lib\amd64;c:\Program Files (x86)\Microsoft SDKs\Windows\v7.1A\lib\x64
|
||||||
|
|
||||||
set PATH=c:\nasm-2.10.01;c:\Program Files (x86)\NSIS;c:\msys64\mingw32\bin;c:\msys64\usr\bin;c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\bin\amd64;c:\Program Files (x86)\Microsoft Visual Studio 10.0\Common7\IDE;c:\Program Files (x86)\Microsoft SDKs\Windows\v7.1A\bin\x64;c:\Program Files (x86)\Microsoft SDKs\Windows\v7.1A\bin;%PATH%
|
set PATH=c:\nasm-2.10.01;c:\Program Files (x86)\NSIS;c:\msys64\usr\bin;c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\bin\amd64;c:\Program Files (x86)\Microsoft Visual Studio 10.0\Common7\IDE;c:\Program Files (x86)\Microsoft SDKs\Windows\v7.1A\bin\x64;c:\Program Files (x86)\Microsoft SDKs\Windows\v7.1A\bin;%PATH%
|
||||||
|
|
||||||
set MSYSTEM=MINGW32
|
set MSYSTEM=MINGW32
|
||||||
|
|
||||||
@@ -24,6 +36,8 @@ install:
|
|||||||
|
|
||||||
cache:
|
cache:
|
||||||
- c:\installers\nasm-2.10.01-win32.zip -> appveyor.yml
|
- c:\installers\nasm-2.10.01-win32.zip -> appveyor.yml
|
||||||
|
- c:\installers\i686-6.4.0-release-posix-dwarf-rt_v5-rev0.7z -> appveyor.yml
|
||||||
|
- c:\installers\x86_64-6.4.0-release-posix-seh-rt_v5-rev0.7z -> appveyor.yml
|
||||||
|
|
||||||
build_script:
|
build_script:
|
||||||
- cmd: >-
|
- cmd: >-
|
||||||
|
|||||||
@@ -83,7 +83,7 @@ endif()
|
|||||||
if(BITS EQUAL 64)
|
if(BITS EQUAL 64)
|
||||||
set(INST_PLATFORM "${INST_PLATFORM} 64-bit")
|
set(INST_PLATFORM "${INST_PLATFORM} 64-bit")
|
||||||
set(INST_NAME ${INST_NAME}64)
|
set(INST_NAME ${INST_NAME}64)
|
||||||
set(INST_REG_NAME ${INST_DIR}64)
|
set(INST_REG_NAME ${INST_REG_NAME}64)
|
||||||
set(INST_DEFS ${INST_DEFS} -DWIN64)
|
set(INST_DEFS ${INST_DEFS} -DWIN64)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
|||||||
4
jchuff.c
4
jchuff.c
@@ -44,8 +44,8 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
/* NOTE: Both GCC and Clang define __GNUC__ */
|
/* NOTE: Both GCC and Clang define __GNUC__ */
|
||||||
#if defined __GNUC__ && (defined __arm__ || defined __aarch64__)
|
#if defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))
|
||||||
#if !defined __thumb__ || defined __thumb2__
|
#if !defined(__thumb__) || defined(__thumb2__)
|
||||||
#define USE_CLZ_INTRINSIC
|
#define USE_CLZ_INTRINSIC
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -52,8 +52,8 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
/* NOTE: Both GCC and Clang define __GNUC__ */
|
/* NOTE: Both GCC and Clang define __GNUC__ */
|
||||||
#if defined __GNUC__ && (defined __arm__ || defined __aarch64__)
|
#if defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))
|
||||||
#if !defined __thumb__ || defined __thumb2__
|
#if !defined(__thumb__) || defined(__thumb2__)
|
||||||
#define USE_CLZ_INTRINSIC
|
#define USE_CLZ_INTRINSIC
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -13,8 +13,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jcolsamp.inc"
|
%include "jcolsamp.inc"
|
||||||
|
|
||||||
@@ -110,12 +108,12 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
|
|||||||
test cl, SIZEOF_BYTE
|
test cl, SIZEOF_BYTE
|
||||||
jz short .column_ld2
|
jz short .column_ld2
|
||||||
sub ecx, byte SIZEOF_BYTE
|
sub ecx, byte SIZEOF_BYTE
|
||||||
movzx eax, BYTE [esi+ecx]
|
movzx eax, byte [esi+ecx]
|
||||||
.column_ld2:
|
.column_ld2:
|
||||||
test cl, SIZEOF_WORD
|
test cl, SIZEOF_WORD
|
||||||
jz short .column_ld4
|
jz short .column_ld4
|
||||||
sub ecx, byte SIZEOF_WORD
|
sub ecx, byte SIZEOF_WORD
|
||||||
movzx edx, WORD [esi+ecx]
|
movzx edx, word [esi+ecx]
|
||||||
shl eax, WORD_BIT
|
shl eax, WORD_BIT
|
||||||
or eax, edx
|
or eax, edx
|
||||||
.column_ld4:
|
.column_ld4:
|
||||||
|
|||||||
@@ -13,8 +13,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jcolsamp.inc"
|
%include "jcolsamp.inc"
|
||||||
|
|
||||||
@@ -111,13 +109,13 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
|
|||||||
jz short .column_ld2
|
jz short .column_ld2
|
||||||
sub ecx, byte SIZEOF_BYTE
|
sub ecx, byte SIZEOF_BYTE
|
||||||
xor eax, eax
|
xor eax, eax
|
||||||
mov al, BYTE [esi+ecx]
|
mov al, byte [esi+ecx]
|
||||||
.column_ld2:
|
.column_ld2:
|
||||||
test cl, SIZEOF_WORD
|
test cl, SIZEOF_WORD
|
||||||
jz short .column_ld4
|
jz short .column_ld4
|
||||||
sub ecx, byte SIZEOF_WORD
|
sub ecx, byte SIZEOF_WORD
|
||||||
xor edx, edx
|
xor edx, edx
|
||||||
mov dx, WORD [esi+ecx]
|
mov dx, word [esi+ecx]
|
||||||
shl eax, WORD_BIT
|
shl eax, WORD_BIT
|
||||||
or eax, edx
|
or eax, edx
|
||||||
.column_ld4:
|
.column_ld4:
|
||||||
@@ -127,7 +125,7 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
|
|||||||
test cl, SIZEOF_DWORD
|
test cl, SIZEOF_DWORD
|
||||||
jz short .column_ld8
|
jz short .column_ld8
|
||||||
sub ecx, byte SIZEOF_DWORD
|
sub ecx, byte SIZEOF_DWORD
|
||||||
movd mmG, DWORD [esi+ecx]
|
movd mmG, dword [esi+ecx]
|
||||||
psllq mmA, DWORD_BIT
|
psllq mmA, DWORD_BIT
|
||||||
por mmA, mmG
|
por mmA, mmG
|
||||||
.column_ld8:
|
.column_ld8:
|
||||||
@@ -197,7 +195,7 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
|
|||||||
test cl, SIZEOF_MMWORD/8
|
test cl, SIZEOF_MMWORD/8
|
||||||
jz short .column_ld2
|
jz short .column_ld2
|
||||||
sub ecx, byte SIZEOF_MMWORD/8
|
sub ecx, byte SIZEOF_MMWORD/8
|
||||||
movd mmA, DWORD [esi+ecx*RGB_PIXELSIZE]
|
movd mmA, dword [esi+ecx*RGB_PIXELSIZE]
|
||||||
.column_ld2:
|
.column_ld2:
|
||||||
test cl, SIZEOF_MMWORD/4
|
test cl, SIZEOF_MMWORD/4
|
||||||
jz short .column_ld4
|
jz short .column_ld4
|
||||||
|
|||||||
@@ -12,8 +12,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jcolsamp.inc"
|
%include "jcolsamp.inc"
|
||||||
|
|
||||||
@@ -109,12 +107,12 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
|
|||||||
test cl, SIZEOF_BYTE
|
test cl, SIZEOF_BYTE
|
||||||
jz short .column_ld2
|
jz short .column_ld2
|
||||||
sub ecx, byte SIZEOF_BYTE
|
sub ecx, byte SIZEOF_BYTE
|
||||||
movzx eax, BYTE [esi+ecx]
|
movzx eax, byte [esi+ecx]
|
||||||
.column_ld2:
|
.column_ld2:
|
||||||
test cl, SIZEOF_WORD
|
test cl, SIZEOF_WORD
|
||||||
jz short .column_ld4
|
jz short .column_ld4
|
||||||
sub ecx, byte SIZEOF_WORD
|
sub ecx, byte SIZEOF_WORD
|
||||||
movzx edx, WORD [esi+ecx]
|
movzx edx, word [esi+ecx]
|
||||||
shl eax, WORD_BIT
|
shl eax, WORD_BIT
|
||||||
or eax, edx
|
or eax, edx
|
||||||
.column_ld4:
|
.column_ld4:
|
||||||
|
|||||||
@@ -13,8 +13,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
|
|
||||||
|
|||||||
@@ -13,8 +13,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
|
|
||||||
|
|||||||
@@ -12,8 +12,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
|
|
||||||
|
|||||||
@@ -13,8 +13,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
|
|
||||||
|
|||||||
@@ -13,8 +13,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
|
|
||||||
|
|||||||
@@ -12,8 +12,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
|
|
||||||
|
|||||||
@@ -13,8 +13,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jcolsamp.inc"
|
%include "jcolsamp.inc"
|
||||||
|
|
||||||
@@ -102,12 +100,12 @@ EXTN(jsimd_rgb_gray_convert_avx2):
|
|||||||
test cl, SIZEOF_BYTE
|
test cl, SIZEOF_BYTE
|
||||||
jz short .column_ld2
|
jz short .column_ld2
|
||||||
sub ecx, byte SIZEOF_BYTE
|
sub ecx, byte SIZEOF_BYTE
|
||||||
movzx eax, BYTE [esi+ecx]
|
movzx eax, byte [esi+ecx]
|
||||||
.column_ld2:
|
.column_ld2:
|
||||||
test cl, SIZEOF_WORD
|
test cl, SIZEOF_WORD
|
||||||
jz short .column_ld4
|
jz short .column_ld4
|
||||||
sub ecx, byte SIZEOF_WORD
|
sub ecx, byte SIZEOF_WORD
|
||||||
movzx edx, WORD [esi+ecx]
|
movzx edx, word [esi+ecx]
|
||||||
shl eax, WORD_BIT
|
shl eax, WORD_BIT
|
||||||
or eax, edx
|
or eax, edx
|
||||||
.column_ld4:
|
.column_ld4:
|
||||||
|
|||||||
@@ -13,8 +13,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jcolsamp.inc"
|
%include "jcolsamp.inc"
|
||||||
|
|
||||||
@@ -103,13 +101,13 @@ EXTN(jsimd_rgb_gray_convert_mmx):
|
|||||||
jz short .column_ld2
|
jz short .column_ld2
|
||||||
sub ecx, byte SIZEOF_BYTE
|
sub ecx, byte SIZEOF_BYTE
|
||||||
xor eax, eax
|
xor eax, eax
|
||||||
mov al, BYTE [esi+ecx]
|
mov al, byte [esi+ecx]
|
||||||
.column_ld2:
|
.column_ld2:
|
||||||
test cl, SIZEOF_WORD
|
test cl, SIZEOF_WORD
|
||||||
jz short .column_ld4
|
jz short .column_ld4
|
||||||
sub ecx, byte SIZEOF_WORD
|
sub ecx, byte SIZEOF_WORD
|
||||||
xor edx, edx
|
xor edx, edx
|
||||||
mov dx, WORD [esi+ecx]
|
mov dx, word [esi+ecx]
|
||||||
shl eax, WORD_BIT
|
shl eax, WORD_BIT
|
||||||
or eax, edx
|
or eax, edx
|
||||||
.column_ld4:
|
.column_ld4:
|
||||||
@@ -119,7 +117,7 @@ EXTN(jsimd_rgb_gray_convert_mmx):
|
|||||||
test cl, SIZEOF_DWORD
|
test cl, SIZEOF_DWORD
|
||||||
jz short .column_ld8
|
jz short .column_ld8
|
||||||
sub ecx, byte SIZEOF_DWORD
|
sub ecx, byte SIZEOF_DWORD
|
||||||
movd mmG, DWORD [esi+ecx]
|
movd mmG, dword [esi+ecx]
|
||||||
psllq mmA, DWORD_BIT
|
psllq mmA, DWORD_BIT
|
||||||
por mmA, mmG
|
por mmA, mmG
|
||||||
.column_ld8:
|
.column_ld8:
|
||||||
@@ -189,7 +187,7 @@ EXTN(jsimd_rgb_gray_convert_mmx):
|
|||||||
test cl, SIZEOF_MMWORD/8
|
test cl, SIZEOF_MMWORD/8
|
||||||
jz short .column_ld2
|
jz short .column_ld2
|
||||||
sub ecx, byte SIZEOF_MMWORD/8
|
sub ecx, byte SIZEOF_MMWORD/8
|
||||||
movd mmA, DWORD [esi+ecx*RGB_PIXELSIZE]
|
movd mmA, dword [esi+ecx*RGB_PIXELSIZE]
|
||||||
.column_ld2:
|
.column_ld2:
|
||||||
test cl, SIZEOF_MMWORD/4
|
test cl, SIZEOF_MMWORD/4
|
||||||
jz short .column_ld4
|
jz short .column_ld4
|
||||||
|
|||||||
@@ -12,8 +12,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jcolsamp.inc"
|
%include "jcolsamp.inc"
|
||||||
|
|
||||||
@@ -101,12 +99,12 @@ EXTN(jsimd_rgb_gray_convert_sse2):
|
|||||||
test cl, SIZEOF_BYTE
|
test cl, SIZEOF_BYTE
|
||||||
jz short .column_ld2
|
jz short .column_ld2
|
||||||
sub ecx, byte SIZEOF_BYTE
|
sub ecx, byte SIZEOF_BYTE
|
||||||
movzx eax, BYTE [esi+ecx]
|
movzx eax, byte [esi+ecx]
|
||||||
.column_ld2:
|
.column_ld2:
|
||||||
test cl, SIZEOF_WORD
|
test cl, SIZEOF_WORD
|
||||||
jz short .column_ld4
|
jz short .column_ld4
|
||||||
sub ecx, byte SIZEOF_WORD
|
sub ecx, byte SIZEOF_WORD
|
||||||
movzx edx, WORD [esi+ecx]
|
movzx edx, word [esi+ecx]
|
||||||
shl eax, WORD_BIT
|
shl eax, WORD_BIT
|
||||||
or eax, edx
|
or eax, edx
|
||||||
.column_ld4:
|
.column_ld4:
|
||||||
|
|||||||
@@ -17,8 +17,6 @@
|
|||||||
; This file contains an SSE2 implementation for Huffman coding of one block.
|
; This file contains an SSE2 implementation for Huffman coding of one block.
|
||||||
; The following code is based directly on jchuff.c; see jchuff.c for more
|
; The following code is based directly on jchuff.c; see jchuff.c for more
|
||||||
; details.
|
; details.
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
|
|
||||||
@@ -197,8 +195,8 @@ EXTN(jsimd_huff_encode_one_block_sse2):
|
|||||||
push ebp
|
push ebp
|
||||||
|
|
||||||
mov esi, POINTER [eax+8] ; (working_state *state)
|
mov esi, POINTER [eax+8] ; (working_state *state)
|
||||||
mov put_buffer, DWORD [esi+8] ; put_buffer = state->cur.put_buffer;
|
mov put_buffer, dword [esi+8] ; put_buffer = state->cur.put_buffer;
|
||||||
mov put_bits, DWORD [esi+12] ; put_bits = state->cur.put_bits;
|
mov put_bits, dword [esi+12] ; put_bits = state->cur.put_bits;
|
||||||
push esi ; esi is now scratch
|
push esi ; esi is now scratch
|
||||||
|
|
||||||
get_GOT edx ; get GOT address
|
get_GOT edx ; get GOT address
|
||||||
@@ -214,7 +212,7 @@ EXTN(jsimd_huff_encode_one_block_sse2):
|
|||||||
; Encode the DC coefficient difference per section F.1.2.1
|
; Encode the DC coefficient difference per section F.1.2.1
|
||||||
mov esi, POINTER [esp+block] ; block
|
mov esi, POINTER [esp+block] ; block
|
||||||
movsx ecx, word [esi] ; temp = temp2 = block[0] - last_dc_val;
|
movsx ecx, word [esi] ; temp = temp2 = block[0] - last_dc_val;
|
||||||
sub ecx, DWORD [eax+20]
|
sub ecx, dword [eax+20]
|
||||||
mov esi, ecx
|
mov esi, ecx
|
||||||
|
|
||||||
; This is a well-known technique for obtaining the absolute value
|
; This is a well-known technique for obtaining the absolute value
|
||||||
@@ -229,12 +227,12 @@ EXTN(jsimd_huff_encode_one_block_sse2):
|
|||||||
; For a negative input, want temp2 = bitwise complement of abs(input)
|
; For a negative input, want temp2 = bitwise complement of abs(input)
|
||||||
; This code assumes we are on a two's complement machine
|
; This code assumes we are on a two's complement machine
|
||||||
add esi, edx ; temp2 += temp3;
|
add esi, edx ; temp2 += temp3;
|
||||||
mov DWORD [esp+temp], esi ; backup temp2 in temp
|
mov dword [esp+temp], esi ; backup temp2 in temp
|
||||||
|
|
||||||
; Find the number of bits needed for the magnitude of the coefficient
|
; Find the number of bits needed for the magnitude of the coefficient
|
||||||
movpic ebp, POINTER [esp+gotptr] ; load GOT address (ebp)
|
movpic ebp, POINTER [esp+gotptr] ; load GOT address (ebp)
|
||||||
movzx edx, byte [GOTOFF(ebp, jpeg_nbits_table + ecx)] ; nbits = JPEG_NBITS(temp);
|
movzx edx, byte [GOTOFF(ebp, jpeg_nbits_table + ecx)] ; nbits = JPEG_NBITS(temp);
|
||||||
mov DWORD [esp+temp2], edx ; backup nbits in temp2
|
mov dword [esp+temp2], edx ; backup nbits in temp2
|
||||||
|
|
||||||
; Emit the Huffman-coded symbol for the number of bits
|
; Emit the Huffman-coded symbol for the number of bits
|
||||||
mov ebp, POINTER [eax+24] ; After this point, arguments are not accessible anymore
|
mov ebp, POINTER [eax+24] ; After this point, arguments are not accessible anymore
|
||||||
@@ -242,13 +240,13 @@ EXTN(jsimd_huff_encode_one_block_sse2):
|
|||||||
movzx ecx, byte [ebp + edx + 1024] ; size = dctbl->ehufsi[nbits];
|
movzx ecx, byte [ebp + edx + 1024] ; size = dctbl->ehufsi[nbits];
|
||||||
EMIT_BITS eax ; EMIT_BITS(code, size)
|
EMIT_BITS eax ; EMIT_BITS(code, size)
|
||||||
|
|
||||||
mov ecx, DWORD [esp+temp2] ; restore nbits
|
mov ecx, dword [esp+temp2] ; restore nbits
|
||||||
|
|
||||||
; Mask off any extra bits in code
|
; Mask off any extra bits in code
|
||||||
mov eax, 1
|
mov eax, 1
|
||||||
shl eax, cl
|
shl eax, cl
|
||||||
dec eax
|
dec eax
|
||||||
and eax, DWORD [esp+temp] ; temp2 &= (((JLONG)1)<<nbits) - 1;
|
and eax, dword [esp+temp] ; temp2 &= (((JLONG)1)<<nbits) - 1;
|
||||||
|
|
||||||
; Emit that number of bits of the value, if positive,
|
; Emit that number of bits of the value, if positive,
|
||||||
; or the complement of its magnitude, if negative.
|
; or the complement of its magnitude, if negative.
|
||||||
@@ -291,22 +289,22 @@ EXTN(jsimd_huff_encode_one_block_sse2):
|
|||||||
jz near .ELOOP
|
jz near .ELOOP
|
||||||
lea esi, [esi+ecx*2] ; k += r;
|
lea esi, [esi+ecx*2] ; k += r;
|
||||||
shr edx, cl ; index >>= r;
|
shr edx, cl ; index >>= r;
|
||||||
mov DWORD [esp+temp3], edx
|
mov dword [esp+temp3], edx
|
||||||
.BRLOOP:
|
.BRLOOP:
|
||||||
cmp ecx, 16 ; while (r > 15) {
|
cmp ecx, 16 ; while (r > 15) {
|
||||||
jl near .ERLOOP
|
jl near .ERLOOP
|
||||||
sub ecx, 16 ; r -= 16;
|
sub ecx, 16 ; r -= 16;
|
||||||
mov DWORD [esp+temp], ecx
|
mov dword [esp+temp], ecx
|
||||||
mov eax, INT [ebp + 240 * 4] ; code_0xf0 = actbl->ehufco[0xf0];
|
mov eax, INT [ebp + 240 * 4] ; code_0xf0 = actbl->ehufco[0xf0];
|
||||||
movzx ecx, byte [ebp + 1024 + 240] ; size_0xf0 = actbl->ehufsi[0xf0];
|
movzx ecx, byte [ebp + 1024 + 240] ; size_0xf0 = actbl->ehufsi[0xf0];
|
||||||
EMIT_BITS eax ; EMIT_BITS(code_0xf0, size_0xf0)
|
EMIT_BITS eax ; EMIT_BITS(code_0xf0, size_0xf0)
|
||||||
mov ecx, DWORD [esp+temp]
|
mov ecx, dword [esp+temp]
|
||||||
jmp .BRLOOP
|
jmp .BRLOOP
|
||||||
.ERLOOP:
|
.ERLOOP:
|
||||||
movsx eax, word [esi] ; temp = t1[k];
|
movsx eax, word [esi] ; temp = t1[k];
|
||||||
movpic edx, POINTER [esp+gotptr] ; load GOT address (edx)
|
movpic edx, POINTER [esp+gotptr] ; load GOT address (edx)
|
||||||
movzx eax, byte [GOTOFF(edx, jpeg_nbits_table + eax)] ; nbits = JPEG_NBITS(temp);
|
movzx eax, byte [GOTOFF(edx, jpeg_nbits_table + eax)] ; nbits = JPEG_NBITS(temp);
|
||||||
mov DWORD [esp+temp2], eax
|
mov dword [esp+temp2], eax
|
||||||
; Emit Huffman symbol for run length / number of bits
|
; Emit Huffman symbol for run length / number of bits
|
||||||
shl ecx, 4 ; temp3 = (r << 4) + nbits;
|
shl ecx, 4 ; temp3 = (r << 4) + nbits;
|
||||||
add ecx, eax
|
add ecx, eax
|
||||||
@@ -316,13 +314,13 @@ EXTN(jsimd_huff_encode_one_block_sse2):
|
|||||||
|
|
||||||
movsx edx, word [esi+DCTSIZE2*2] ; temp2 = t2[k];
|
movsx edx, word [esi+DCTSIZE2*2] ; temp2 = t2[k];
|
||||||
; Mask off any extra bits in code
|
; Mask off any extra bits in code
|
||||||
mov ecx, DWORD [esp+temp2]
|
mov ecx, dword [esp+temp2]
|
||||||
mov eax, 1
|
mov eax, 1
|
||||||
shl eax, cl
|
shl eax, cl
|
||||||
dec eax
|
dec eax
|
||||||
and eax, edx ; temp2 &= (((JLONG)1)<<nbits) - 1;
|
and eax, edx ; temp2 &= (((JLONG)1)<<nbits) - 1;
|
||||||
EMIT_BITS eax ; PUT_BITS(temp2, nbits)
|
EMIT_BITS eax ; PUT_BITS(temp2, nbits)
|
||||||
mov edx, DWORD [esp+temp3]
|
mov edx, dword [esp+temp3]
|
||||||
add esi, 2 ; ++k;
|
add esi, 2 ; ++k;
|
||||||
shr edx, 1 ; index >>= 1;
|
shr edx, 1 ; index >>= 1;
|
||||||
|
|
||||||
@@ -352,29 +350,29 @@ EXTN(jsimd_huff_encode_one_block_sse2):
|
|||||||
shr edx, cl ; index >>= r;
|
shr edx, cl ; index >>= r;
|
||||||
add ecx, eax
|
add ecx, eax
|
||||||
lea esi, [esi+ecx*2] ; k += r;
|
lea esi, [esi+ecx*2] ; k += r;
|
||||||
mov DWORD [esp+temp3], edx
|
mov dword [esp+temp3], edx
|
||||||
jmp .BRLOOP2
|
jmp .BRLOOP2
|
||||||
.BLOOP2:
|
.BLOOP2:
|
||||||
bsf ecx, edx ; r = __builtin_ctzl(index);
|
bsf ecx, edx ; r = __builtin_ctzl(index);
|
||||||
jz near .ELOOP2
|
jz near .ELOOP2
|
||||||
lea esi, [esi+ecx*2] ; k += r;
|
lea esi, [esi+ecx*2] ; k += r;
|
||||||
shr edx, cl ; index >>= r;
|
shr edx, cl ; index >>= r;
|
||||||
mov DWORD [esp+temp3], edx
|
mov dword [esp+temp3], edx
|
||||||
.BRLOOP2:
|
.BRLOOP2:
|
||||||
cmp ecx, 16 ; while (r > 15) {
|
cmp ecx, 16 ; while (r > 15) {
|
||||||
jl near .ERLOOP2
|
jl near .ERLOOP2
|
||||||
sub ecx, 16 ; r -= 16;
|
sub ecx, 16 ; r -= 16;
|
||||||
mov DWORD [esp+temp], ecx
|
mov dword [esp+temp], ecx
|
||||||
mov eax, INT [ebp + 240 * 4] ; code_0xf0 = actbl->ehufco[0xf0];
|
mov eax, INT [ebp + 240 * 4] ; code_0xf0 = actbl->ehufco[0xf0];
|
||||||
movzx ecx, byte [ebp + 1024 + 240] ; size_0xf0 = actbl->ehufsi[0xf0];
|
movzx ecx, byte [ebp + 1024 + 240] ; size_0xf0 = actbl->ehufsi[0xf0];
|
||||||
EMIT_BITS eax ; EMIT_BITS(code_0xf0, size_0xf0)
|
EMIT_BITS eax ; EMIT_BITS(code_0xf0, size_0xf0)
|
||||||
mov ecx, DWORD [esp+temp]
|
mov ecx, dword [esp+temp]
|
||||||
jmp .BRLOOP2
|
jmp .BRLOOP2
|
||||||
.ERLOOP2:
|
.ERLOOP2:
|
||||||
movsx eax, word [esi] ; temp = t1[k];
|
movsx eax, word [esi] ; temp = t1[k];
|
||||||
bsr eax, eax ; nbits = 32 - __builtin_clz(temp);
|
bsr eax, eax ; nbits = 32 - __builtin_clz(temp);
|
||||||
inc eax
|
inc eax
|
||||||
mov DWORD [esp+temp2], eax
|
mov dword [esp+temp2], eax
|
||||||
; Emit Huffman symbol for run length / number of bits
|
; Emit Huffman symbol for run length / number of bits
|
||||||
shl ecx, 4 ; temp3 = (r << 4) + nbits;
|
shl ecx, 4 ; temp3 = (r << 4) + nbits;
|
||||||
add ecx, eax
|
add ecx, eax
|
||||||
@@ -384,13 +382,13 @@ EXTN(jsimd_huff_encode_one_block_sse2):
|
|||||||
|
|
||||||
movsx edx, word [esi+DCTSIZE2*2] ; temp2 = t2[k];
|
movsx edx, word [esi+DCTSIZE2*2] ; temp2 = t2[k];
|
||||||
; Mask off any extra bits in code
|
; Mask off any extra bits in code
|
||||||
mov ecx, DWORD [esp+temp2]
|
mov ecx, dword [esp+temp2]
|
||||||
mov eax, 1
|
mov eax, 1
|
||||||
shl eax, cl
|
shl eax, cl
|
||||||
dec eax
|
dec eax
|
||||||
and eax, edx ; temp2 &= (((JLONG)1)<<nbits) - 1;
|
and eax, edx ; temp2 &= (((JLONG)1)<<nbits) - 1;
|
||||||
EMIT_BITS eax ; PUT_BITS(temp2, nbits)
|
EMIT_BITS eax ; PUT_BITS(temp2, nbits)
|
||||||
mov edx, DWORD [esp+temp3]
|
mov edx, dword [esp+temp3]
|
||||||
add esi, 2 ; ++k;
|
add esi, 2 ; ++k;
|
||||||
shr edx, 1 ; index >>= 1;
|
shr edx, 1 ; index >>= 1;
|
||||||
|
|
||||||
@@ -407,8 +405,8 @@ EXTN(jsimd_huff_encode_one_block_sse2):
|
|||||||
mov eax, [esp+buffer]
|
mov eax, [esp+buffer]
|
||||||
pop esi
|
pop esi
|
||||||
; Save put_buffer & put_bits
|
; Save put_buffer & put_bits
|
||||||
mov DWORD [esi+8], put_buffer ; state->cur.put_buffer = put_buffer;
|
mov dword [esi+8], put_buffer ; state->cur.put_buffer = put_buffer;
|
||||||
mov DWORD [esi+12], put_bits ; state->cur.put_bits = put_bits;
|
mov dword [esi+12], put_bits ; state->cur.put_bits = put_bits;
|
||||||
|
|
||||||
pop ebp
|
pop ebp
|
||||||
pop edi
|
pop edi
|
||||||
|
|||||||
@@ -15,8 +15,6 @@
|
|||||||
;
|
;
|
||||||
; This file contains an SSE2 implementation of data preparation for progressive
|
; This file contains an SSE2 implementation of data preparation for progressive
|
||||||
; Huffman encoding. See jcphuff.c for more details.
|
; Huffman encoding. See jcphuff.c for more details.
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
|
|
||||||
|
|||||||
@@ -14,8 +14,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
|
|
||||||
|
|||||||
@@ -13,8 +13,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
|
|
||||||
|
|||||||
@@ -13,8 +13,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
|
|
||||||
|
|||||||
@@ -14,8 +14,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jcolsamp.inc"
|
%include "jcolsamp.inc"
|
||||||
|
|
||||||
@@ -348,7 +346,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
|
|||||||
vmovd eax, xmmA
|
vmovd eax, xmmA
|
||||||
cmp ecx, byte SIZEOF_WORD
|
cmp ecx, byte SIZEOF_WORD
|
||||||
jb short .column_st1
|
jb short .column_st1
|
||||||
mov WORD [edi], ax
|
mov word [edi], ax
|
||||||
add edi, byte SIZEOF_WORD
|
add edi, byte SIZEOF_WORD
|
||||||
sub ecx, byte SIZEOF_WORD
|
sub ecx, byte SIZEOF_WORD
|
||||||
shr eax, 16
|
shr eax, 16
|
||||||
@@ -357,7 +355,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
|
|||||||
; space.
|
; space.
|
||||||
test ecx, ecx
|
test ecx, ecx
|
||||||
jz short .nextrow
|
jz short .nextrow
|
||||||
mov BYTE [edi], al
|
mov byte [edi], al
|
||||||
|
|
||||||
%else ; RGB_PIXELSIZE == 4 ; -----------
|
%else ; RGB_PIXELSIZE == 4 ; -----------
|
||||||
|
|
||||||
|
|||||||
@@ -13,8 +13,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jcolsamp.inc"
|
%include "jcolsamp.inc"
|
||||||
|
|
||||||
@@ -280,7 +278,7 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
|
|||||||
movd eax, mmA
|
movd eax, mmA
|
||||||
cmp ecx, byte SIZEOF_DWORD
|
cmp ecx, byte SIZEOF_DWORD
|
||||||
jb short .column_st2
|
jb short .column_st2
|
||||||
mov DWORD [edi+0*SIZEOF_DWORD], eax
|
mov dword [edi+0*SIZEOF_DWORD], eax
|
||||||
psrlq mmA, DWORD_BIT
|
psrlq mmA, DWORD_BIT
|
||||||
movd eax, mmA
|
movd eax, mmA
|
||||||
sub ecx, byte SIZEOF_DWORD
|
sub ecx, byte SIZEOF_DWORD
|
||||||
@@ -288,14 +286,14 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
|
|||||||
.column_st2:
|
.column_st2:
|
||||||
cmp ecx, byte SIZEOF_WORD
|
cmp ecx, byte SIZEOF_WORD
|
||||||
jb short .column_st1
|
jb short .column_st1
|
||||||
mov WORD [edi+0*SIZEOF_WORD], ax
|
mov word [edi+0*SIZEOF_WORD], ax
|
||||||
shr eax, WORD_BIT
|
shr eax, WORD_BIT
|
||||||
sub ecx, byte SIZEOF_WORD
|
sub ecx, byte SIZEOF_WORD
|
||||||
add edi, byte SIZEOF_WORD
|
add edi, byte SIZEOF_WORD
|
||||||
.column_st1:
|
.column_st1:
|
||||||
cmp ecx, byte SIZEOF_BYTE
|
cmp ecx, byte SIZEOF_BYTE
|
||||||
jb short .nextrow
|
jb short .nextrow
|
||||||
mov BYTE [edi+0*SIZEOF_BYTE], al
|
mov byte [edi+0*SIZEOF_BYTE], al
|
||||||
|
|
||||||
%else ; RGB_PIXELSIZE == 4 ; -----------
|
%else ; RGB_PIXELSIZE == 4 ; -----------
|
||||||
|
|
||||||
@@ -367,7 +365,7 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
|
|||||||
.column_st4:
|
.column_st4:
|
||||||
cmp ecx, byte SIZEOF_MMWORD/8
|
cmp ecx, byte SIZEOF_MMWORD/8
|
||||||
jb short .nextrow
|
jb short .nextrow
|
||||||
movd DWORD [edi+0*SIZEOF_DWORD], mmA
|
movd dword [edi+0*SIZEOF_DWORD], mmA
|
||||||
|
|
||||||
%endif ; RGB_PIXELSIZE ; ---------------
|
%endif ; RGB_PIXELSIZE ; ---------------
|
||||||
|
|
||||||
|
|||||||
@@ -13,8 +13,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jcolsamp.inc"
|
%include "jcolsamp.inc"
|
||||||
|
|
||||||
@@ -320,7 +318,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
|
|||||||
movd eax, xmmA
|
movd eax, xmmA
|
||||||
cmp ecx, byte SIZEOF_WORD
|
cmp ecx, byte SIZEOF_WORD
|
||||||
jb short .column_st1
|
jb short .column_st1
|
||||||
mov WORD [edi], ax
|
mov word [edi], ax
|
||||||
add edi, byte SIZEOF_WORD
|
add edi, byte SIZEOF_WORD
|
||||||
sub ecx, byte SIZEOF_WORD
|
sub ecx, byte SIZEOF_WORD
|
||||||
shr eax, 16
|
shr eax, 16
|
||||||
@@ -329,7 +327,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
|
|||||||
; space.
|
; space.
|
||||||
test ecx, ecx
|
test ecx, ecx
|
||||||
jz short .nextrow
|
jz short .nextrow
|
||||||
mov BYTE [edi], al
|
mov byte [edi], al
|
||||||
|
|
||||||
%else ; RGB_PIXELSIZE == 4 ; -----------
|
%else ; RGB_PIXELSIZE == 4 ; -----------
|
||||||
|
|
||||||
|
|||||||
@@ -14,8 +14,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
|
|
||||||
|
|||||||
@@ -13,8 +13,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
|
|
||||||
|
|||||||
@@ -13,8 +13,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
|
|
||||||
|
|||||||
@@ -14,8 +14,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
|
|
||||||
|
|||||||
@@ -13,8 +13,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
|
|
||||||
|
|||||||
@@ -13,8 +13,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
|
|
||||||
|
|||||||
@@ -14,8 +14,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jcolsamp.inc"
|
%include "jcolsamp.inc"
|
||||||
|
|
||||||
@@ -354,7 +352,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
|
|||||||
vmovd eax, xmmA
|
vmovd eax, xmmA
|
||||||
cmp ecx, byte SIZEOF_WORD
|
cmp ecx, byte SIZEOF_WORD
|
||||||
jb short .column_st1
|
jb short .column_st1
|
||||||
mov WORD [edi], ax
|
mov word [edi], ax
|
||||||
add edi, byte SIZEOF_WORD
|
add edi, byte SIZEOF_WORD
|
||||||
sub ecx, byte SIZEOF_WORD
|
sub ecx, byte SIZEOF_WORD
|
||||||
shr eax, 16
|
shr eax, 16
|
||||||
@@ -363,7 +361,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
|
|||||||
; space.
|
; space.
|
||||||
test ecx, ecx
|
test ecx, ecx
|
||||||
jz short .endcolumn
|
jz short .endcolumn
|
||||||
mov BYTE [edi], al
|
mov byte [edi], al
|
||||||
|
|
||||||
%else ; RGB_PIXELSIZE == 4 ; -----------
|
%else ; RGB_PIXELSIZE == 4 ; -----------
|
||||||
|
|
||||||
|
|||||||
@@ -13,8 +13,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jcolsamp.inc"
|
%include "jcolsamp.inc"
|
||||||
|
|
||||||
@@ -283,7 +281,7 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
|
|||||||
movd eax, mmA
|
movd eax, mmA
|
||||||
cmp ecx, byte SIZEOF_DWORD
|
cmp ecx, byte SIZEOF_DWORD
|
||||||
jb short .column_st2
|
jb short .column_st2
|
||||||
mov DWORD [edi+0*SIZEOF_DWORD], eax
|
mov dword [edi+0*SIZEOF_DWORD], eax
|
||||||
psrlq mmA, DWORD_BIT
|
psrlq mmA, DWORD_BIT
|
||||||
movd eax, mmA
|
movd eax, mmA
|
||||||
sub ecx, byte SIZEOF_DWORD
|
sub ecx, byte SIZEOF_DWORD
|
||||||
@@ -291,14 +289,14 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
|
|||||||
.column_st2:
|
.column_st2:
|
||||||
cmp ecx, byte SIZEOF_WORD
|
cmp ecx, byte SIZEOF_WORD
|
||||||
jb short .column_st1
|
jb short .column_st1
|
||||||
mov WORD [edi+0*SIZEOF_WORD], ax
|
mov word [edi+0*SIZEOF_WORD], ax
|
||||||
shr eax, WORD_BIT
|
shr eax, WORD_BIT
|
||||||
sub ecx, byte SIZEOF_WORD
|
sub ecx, byte SIZEOF_WORD
|
||||||
add edi, byte SIZEOF_WORD
|
add edi, byte SIZEOF_WORD
|
||||||
.column_st1:
|
.column_st1:
|
||||||
cmp ecx, byte SIZEOF_BYTE
|
cmp ecx, byte SIZEOF_BYTE
|
||||||
jb short .endcolumn
|
jb short .endcolumn
|
||||||
mov BYTE [edi+0*SIZEOF_BYTE], al
|
mov byte [edi+0*SIZEOF_BYTE], al
|
||||||
|
|
||||||
%else ; RGB_PIXELSIZE == 4 ; -----------
|
%else ; RGB_PIXELSIZE == 4 ; -----------
|
||||||
|
|
||||||
@@ -373,7 +371,7 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
|
|||||||
.column_st4:
|
.column_st4:
|
||||||
cmp ecx, byte SIZEOF_MMWORD/8
|
cmp ecx, byte SIZEOF_MMWORD/8
|
||||||
jb short .endcolumn
|
jb short .endcolumn
|
||||||
movd DWORD [edi+0*SIZEOF_DWORD], mmA
|
movd dword [edi+0*SIZEOF_DWORD], mmA
|
||||||
|
|
||||||
%endif ; RGB_PIXELSIZE ; ---------------
|
%endif ; RGB_PIXELSIZE ; ---------------
|
||||||
|
|
||||||
|
|||||||
@@ -13,8 +13,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jcolsamp.inc"
|
%include "jcolsamp.inc"
|
||||||
|
|
||||||
@@ -325,7 +323,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
|
|||||||
movd eax, xmmA
|
movd eax, xmmA
|
||||||
cmp ecx, byte SIZEOF_WORD
|
cmp ecx, byte SIZEOF_WORD
|
||||||
jb short .column_st1
|
jb short .column_st1
|
||||||
mov WORD [edi], ax
|
mov word [edi], ax
|
||||||
add edi, byte SIZEOF_WORD
|
add edi, byte SIZEOF_WORD
|
||||||
sub ecx, byte SIZEOF_WORD
|
sub ecx, byte SIZEOF_WORD
|
||||||
shr eax, 16
|
shr eax, 16
|
||||||
@@ -334,7 +332,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
|
|||||||
; space.
|
; space.
|
||||||
test ecx, ecx
|
test ecx, ecx
|
||||||
jz short .endcolumn
|
jz short .endcolumn
|
||||||
mov BYTE [edi], al
|
mov byte [edi], al
|
||||||
|
|
||||||
%else ; RGB_PIXELSIZE == 4 ; -----------
|
%else ; RGB_PIXELSIZE == 4 ; -----------
|
||||||
|
|
||||||
|
|||||||
@@ -14,8 +14,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
|
|
||||||
|
|||||||
@@ -13,8 +13,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
|
|
||||||
|
|||||||
@@ -13,8 +13,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
|
|
||||||
|
|||||||
@@ -17,8 +17,6 @@
|
|||||||
; This file contains a floating-point implementation of the forward DCT
|
; This file contains a floating-point implementation of the forward DCT
|
||||||
; (Discrete Cosine Transform). The following code is based directly on
|
; (Discrete Cosine Transform). The following code is based directly on
|
||||||
; the IJG's original jfdctflt.c; see the jfdctflt.c for more details.
|
; the IJG's original jfdctflt.c; see the jfdctflt.c for more details.
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
%include "jdct.inc"
|
%include "jdct.inc"
|
||||||
|
|||||||
@@ -17,8 +17,6 @@
|
|||||||
; This file contains a floating-point implementation of the forward DCT
|
; This file contains a floating-point implementation of the forward DCT
|
||||||
; (Discrete Cosine Transform). The following code is based directly on
|
; (Discrete Cosine Transform). The following code is based directly on
|
||||||
; the IJG's original jfdctflt.c; see the jfdctflt.c for more details.
|
; the IJG's original jfdctflt.c; see the jfdctflt.c for more details.
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
%include "jdct.inc"
|
%include "jdct.inc"
|
||||||
|
|||||||
@@ -18,8 +18,6 @@
|
|||||||
; the forward DCT (Discrete Cosine Transform). The following code is
|
; the forward DCT (Discrete Cosine Transform). The following code is
|
||||||
; based directly on the IJG's original jfdctfst.c; see the jfdctfst.c
|
; based directly on the IJG's original jfdctfst.c; see the jfdctfst.c
|
||||||
; for more details.
|
; for more details.
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
%include "jdct.inc"
|
%include "jdct.inc"
|
||||||
|
|||||||
@@ -18,8 +18,6 @@
|
|||||||
; the forward DCT (Discrete Cosine Transform). The following code is
|
; the forward DCT (Discrete Cosine Transform). The following code is
|
||||||
; based directly on the IJG's original jfdctfst.c; see the jfdctfst.c
|
; based directly on the IJG's original jfdctfst.c; see the jfdctfst.c
|
||||||
; for more details.
|
; for more details.
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
%include "jdct.inc"
|
%include "jdct.inc"
|
||||||
|
|||||||
@@ -18,8 +18,6 @@
|
|||||||
; forward DCT (Discrete Cosine Transform). The following code is based
|
; forward DCT (Discrete Cosine Transform). The following code is based
|
||||||
; directly on the IJG's original jfdctint.c; see the jfdctint.c for
|
; directly on the IJG's original jfdctint.c; see the jfdctint.c for
|
||||||
; more details.
|
; more details.
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
%include "jdct.inc"
|
%include "jdct.inc"
|
||||||
|
|||||||
@@ -18,8 +18,6 @@
|
|||||||
; forward DCT (Discrete Cosine Transform). The following code is based
|
; forward DCT (Discrete Cosine Transform). The following code is based
|
||||||
; directly on the IJG's original jfdctint.c; see the jfdctint.c for
|
; directly on the IJG's original jfdctint.c; see the jfdctint.c for
|
||||||
; more details.
|
; more details.
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
%include "jdct.inc"
|
%include "jdct.inc"
|
||||||
|
|||||||
@@ -18,8 +18,6 @@
|
|||||||
; forward DCT (Discrete Cosine Transform). The following code is based
|
; forward DCT (Discrete Cosine Transform). The following code is based
|
||||||
; directly on the IJG's original jfdctint.c; see the jfdctint.c for
|
; directly on the IJG's original jfdctint.c; see the jfdctint.c for
|
||||||
; more details.
|
; more details.
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
%include "jdct.inc"
|
%include "jdct.inc"
|
||||||
|
|||||||
@@ -17,8 +17,6 @@
|
|||||||
; This file contains a floating-point implementation of the inverse DCT
|
; This file contains a floating-point implementation of the inverse DCT
|
||||||
; (Discrete Cosine Transform). The following code is based directly on
|
; (Discrete Cosine Transform). The following code is based directly on
|
||||||
; the IJG's original jidctflt.c; see the jidctflt.c for more details.
|
; the IJG's original jidctflt.c; see the jidctflt.c for more details.
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
%include "jdct.inc"
|
%include "jdct.inc"
|
||||||
@@ -92,23 +90,23 @@ EXTN(jsimd_idct_float_3dnow):
|
|||||||
alignx 16, 7
|
alignx 16, 7
|
||||||
.columnloop:
|
.columnloop:
|
||||||
%ifndef NO_ZERO_COLUMN_TEST_FLOAT_3DNOW
|
%ifndef NO_ZERO_COLUMN_TEST_FLOAT_3DNOW
|
||||||
mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||||
or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||||
jnz short .columnDCT
|
jnz short .columnDCT
|
||||||
|
|
||||||
pushpic ebx ; save GOT address
|
pushpic ebx ; save GOT address
|
||||||
mov ebx, DWORD [DWBLOCK(3,0,esi,SIZEOF_JCOEF)]
|
mov ebx, dword [DWBLOCK(3,0,esi,SIZEOF_JCOEF)]
|
||||||
mov eax, DWORD [DWBLOCK(4,0,esi,SIZEOF_JCOEF)]
|
mov eax, dword [DWBLOCK(4,0,esi,SIZEOF_JCOEF)]
|
||||||
or ebx, DWORD [DWBLOCK(5,0,esi,SIZEOF_JCOEF)]
|
or ebx, dword [DWBLOCK(5,0,esi,SIZEOF_JCOEF)]
|
||||||
or eax, DWORD [DWBLOCK(6,0,esi,SIZEOF_JCOEF)]
|
or eax, dword [DWBLOCK(6,0,esi,SIZEOF_JCOEF)]
|
||||||
or ebx, DWORD [DWBLOCK(7,0,esi,SIZEOF_JCOEF)]
|
or ebx, dword [DWBLOCK(7,0,esi,SIZEOF_JCOEF)]
|
||||||
or eax, ebx
|
or eax, ebx
|
||||||
poppic ebx ; restore GOT address
|
poppic ebx ; restore GOT address
|
||||||
jnz short .columnDCT
|
jnz short .columnDCT
|
||||||
|
|
||||||
; -- AC terms all zero
|
; -- AC terms all zero
|
||||||
|
|
||||||
movd mm0, DWORD [DWBLOCK(0,0,esi,SIZEOF_JCOEF)]
|
movd mm0, dword [DWBLOCK(0,0,esi,SIZEOF_JCOEF)]
|
||||||
|
|
||||||
punpcklwd mm0, mm0
|
punpcklwd mm0, mm0
|
||||||
psrad mm0, (DWORD_BIT-WORD_BIT)
|
psrad mm0, (DWORD_BIT-WORD_BIT)
|
||||||
@@ -135,10 +133,10 @@ EXTN(jsimd_idct_float_3dnow):
|
|||||||
|
|
||||||
; -- Even part
|
; -- Even part
|
||||||
|
|
||||||
movd mm0, DWORD [DWBLOCK(0,0,esi,SIZEOF_JCOEF)]
|
movd mm0, dword [DWBLOCK(0,0,esi,SIZEOF_JCOEF)]
|
||||||
movd mm1, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
movd mm1, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||||
movd mm2, DWORD [DWBLOCK(4,0,esi,SIZEOF_JCOEF)]
|
movd mm2, dword [DWBLOCK(4,0,esi,SIZEOF_JCOEF)]
|
||||||
movd mm3, DWORD [DWBLOCK(6,0,esi,SIZEOF_JCOEF)]
|
movd mm3, dword [DWBLOCK(6,0,esi,SIZEOF_JCOEF)]
|
||||||
|
|
||||||
punpcklwd mm0, mm0
|
punpcklwd mm0, mm0
|
||||||
punpcklwd mm1, mm1
|
punpcklwd mm1, mm1
|
||||||
@@ -182,10 +180,10 @@ EXTN(jsimd_idct_float_3dnow):
|
|||||||
|
|
||||||
; -- Odd part
|
; -- Odd part
|
||||||
|
|
||||||
movd mm2, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
movd mm2, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||||
movd mm3, DWORD [DWBLOCK(3,0,esi,SIZEOF_JCOEF)]
|
movd mm3, dword [DWBLOCK(3,0,esi,SIZEOF_JCOEF)]
|
||||||
movd mm5, DWORD [DWBLOCK(5,0,esi,SIZEOF_JCOEF)]
|
movd mm5, dword [DWBLOCK(5,0,esi,SIZEOF_JCOEF)]
|
||||||
movd mm1, DWORD [DWBLOCK(7,0,esi,SIZEOF_JCOEF)]
|
movd mm1, dword [DWBLOCK(7,0,esi,SIZEOF_JCOEF)]
|
||||||
|
|
||||||
punpcklwd mm2, mm2
|
punpcklwd mm2, mm2
|
||||||
punpcklwd mm3, mm3
|
punpcklwd mm3, mm3
|
||||||
|
|||||||
@@ -17,8 +17,6 @@
|
|||||||
; This file contains a floating-point implementation of the inverse DCT
|
; This file contains a floating-point implementation of the inverse DCT
|
||||||
; (Discrete Cosine Transform). The following code is based directly on
|
; (Discrete Cosine Transform). The following code is based directly on
|
||||||
; the IJG's original jidctflt.c; see the jidctflt.c for more details.
|
; the IJG's original jidctflt.c; see the jidctflt.c for more details.
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
%include "jdct.inc"
|
%include "jdct.inc"
|
||||||
@@ -102,8 +100,8 @@ EXTN(jsimd_idct_float_sse):
|
|||||||
alignx 16, 7
|
alignx 16, 7
|
||||||
.columnloop:
|
.columnloop:
|
||||||
%ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE
|
%ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE
|
||||||
mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||||
or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||||
jnz near .columnDCT
|
jnz near .columnDCT
|
||||||
|
|
||||||
movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||||
|
|||||||
@@ -17,8 +17,6 @@
|
|||||||
; This file contains a floating-point implementation of the inverse DCT
|
; This file contains a floating-point implementation of the inverse DCT
|
||||||
; (Discrete Cosine Transform). The following code is based directly on
|
; (Discrete Cosine Transform). The following code is based directly on
|
||||||
; the IJG's original jidctflt.c; see the jidctflt.c for more details.
|
; the IJG's original jidctflt.c; see the jidctflt.c for more details.
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
%include "jdct.inc"
|
%include "jdct.inc"
|
||||||
@@ -102,8 +100,8 @@ EXTN(jsimd_idct_float_sse2):
|
|||||||
alignx 16, 7
|
alignx 16, 7
|
||||||
.columnloop:
|
.columnloop:
|
||||||
%ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE
|
%ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE
|
||||||
mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||||
or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||||
jnz near .columnDCT
|
jnz near .columnDCT
|
||||||
|
|
||||||
movq xmm1, XMM_MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
movq xmm1, XMM_MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||||
|
|||||||
@@ -18,8 +18,6 @@
|
|||||||
; the inverse DCT (Discrete Cosine Transform). The following code is
|
; the inverse DCT (Discrete Cosine Transform). The following code is
|
||||||
; based directly on the IJG's original jidctfst.c; see the jidctfst.c
|
; based directly on the IJG's original jidctfst.c; see the jidctfst.c
|
||||||
; for more details.
|
; for more details.
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
%include "jdct.inc"
|
%include "jdct.inc"
|
||||||
@@ -123,8 +121,8 @@ EXTN(jsimd_idct_ifast_mmx):
|
|||||||
alignx 16, 7
|
alignx 16, 7
|
||||||
.columnloop:
|
.columnloop:
|
||||||
%ifndef NO_ZERO_COLUMN_TEST_IFAST_MMX
|
%ifndef NO_ZERO_COLUMN_TEST_IFAST_MMX
|
||||||
mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||||
or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||||
jnz short .columnDCT
|
jnz short .columnDCT
|
||||||
|
|
||||||
movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||||
|
|||||||
@@ -18,8 +18,6 @@
|
|||||||
; the inverse DCT (Discrete Cosine Transform). The following code is
|
; the inverse DCT (Discrete Cosine Transform). The following code is
|
||||||
; based directly on the IJG's original jidctfst.c; see the jidctfst.c
|
; based directly on the IJG's original jidctfst.c; see the jidctfst.c
|
||||||
; for more details.
|
; for more details.
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
%include "jdct.inc"
|
%include "jdct.inc"
|
||||||
@@ -118,8 +116,8 @@ EXTN(jsimd_idct_ifast_sse2):
|
|||||||
mov esi, JCOEFPTR [coef_block(eax)] ; inptr
|
mov esi, JCOEFPTR [coef_block(eax)] ; inptr
|
||||||
|
|
||||||
%ifndef NO_ZERO_COLUMN_TEST_IFAST_SSE2
|
%ifndef NO_ZERO_COLUMN_TEST_IFAST_SSE2
|
||||||
mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||||
or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||||
jnz near .columnDCT
|
jnz near .columnDCT
|
||||||
|
|
||||||
movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||||
|
|||||||
@@ -18,8 +18,6 @@
|
|||||||
; inverse DCT (Discrete Cosine Transform). The following code is based
|
; inverse DCT (Discrete Cosine Transform). The following code is based
|
||||||
; directly on the IJG's original jidctint.c; see the jidctint.c for
|
; directly on the IJG's original jidctint.c; see the jidctint.c for
|
||||||
; more details.
|
; more details.
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
%include "jdct.inc"
|
%include "jdct.inc"
|
||||||
@@ -320,8 +318,8 @@ EXTN(jsimd_idct_islow_avx2):
|
|||||||
mov esi, JCOEFPTR [coef_block(eax)] ; inptr
|
mov esi, JCOEFPTR [coef_block(eax)] ; inptr
|
||||||
|
|
||||||
%ifndef NO_ZERO_COLUMN_TEST_ISLOW_AVX2
|
%ifndef NO_ZERO_COLUMN_TEST_ISLOW_AVX2
|
||||||
mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||||
or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||||
jnz near .columnDCT
|
jnz near .columnDCT
|
||||||
|
|
||||||
movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||||
|
|||||||
@@ -18,8 +18,6 @@
|
|||||||
; inverse DCT (Discrete Cosine Transform). The following code is based
|
; inverse DCT (Discrete Cosine Transform). The following code is based
|
||||||
; directly on the IJG's original jidctint.c; see the jidctint.c for
|
; directly on the IJG's original jidctint.c; see the jidctint.c for
|
||||||
; more details.
|
; more details.
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
%include "jdct.inc"
|
%include "jdct.inc"
|
||||||
@@ -136,8 +134,8 @@ EXTN(jsimd_idct_islow_mmx):
|
|||||||
alignx 16, 7
|
alignx 16, 7
|
||||||
.columnloop:
|
.columnloop:
|
||||||
%ifndef NO_ZERO_COLUMN_TEST_ISLOW_MMX
|
%ifndef NO_ZERO_COLUMN_TEST_ISLOW_MMX
|
||||||
mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||||
or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||||
jnz short .columnDCT
|
jnz short .columnDCT
|
||||||
|
|
||||||
movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||||
|
|||||||
@@ -18,8 +18,6 @@
|
|||||||
; inverse DCT (Discrete Cosine Transform). The following code is based
|
; inverse DCT (Discrete Cosine Transform). The following code is based
|
||||||
; directly on the IJG's original jidctint.c; see the jidctint.c for
|
; directly on the IJG's original jidctint.c; see the jidctint.c for
|
||||||
; more details.
|
; more details.
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
%include "jdct.inc"
|
%include "jdct.inc"
|
||||||
@@ -131,8 +129,8 @@ EXTN(jsimd_idct_islow_sse2):
|
|||||||
mov esi, JCOEFPTR [coef_block(eax)] ; inptr
|
mov esi, JCOEFPTR [coef_block(eax)] ; inptr
|
||||||
|
|
||||||
%ifndef NO_ZERO_COLUMN_TEST_ISLOW_SSE2
|
%ifndef NO_ZERO_COLUMN_TEST_ISLOW_SSE2
|
||||||
mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||||
or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||||
jnz near .columnDCT
|
jnz near .columnDCT
|
||||||
|
|
||||||
movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||||
|
|||||||
@@ -18,8 +18,6 @@
|
|||||||
; output: either 4x4 or 2x2 pixels from an 8x8 DCT block.
|
; output: either 4x4 or 2x2 pixels from an 8x8 DCT block.
|
||||||
; The following code is based directly on the IJG's original jidctred.c;
|
; The following code is based directly on the IJG's original jidctred.c;
|
||||||
; see the jidctred.c for more details.
|
; see the jidctred.c for more details.
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
%include "jdct.inc"
|
%include "jdct.inc"
|
||||||
@@ -144,8 +142,8 @@ EXTN(jsimd_idct_4x4_mmx):
|
|||||||
alignx 16, 7
|
alignx 16, 7
|
||||||
.columnloop:
|
.columnloop:
|
||||||
%ifndef NO_ZERO_COLUMN_TEST_4X4_MMX
|
%ifndef NO_ZERO_COLUMN_TEST_4X4_MMX
|
||||||
mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||||
or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||||
jnz short .columnDCT
|
jnz short .columnDCT
|
||||||
|
|
||||||
movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||||
@@ -464,16 +462,16 @@ EXTN(jsimd_idct_4x4_mmx):
|
|||||||
|
|
||||||
mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
|
mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
|
||||||
mov esi, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
|
mov esi, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
|
||||||
movd DWORD [edx+eax*SIZEOF_JSAMPLE], mm1
|
movd dword [edx+eax*SIZEOF_JSAMPLE], mm1
|
||||||
movd DWORD [esi+eax*SIZEOF_JSAMPLE], mm0
|
movd dword [esi+eax*SIZEOF_JSAMPLE], mm0
|
||||||
|
|
||||||
psrlq mm1, 4*BYTE_BIT
|
psrlq mm1, 4*BYTE_BIT
|
||||||
psrlq mm0, 4*BYTE_BIT
|
psrlq mm0, 4*BYTE_BIT
|
||||||
|
|
||||||
mov edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
|
mov edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
|
||||||
mov esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW]
|
mov esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW]
|
||||||
movd DWORD [edx+eax*SIZEOF_JSAMPLE], mm1
|
movd dword [edx+eax*SIZEOF_JSAMPLE], mm1
|
||||||
movd DWORD [esi+eax*SIZEOF_JSAMPLE], mm0
|
movd dword [esi+eax*SIZEOF_JSAMPLE], mm0
|
||||||
|
|
||||||
emms ; empty MMX state
|
emms ; empty MMX state
|
||||||
|
|
||||||
@@ -688,8 +686,8 @@ EXTN(jsimd_idct_2x2_mmx):
|
|||||||
|
|
||||||
mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
|
mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
|
||||||
mov esi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
|
mov esi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
|
||||||
mov WORD [edx+eax*SIZEOF_JSAMPLE], bx
|
mov word [edx+eax*SIZEOF_JSAMPLE], bx
|
||||||
mov WORD [esi+eax*SIZEOF_JSAMPLE], cx
|
mov word [esi+eax*SIZEOF_JSAMPLE], cx
|
||||||
|
|
||||||
emms ; empty MMX state
|
emms ; empty MMX state
|
||||||
|
|
||||||
|
|||||||
@@ -18,8 +18,6 @@
|
|||||||
; output: either 4x4 or 2x2 pixels from an 8x8 DCT block.
|
; output: either 4x4 or 2x2 pixels from an 8x8 DCT block.
|
||||||
; The following code is based directly on the IJG's original jidctred.c;
|
; The following code is based directly on the IJG's original jidctred.c;
|
||||||
; see the jidctred.c for more details.
|
; see the jidctred.c for more details.
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
%include "jdct.inc"
|
%include "jdct.inc"
|
||||||
@@ -139,8 +137,8 @@ EXTN(jsimd_idct_4x4_sse2):
|
|||||||
mov esi, JCOEFPTR [coef_block(eax)] ; inptr
|
mov esi, JCOEFPTR [coef_block(eax)] ; inptr
|
||||||
|
|
||||||
%ifndef NO_ZERO_COLUMN_TEST_4X4_SSE2
|
%ifndef NO_ZERO_COLUMN_TEST_4X4_SSE2
|
||||||
mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||||
or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||||
jnz short .columnDCT
|
jnz short .columnDCT
|
||||||
|
|
||||||
movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||||
@@ -578,8 +576,8 @@ EXTN(jsimd_idct_2x2_sse2):
|
|||||||
|
|
||||||
mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
|
mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
|
||||||
mov esi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
|
mov esi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
|
||||||
mov WORD [edx+eax*SIZEOF_JSAMPLE], bx
|
mov word [edx+eax*SIZEOF_JSAMPLE], bx
|
||||||
mov WORD [esi+eax*SIZEOF_JSAMPLE], cx
|
mov word [esi+eax*SIZEOF_JSAMPLE], cx
|
||||||
|
|
||||||
pop edi
|
pop edi
|
||||||
pop esi
|
pop esi
|
||||||
|
|||||||
@@ -13,8 +13,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
%include "jdct.inc"
|
%include "jdct.inc"
|
||||||
|
|||||||
@@ -13,8 +13,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
%include "jdct.inc"
|
%include "jdct.inc"
|
||||||
|
|||||||
@@ -13,8 +13,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
%include "jdct.inc"
|
%include "jdct.inc"
|
||||||
|
|||||||
@@ -13,8 +13,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
%include "jdct.inc"
|
%include "jdct.inc"
|
||||||
|
|||||||
@@ -14,8 +14,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
%include "jdct.inc"
|
%include "jdct.inc"
|
||||||
|
|||||||
@@ -13,8 +13,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
%include "jdct.inc"
|
%include "jdct.inc"
|
||||||
|
|||||||
@@ -13,8 +13,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
|
|
||||||
|
|||||||
@@ -7,8 +7,6 @@
|
|||||||
; Based on the x86 SIMD extension for IJG JPEG library
|
; Based on the x86 SIMD extension for IJG JPEG library
|
||||||
; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||||
; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
; --------------------------------------------------------------------------
|
; --------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|||||||
@@ -7,8 +7,6 @@
|
|||||||
; Based on the x86 SIMD extension for IJG JPEG library
|
; Based on the x86 SIMD extension for IJG JPEG library
|
||||||
; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||||
; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
; Each IDCT routine is responsible for range-limiting its results and
|
; Each IDCT routine is responsible for range-limiting its results and
|
||||||
; converting them to unsigned form (0..MAXJSAMPLE). The raw outputs could
|
; converting them to unsigned form (0..MAXJSAMPLE). The raw outputs could
|
||||||
|
|||||||
@@ -25,8 +25,6 @@
|
|||||||
; 2. Altered source versions must be plainly marked as such, and must not be
|
; 2. Altered source versions must be plainly marked as such, and must not be
|
||||||
; misrepresented as being the original software.
|
; misrepresented as being the original software.
|
||||||
; 3. This notice may not be removed or altered from any source distribution.
|
; 3. This notice may not be removed or altered from any source distribution.
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
; ==========================================================================
|
; ==========================================================================
|
||||||
; System-dependent configurations
|
; System-dependent configurations
|
||||||
@@ -204,19 +202,19 @@ section .note.GNU-stack noalloc noexec nowrite progbits
|
|||||||
%define XMM_DWORD
|
%define XMM_DWORD
|
||||||
%define XMM_MMWORD
|
%define XMM_MMWORD
|
||||||
|
|
||||||
%define SIZEOF_BYTE 1 ; sizeof(BYTE)
|
%define SIZEOF_BYTE 1 ; sizeof(byte)
|
||||||
%define SIZEOF_WORD 2 ; sizeof(WORD)
|
%define SIZEOF_WORD 2 ; sizeof(word)
|
||||||
%define SIZEOF_DWORD 4 ; sizeof(DWORD)
|
%define SIZEOF_DWORD 4 ; sizeof(dword)
|
||||||
%define SIZEOF_QWORD 8 ; sizeof(QWORD)
|
%define SIZEOF_QWORD 8 ; sizeof(qword)
|
||||||
%define SIZEOF_OWORD 16 ; sizeof(OWORD)
|
%define SIZEOF_OWORD 16 ; sizeof(oword)
|
||||||
%define SIZEOF_YWORD 32 ; sizeof(YWORD)
|
%define SIZEOF_YWORD 32 ; sizeof(yword)
|
||||||
|
|
||||||
%define BYTE_BIT 8 ; CHAR_BIT in C
|
%define BYTE_BIT 8 ; CHAR_BIT in C
|
||||||
%define WORD_BIT 16 ; sizeof(WORD)*BYTE_BIT
|
%define WORD_BIT 16 ; sizeof(word)*BYTE_BIT
|
||||||
%define DWORD_BIT 32 ; sizeof(DWORD)*BYTE_BIT
|
%define DWORD_BIT 32 ; sizeof(dword)*BYTE_BIT
|
||||||
%define QWORD_BIT 64 ; sizeof(QWORD)*BYTE_BIT
|
%define QWORD_BIT 64 ; sizeof(qword)*BYTE_BIT
|
||||||
%define OWORD_BIT 128 ; sizeof(OWORD)*BYTE_BIT
|
%define OWORD_BIT 128 ; sizeof(oword)*BYTE_BIT
|
||||||
%define YWORD_BIT 256 ; sizeof(YWORD)*BYTE_BIT
|
%define YWORD_BIT 256 ; sizeof(yword)*BYTE_BIT
|
||||||
|
|
||||||
; --------------------------------------------------------------------------
|
; --------------------------------------------------------------------------
|
||||||
; External Symbol Name
|
; External Symbol Name
|
||||||
|
|||||||
@@ -14,8 +14,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jcolsamp.inc"
|
%include "jcolsamp.inc"
|
||||||
|
|
||||||
@@ -97,12 +95,12 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
|
|||||||
test cl, SIZEOF_BYTE
|
test cl, SIZEOF_BYTE
|
||||||
jz short .column_ld2
|
jz short .column_ld2
|
||||||
sub rcx, byte SIZEOF_BYTE
|
sub rcx, byte SIZEOF_BYTE
|
||||||
movzx rax, BYTE [rsi+rcx]
|
movzx rax, byte [rsi+rcx]
|
||||||
.column_ld2:
|
.column_ld2:
|
||||||
test cl, SIZEOF_WORD
|
test cl, SIZEOF_WORD
|
||||||
jz short .column_ld4
|
jz short .column_ld4
|
||||||
sub rcx, byte SIZEOF_WORD
|
sub rcx, byte SIZEOF_WORD
|
||||||
movzx rdx, WORD [rsi+rcx]
|
movzx rdx, word [rsi+rcx]
|
||||||
shl rax, WORD_BIT
|
shl rax, WORD_BIT
|
||||||
or rax, rdx
|
or rax, rdx
|
||||||
.column_ld4:
|
.column_ld4:
|
||||||
|
|||||||
@@ -13,8 +13,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jcolsamp.inc"
|
%include "jcolsamp.inc"
|
||||||
|
|
||||||
@@ -96,12 +94,12 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
|
|||||||
test cl, SIZEOF_BYTE
|
test cl, SIZEOF_BYTE
|
||||||
jz short .column_ld2
|
jz short .column_ld2
|
||||||
sub rcx, byte SIZEOF_BYTE
|
sub rcx, byte SIZEOF_BYTE
|
||||||
movzx rax, BYTE [rsi+rcx]
|
movzx rax, byte [rsi+rcx]
|
||||||
.column_ld2:
|
.column_ld2:
|
||||||
test cl, SIZEOF_WORD
|
test cl, SIZEOF_WORD
|
||||||
jz short .column_ld4
|
jz short .column_ld4
|
||||||
sub rcx, byte SIZEOF_WORD
|
sub rcx, byte SIZEOF_WORD
|
||||||
movzx rdx, WORD [rsi+rcx]
|
movzx rdx, word [rsi+rcx]
|
||||||
shl rax, WORD_BIT
|
shl rax, WORD_BIT
|
||||||
or rax, rdx
|
or rax, rdx
|
||||||
.column_ld4:
|
.column_ld4:
|
||||||
|
|||||||
@@ -13,8 +13,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
|
|
||||||
|
|||||||
@@ -12,8 +12,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
|
|
||||||
|
|||||||
@@ -13,8 +13,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
|
|
||||||
|
|||||||
@@ -12,8 +12,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
|
|
||||||
|
|||||||
@@ -14,8 +14,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jcolsamp.inc"
|
%include "jcolsamp.inc"
|
||||||
|
|
||||||
@@ -89,12 +87,12 @@ EXTN(jsimd_rgb_gray_convert_avx2):
|
|||||||
test cl, SIZEOF_BYTE
|
test cl, SIZEOF_BYTE
|
||||||
jz short .column_ld2
|
jz short .column_ld2
|
||||||
sub rcx, byte SIZEOF_BYTE
|
sub rcx, byte SIZEOF_BYTE
|
||||||
movzx rax, BYTE [rsi+rcx]
|
movzx rax, byte [rsi+rcx]
|
||||||
.column_ld2:
|
.column_ld2:
|
||||||
test cl, SIZEOF_WORD
|
test cl, SIZEOF_WORD
|
||||||
jz short .column_ld4
|
jz short .column_ld4
|
||||||
sub rcx, byte SIZEOF_WORD
|
sub rcx, byte SIZEOF_WORD
|
||||||
movzx rdx, WORD [rsi+rcx]
|
movzx rdx, word [rsi+rcx]
|
||||||
shl rax, WORD_BIT
|
shl rax, WORD_BIT
|
||||||
or rax, rdx
|
or rax, rdx
|
||||||
.column_ld4:
|
.column_ld4:
|
||||||
|
|||||||
@@ -13,8 +13,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jcolsamp.inc"
|
%include "jcolsamp.inc"
|
||||||
|
|
||||||
@@ -88,12 +86,12 @@ EXTN(jsimd_rgb_gray_convert_sse2):
|
|||||||
test cl, SIZEOF_BYTE
|
test cl, SIZEOF_BYTE
|
||||||
jz short .column_ld2
|
jz short .column_ld2
|
||||||
sub rcx, byte SIZEOF_BYTE
|
sub rcx, byte SIZEOF_BYTE
|
||||||
movzx rax, BYTE [rsi+rcx]
|
movzx rax, byte [rsi+rcx]
|
||||||
.column_ld2:
|
.column_ld2:
|
||||||
test cl, SIZEOF_WORD
|
test cl, SIZEOF_WORD
|
||||||
jz short .column_ld4
|
jz short .column_ld4
|
||||||
sub rcx, byte SIZEOF_WORD
|
sub rcx, byte SIZEOF_WORD
|
||||||
movzx rdx, WORD [rsi+rcx]
|
movzx rdx, word [rsi+rcx]
|
||||||
shl rax, WORD_BIT
|
shl rax, WORD_BIT
|
||||||
or rax, rdx
|
or rax, rdx
|
||||||
.column_ld4:
|
.column_ld4:
|
||||||
|
|||||||
@@ -18,8 +18,6 @@
|
|||||||
; This file contains an SSE2 implementation for Huffman coding of one block.
|
; This file contains an SSE2 implementation for Huffman coding of one block.
|
||||||
; The following code is based directly on jchuff.c; see jchuff.c for more
|
; The following code is based directly on jchuff.c; see jchuff.c for more
|
||||||
; details.
|
; details.
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
|
|
||||||
@@ -201,7 +199,7 @@ EXTN(jsimd_huff_encode_one_block_sse2):
|
|||||||
mov buffer, r11 ; r11 is now sratch
|
mov buffer, r11 ; r11 is now sratch
|
||||||
|
|
||||||
mov put_buffer, MMWORD [r10+SIZEOF_POINTER*2] ; put_buffer = state->cur.put_buffer;
|
mov put_buffer, MMWORD [r10+SIZEOF_POINTER*2] ; put_buffer = state->cur.put_buffer;
|
||||||
mov put_bits, DWORD [r10+SIZEOF_POINTER*2+8] ; put_bits = state->cur.put_bits;
|
mov put_bits, dword [r10+SIZEOF_POINTER*2+8] ; put_bits = state->cur.put_bits;
|
||||||
push r10 ; r10 is now scratch
|
push r10 ; r10 is now scratch
|
||||||
|
|
||||||
; Encode the DC coefficient difference per section F.1.2.1
|
; Encode the DC coefficient difference per section F.1.2.1
|
||||||
@@ -334,7 +332,7 @@ EXTN(jsimd_huff_encode_one_block_sse2):
|
|||||||
pop r10
|
pop r10
|
||||||
; Save put_buffer & put_bits
|
; Save put_buffer & put_bits
|
||||||
mov MMWORD [r10+SIZEOF_POINTER*2], put_buffer ; state->cur.put_buffer = put_buffer;
|
mov MMWORD [r10+SIZEOF_POINTER*2], put_buffer ; state->cur.put_buffer = put_buffer;
|
||||||
mov DWORD [r10+SIZEOF_POINTER*2+8], put_bits ; state->cur.put_bits = put_bits;
|
mov dword [r10+SIZEOF_POINTER*2+8], put_bits ; state->cur.put_bits = put_bits;
|
||||||
|
|
||||||
pop rbx
|
pop rbx
|
||||||
uncollect_args 6
|
uncollect_args 6
|
||||||
|
|||||||
@@ -16,8 +16,6 @@
|
|||||||
;
|
;
|
||||||
; This file contains an SSE2 implementation of data preparation for progressive
|
; This file contains an SSE2 implementation of data preparation for progressive
|
||||||
; Huffman encoding. See jcphuff.c for more details.
|
; Huffman encoding. See jcphuff.c for more details.
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
|
|
||||||
|
|||||||
@@ -15,8 +15,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
|
|
||||||
|
|||||||
@@ -14,8 +14,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
|
|
||||||
|
|||||||
@@ -15,8 +15,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jcolsamp.inc"
|
%include "jcolsamp.inc"
|
||||||
|
|
||||||
@@ -335,7 +333,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
|
|||||||
vmovd eax, xmmA
|
vmovd eax, xmmA
|
||||||
cmp rcx, byte SIZEOF_WORD
|
cmp rcx, byte SIZEOF_WORD
|
||||||
jb short .column_st1
|
jb short .column_st1
|
||||||
mov WORD [rdi], ax
|
mov word [rdi], ax
|
||||||
add rdi, byte SIZEOF_WORD
|
add rdi, byte SIZEOF_WORD
|
||||||
sub rcx, byte SIZEOF_WORD
|
sub rcx, byte SIZEOF_WORD
|
||||||
shr rax, 16
|
shr rax, 16
|
||||||
@@ -344,7 +342,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
|
|||||||
; space.
|
; space.
|
||||||
test rcx, rcx
|
test rcx, rcx
|
||||||
jz short .nextrow
|
jz short .nextrow
|
||||||
mov BYTE [rdi], al
|
mov byte [rdi], al
|
||||||
|
|
||||||
%else ; RGB_PIXELSIZE == 4 ; -----------
|
%else ; RGB_PIXELSIZE == 4 ; -----------
|
||||||
|
|
||||||
|
|||||||
@@ -14,8 +14,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jcolsamp.inc"
|
%include "jcolsamp.inc"
|
||||||
|
|
||||||
@@ -307,7 +305,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
|
|||||||
movd eax, xmmA
|
movd eax, xmmA
|
||||||
cmp rcx, byte SIZEOF_WORD
|
cmp rcx, byte SIZEOF_WORD
|
||||||
jb short .column_st1
|
jb short .column_st1
|
||||||
mov WORD [rdi], ax
|
mov word [rdi], ax
|
||||||
add rdi, byte SIZEOF_WORD
|
add rdi, byte SIZEOF_WORD
|
||||||
sub rcx, byte SIZEOF_WORD
|
sub rcx, byte SIZEOF_WORD
|
||||||
shr rax, 16
|
shr rax, 16
|
||||||
@@ -316,7 +314,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
|
|||||||
; space.
|
; space.
|
||||||
test rcx, rcx
|
test rcx, rcx
|
||||||
jz short .nextrow
|
jz short .nextrow
|
||||||
mov BYTE [rdi], al
|
mov byte [rdi], al
|
||||||
|
|
||||||
%else ; RGB_PIXELSIZE == 4 ; -----------
|
%else ; RGB_PIXELSIZE == 4 ; -----------
|
||||||
|
|
||||||
|
|||||||
@@ -14,8 +14,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
|
|
||||||
|
|||||||
@@ -13,8 +13,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
|
|
||||||
|
|||||||
@@ -14,8 +14,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
|
|
||||||
|
|||||||
@@ -13,8 +13,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
|
|
||||||
|
|||||||
@@ -15,8 +15,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jcolsamp.inc"
|
%include "jcolsamp.inc"
|
||||||
|
|
||||||
@@ -340,7 +338,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
|
|||||||
vmovd eax, xmmA
|
vmovd eax, xmmA
|
||||||
cmp rcx, byte SIZEOF_WORD
|
cmp rcx, byte SIZEOF_WORD
|
||||||
jb short .column_st1
|
jb short .column_st1
|
||||||
mov WORD [rdi], ax
|
mov word [rdi], ax
|
||||||
add rdi, byte SIZEOF_WORD
|
add rdi, byte SIZEOF_WORD
|
||||||
sub rcx, byte SIZEOF_WORD
|
sub rcx, byte SIZEOF_WORD
|
||||||
shr rax, 16
|
shr rax, 16
|
||||||
@@ -349,7 +347,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
|
|||||||
; space.
|
; space.
|
||||||
test rcx, rcx
|
test rcx, rcx
|
||||||
jz short .endcolumn
|
jz short .endcolumn
|
||||||
mov BYTE [rdi], al
|
mov byte [rdi], al
|
||||||
|
|
||||||
%else ; RGB_PIXELSIZE == 4 ; -----------
|
%else ; RGB_PIXELSIZE == 4 ; -----------
|
||||||
|
|
||||||
|
|||||||
@@ -14,8 +14,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jcolsamp.inc"
|
%include "jcolsamp.inc"
|
||||||
|
|
||||||
@@ -311,7 +309,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
|
|||||||
movd eax, xmmA
|
movd eax, xmmA
|
||||||
cmp rcx, byte SIZEOF_WORD
|
cmp rcx, byte SIZEOF_WORD
|
||||||
jb short .column_st1
|
jb short .column_st1
|
||||||
mov WORD [rdi], ax
|
mov word [rdi], ax
|
||||||
add rdi, byte SIZEOF_WORD
|
add rdi, byte SIZEOF_WORD
|
||||||
sub rcx, byte SIZEOF_WORD
|
sub rcx, byte SIZEOF_WORD
|
||||||
shr rax, 16
|
shr rax, 16
|
||||||
@@ -320,7 +318,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
|
|||||||
; space.
|
; space.
|
||||||
test rcx, rcx
|
test rcx, rcx
|
||||||
jz short .endcolumn
|
jz short .endcolumn
|
||||||
mov BYTE [rdi], al
|
mov byte [rdi], al
|
||||||
|
|
||||||
%else ; RGB_PIXELSIZE == 4 ; -----------
|
%else ; RGB_PIXELSIZE == 4 ; -----------
|
||||||
|
|
||||||
|
|||||||
@@ -15,8 +15,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
|
|
||||||
|
|||||||
@@ -14,8 +14,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
|
|
||||||
|
|||||||
@@ -17,8 +17,6 @@
|
|||||||
; This file contains a floating-point implementation of the forward DCT
|
; This file contains a floating-point implementation of the forward DCT
|
||||||
; (Discrete Cosine Transform). The following code is based directly on
|
; (Discrete Cosine Transform). The following code is based directly on
|
||||||
; the IJG's original jfdctflt.c; see the jfdctflt.c for more details.
|
; the IJG's original jfdctflt.c; see the jfdctflt.c for more details.
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
%include "jdct.inc"
|
%include "jdct.inc"
|
||||||
|
|||||||
@@ -18,8 +18,6 @@
|
|||||||
; the forward DCT (Discrete Cosine Transform). The following code is
|
; the forward DCT (Discrete Cosine Transform). The following code is
|
||||||
; based directly on the IJG's original jfdctfst.c; see the jfdctfst.c
|
; based directly on the IJG's original jfdctfst.c; see the jfdctfst.c
|
||||||
; for more details.
|
; for more details.
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
%include "jdct.inc"
|
%include "jdct.inc"
|
||||||
|
|||||||
@@ -18,8 +18,6 @@
|
|||||||
; forward DCT (Discrete Cosine Transform). The following code is based
|
; forward DCT (Discrete Cosine Transform). The following code is based
|
||||||
; directly on the IJG's original jfdctint.c; see the jfdctint.c for
|
; directly on the IJG's original jfdctint.c; see the jfdctint.c for
|
||||||
; more details.
|
; more details.
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
%include "jdct.inc"
|
%include "jdct.inc"
|
||||||
|
|||||||
@@ -18,8 +18,6 @@
|
|||||||
; forward DCT (Discrete Cosine Transform). The following code is based
|
; forward DCT (Discrete Cosine Transform). The following code is based
|
||||||
; directly on the IJG's original jfdctint.c; see the jfdctint.c for
|
; directly on the IJG's original jfdctint.c; see the jfdctint.c for
|
||||||
; more details.
|
; more details.
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
%include "jdct.inc"
|
%include "jdct.inc"
|
||||||
|
|||||||
@@ -18,8 +18,6 @@
|
|||||||
; This file contains a floating-point implementation of the inverse DCT
|
; This file contains a floating-point implementation of the inverse DCT
|
||||||
; (Discrete Cosine Transform). The following code is based directly on
|
; (Discrete Cosine Transform). The following code is based directly on
|
||||||
; the IJG's original jidctflt.c; see the jidctflt.c for more details.
|
; the IJG's original jidctflt.c; see the jidctflt.c for more details.
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
%include "jdct.inc"
|
%include "jdct.inc"
|
||||||
@@ -96,8 +94,8 @@ EXTN(jsimd_idct_float_sse2):
|
|||||||
mov rcx, DCTSIZE/4 ; ctr
|
mov rcx, DCTSIZE/4 ; ctr
|
||||||
.columnloop:
|
.columnloop:
|
||||||
%ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE
|
%ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE
|
||||||
mov eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)]
|
mov eax, dword [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)]
|
||||||
or eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)]
|
or eax, dword [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)]
|
||||||
jnz near .columnDCT
|
jnz near .columnDCT
|
||||||
|
|
||||||
movq xmm1, XMM_MMWORD [MMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
|
movq xmm1, XMM_MMWORD [MMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
|
||||||
|
|||||||
@@ -19,8 +19,6 @@
|
|||||||
; the inverse DCT (Discrete Cosine Transform). The following code is
|
; the inverse DCT (Discrete Cosine Transform). The following code is
|
||||||
; based directly on the IJG's original jidctfst.c; see the jidctfst.c
|
; based directly on the IJG's original jidctfst.c; see the jidctfst.c
|
||||||
; for more details.
|
; for more details.
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
%include "jdct.inc"
|
%include "jdct.inc"
|
||||||
@@ -112,8 +110,8 @@ EXTN(jsimd_idct_ifast_sse2):
|
|||||||
mov rsi, r11 ; inptr
|
mov rsi, r11 ; inptr
|
||||||
|
|
||||||
%ifndef NO_ZERO_COLUMN_TEST_IFAST_SSE2
|
%ifndef NO_ZERO_COLUMN_TEST_IFAST_SSE2
|
||||||
mov eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)]
|
mov eax, dword [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)]
|
||||||
or eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)]
|
or eax, dword [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)]
|
||||||
jnz near .columnDCT
|
jnz near .columnDCT
|
||||||
|
|
||||||
movdqa xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
|
movdqa xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
|
||||||
|
|||||||
@@ -19,8 +19,6 @@
|
|||||||
; inverse DCT (Discrete Cosine Transform). The following code is based
|
; inverse DCT (Discrete Cosine Transform). The following code is based
|
||||||
; directly on the IJG's original jidctint.c; see the jidctint.c for
|
; directly on the IJG's original jidctint.c; see the jidctint.c for
|
||||||
; more details.
|
; more details.
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
%include "jdct.inc"
|
%include "jdct.inc"
|
||||||
@@ -293,8 +291,8 @@ EXTN(jsimd_idct_islow_avx2):
|
|||||||
; ---- Pass 1: process columns.
|
; ---- Pass 1: process columns.
|
||||||
|
|
||||||
%ifndef NO_ZERO_COLUMN_TEST_ISLOW_AVX2
|
%ifndef NO_ZERO_COLUMN_TEST_ISLOW_AVX2
|
||||||
mov eax, DWORD [DWBLOCK(1,0,r11,SIZEOF_JCOEF)]
|
mov eax, dword [DWBLOCK(1,0,r11,SIZEOF_JCOEF)]
|
||||||
or eax, DWORD [DWBLOCK(2,0,r11,SIZEOF_JCOEF)]
|
or eax, dword [DWBLOCK(2,0,r11,SIZEOF_JCOEF)]
|
||||||
jnz near .columnDCT
|
jnz near .columnDCT
|
||||||
|
|
||||||
movdqa xmm0, XMMWORD [XMMBLOCK(1,0,r11,SIZEOF_JCOEF)]
|
movdqa xmm0, XMMWORD [XMMBLOCK(1,0,r11,SIZEOF_JCOEF)]
|
||||||
|
|||||||
@@ -19,8 +19,6 @@
|
|||||||
; inverse DCT (Discrete Cosine Transform). The following code is based
|
; inverse DCT (Discrete Cosine Transform). The following code is based
|
||||||
; directly on the IJG's original jidctint.c; see the jidctint.c for
|
; directly on the IJG's original jidctint.c; see the jidctint.c for
|
||||||
; more details.
|
; more details.
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
%include "jdct.inc"
|
%include "jdct.inc"
|
||||||
@@ -125,8 +123,8 @@ EXTN(jsimd_idct_islow_sse2):
|
|||||||
mov rsi, r11 ; inptr
|
mov rsi, r11 ; inptr
|
||||||
|
|
||||||
%ifndef NO_ZERO_COLUMN_TEST_ISLOW_SSE2
|
%ifndef NO_ZERO_COLUMN_TEST_ISLOW_SSE2
|
||||||
mov eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)]
|
mov eax, dword [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)]
|
||||||
or eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)]
|
or eax, dword [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)]
|
||||||
jnz near .columnDCT
|
jnz near .columnDCT
|
||||||
|
|
||||||
movdqa xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
|
movdqa xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
|
||||||
|
|||||||
@@ -19,8 +19,6 @@
|
|||||||
; output: either 4x4 or 2x2 pixels from an 8x8 DCT block.
|
; output: either 4x4 or 2x2 pixels from an 8x8 DCT block.
|
||||||
; The following code is based directly on the IJG's original jidctred.c;
|
; The following code is based directly on the IJG's original jidctred.c;
|
||||||
; see the jidctred.c for more details.
|
; see the jidctred.c for more details.
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
%include "jdct.inc"
|
%include "jdct.inc"
|
||||||
@@ -133,8 +131,8 @@ EXTN(jsimd_idct_4x4_sse2):
|
|||||||
mov rsi, r11 ; inptr
|
mov rsi, r11 ; inptr
|
||||||
|
|
||||||
%ifndef NO_ZERO_COLUMN_TEST_4X4_SSE2
|
%ifndef NO_ZERO_COLUMN_TEST_4X4_SSE2
|
||||||
mov eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)]
|
mov eax, dword [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)]
|
||||||
or eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)]
|
or eax, dword [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)]
|
||||||
jnz short .columnDCT
|
jnz short .columnDCT
|
||||||
|
|
||||||
movdqa xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
|
movdqa xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
|
||||||
@@ -563,8 +561,8 @@ EXTN(jsimd_idct_2x2_sse2):
|
|||||||
|
|
||||||
mov rdxp, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
|
mov rdxp, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
|
||||||
mov rsip, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
|
mov rsip, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
|
||||||
mov WORD [rdx+rax*SIZEOF_JSAMPLE], bx
|
mov word [rdx+rax*SIZEOF_JSAMPLE], bx
|
||||||
mov WORD [rsi+rax*SIZEOF_JSAMPLE], cx
|
mov word [rsi+rax*SIZEOF_JSAMPLE], cx
|
||||||
|
|
||||||
pop rbx
|
pop rbx
|
||||||
uncollect_args 4
|
uncollect_args 4
|
||||||
|
|||||||
@@ -14,8 +14,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
%include "jdct.inc"
|
%include "jdct.inc"
|
||||||
|
|||||||
@@ -15,8 +15,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
%include "jdct.inc"
|
%include "jdct.inc"
|
||||||
|
|||||||
@@ -14,8 +14,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
%include "jdct.inc"
|
%include "jdct.inc"
|
||||||
|
|||||||
@@ -14,8 +14,6 @@
|
|||||||
; assembler (including Borland's Turbo Assembler).
|
; assembler (including Borland's Turbo Assembler).
|
||||||
; NASM is available from http://nasm.sourceforge.net/ or
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
;
|
|
||||||
; [TAB8]
|
|
||||||
|
|
||||||
%include "jsimdext.inc"
|
%include "jsimdext.inc"
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user