Compare commits
76 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5198654f73 | ||
|
|
0d2908a62b | ||
|
|
ee39375c85 | ||
|
|
50d4088439 | ||
|
|
7e8ed0d448 | ||
|
|
6c1538470f | ||
|
|
6b05623682 | ||
|
|
c716918d0a | ||
|
|
619c20d5e8 | ||
|
|
f8a5b80cb0 | ||
|
|
cc11b90b22 | ||
|
|
0629d2a00d | ||
|
|
32ba839c57 | ||
|
|
1da5cf4251 | ||
|
|
fbf0a5fbc5 | ||
|
|
da75d56d6c | ||
|
|
02939f53a0 | ||
|
|
39b950076a | ||
|
|
c0f5e0b702 | ||
|
|
4966e1eec5 | ||
|
|
67753d1298 | ||
|
|
f446e5d5c7 | ||
|
|
0c8bf27a3c | ||
|
|
0aea1da9f0 | ||
|
|
85e2e0f9c2 | ||
|
|
f2ec34de52 | ||
|
|
5e8fd24131 | ||
|
|
71441f322e | ||
|
|
aa20343efd | ||
|
|
7faa703ebf | ||
|
|
e4189accd3 | ||
|
|
5ba6c7effb | ||
|
|
bf506e11b7 | ||
|
|
cdb6c34e1c | ||
|
|
576eef0509 | ||
|
|
f654cf0e2c | ||
|
|
70d831dc0d | ||
|
|
510e67c542 | ||
|
|
243aba148e | ||
|
|
2a4e7f1bc3 | ||
|
|
6a244cb514 | ||
|
|
2ee9faef10 | ||
|
|
a07787f423 | ||
|
|
ace9d06b9e | ||
|
|
752ee33e86 | ||
|
|
d44ffd9db0 | ||
|
|
598cd994f3 | ||
|
|
fa628eff6a | ||
|
|
933289f509 | ||
|
|
888d4075ee | ||
|
|
ddd54ff8a8 | ||
|
|
0d435698f4 | ||
|
|
63c1674ebc | ||
|
|
864600d707 | ||
|
|
4efb529bb7 | ||
|
|
f8e8039204 | ||
|
|
aa805bc89f | ||
|
|
81a64020e3 | ||
|
|
a546be5141 | ||
|
|
c7dadd2d0b | ||
|
|
7475e59637 | ||
|
|
25e40dc42c | ||
|
|
296c8bad7e | ||
|
|
6b99f99b88 | ||
|
|
779f5622aa | ||
|
|
0cbef40560 | ||
|
|
aed7d4661e | ||
|
|
de852420c0 | ||
|
|
c1afc7921d | ||
|
|
a9cad80d19 | ||
|
|
f03d5df238 | ||
|
|
0e9c14e1bb | ||
|
|
602f5bea74 | ||
|
|
24ad6a0179 | ||
|
|
60ba1963fe | ||
|
|
f7067a9e73 |
32
.gitignore
vendored
32
.gitignore
vendored
@@ -1,14 +1,44 @@
|
|||||||
Makefile.in
|
Makefile.in
|
||||||
|
Makefile
|
||||||
|
/CMakeFiles
|
||||||
/autom4te.cache
|
/autom4te.cache
|
||||||
/aclocal.m4
|
/aclocal.m4
|
||||||
/compile
|
/compile
|
||||||
/configure
|
/configure
|
||||||
/depcomp
|
/depcomp
|
||||||
/install-sh
|
/install-sh
|
||||||
|
/libtool
|
||||||
/missing
|
/missing
|
||||||
/stamp-h1
|
/stamp-h*
|
||||||
|
/java/classnoinst.stamp
|
||||||
|
/pkgscripts/
|
||||||
|
/jconfig.h
|
||||||
|
/jconfigint.h
|
||||||
/config.guess
|
/config.guess
|
||||||
|
/config.h
|
||||||
/config.h.in
|
/config.h.in
|
||||||
|
/config.log
|
||||||
|
/config.status
|
||||||
/config.sub
|
/config.sub
|
||||||
/ltmain.sh
|
/ltmain.sh
|
||||||
/ar-lib
|
/ar-lib
|
||||||
|
/libjpeg.map
|
||||||
|
/.libs/
|
||||||
|
/simd/.libs/
|
||||||
|
/simd/jsimdcfg.inc
|
||||||
|
*.o
|
||||||
|
*.lo
|
||||||
|
*.la
|
||||||
|
/cjpeg
|
||||||
|
/djpeg
|
||||||
|
/jcstest
|
||||||
|
/jpegtran
|
||||||
|
/jpegyuv
|
||||||
|
/md5/md5cmp
|
||||||
|
/rdjpgcom
|
||||||
|
/test_enc_*
|
||||||
|
/tjbench
|
||||||
|
/tjbenchtest
|
||||||
|
/tjunittest
|
||||||
|
/wrjpgcom
|
||||||
|
/yuvjpeg
|
||||||
|
|||||||
118
BUILDING.txt
118
BUILDING.txt
@@ -7,6 +7,7 @@
|
|||||||
Build Requirements
|
Build Requirements
|
||||||
==================
|
==================
|
||||||
|
|
||||||
|
-- pkg-config
|
||||||
-- autoconf 2.56 or later
|
-- autoconf 2.56 or later
|
||||||
-- automake 1.7 or later
|
-- automake 1.7 or later
|
||||||
-- libtool 1.4 or later
|
-- libtool 1.4 or later
|
||||||
@@ -68,12 +69,25 @@ The following procedure will build mozjpeg on Linux, FreeBSD, Cygwin, and
|
|||||||
Solaris/x86 systems (on Solaris, this generates a 32-bit library. See below
|
Solaris/x86 systems (on Solaris, this generates a 32-bit library. See below
|
||||||
for 64-bit build instructions.)
|
for 64-bit build instructions.)
|
||||||
|
|
||||||
|
Simple Release tar.gz Source Build
|
||||||
|
----------------------------------
|
||||||
|
|
||||||
|
cd {source_directory}
|
||||||
|
./configure [additional configure flags]
|
||||||
|
make
|
||||||
|
|
||||||
|
Non-Release Source Build (e.g. GitHub clone)
|
||||||
|
--------------------------------------------
|
||||||
|
|
||||||
cd {source_directory}
|
cd {source_directory}
|
||||||
autoreconf -fiv
|
autoreconf -fiv
|
||||||
cd {build_directory}
|
cd {build_directory}
|
||||||
sh {source_directory}/configure [additional configure flags]
|
sh {source_directory}/configure [additional configure flags]
|
||||||
make
|
make
|
||||||
|
|
||||||
|
Results
|
||||||
|
-------
|
||||||
|
|
||||||
This will generate the following files under .libs/
|
This will generate the following files under .libs/
|
||||||
|
|
||||||
libjpeg.a
|
libjpeg.a
|
||||||
@@ -309,6 +323,9 @@ Additional build requirements:
|
|||||||
(https://sourceforge.net/p/libjpeg-turbo/code/HEAD/tree/gas-preprocessor)
|
(https://sourceforge.net/p/libjpeg-turbo/code/HEAD/tree/gas-preprocessor)
|
||||||
should be installed in your PATH.
|
should be installed in your PATH.
|
||||||
|
|
||||||
|
|
||||||
|
ARM 32-bit Build (Xcode 4.6.x and earlier, LLVM-GCC):
|
||||||
|
|
||||||
Set the following shell variables for simplicity:
|
Set the following shell variables for simplicity:
|
||||||
|
|
||||||
Xcode 4.2 and earlier:
|
Xcode 4.2 and earlier:
|
||||||
@@ -317,47 +334,80 @@ Set the following shell variables for simplicity:
|
|||||||
IOS_PLATFORMDIR=/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform
|
IOS_PLATFORMDIR=/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform
|
||||||
|
|
||||||
IOS_SYSROOT=$IOS_PLATFORMDIR/Developer/SDKs/iPhoneOS*.sdk
|
IOS_SYSROOT=$IOS_PLATFORMDIR/Developer/SDKs/iPhoneOS*.sdk
|
||||||
|
|
||||||
Xcode 4.6.x and earlier:
|
|
||||||
IOS_GCC=$IOS_PLATFORMDIR/Developer/usr/bin/arm-apple-darwin10-llvm-gcc-4.2
|
IOS_GCC=$IOS_PLATFORMDIR/Developer/usr/bin/arm-apple-darwin10-llvm-gcc-4.2
|
||||||
Xcode 5.0.x and later:
|
|
||||||
IOS_GCC=/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
|
|
||||||
|
|
||||||
ARM v6 (code will run on all iOS devices, not SIMD-accelerated):
|
ARMv6 (code will run on all iOS devices, not SIMD-accelerated):
|
||||||
[NOTE: Requires Xcode 4.4.x or earlier]
|
[NOTE: Requires Xcode 4.4.x or earlier]
|
||||||
IOS_CFLAGS="-march=armv6 -mcpu=arm1176jzf-s -mfpu=vfp"
|
IOS_CFLAGS="-march=armv6 -mcpu=arm1176jzf-s -mfpu=vfp"
|
||||||
|
|
||||||
ARM v7 (code will run on iPhone 3GS-4S/iPad 1st-3rd Generation and newer):
|
ARMv7 (code will run on iPhone 3GS-4S/iPad 1st-3rd Generation and newer):
|
||||||
Xcode 4.6.x and earlier:
|
|
||||||
IOS_CFLAGS="-march=armv7 -mcpu=cortex-a8 -mtune=cortex-a8 -mfpu=neon"
|
IOS_CFLAGS="-march=armv7 -mcpu=cortex-a8 -mtune=cortex-a8 -mfpu=neon"
|
||||||
Xcode 5.0.x and later:
|
|
||||||
IOS_CFLAGS="-arch armv7"
|
|
||||||
|
|
||||||
ARM v7s (code will run on iPhone 5/iPad 4th Generation and newer):
|
ARMv7s (code will run on iPhone 5/iPad 4th Generation and newer):
|
||||||
[NOTE: Requires Xcode 4.5 or later]
|
[NOTE: Requires Xcode 4.5 or later]
|
||||||
Xcode 4.6.x and earlier:
|
|
||||||
IOS_CFLAGS="-march=armv7s -mcpu=swift -mtune=swift -mfpu=neon"
|
IOS_CFLAGS="-march=armv7s -mcpu=swift -mtune=swift -mfpu=neon"
|
||||||
Xcode 5.0.x and later:
|
|
||||||
IOS_CFLAGS="-arch armv7s"
|
|
||||||
|
|
||||||
Follow the procedure under "Building mozjpeg" above, adding
|
Follow the procedure under "Building mozjpeg" above, adding
|
||||||
|
|
||||||
--host arm-apple-darwin10 --enable-static --disable-shared \
|
--host arm-apple-darwin10 \
|
||||||
CC="$IOS_GCC" LD="$IOS_GCC" \
|
CC="$IOS_GCC" LD="$IOS_GCC" \
|
||||||
CFLAGS="-mfloat-abi=softfp -isysroot $IOS_SYSROOT -O3 $IOS_CFLAGS" \
|
CFLAGS="-mfloat-abi=softfp -isysroot $IOS_SYSROOT -O3 $IOS_CFLAGS" \
|
||||||
LDFLAGS="-mfloat-abi=softfp -isysroot $IOS_SYSROOT $IOS_CFLAGS"
|
LDFLAGS="-mfloat-abi=softfp -isysroot $IOS_SYSROOT $IOS_CFLAGS"
|
||||||
|
|
||||||
to the configure command line. If using Xcode 5.0.x or later, also add
|
to the configure command line.
|
||||||
|
|
||||||
|
|
||||||
|
ARM 32-bit Build (Xcode 5.0.x and later, Clang):
|
||||||
|
|
||||||
|
Set the following shell variables for simplicity:
|
||||||
|
|
||||||
|
IOS_PLATFORMDIR=/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform
|
||||||
|
IOS_SYSROOT=$IOS_PLATFORMDIR/Developer/SDKs/iPhoneOS*.sdk
|
||||||
|
IOS_GCC=/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
|
||||||
|
|
||||||
|
ARMv7 (code will run on iPhone 3GS-4S/iPad 1st-3rd Generation and newer):
|
||||||
|
IOS_CFLAGS="-arch armv7"
|
||||||
|
|
||||||
|
ARMv7s (code will run on iPhone 5/iPad 4th Generation and newer):
|
||||||
|
IOS_CFLAGS="-arch armv7s"
|
||||||
|
|
||||||
|
Follow the procedure under "Building libjpeg-turbo" above, adding
|
||||||
|
|
||||||
|
--host arm-apple-darwin10 \
|
||||||
|
CC="$IOS_GCC" LD="$IOS_GCC" \
|
||||||
|
CFLAGS="-mfloat-abi=softfp -isysroot $IOS_SYSROOT -O3 $IOS_CFLAGS" \
|
||||||
|
LDFLAGS="-mfloat-abi=softfp -isysroot $IOS_SYSROOT $IOS_CFLAGS" \
|
||||||
CCASFLAGS="-no-integrated-as $IOS_CFLAGS"
|
CCASFLAGS="-no-integrated-as $IOS_CFLAGS"
|
||||||
|
|
||||||
to the configure command line.
|
to the configure command line.
|
||||||
|
|
||||||
|
|
||||||
|
ARMv8 64-bit Build (Xcode 5.0.x and later, Clang):
|
||||||
|
|
||||||
|
Code will run on iPhone 5S/iPad Mini 2 and newer.
|
||||||
|
|
||||||
|
Set the following shell variables for simplicity:
|
||||||
|
|
||||||
|
IOS_PLATFORMDIR=/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform
|
||||||
|
IOS_SYSROOT=$IOS_PLATFORMDIR/Developer/SDKs/iPhoneOS*.sdk
|
||||||
|
IOS_GCC=/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
|
||||||
|
IOS_CFLAGS="-arch arm64"
|
||||||
|
|
||||||
|
Follow the procedure under "Building libjpeg-turbo" above, adding
|
||||||
|
|
||||||
|
--host aarch64-apple-darwin \
|
||||||
|
CC="$IOS_GCC" LD="$IOS_GCC" \
|
||||||
|
CFLAGS="-isysroot $IOS_SYSROOT -O3 $IOS_CFLAGS" \
|
||||||
|
LDFLAGS="-isysroot $IOS_SYSROOT $IOS_CFLAGS"
|
||||||
|
|
||||||
|
to the configure command line.
|
||||||
|
|
||||||
|
|
||||||
NOTE: You can also add -miphoneos-version-min={version} to $IOS_CFLAGS above
|
NOTE: You can also add -miphoneos-version-min={version} to $IOS_CFLAGS above
|
||||||
in order to support older versions of iOS than the default version supported by
|
in order to support older versions of iOS than the default version supported by
|
||||||
the SDK.
|
the SDK.
|
||||||
|
|
||||||
Once built, lipo can be used to combine the ARM v6, v7, and/or v7s variants
|
Once built, lipo can be used to combine the ARMv6, v7, v7s, and/or v8 variants
|
||||||
into a universal library.
|
into a universal library.
|
||||||
|
|
||||||
|
|
||||||
@@ -732,26 +782,32 @@ make udmg [BUILDDIR32={32-bit build directory}]
|
|||||||
make command line as shown above.
|
make command line as shown above.
|
||||||
|
|
||||||
make iosdmg [BUILDDIR32={32-bit build directory}] \
|
make iosdmg [BUILDDIR32={32-bit build directory}] \
|
||||||
[BUILDDIRARMV6={ARM v6 build directory}] \
|
[BUILDDIRARMV6={ARMv6 build directory}] \
|
||||||
[BUILDDIRARMV7={ARM v7 build directory}] \
|
[BUILDDIRARMV7={ARMv7 build directory}] \
|
||||||
[BUILDDIRARMV7S={ARM v7s build directory}]
|
[BUILDDIRARMV7S={ARMv7s build directory}] \
|
||||||
|
[BUILDDIRARMV8={ARMv8 build directory}]
|
||||||
|
|
||||||
On OS X systems, this creates a Macintosh package and disk image in which the
|
On OS X systems, this creates a Macintosh package and disk image in which the
|
||||||
mozjpeg static libraries contain ARM architectures necessary to build
|
mozjpeg static libraries contain ARM architectures necessary to build
|
||||||
iOS applications. If building on an x86-64 system, the binaries will also
|
iOS applications. If building on an x86-64 system, the binaries will also
|
||||||
contain the i386 architecture, as with 'make udmg' above. You should first
|
contain the i386 architecture, as with 'make udmg' above. You should first
|
||||||
configure ARM v6, ARM v7, and/or ARM v7s out-of-tree builds of mozjpeg
|
configure ARMv6, ARMv7, ARMv7s, and/or ARMv8 out-of-tree builds of
|
||||||
(see "Building mozjpeg for iOS" above.) If you are building an x86-64
|
mozjpeg (see "Building mozjpeg for iOS" above.) If you are
|
||||||
version of mozjpeg, you should configure a 32-bit out-of-tree build as
|
building an x86-64 version of mozjpeg, you should configure a 32-bit
|
||||||
well. Next, build mozjpeg as you would normally, using an out-of-tree
|
out-of-tree build as well. Next, build mozjpeg as you would normally,
|
||||||
build. When it is built, run 'make iosdmg' from the build directory. The
|
using an out-of-tree build. When it is built, run 'make iosdmg' from the
|
||||||
build system will look for the ARM v6 build under {source_directory}/iosarmv6
|
build directory. The build system will look for the ARMv6 build under
|
||||||
by default, the ARM v7 build under {source_directory}/iosarmv7 by default,
|
{source_directory}/iosarmv6 by default, the ARMv7 build under
|
||||||
the ARM v7s build under {source_directory}/iosarmv7s by default, and (if
|
{source_directory}/iosarmv7 by default, the ARMv7s build under
|
||||||
applicable) the 32-bit build under {source_directory}/osxx86 by default, but
|
{source_directory}/iosarmv7s by default, the ARMv8 build under
|
||||||
you can override this by setting the BUILDDIR32, BUILDDIRARMV6,
|
{source_directory}/iosarmv8 by default, and (if applicable) the 32-bit build
|
||||||
BUILDDIRARMV7, and/or BUILDDIRARMV7S variables on the make command line as
|
under {source_directory}/osxx86 by default, but you can override this by
|
||||||
shown above.
|
setting the BUILDDIR32, BUILDDIRARMV6, BUILDDIRARMV7, BUILDDIRARMV7S, and/or
|
||||||
|
BUILDDIRARMV8 variables on the make command line as shown above.
|
||||||
|
|
||||||
|
NOTE: If including an ARMv8 build in the package, then you may need to use
|
||||||
|
Xcode's version of lipo instead of the operating system's. To do this, pass
|
||||||
|
an argument of LIPO="xcrun lipo" on the make command line.
|
||||||
|
|
||||||
make cygwinpkg
|
make cygwinpkg
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ if(POLICY CMP0022)
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
project(mozjpeg C)
|
project(mozjpeg C)
|
||||||
set(VERSION 3.0)
|
set(VERSION 3.1)
|
||||||
|
|
||||||
if(CYGWIN OR NOT CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows")
|
if(CYGWIN OR NOT CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows")
|
||||||
execute_process(COMMAND "date" "+%Y%m%d" OUTPUT_VARIABLE BUILD)
|
execute_process(COMMAND "date" "+%Y%m%d" OUTPUT_VARIABLE BUILD)
|
||||||
@@ -557,7 +557,7 @@ foreach(libtype shared static)
|
|||||||
add_test(djpeg${suffix}-gray-islow-rgb
|
add_test(djpeg${suffix}-gray-islow-rgb
|
||||||
${dir}djpeg${suffix} -dct int -rgb -outfile testout_gray_islow_rgb.ppm
|
${dir}djpeg${suffix} -dct int -rgb -outfile testout_gray_islow_rgb.ppm
|
||||||
testout_gray_islow.jpg)
|
testout_gray_islow.jpg)
|
||||||
add_test(cjpeg${suffix}-gray-islow-rgb-cmp
|
add_test(djpeg${suffix}-gray-islow-rgb-cmp
|
||||||
${CMAKE_COMMAND} -DMD5=${MD5_PPM_GRAY_ISLOW_RGB}
|
${CMAKE_COMMAND} -DMD5=${MD5_PPM_GRAY_ISLOW_RGB}
|
||||||
-DFILE=testout_gray_islow_rgb.ppm
|
-DFILE=testout_gray_islow_rgb.ppm
|
||||||
-P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
|
-P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
|
||||||
@@ -637,10 +637,10 @@ foreach(libtype shared static)
|
|||||||
endif()
|
endif()
|
||||||
if(WITH_ARITH_DEC)
|
if(WITH_ARITH_DEC)
|
||||||
# CC: RGB->YCC SAMP: h2v2 merged IDCT: ifast ENT: arith
|
# CC: RGB->YCC SAMP: h2v2 merged IDCT: ifast ENT: arith
|
||||||
add_test(cjpeg${suffix}-420m-ifast-ari
|
add_test(djpeg${suffix}-420m-ifast-ari
|
||||||
${dir}djpeg${suffix} -fast -ppm -outfile testout_420m_ifast_ari.ppm
|
${dir}djpeg${suffix} -fast -ppm -outfile testout_420m_ifast_ari.ppm
|
||||||
${CMAKE_SOURCE_DIR}/testimages/testimgari.jpg)
|
${CMAKE_SOURCE_DIR}/testimages/testimgari.jpg)
|
||||||
add_test(cjpeg${suffix}-420m-ifast-ari-cmp
|
add_test(djpeg${suffix}-420m-ifast-ari-cmp
|
||||||
${CMAKE_COMMAND} -DMD5=${MD5_PPM_420M_IFAST_ARI}
|
${CMAKE_COMMAND} -DMD5=${MD5_PPM_420M_IFAST_ARI}
|
||||||
-DFILE=testout_420m_ifast_ari.ppm
|
-DFILE=testout_420m_ifast_ari.ppm
|
||||||
-P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
|
-P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
|
||||||
|
|||||||
@@ -1,4 +0,0 @@
|
|||||||
Version 1.0 (March 4, 2014)
|
|
||||||
===========================
|
|
||||||
|
|
||||||
* Add 'jpgcrush' functionality. Figures out which progressive coding configuration uses the fewest bits.
|
|
||||||
@@ -13,6 +13,25 @@ instead of -1 if componentID was > 0 and subsamp was TJSAMP_GRAY.
|
|||||||
[3] Fixed an issue in tjBufSizeYUV2() wherby it would erroneously return 0
|
[3] Fixed an issue in tjBufSizeYUV2() wherby it would erroneously return 0
|
||||||
instead of -1 if width was < 1.
|
instead of -1 if width was < 1.
|
||||||
|
|
||||||
|
[5] The Huffman encoder now uses clz and bsr instructions for bit counting on
|
||||||
|
ARM64 platforms (see 1.4 beta1 [5].)
|
||||||
|
|
||||||
|
[6] The close() method in the TJCompressor and TJDecompressor Java classes is
|
||||||
|
now idempotent. Previously, that method would call the native tjDestroy()
|
||||||
|
function even if the TurboJPEG instance had already been destroyed. This
|
||||||
|
caused an exception to be thrown during finalization, if the close() method had
|
||||||
|
already been called. The exception was caught, but it was still an expensive
|
||||||
|
operation.
|
||||||
|
|
||||||
|
[7] The TurboJPEG API previously generated an error ("Could not determine
|
||||||
|
subsampling type for JPEG image") when attempting to decompress grayscale JPEG
|
||||||
|
images that were compressed with a sampling factor other than 1 (for instance,
|
||||||
|
with 'cjpeg -grayscale -sample 2x2'). Subsampling technically has no meaning
|
||||||
|
with grayscale JPEGs, and thus the horizontal and vertical sampling factors
|
||||||
|
for such images are ignored by the decompressor. However, the TurboJPEG API
|
||||||
|
was being too rigid and was expecting the sampling factors to be equal to 1
|
||||||
|
before it treated the image as a grayscale JPEG.
|
||||||
|
|
||||||
[8] cjpeg, djpeg, and jpegtran now accept an argument of -version, which will
|
[8] cjpeg, djpeg, and jpegtran now accept an argument of -version, which will
|
||||||
print the library version and exit.
|
print the library version and exit.
|
||||||
|
|
||||||
@@ -28,6 +47,26 @@ order), the Huffman encoder can produce encoded blocks that approach double the
|
|||||||
size of the unencoded blocks. Thus, the Huffman local buffer was increased to
|
size of the unencoded blocks. Thus, the Huffman local buffer was increased to
|
||||||
256 bytes, which should prevent any such issue from re-occurring in the future.
|
256 bytes, which should prevent any such issue from re-occurring in the future.
|
||||||
|
|
||||||
|
[10] The new tjPlaneSizeYUV(), tjPlaneWidth(), and tjPlaneHeight() functions
|
||||||
|
were not actually usable on any platform except OS X and Windows, because
|
||||||
|
those functions were not included in the libturbojpeg mapfile. This has been
|
||||||
|
fixed.
|
||||||
|
|
||||||
|
[11] Restored the JPP(), JMETHOD(), and FAR macros in the libjpeg-turbo header
|
||||||
|
files. The JPP() and JMETHOD() macros were originally implemented in libjpeg
|
||||||
|
as a way of supporting non-ANSI compilers that lacked support for prototype
|
||||||
|
parameters. libjpeg-turbo has never supported such compilers, but some
|
||||||
|
software packages still use the macros to define their own prototypes.
|
||||||
|
Similarly, libjpeg-turbo has never supported MS-DOS and other platforms that
|
||||||
|
have far symbols, but some software packages still use the FAR macro. A pretty
|
||||||
|
good argument can be made that this is a bad practice on the part of the
|
||||||
|
software in question, but since this affects more than one package, it's just
|
||||||
|
easier to fix it here.
|
||||||
|
|
||||||
|
[12] Fixed issues that were preventing the ARM 64-bit SIMD code from compiling
|
||||||
|
for iOS, and included an ARMv8 architecture in all of the binaries installed by
|
||||||
|
the "official" libjpeg-turbo SDK for OS X.
|
||||||
|
|
||||||
|
|
||||||
1.3.90 (1.4 beta1)
|
1.3.90 (1.4 beta1)
|
||||||
==================
|
==================
|
||||||
@@ -280,7 +319,7 @@ configure/CMake switch in order to retain strict API/ABI compatibility with the
|
|||||||
libjpeg v6b or v7 API/ABI (or with previous versions of libjpeg-turbo.) See
|
libjpeg v6b or v7 API/ABI (or with previous versions of libjpeg-turbo.) See
|
||||||
README-turbo.txt for more details.
|
README-turbo.txt for more details.
|
||||||
|
|
||||||
[13] Added ARM v7s architecture to libjpeg.a and libturbojpeg.a in the official
|
[13] Added ARMv7s architecture to libjpeg.a and libturbojpeg.a in the official
|
||||||
libjpeg-turbo binary package for OS X, so that those libraries can be used to
|
libjpeg-turbo binary package for OS X, so that those libraries can be used to
|
||||||
build applications that leverage the faster CPUs in the iPhone 5 and iPad 4.
|
build applications that leverage the faster CPUs in the iPhone 5 and iPad 4.
|
||||||
|
|
||||||
@@ -363,7 +402,7 @@ K component is assigned a component ID of 1 instead of 4. Although these files
|
|||||||
are in violation of the spec, other JPEG implementations handle them
|
are in violation of the spec, other JPEG implementations handle them
|
||||||
correctly.
|
correctly.
|
||||||
|
|
||||||
[7] Added ARM v6 and ARM v7 architectures to libjpeg.a and libturbojpeg.a in
|
[7] Added ARMv6 and ARMv7 architectures to libjpeg.a and libturbojpeg.a in
|
||||||
the official libjpeg-turbo binary package for OS X, so that those libraries can
|
the official libjpeg-turbo binary package for OS X, so that those libraries can
|
||||||
be used to build both OS X and iOS applications.
|
be used to build both OS X and iOS applications.
|
||||||
|
|
||||||
|
|||||||
11
Makefile.am
11
Makefile.am
@@ -276,7 +276,10 @@ MD5_JPEG_CROP = b4197f377e621c4e9b1d20471432610d
|
|||||||
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
test: testclean all
|
.PHONY: test
|
||||||
|
test: tjquicktest bittest
|
||||||
|
|
||||||
|
tjquicktest: testclean all
|
||||||
|
|
||||||
if WITH_TURBOJPEG
|
if WITH_TURBOJPEG
|
||||||
if WITH_JAVA
|
if WITH_JAVA
|
||||||
@@ -294,6 +297,8 @@ endif
|
|||||||
./tjunittest -yuv -noyuvpad
|
./tjunittest -yuv -noyuvpad
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
bittest: testclean all
|
||||||
|
|
||||||
# These tests are carefully crafted to provide full coverage of as many of the
|
# These tests are carefully crafted to provide full coverage of as many of the
|
||||||
# underlying algorithms as possible (including all of the SIMD-accelerated
|
# underlying algorithms as possible (including all of the SIMD-accelerated
|
||||||
# ones.)
|
# ones.)
|
||||||
@@ -598,12 +603,12 @@ udmg: all pkgscripts/makemacpkg pkgscripts/uninstall
|
|||||||
sh pkgscripts/makemacpkg -build32 ${BUILDDIR32}
|
sh pkgscripts/makemacpkg -build32 ${BUILDDIR32}
|
||||||
|
|
||||||
iosdmg: all pkgscripts/makemacpkg pkgscripts/uninstall
|
iosdmg: all pkgscripts/makemacpkg pkgscripts/uninstall
|
||||||
sh pkgscripts/makemacpkg -build32 ${BUILDDIR32} -buildarmv6 ${BUILDDIRARMV6} -buildarmv7 ${BUILDDIRARMV7} -buildarmv7s ${BUILDDIRARMV7S}
|
sh pkgscripts/makemacpkg -build32 ${BUILDDIR32} -buildarmv6 ${BUILDDIRARMV6} -buildarmv7 ${BUILDDIRARMV7} -buildarmv7s ${BUILDDIRARMV7S} -buildarmv8 ${BUILDDIRARMV8} -lipo "${LIPO}"
|
||||||
|
|
||||||
else
|
else
|
||||||
|
|
||||||
iosdmg: all pkgscripts/makemacpkg pkgscripts/uninstall
|
iosdmg: all pkgscripts/makemacpkg pkgscripts/uninstall
|
||||||
sh pkgscripts/makemacpkg -buildarmv6 ${BUILDDIRARMV6} -buildarmv7 ${BUILDDIRARMV7} -buildarmv7s ${BUILDDIRARMV7S}
|
sh pkgscripts/makemacpkg -buildarmv6 ${BUILDDIRARMV6} -buildarmv7 ${BUILDDIRARMV7} -buildarmv7s ${BUILDDIRARMV7S} -buildarmv8 ${BUILDDIRARMV8} -lipo "${LIPO}"
|
||||||
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ The idea is to reduce transfer times for JPEGs on the Web, thus reducing page lo
|
|||||||
|
|
||||||
More information:
|
More information:
|
||||||
|
|
||||||
* [Version 1.0 Announcement](https://blog.mozilla.org/research/2014/03/05/introducing-the-mozjpeg-project/)
|
|
||||||
* [Version 2.0 Announcement](https://blog.mozilla.org/research/2014/07/15/mozilla-advances-jpeg-encoding-with-mozjpeg-2-0/)
|
|
||||||
* [Mailing List](https://lists.mozilla.org/listinfo/dev-mozjpeg)</a>
|
* [Mailing List](https://lists.mozilla.org/listinfo/dev-mozjpeg)</a>
|
||||||
|
* [Version 3.0 Announcement](https://boomswaggerboom.wordpress.com/2014/12/30/mozjpeg-3-0-released/)
|
||||||
|
* [Version 2.0 Announcement](https://blog.mozilla.org/research/2014/07/15/mozilla-advances-jpeg-encoding-with-mozjpeg-2-0/)
|
||||||
|
* [Version 1.0 Announcement](https://blog.mozilla.org/research/2014/03/05/introducing-the-mozjpeg-project/)
|
||||||
|
|||||||
@@ -74,6 +74,7 @@ JMESSAGE(JWRN_GIF_NOMOREDATA, "Ran out of GIF bits")
|
|||||||
#ifdef PPM_SUPPORTED
|
#ifdef PPM_SUPPORTED
|
||||||
JMESSAGE(JERR_PPM_COLORSPACE, "PPM output must be grayscale or RGB")
|
JMESSAGE(JERR_PPM_COLORSPACE, "PPM output must be grayscale or RGB")
|
||||||
JMESSAGE(JERR_PPM_NONNUMERIC, "Nonnumeric data in PPM file")
|
JMESSAGE(JERR_PPM_NONNUMERIC, "Nonnumeric data in PPM file")
|
||||||
|
JMESSAGE(JERR_PPM_TOOLARGE, "Integer value too large in PPM file")
|
||||||
JMESSAGE(JERR_PPM_NOT, "Not a PPM/PGM file")
|
JMESSAGE(JERR_PPM_NOT, "Not a PPM/PGM file")
|
||||||
JMESSAGE(JTRC_PGM, "%ux%u PGM image")
|
JMESSAGE(JTRC_PGM, "%ux%u PGM image")
|
||||||
JMESSAGE(JTRC_PGM_TEXT, "%ux%u text PGM image")
|
JMESSAGE(JTRC_PGM_TEXT, "%ux%u text PGM image")
|
||||||
|
|||||||
7
cjpeg.c
7
cjpeg.c
@@ -207,6 +207,7 @@ usage (void)
|
|||||||
fprintf(stderr, " -dct float Use floating-point DCT method%s\n",
|
fprintf(stderr, " -dct float Use floating-point DCT method%s\n",
|
||||||
(JDCT_DEFAULT == JDCT_FLOAT ? " (default)" : ""));
|
(JDCT_DEFAULT == JDCT_FLOAT ? " (default)" : ""));
|
||||||
#endif
|
#endif
|
||||||
|
fprintf(stderr, " -quant-baseline Use 8-bit quantization table entries for baseline JPEG compatibility\n");
|
||||||
fprintf(stderr, " -quant-table N Use predefined quantization table N:\n");
|
fprintf(stderr, " -quant-table N Use predefined quantization table N:\n");
|
||||||
fprintf(stderr, " - 0 JPEG Annex K\n");
|
fprintf(stderr, " - 0 JPEG Annex K\n");
|
||||||
fprintf(stderr, " - 1 Flat\n");
|
fprintf(stderr, " - 1 Flat\n");
|
||||||
@@ -450,7 +451,7 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv,
|
|||||||
qtablefile = argv[argn];
|
qtablefile = argv[argn];
|
||||||
/* We postpone actually reading the file in case -quality comes later. */
|
/* We postpone actually reading the file in case -quality comes later. */
|
||||||
|
|
||||||
} else if (keymatch(arg, "quant-table", 2)) {
|
} else if (keymatch(arg, "quant-table", 7)) {
|
||||||
int val;
|
int val;
|
||||||
if (++argn >= argc) /* advance to next argument */
|
if (++argn >= argc) /* advance to next argument */
|
||||||
usage();
|
usage();
|
||||||
@@ -462,6 +463,10 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv,
|
|||||||
}
|
}
|
||||||
jpeg_set_quality(cinfo, 75, TRUE);
|
jpeg_set_quality(cinfo, 75, TRUE);
|
||||||
|
|
||||||
|
} else if (keymatch(arg, "quant-baseline", 7)) {
|
||||||
|
/* Force quantization table to meet baseline requirements */
|
||||||
|
force_baseline = TRUE;
|
||||||
|
|
||||||
} else if (keymatch(arg, "restart", 1)) {
|
} else if (keymatch(arg, "restart", 1)) {
|
||||||
/* Restart interval in MCU rows (or in MCUs with 'b'). */
|
/* Restart interval in MCU rows (or in MCUs with 'b'). */
|
||||||
long lval;
|
long lval;
|
||||||
|
|||||||
14
configure.ac
14
configure.ac
@@ -2,7 +2,7 @@
|
|||||||
# Process this file with autoconf to produce a configure script.
|
# Process this file with autoconf to produce a configure script.
|
||||||
|
|
||||||
AC_PREREQ([2.56])
|
AC_PREREQ([2.56])
|
||||||
AC_INIT([mozjpeg], [3.0])
|
AC_INIT([mozjpeg], [3.1])
|
||||||
BUILD=`date +%Y%m%d`
|
BUILD=`date +%Y%m%d`
|
||||||
|
|
||||||
AM_INIT_AUTOMAKE([-Wall foreign dist-bzip2])
|
AM_INIT_AUTOMAKE([-Wall foreign dist-bzip2])
|
||||||
@@ -443,7 +443,11 @@ if test "x${with_simd}" != "xno"; then
|
|||||||
AC_MSG_RESULT([yes (arm)])
|
AC_MSG_RESULT([yes (arm)])
|
||||||
AC_MSG_CHECKING([if the assembler is GNU-compatible and can be used])
|
AC_MSG_CHECKING([if the assembler is GNU-compatible and can be used])
|
||||||
AC_CHECK_COMPATIBLE_ARM_ASSEMBLER_IFELSE(
|
AC_CHECK_COMPATIBLE_ARM_ASSEMBLER_IFELSE(
|
||||||
[AC_MSG_RESULT([yes])
|
[if test "x$ac_use_gas_preprocessor" = "xyes"; then
|
||||||
|
AC_MSG_RESULT([yes (with gas-preprocessor)])
|
||||||
|
else
|
||||||
|
AC_MSG_RESULT([yes])
|
||||||
|
fi
|
||||||
simd_arch=arm],
|
simd_arch=arm],
|
||||||
[AC_MSG_RESULT([no])
|
[AC_MSG_RESULT([no])
|
||||||
with_simd=no])
|
with_simd=no])
|
||||||
@@ -459,7 +463,11 @@ if test "x${with_simd}" != "xno"; then
|
|||||||
AC_MSG_RESULT([yes (arm64)])
|
AC_MSG_RESULT([yes (arm64)])
|
||||||
AC_MSG_CHECKING([if the assembler is GNU-compatible and can be used])
|
AC_MSG_CHECKING([if the assembler is GNU-compatible and can be used])
|
||||||
AC_CHECK_COMPATIBLE_ARM64_ASSEMBLER_IFELSE(
|
AC_CHECK_COMPATIBLE_ARM64_ASSEMBLER_IFELSE(
|
||||||
[AC_MSG_RESULT([yes])
|
[if test "x$ac_use_gas_preprocessor" = "xyes"; then
|
||||||
|
AC_MSG_RESULT([yes (with gas-preprocessor)])
|
||||||
|
else
|
||||||
|
AC_MSG_RESULT([yes])
|
||||||
|
fi
|
||||||
simd_arch=aarch64],
|
simd_arch=aarch64],
|
||||||
[AC_MSG_RESULT([no])
|
[AC_MSG_RESULT([no])
|
||||||
with_simd=no])
|
with_simd=no])
|
||||||
|
|||||||
@@ -567,6 +567,7 @@ public class TJCompressor {
|
|||||||
* Free the native structures associated with this compressor instance.
|
* Free the native structures associated with this compressor instance.
|
||||||
*/
|
*/
|
||||||
public void close() throws Exception {
|
public void close() throws Exception {
|
||||||
|
if (handle != 0)
|
||||||
destroy();
|
destroy();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -834,6 +834,7 @@ public class TJDecompressor {
|
|||||||
* Free the native structures associated with this decompressor instance.
|
* Free the native structures associated with this decompressor instance.
|
||||||
*/
|
*/
|
||||||
public void close() throws Exception {
|
public void close() throws Exception {
|
||||||
|
if (handle != 0)
|
||||||
destroy();
|
destroy();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -940,6 +940,11 @@ jget_arith_rates (j_compress_ptr cinfo, int dc_tbl_no, int ac_tbl_no, arith_rate
|
|||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
|
arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
|
||||||
|
|
||||||
|
r->arith_dc_L = cinfo->arith_dc_L[dc_tbl_no];
|
||||||
|
r->arith_dc_U = cinfo->arith_dc_U[dc_tbl_no];
|
||||||
|
r->arith_ac_K = cinfo->arith_ac_K[ac_tbl_no];
|
||||||
|
|
||||||
for (i = 0; i < DC_STAT_BINS; i++) {
|
for (i = 0; i < DC_STAT_BINS; i++) {
|
||||||
int state = entropy->dc_stats[dc_tbl_no][i];
|
int state = entropy->dc_stats[dc_tbl_no][i];
|
||||||
int mps_val = state >> 7;
|
int mps_val = state >> 7;
|
||||||
|
|||||||
@@ -367,14 +367,19 @@ compress_trellis_pass (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
|
|||||||
c_derived_tbl actbl_data;
|
c_derived_tbl actbl_data;
|
||||||
c_derived_tbl *actbl = &actbl_data;
|
c_derived_tbl *actbl = &actbl_data;
|
||||||
|
|
||||||
|
#ifdef C_ARITH_CODING_SUPPORTED
|
||||||
arith_rates arith_r_data;
|
arith_rates arith_r_data;
|
||||||
arith_rates *arith_r = &arith_r_data;
|
arith_rates *arith_r = &arith_r_data;
|
||||||
|
#endif
|
||||||
|
|
||||||
compptr = cinfo->cur_comp_info[ci];
|
compptr = cinfo->cur_comp_info[ci];
|
||||||
|
|
||||||
|
#ifdef C_ARITH_CODING_SUPPORTED
|
||||||
if (cinfo->arith_code)
|
if (cinfo->arith_code)
|
||||||
jget_arith_rates(cinfo, compptr->dc_tbl_no, compptr->ac_tbl_no, arith_r);
|
jget_arith_rates(cinfo, compptr->dc_tbl_no, compptr->ac_tbl_no, arith_r);
|
||||||
else {
|
else
|
||||||
|
#endif
|
||||||
|
{
|
||||||
jpeg_make_c_derived_tbl(cinfo, TRUE, compptr->dc_tbl_no, &dctbl);
|
jpeg_make_c_derived_tbl(cinfo, TRUE, compptr->dc_tbl_no, &dctbl);
|
||||||
jpeg_make_c_derived_tbl(cinfo, FALSE, compptr->ac_tbl_no, &actbl);
|
jpeg_make_c_derived_tbl(cinfo, FALSE, compptr->ac_tbl_no, &actbl);
|
||||||
}
|
}
|
||||||
@@ -413,6 +418,7 @@ compress_trellis_pass (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
|
|||||||
for (block_row = 0; block_row < block_rows; block_row++) {
|
for (block_row = 0; block_row < block_rows; block_row++) {
|
||||||
thisblockrow = buffer[block_row];
|
thisblockrow = buffer[block_row];
|
||||||
lastblockrow = (block_row > 0) ? buffer[block_row-1] : NULL;
|
lastblockrow = (block_row > 0) ? buffer[block_row-1] : NULL;
|
||||||
|
#ifdef C_ARITH_CODING_SUPPORTED
|
||||||
if (cinfo->arith_code)
|
if (cinfo->arith_code)
|
||||||
quantize_trellis_arith(cinfo, arith_r, thisblockrow,
|
quantize_trellis_arith(cinfo, arith_r, thisblockrow,
|
||||||
buffer_dst[block_row], blocks_across,
|
buffer_dst[block_row], blocks_across,
|
||||||
@@ -421,6 +427,7 @@ compress_trellis_pass (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
|
|||||||
cinfo->master->norm_coef[compptr->quant_tbl_no],
|
cinfo->master->norm_coef[compptr->quant_tbl_no],
|
||||||
&lastDC, lastblockrow, buffer_dst[block_row-1]);
|
&lastDC, lastblockrow, buffer_dst[block_row-1]);
|
||||||
else
|
else
|
||||||
|
#endif
|
||||||
quantize_trellis(cinfo, dctbl, actbl, thisblockrow,
|
quantize_trellis(cinfo, dctbl, actbl, thisblockrow,
|
||||||
buffer_dst[block_row], blocks_across,
|
buffer_dst[block_row], blocks_across,
|
||||||
cinfo->quant_tbl_ptrs[compptr->quant_tbl_no],
|
cinfo->quant_tbl_ptrs[compptr->quant_tbl_no],
|
||||||
|
|||||||
156
jcdctmgr.c
156
jcdctmgr.c
@@ -22,6 +22,7 @@
|
|||||||
#include "jpeglib.h"
|
#include "jpeglib.h"
|
||||||
#include "jdct.h" /* Private declarations for DCT subsystem */
|
#include "jdct.h" /* Private declarations for DCT subsystem */
|
||||||
#include "jsimddct.h"
|
#include "jsimddct.h"
|
||||||
|
#include "jchuff.h"
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
|
|
||||||
@@ -726,6 +727,17 @@ forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
|
|||||||
|
|
||||||
/* Quantize/descale the coefficients, and store into coef_blocks[] */
|
/* Quantize/descale the coefficients, and store into coef_blocks[] */
|
||||||
(*do_quantize) (coef_blocks[bi], divisors, workspace);
|
(*do_quantize) (coef_blocks[bi], divisors, workspace);
|
||||||
|
|
||||||
|
if (do_preprocess) {
|
||||||
|
int i;
|
||||||
|
int maxval = (1 << MAX_COEF_BITS) - 1;
|
||||||
|
for (i = 0; i < 64; i++) {
|
||||||
|
if (coef_blocks[bi][i] < -maxval)
|
||||||
|
coef_blocks[bi][i] = -maxval;
|
||||||
|
if (coef_blocks[bi][i] > maxval)
|
||||||
|
coef_blocks[bi][i] = maxval;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -845,12 +857,22 @@ forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
|
|||||||
|
|
||||||
/* Quantize/descale the coefficients, and store into coef_blocks[] */
|
/* Quantize/descale the coefficients, and store into coef_blocks[] */
|
||||||
(*do_quantize) (coef_blocks[bi], divisors, workspace);
|
(*do_quantize) (coef_blocks[bi], divisors, workspace);
|
||||||
|
|
||||||
|
if (do_preprocess) {
|
||||||
|
int i;
|
||||||
|
int maxval = (1 << MAX_COEF_BITS) - 1;
|
||||||
|
for (i = 0; i < 64; i++) {
|
||||||
|
if (coef_blocks[bi][i] < -maxval)
|
||||||
|
coef_blocks[bi][i] = -maxval;
|
||||||
|
if (coef_blocks[bi][i] > maxval)
|
||||||
|
coef_blocks[bi][i] = maxval;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* DCT_FLOAT_SUPPORTED */
|
#endif /* DCT_FLOAT_SUPPORTED */
|
||||||
|
|
||||||
#include "jchuff.h"
|
|
||||||
#include "jpeg_nbits_table.h"
|
#include "jpeg_nbits_table.h"
|
||||||
|
|
||||||
static const float jpeg_lambda_weights_flat[64] = {
|
static const float jpeg_lambda_weights_flat[64] = {
|
||||||
@@ -875,7 +897,12 @@ static const float jpeg_lambda_weights_csf_luma[64] = {
|
|||||||
0.43454f, 0.42146f, 0.34609f, 0.24072f, 0.15975f, 0.10701f, 0.07558f, 0.05875f,
|
0.43454f, 0.42146f, 0.34609f, 0.24072f, 0.15975f, 0.10701f, 0.07558f, 0.05875f,
|
||||||
};
|
};
|
||||||
|
|
||||||
#define DC_TRELLIS_CANDIDATES 3
|
#define DC_TRELLIS_MAX_CANDIDATES 9
|
||||||
|
|
||||||
|
LOCAL(int) get_num_dc_trellis_candidates(int dc_quantval) {
|
||||||
|
/* Higher qualities can tolerate higher DC distortion */
|
||||||
|
return MIN(DC_TRELLIS_MAX_CANDIDATES, (2 + 60 / dc_quantval)|1);
|
||||||
|
}
|
||||||
|
|
||||||
GLOBAL(void)
|
GLOBAL(void)
|
||||||
quantize_trellis(j_compress_ptr cinfo, c_derived_tbl *dctbl, c_derived_tbl *actbl, JBLOCKROW coef_blocks, JBLOCKROW src, JDIMENSION num_blocks,
|
quantize_trellis(j_compress_ptr cinfo, c_derived_tbl *dctbl, c_derived_tbl *actbl, JBLOCKROW coef_blocks, JBLOCKROW src, JDIMENSION num_blocks,
|
||||||
@@ -908,11 +935,12 @@ quantize_trellis(j_compress_ptr cinfo, c_derived_tbl *dctbl, c_derived_tbl *actb
|
|||||||
int zero_run;
|
int zero_run;
|
||||||
int run_bits;
|
int run_bits;
|
||||||
int rate;
|
int rate;
|
||||||
float *accumulated_dc_cost[DC_TRELLIS_CANDIDATES];
|
float *accumulated_dc_cost[DC_TRELLIS_MAX_CANDIDATES];
|
||||||
int *dc_cost_backtrack[DC_TRELLIS_CANDIDATES];
|
int *dc_cost_backtrack[DC_TRELLIS_MAX_CANDIDATES];
|
||||||
JCOEF *dc_candidate[DC_TRELLIS_CANDIDATES];
|
JCOEF *dc_candidate[DC_TRELLIS_MAX_CANDIDATES];
|
||||||
int mode = 1;
|
int mode = 1;
|
||||||
float lambda_table[DCTSIZE2];
|
float lambda_table[DCTSIZE2];
|
||||||
|
const int dc_trellis_candidates = get_num_dc_trellis_candidates(qtbl->quantval[0]);
|
||||||
|
|
||||||
Ss = cinfo->Ss;
|
Ss = cinfo->Ss;
|
||||||
Se = cinfo->Se;
|
Se = cinfo->Se;
|
||||||
@@ -936,8 +964,9 @@ quantize_trellis(j_compress_ptr cinfo, c_derived_tbl *dctbl, c_derived_tbl *actb
|
|||||||
accumulated_block_cost[0] = 0;
|
accumulated_block_cost[0] = 0;
|
||||||
requires_eob[0] = 0;
|
requires_eob[0] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cinfo->master->trellis_quant_dc) {
|
if (cinfo->master->trellis_quant_dc) {
|
||||||
for (i = 0; i < DC_TRELLIS_CANDIDATES; i++) {
|
for (i = 0; i < dc_trellis_candidates; i++) {
|
||||||
accumulated_dc_cost[i] = (float *)malloc(num_blocks * sizeof(float));
|
accumulated_dc_cost[i] = (float *)malloc(num_blocks * sizeof(float));
|
||||||
dc_cost_backtrack[i] = (int *)malloc(num_blocks * sizeof(int));
|
dc_cost_backtrack[i] = (int *)malloc(num_blocks * sizeof(int));
|
||||||
dc_candidate[i] = (JCOEF *)malloc(num_blocks * sizeof(JCOEF));
|
dc_candidate[i] = (JCOEF *)malloc(num_blocks * sizeof(JCOEF));
|
||||||
@@ -991,12 +1020,17 @@ quantize_trellis(j_compress_ptr cinfo, c_derived_tbl *dctbl, c_derived_tbl *actb
|
|||||||
float dc_candidate_dist;
|
float dc_candidate_dist;
|
||||||
|
|
||||||
qval = (x + q/2) / q; /* quantized value (round nearest) */
|
qval = (x + q/2) / q; /* quantized value (round nearest) */
|
||||||
for (k = 0; k < DC_TRELLIS_CANDIDATES; k++) {
|
for (k = 0; k < dc_trellis_candidates; k++) {
|
||||||
int delta;
|
int delta;
|
||||||
int dc_delta;
|
int dc_delta;
|
||||||
int bits;
|
int bits;
|
||||||
|
|
||||||
dc_candidate[k][bi] = qval - DC_TRELLIS_CANDIDATES/2 + k;
|
dc_candidate[k][bi] = qval - dc_trellis_candidates/2 + k;
|
||||||
|
if (dc_candidate[k][bi] >= (1<<MAX_COEF_BITS))
|
||||||
|
dc_candidate[k][bi] = (1<<MAX_COEF_BITS)-1;
|
||||||
|
if (dc_candidate[k][bi] <= -(1<<MAX_COEF_BITS))
|
||||||
|
dc_candidate[k][bi] = -(1<<MAX_COEF_BITS)+1;
|
||||||
|
|
||||||
delta = dc_candidate[k][bi] * q - x;
|
delta = dc_candidate[k][bi] * q - x;
|
||||||
dc_candidate_dist = delta * delta * lambda_dc;
|
dc_candidate_dist = delta * delta * lambda_dc;
|
||||||
dc_candidate[k][bi] *= 1 + 2*sign;
|
dc_candidate[k][bi] *= 1 + 2*sign;
|
||||||
@@ -1033,7 +1067,7 @@ quantize_trellis(j_compress_ptr cinfo, c_derived_tbl *dctbl, c_derived_tbl *actb
|
|||||||
accumulated_dc_cost[k][0] = cost;
|
accumulated_dc_cost[k][0] = cost;
|
||||||
dc_cost_backtrack[k][0] = -1;
|
dc_cost_backtrack[k][0] = -1;
|
||||||
} else {
|
} else {
|
||||||
for (l = 0; l < DC_TRELLIS_CANDIDATES; l++) {
|
for (l = 0; l < dc_trellis_candidates; l++) {
|
||||||
dc_delta = dc_candidate[k][bi] - dc_candidate[l][bi-1];
|
dc_delta = dc_candidate[k][bi] - dc_candidate[l][bi-1];
|
||||||
|
|
||||||
/* Derive number of suffix bits */
|
/* Derive number of suffix bits */
|
||||||
@@ -1076,6 +1110,9 @@ quantize_trellis(j_compress_ptr cinfo, c_derived_tbl *dctbl, c_derived_tbl *actb
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (qval >= (1<<MAX_COEF_BITS))
|
||||||
|
qval = (1<<MAX_COEF_BITS)-1;
|
||||||
|
|
||||||
num_candidates = jpeg_nbits_table[qval];
|
num_candidates = jpeg_nbits_table[qval];
|
||||||
for (k = 0; k < num_candidates; k++) {
|
for (k = 0; k < num_candidates; k++) {
|
||||||
int delta;
|
int delta;
|
||||||
@@ -1240,7 +1277,7 @@ quantize_trellis(j_compress_ptr cinfo, c_derived_tbl *dctbl, c_derived_tbl *actb
|
|||||||
|
|
||||||
if (cinfo->master->trellis_quant_dc) {
|
if (cinfo->master->trellis_quant_dc) {
|
||||||
j = 0;
|
j = 0;
|
||||||
for (i = 1; i < DC_TRELLIS_CANDIDATES; i++) {
|
for (i = 1; i < dc_trellis_candidates; i++) {
|
||||||
if (accumulated_dc_cost[i][num_blocks-1] < accumulated_dc_cost[j][num_blocks-1])
|
if (accumulated_dc_cost[i][num_blocks-1] < accumulated_dc_cost[j][num_blocks-1])
|
||||||
j = i;
|
j = i;
|
||||||
}
|
}
|
||||||
@@ -1252,7 +1289,7 @@ quantize_trellis(j_compress_ptr cinfo, c_derived_tbl *dctbl, c_derived_tbl *actb
|
|||||||
/* Save DC predictor */
|
/* Save DC predictor */
|
||||||
*last_dc_val = coef_blocks[num_blocks-1][0];
|
*last_dc_val = coef_blocks[num_blocks-1][0];
|
||||||
|
|
||||||
for (i = 0; i < DC_TRELLIS_CANDIDATES; i++) {
|
for (i = 0; i < dc_trellis_candidates; i++) {
|
||||||
free(accumulated_dc_cost[i]);
|
free(accumulated_dc_cost[i]);
|
||||||
free(dc_cost_backtrack[i]);
|
free(dc_cost_backtrack[i]);
|
||||||
free(dc_candidate[i]);
|
free(dc_candidate[i]);
|
||||||
@@ -1261,6 +1298,7 @@ quantize_trellis(j_compress_ptr cinfo, c_derived_tbl *dctbl, c_derived_tbl *actb
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef C_ARITH_CODING_SUPPORTED
|
||||||
GLOBAL(void)
|
GLOBAL(void)
|
||||||
quantize_trellis_arith(j_compress_ptr cinfo, arith_rates *r, JBLOCKROW coef_blocks, JBLOCKROW src, JDIMENSION num_blocks,
|
quantize_trellis_arith(j_compress_ptr cinfo, arith_rates *r, JBLOCKROW coef_blocks, JBLOCKROW src, JDIMENSION num_blocks,
|
||||||
JQUANT_TBL * qtbl, double *norm_src, double *norm_coef, JCOEF *last_dc_val,
|
JQUANT_TBL * qtbl, double *norm_src, double *norm_coef, JCOEF *last_dc_val,
|
||||||
@@ -1281,20 +1319,17 @@ quantize_trellis_arith(j_compress_ptr cinfo, arith_rates *r, JBLOCKROW coef_bloc
|
|||||||
jpeg_lambda_weights_csf_luma :
|
jpeg_lambda_weights_csf_luma :
|
||||||
jpeg_lambda_weights_flat;
|
jpeg_lambda_weights_flat;
|
||||||
int Ss, Se;
|
int Ss, Se;
|
||||||
int has_eob;
|
|
||||||
float cost_all_zeros;
|
|
||||||
float best_cost_skip;
|
|
||||||
float cost;
|
float cost;
|
||||||
float run_bits;
|
float run_bits;
|
||||||
int rate;
|
int rate;
|
||||||
float *accumulated_dc_cost[DC_TRELLIS_CANDIDATES];
|
float *accumulated_dc_cost[DC_TRELLIS_MAX_CANDIDATES];
|
||||||
int *dc_cost_backtrack[DC_TRELLIS_CANDIDATES];
|
int *dc_cost_backtrack[DC_TRELLIS_MAX_CANDIDATES];
|
||||||
JCOEF *dc_candidate[DC_TRELLIS_CANDIDATES];
|
JCOEF *dc_candidate[DC_TRELLIS_MAX_CANDIDATES];
|
||||||
|
int *dc_context[DC_TRELLIS_MAX_CANDIDATES];
|
||||||
|
|
||||||
int mode = 1;
|
int mode = 1;
|
||||||
float lambda_table[DCTSIZE2];
|
float lambda_table[DCTSIZE2];
|
||||||
|
const int dc_trellis_candidates = get_num_dc_trellis_candidates(qtbl->quantval[0]);
|
||||||
/* Arithmetic coding context. Set to 0 for now but can refined */
|
|
||||||
int dc_context = 0;
|
|
||||||
|
|
||||||
Ss = cinfo->Ss;
|
Ss = cinfo->Ss;
|
||||||
Se = cinfo->Se;
|
Se = cinfo->Se;
|
||||||
@@ -1304,13 +1339,15 @@ quantize_trellis_arith(j_compress_ptr cinfo, arith_rates *r, JBLOCKROW coef_bloc
|
|||||||
return;
|
return;
|
||||||
|
|
||||||
if (cinfo->master->trellis_quant_dc) {
|
if (cinfo->master->trellis_quant_dc) {
|
||||||
for (i = 0; i < DC_TRELLIS_CANDIDATES; i++) {
|
for (i = 0; i < dc_trellis_candidates; i++) {
|
||||||
accumulated_dc_cost[i] = (float *)malloc(num_blocks * sizeof(float));
|
accumulated_dc_cost[i] = (float *)malloc(num_blocks * sizeof(float));
|
||||||
dc_cost_backtrack[i] = (int *)malloc(num_blocks * sizeof(int));
|
dc_cost_backtrack[i] = (int *)malloc(num_blocks * sizeof(int));
|
||||||
dc_candidate[i] = (JCOEF *)malloc(num_blocks * sizeof(JCOEF));
|
dc_candidate[i] = (JCOEF *)malloc(num_blocks * sizeof(JCOEF));
|
||||||
|
dc_context[i] = (int *)malloc(num_blocks * sizeof(int));
|
||||||
if (!accumulated_dc_cost[i] ||
|
if (!accumulated_dc_cost[i] ||
|
||||||
!dc_cost_backtrack[i] ||
|
!dc_cost_backtrack[i] ||
|
||||||
!dc_candidate[i]) {
|
!dc_candidate[i] ||
|
||||||
|
!dc_context[i]) {
|
||||||
ERREXIT(cinfo, JERR_OUT_OF_MEMORY);
|
ERREXIT(cinfo, JERR_OUT_OF_MEMORY);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1358,15 +1395,16 @@ quantize_trellis_arith(j_compress_ptr cinfo, arith_rates *r, JBLOCKROW coef_bloc
|
|||||||
float dc_candidate_dist;
|
float dc_candidate_dist;
|
||||||
|
|
||||||
qval = (x + q/2) / q; /* quantized value (round nearest) */
|
qval = (x + q/2) / q; /* quantized value (round nearest) */
|
||||||
for (k = 0; k < DC_TRELLIS_CANDIDATES; k++) {
|
|
||||||
|
/* loop over candidates in current block */
|
||||||
|
for (k = 0; k < dc_trellis_candidates; k++) {
|
||||||
int delta;
|
int delta;
|
||||||
int dc_delta;
|
int dc_delta;
|
||||||
float bits;
|
float bits;
|
||||||
int st = dc_context;
|
|
||||||
int m;
|
int m;
|
||||||
int v2;
|
int v2;
|
||||||
|
|
||||||
dc_candidate[k][bi] = qval - DC_TRELLIS_CANDIDATES/2 + k;
|
dc_candidate[k][bi] = qval - dc_trellis_candidates/2 + k;
|
||||||
delta = dc_candidate[k][bi] * q - x;
|
delta = dc_candidate[k][bi] * q - x;
|
||||||
dc_candidate_dist = delta * delta * lambda_dc;
|
dc_candidate_dist = delta * delta * lambda_dc;
|
||||||
dc_candidate[k][bi] *= 1 + 2*sign;
|
dc_candidate[k][bi] *= 1 + 2*sign;
|
||||||
@@ -1389,13 +1427,20 @@ quantize_trellis_arith(j_compress_ptr cinfo, arith_rates *r, JBLOCKROW coef_bloc
|
|||||||
dc_candidate_dist += cinfo->master->trellis_delta_dc_weight * (vertical_dist - dc_candidate_dist);
|
dc_candidate_dist += cinfo->master->trellis_delta_dc_weight * (vertical_dist - dc_candidate_dist);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (bi == 0) {
|
/* loop of candidates from previous block */
|
||||||
dc_delta = dc_candidate[k][bi] - *last_dc_val;
|
for (l = 0; l < (bi == 0 ? 1 : dc_trellis_candidates); l++) {
|
||||||
|
int dc_pred = (bi == 0 ? *last_dc_val : dc_candidate[l][bi-1]);
|
||||||
|
int updated_dc_context = 0;
|
||||||
|
int st = (bi == 0) ? 0 : dc_context[l][bi-1];
|
||||||
|
dc_delta = dc_candidate[k][bi] - dc_pred;
|
||||||
|
|
||||||
bits = r->rate_dc[st][dc_delta != 0];
|
bits = r->rate_dc[st][dc_delta != 0];
|
||||||
|
|
||||||
if (dc_delta != 0) {
|
if (dc_delta != 0) {
|
||||||
bits += r->rate_dc[st+1][dc_delta < 0];
|
bits += r->rate_dc[st+1][dc_delta < 0];
|
||||||
st += 2 + (dc_delta < 0);
|
st += 2 + (dc_delta < 0);
|
||||||
|
updated_dc_context = (dc_delta < 0) ? 8 : 4;
|
||||||
|
|
||||||
dc_delta = abs(dc_delta);
|
dc_delta = abs(dc_delta);
|
||||||
|
|
||||||
m = 0;
|
m = 0;
|
||||||
@@ -1411,46 +1456,25 @@ quantize_trellis_arith(j_compress_ptr cinfo, arith_rates *r, JBLOCKROW coef_bloc
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
bits += r->rate_dc[st][0];
|
bits += r->rate_dc[st][0];
|
||||||
|
|
||||||
|
if (m < (int) ((1L << r->arith_dc_L) >> 1))
|
||||||
|
updated_dc_context = 0; /* zero diff category */
|
||||||
|
else if (m > (int) ((1L << r->arith_dc_U) >> 1))
|
||||||
|
updated_dc_context += 8; /* large diff category */
|
||||||
|
|
||||||
st += 14;
|
st += 14;
|
||||||
while (m >>= 1)
|
while (m >>= 1)
|
||||||
bits += r->rate_dc[st][(m & dc_delta) ? 1 : 0];
|
bits += r->rate_dc[st][(m & dc_delta) ? 1 : 0];
|
||||||
}
|
}
|
||||||
|
|
||||||
cost = bits + dc_candidate_dist;
|
cost = bits + dc_candidate_dist;
|
||||||
accumulated_dc_cost[k][0] = cost;
|
if (bi != 0)
|
||||||
dc_cost_backtrack[k][0] = -1;
|
cost += accumulated_dc_cost[l][bi-1];
|
||||||
} else {
|
|
||||||
for (l = 0; l < DC_TRELLIS_CANDIDATES; l++) {
|
|
||||||
dc_delta = dc_candidate[k][bi] - dc_candidate[l][bi-1];
|
|
||||||
|
|
||||||
bits = r->rate_dc[st][dc_delta != 0];
|
|
||||||
if (dc_delta != 0) {
|
|
||||||
bits += r->rate_dc[st+1][dc_delta < 0];
|
|
||||||
st += 2 + (dc_delta < 0);
|
|
||||||
dc_delta = abs(dc_delta);
|
|
||||||
|
|
||||||
m = 0;
|
|
||||||
if (dc_delta -= 1) {
|
|
||||||
bits += r->rate_dc[st][1];
|
|
||||||
st = 20;
|
|
||||||
m = 1;
|
|
||||||
v2 = dc_delta;
|
|
||||||
while (v2 >>= 1) {
|
|
||||||
bits += r->rate_dc[st][1];
|
|
||||||
m <<= 1;
|
|
||||||
st++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
bits += r->rate_dc[st][0];
|
|
||||||
st += 14;
|
|
||||||
while (m >>= 1)
|
|
||||||
bits += r->rate_dc[st][(m & dc_delta) ? 1 : 0];
|
|
||||||
}
|
|
||||||
|
|
||||||
cost = bits + dc_candidate_dist + accumulated_dc_cost[l][bi-1];
|
|
||||||
if (l == 0 || cost < accumulated_dc_cost[k][bi]) {
|
if (l == 0 || cost < accumulated_dc_cost[k][bi]) {
|
||||||
accumulated_dc_cost[k][bi] = cost;
|
accumulated_dc_cost[k][bi] = cost;
|
||||||
dc_cost_backtrack[k][bi] = l;
|
dc_cost_backtrack[k][bi] = (bi == 0 ? -1 : l);
|
||||||
}
|
dc_context[k][bi] = updated_dc_context;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1520,7 +1544,7 @@ quantize_trellis_arith(j_compress_ptr cinfo, arith_rates *r, JBLOCKROW coef_bloc
|
|||||||
if (v2 >>= 1) {
|
if (v2 >>= 1) {
|
||||||
coef_bits += r->rate_ac[st][1];
|
coef_bits += r->rate_ac[st][1];
|
||||||
m <<= 1;
|
m <<= 1;
|
||||||
st = 189; /* TODO: condition 189/217 */
|
st = (i <= r->arith_ac_K) ? 189 : 217;
|
||||||
while (v2 >>= 1) {
|
while (v2 >>= 1) {
|
||||||
coef_bits += r->rate_ac[st][1];
|
coef_bits += r->rate_ac[st][1];
|
||||||
m <<= 1;
|
m <<= 1;
|
||||||
@@ -1548,14 +1572,11 @@ quantize_trellis_arith(j_compress_ptr cinfo, arith_rates *r, JBLOCKROW coef_bloc
|
|||||||
|
|
||||||
last_coeff_idx = Ss-1;
|
last_coeff_idx = Ss-1;
|
||||||
best_cost = accumulated_zero_dist[Se] + r->rate_ac[0][1];
|
best_cost = accumulated_zero_dist[Se] + r->rate_ac[0][1];
|
||||||
cost_all_zeros = accumulated_zero_dist[Se];
|
|
||||||
best_cost_skip = cost_all_zeros;
|
|
||||||
|
|
||||||
for (i = Ss; i <= Se; i++) {
|
for (i = Ss; i <= Se; i++) {
|
||||||
int z = jpeg_natural_order[i];
|
int z = jpeg_natural_order[i];
|
||||||
if (coef_blocks[bi][z] != 0) {
|
if (coef_blocks[bi][z] != 0) {
|
||||||
float cost = accumulated_cost[i] + accumulated_zero_dist[Se] - accumulated_zero_dist[i];
|
float cost = accumulated_cost[i] + accumulated_zero_dist[Se] - accumulated_zero_dist[i];
|
||||||
float cost_wo_eob = cost;
|
|
||||||
|
|
||||||
if (i < Se)
|
if (i < Se)
|
||||||
cost += r->rate_ac[3*(i-1)][1];
|
cost += r->rate_ac[3*(i-1)][1];
|
||||||
@@ -1563,13 +1584,10 @@ quantize_trellis_arith(j_compress_ptr cinfo, arith_rates *r, JBLOCKROW coef_bloc
|
|||||||
if (cost < best_cost) {
|
if (cost < best_cost) {
|
||||||
best_cost = cost;
|
best_cost = cost;
|
||||||
last_coeff_idx = i;
|
last_coeff_idx = i;
|
||||||
best_cost_skip = cost_wo_eob;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
has_eob = (last_coeff_idx < Se) + (last_coeff_idx == Ss-1);
|
|
||||||
|
|
||||||
/* Zero out coefficients that are part of runs */
|
/* Zero out coefficients that are part of runs */
|
||||||
i = Se;
|
i = Se;
|
||||||
while (i >= Ss)
|
while (i >= Ss)
|
||||||
@@ -1596,7 +1614,7 @@ quantize_trellis_arith(j_compress_ptr cinfo, arith_rates *r, JBLOCKROW coef_bloc
|
|||||||
|
|
||||||
if (cinfo->master->trellis_quant_dc) {
|
if (cinfo->master->trellis_quant_dc) {
|
||||||
j = 0;
|
j = 0;
|
||||||
for (i = 1; i < DC_TRELLIS_CANDIDATES; i++) {
|
for (i = 1; i < dc_trellis_candidates; i++) {
|
||||||
if (accumulated_dc_cost[i][num_blocks-1] < accumulated_dc_cost[j][num_blocks-1])
|
if (accumulated_dc_cost[i][num_blocks-1] < accumulated_dc_cost[j][num_blocks-1])
|
||||||
j = i;
|
j = i;
|
||||||
}
|
}
|
||||||
@@ -1608,13 +1626,15 @@ quantize_trellis_arith(j_compress_ptr cinfo, arith_rates *r, JBLOCKROW coef_bloc
|
|||||||
/* Save DC predictor */
|
/* Save DC predictor */
|
||||||
*last_dc_val = coef_blocks[num_blocks-1][0];
|
*last_dc_val = coef_blocks[num_blocks-1][0];
|
||||||
|
|
||||||
for (i = 0; i < DC_TRELLIS_CANDIDATES; i++) {
|
for (i = 0; i < dc_trellis_candidates; i++) {
|
||||||
free(accumulated_dc_cost[i]);
|
free(accumulated_dc_cost[i]);
|
||||||
free(dc_cost_backtrack[i]);
|
free(dc_cost_backtrack[i]);
|
||||||
free(dc_candidate[i]);
|
free(dc_candidate[i]);
|
||||||
|
free(dc_context[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Initialize FDCT manager.
|
* Initialize FDCT manager.
|
||||||
|
|||||||
12
jcext.c
12
jcext.c
@@ -16,7 +16,7 @@
|
|||||||
|
|
||||||
|
|
||||||
GLOBAL(boolean)
|
GLOBAL(boolean)
|
||||||
jpeg_c_bool_param_supported (j_compress_ptr cinfo, J_BOOLEAN_PARAM param)
|
jpeg_c_bool_param_supported (const j_compress_ptr cinfo, J_BOOLEAN_PARAM param)
|
||||||
{
|
{
|
||||||
switch (param) {
|
switch (param) {
|
||||||
case JBOOLEAN_OPTIMIZE_SCANS:
|
case JBOOLEAN_OPTIMIZE_SCANS:
|
||||||
@@ -70,7 +70,7 @@ jpeg_c_set_bool_param (j_compress_ptr cinfo, J_BOOLEAN_PARAM param,
|
|||||||
|
|
||||||
|
|
||||||
GLOBAL(boolean)
|
GLOBAL(boolean)
|
||||||
jpeg_c_get_bool_param (j_compress_ptr cinfo, J_BOOLEAN_PARAM param)
|
jpeg_c_get_bool_param (const j_compress_ptr cinfo, J_BOOLEAN_PARAM param)
|
||||||
{
|
{
|
||||||
switch(param) {
|
switch(param) {
|
||||||
case JBOOLEAN_OPTIMIZE_SCANS:
|
case JBOOLEAN_OPTIMIZE_SCANS:
|
||||||
@@ -98,7 +98,7 @@ jpeg_c_get_bool_param (j_compress_ptr cinfo, J_BOOLEAN_PARAM param)
|
|||||||
|
|
||||||
|
|
||||||
GLOBAL(boolean)
|
GLOBAL(boolean)
|
||||||
jpeg_c_float_param_supported (j_compress_ptr cinfo, J_FLOAT_PARAM param)
|
jpeg_c_float_param_supported (const j_compress_ptr cinfo, J_FLOAT_PARAM param)
|
||||||
{
|
{
|
||||||
switch (param) {
|
switch (param) {
|
||||||
case JFLOAT_LAMBDA_LOG_SCALE1:
|
case JFLOAT_LAMBDA_LOG_SCALE1:
|
||||||
@@ -131,7 +131,7 @@ jpeg_c_set_float_param (j_compress_ptr cinfo, J_FLOAT_PARAM param, float value)
|
|||||||
|
|
||||||
|
|
||||||
GLOBAL(float)
|
GLOBAL(float)
|
||||||
jpeg_c_get_float_param (j_compress_ptr cinfo, J_FLOAT_PARAM param)
|
jpeg_c_get_float_param (const j_compress_ptr cinfo, J_FLOAT_PARAM param)
|
||||||
{
|
{
|
||||||
switch (param) {
|
switch (param) {
|
||||||
case JFLOAT_LAMBDA_LOG_SCALE1:
|
case JFLOAT_LAMBDA_LOG_SCALE1:
|
||||||
@@ -149,7 +149,7 @@ jpeg_c_get_float_param (j_compress_ptr cinfo, J_FLOAT_PARAM param)
|
|||||||
|
|
||||||
|
|
||||||
GLOBAL(boolean)
|
GLOBAL(boolean)
|
||||||
jpeg_c_int_param_supported (j_compress_ptr cinfo, J_INT_PARAM param)
|
jpeg_c_int_param_supported (const j_compress_ptr cinfo, J_INT_PARAM param)
|
||||||
{
|
{
|
||||||
switch (param) {
|
switch (param) {
|
||||||
case JINT_COMPRESS_PROFILE:
|
case JINT_COMPRESS_PROFILE:
|
||||||
@@ -198,7 +198,7 @@ jpeg_c_set_int_param (j_compress_ptr cinfo, J_INT_PARAM param, int value)
|
|||||||
|
|
||||||
|
|
||||||
GLOBAL(int)
|
GLOBAL(int)
|
||||||
jpeg_c_get_int_param (j_compress_ptr cinfo, J_INT_PARAM param)
|
jpeg_c_get_int_param (const j_compress_ptr cinfo, J_INT_PARAM param)
|
||||||
{
|
{
|
||||||
switch (param) {
|
switch (param) {
|
||||||
case JINT_COMPRESS_PROFILE:
|
case JINT_COMPRESS_PROFILE:
|
||||||
|
|||||||
2
jchuff.c
2
jchuff.c
@@ -37,7 +37,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
/* NOTE: Both GCC and Clang define __GNUC__ */
|
/* NOTE: Both GCC and Clang define __GNUC__ */
|
||||||
#if defined __GNUC__ && defined __arm__
|
#if defined __GNUC__ && (defined __arm__ || defined __aarch64__)
|
||||||
#if !defined __thumb__ || defined __thumb2__
|
#if !defined __thumb__ || defined __thumb2__
|
||||||
#define USE_CLZ_INTRINSIC
|
#define USE_CLZ_INTRINSIC
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
140
jcmainct.c
140
jcmainct.c
@@ -17,14 +17,6 @@
|
|||||||
#include "jpeglib.h"
|
#include "jpeglib.h"
|
||||||
|
|
||||||
|
|
||||||
/* Note: currently, there is no operating mode in which a full-image buffer
|
|
||||||
* is needed at this step. If there were, that mode could not be used with
|
|
||||||
* "raw data" input, since this module is bypassed in that case. However,
|
|
||||||
* we've left the code here for possible use in special applications.
|
|
||||||
*/
|
|
||||||
#undef FULL_MAIN_BUFFER_SUPPORTED
|
|
||||||
|
|
||||||
|
|
||||||
/* Private buffer controller object */
|
/* Private buffer controller object */
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
@@ -40,13 +32,6 @@ typedef struct {
|
|||||||
* points to the currently accessible strips of the virtual arrays.
|
* points to the currently accessible strips of the virtual arrays.
|
||||||
*/
|
*/
|
||||||
JSAMPARRAY buffer[MAX_COMPONENTS];
|
JSAMPARRAY buffer[MAX_COMPONENTS];
|
||||||
|
|
||||||
#ifdef FULL_MAIN_BUFFER_SUPPORTED
|
|
||||||
/* If using full-image storage, this array holds pointers to virtual-array
|
|
||||||
* control blocks for each component. Unused if not full-image storage.
|
|
||||||
*/
|
|
||||||
jvirt_sarray_ptr whole_image[MAX_COMPONENTS];
|
|
||||||
#endif
|
|
||||||
} my_main_controller;
|
} my_main_controller;
|
||||||
|
|
||||||
typedef my_main_controller * my_main_ptr;
|
typedef my_main_controller * my_main_ptr;
|
||||||
@@ -56,11 +41,6 @@ typedef my_main_controller * my_main_ptr;
|
|||||||
METHODDEF(void) process_data_simple_main
|
METHODDEF(void) process_data_simple_main
|
||||||
(j_compress_ptr cinfo, JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
|
(j_compress_ptr cinfo, JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
|
||||||
JDIMENSION in_rows_avail);
|
JDIMENSION in_rows_avail);
|
||||||
#ifdef FULL_MAIN_BUFFER_SUPPORTED
|
|
||||||
METHODDEF(void) process_data_buffer_main
|
|
||||||
(j_compress_ptr cinfo, JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
|
|
||||||
JDIMENSION in_rows_avail);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -76,32 +56,14 @@ start_pass_main (j_compress_ptr cinfo, J_BUF_MODE pass_mode)
|
|||||||
if (cinfo->raw_data_in)
|
if (cinfo->raw_data_in)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
if (pass_mode != JBUF_PASS_THRU)
|
||||||
|
ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
|
||||||
|
|
||||||
main_ptr->cur_iMCU_row = 0; /* initialize counters */
|
main_ptr->cur_iMCU_row = 0; /* initialize counters */
|
||||||
main_ptr->rowgroup_ctr = 0;
|
main_ptr->rowgroup_ctr = 0;
|
||||||
main_ptr->suspended = FALSE;
|
main_ptr->suspended = FALSE;
|
||||||
main_ptr->pass_mode = pass_mode; /* save mode for use by process_data */
|
main_ptr->pass_mode = pass_mode; /* save mode for use by process_data */
|
||||||
|
|
||||||
switch (pass_mode) {
|
|
||||||
case JBUF_PASS_THRU:
|
|
||||||
#ifdef FULL_MAIN_BUFFER_SUPPORTED
|
|
||||||
if (main_ptr->whole_image[0] != NULL)
|
|
||||||
ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
|
|
||||||
#endif
|
|
||||||
main_ptr->pub.process_data = process_data_simple_main;
|
main_ptr->pub.process_data = process_data_simple_main;
|
||||||
break;
|
|
||||||
#ifdef FULL_MAIN_BUFFER_SUPPORTED
|
|
||||||
case JBUF_SAVE_SOURCE:
|
|
||||||
case JBUF_CRANK_DEST:
|
|
||||||
case JBUF_SAVE_AND_PASS:
|
|
||||||
if (main_ptr->whole_image[0] == NULL)
|
|
||||||
ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
|
|
||||||
main_ptr->pub.process_data = process_data_buffer_main;
|
|
||||||
break;
|
|
||||||
#endif
|
|
||||||
default:
|
|
||||||
ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -160,85 +122,6 @@ process_data_simple_main (j_compress_ptr cinfo,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#ifdef FULL_MAIN_BUFFER_SUPPORTED
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Process some data.
|
|
||||||
* This routine handles all of the modes that use a full-size buffer.
|
|
||||||
*/
|
|
||||||
|
|
||||||
METHODDEF(void)
|
|
||||||
process_data_buffer_main (j_compress_ptr cinfo,
|
|
||||||
JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
|
|
||||||
JDIMENSION in_rows_avail)
|
|
||||||
{
|
|
||||||
my_main_ptr main_ptr = (my_main_ptr) cinfo->main;
|
|
||||||
int ci;
|
|
||||||
jpeg_component_info *compptr;
|
|
||||||
boolean writing = (main_ptr->pass_mode != JBUF_CRANK_DEST);
|
|
||||||
|
|
||||||
while (main_ptr->cur_iMCU_row < cinfo->total_iMCU_rows) {
|
|
||||||
/* Realign the virtual buffers if at the start of an iMCU row. */
|
|
||||||
if (main_ptr->rowgroup_ctr == 0) {
|
|
||||||
for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
|
||||||
ci++, compptr++) {
|
|
||||||
main_ptr->buffer[ci] = (*cinfo->mem->access_virt_sarray)
|
|
||||||
((j_common_ptr) cinfo, main_ptr->whole_image[ci],
|
|
||||||
main_ptr->cur_iMCU_row * (compptr->v_samp_factor * DCTSIZE),
|
|
||||||
(JDIMENSION) (compptr->v_samp_factor * DCTSIZE), writing);
|
|
||||||
}
|
|
||||||
/* In a read pass, pretend we just read some source data. */
|
|
||||||
if (! writing) {
|
|
||||||
*in_row_ctr += cinfo->max_v_samp_factor * DCTSIZE;
|
|
||||||
main_ptr->rowgroup_ctr = DCTSIZE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* If a write pass, read input data until the current iMCU row is full. */
|
|
||||||
/* Note: preprocessor will pad if necessary to fill the last iMCU row. */
|
|
||||||
if (writing) {
|
|
||||||
(*cinfo->prep->pre_process_data) (cinfo,
|
|
||||||
input_buf, in_row_ctr, in_rows_avail,
|
|
||||||
main_ptr->buffer, &main_ptr->rowgroup_ctr,
|
|
||||||
(JDIMENSION) DCTSIZE);
|
|
||||||
/* Return to application if we need more data to fill the iMCU row. */
|
|
||||||
if (main_ptr->rowgroup_ctr < DCTSIZE)
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Emit data, unless this is a sink-only pass. */
|
|
||||||
if (main_ptr->pass_mode != JBUF_SAVE_SOURCE) {
|
|
||||||
if (! (*cinfo->coef->compress_data) (cinfo, main_ptr->buffer)) {
|
|
||||||
/* If compressor did not consume the whole row, then we must need to
|
|
||||||
* suspend processing and return to the application. In this situation
|
|
||||||
* we pretend we didn't yet consume the last input row; otherwise, if
|
|
||||||
* it happened to be the last row of the image, the application would
|
|
||||||
* think we were done.
|
|
||||||
*/
|
|
||||||
if (! main_ptr->suspended) {
|
|
||||||
(*in_row_ctr)--;
|
|
||||||
main_ptr->suspended = TRUE;
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
/* We did finish the row. Undo our little suspension hack if a previous
|
|
||||||
* call suspended; then mark the main buffer empty.
|
|
||||||
*/
|
|
||||||
if (main_ptr->suspended) {
|
|
||||||
(*in_row_ctr)++;
|
|
||||||
main_ptr->suspended = FALSE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* If get here, we are done with this iMCU row. Mark buffer empty. */
|
|
||||||
main_ptr->rowgroup_ctr = 0;
|
|
||||||
main_ptr->cur_iMCU_row++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* FULL_MAIN_BUFFER_SUPPORTED */
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Initialize main buffer controller.
|
* Initialize main buffer controller.
|
||||||
*/
|
*/
|
||||||
@@ -264,25 +147,8 @@ jinit_c_main_controller (j_compress_ptr cinfo, boolean need_full_buffer)
|
|||||||
* may be of a different size.
|
* may be of a different size.
|
||||||
*/
|
*/
|
||||||
if (need_full_buffer) {
|
if (need_full_buffer) {
|
||||||
#ifdef FULL_MAIN_BUFFER_SUPPORTED
|
|
||||||
/* Allocate a full-image virtual array for each component */
|
|
||||||
/* Note we pad the bottom to a multiple of the iMCU height */
|
|
||||||
for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
|
||||||
ci++, compptr++) {
|
|
||||||
main_ptr->whole_image[ci] = (*cinfo->mem->request_virt_sarray)
|
|
||||||
((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
|
|
||||||
compptr->width_in_blocks * DCTSIZE,
|
|
||||||
(JDIMENSION) jround_up((long) compptr->height_in_blocks,
|
|
||||||
(long) compptr->v_samp_factor) * DCTSIZE,
|
|
||||||
(JDIMENSION) (compptr->v_samp_factor * DCTSIZE));
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
|
ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
|
||||||
#endif
|
|
||||||
} else {
|
} else {
|
||||||
#ifdef FULL_MAIN_BUFFER_SUPPORTED
|
|
||||||
main_ptr->whole_image[0] = NULL; /* flag for no virtual arrays */
|
|
||||||
#endif
|
|
||||||
/* Allocate a strip buffer for each component */
|
/* Allocate a strip buffer for each component */
|
||||||
for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
||||||
ci++, compptr++) {
|
ci++, compptr++) {
|
||||||
|
|||||||
@@ -605,7 +605,9 @@ copy_buffer (j_compress_ptr cinfo, int scan_idx)
|
|||||||
size -= cinfo->dest->free_in_buffer;
|
size -= cinfo->dest->free_in_buffer;
|
||||||
cinfo->dest->next_output_byte += cinfo->dest->free_in_buffer;
|
cinfo->dest->next_output_byte += cinfo->dest->free_in_buffer;
|
||||||
cinfo->dest->free_in_buffer = 0;
|
cinfo->dest->free_in_buffer = 0;
|
||||||
(*cinfo->dest->empty_output_buffer)(cinfo);
|
|
||||||
|
if (!(*cinfo->dest->empty_output_buffer)(cinfo))
|
||||||
|
ERREXIT(cinfo, JERR_UNSUPPORTED_SUSPEND);
|
||||||
}
|
}
|
||||||
|
|
||||||
MEMCOPY(cinfo->dest->next_output_byte, src, size);
|
MEMCOPY(cinfo->dest->next_output_byte, src, size);
|
||||||
|
|||||||
@@ -514,8 +514,9 @@ jinit_downsampler (j_compress_ptr cinfo)
|
|||||||
#endif
|
#endif
|
||||||
downsample->methods[ci] = h2v2_smooth_downsample;
|
downsample->methods[ci] = h2v2_smooth_downsample;
|
||||||
downsample->pub.need_context_rows = TRUE;
|
downsample->pub.need_context_rows = TRUE;
|
||||||
} else {
|
} else
|
||||||
#endif
|
#endif
|
||||||
|
{
|
||||||
if (jsimd_can_h2v2_downsample())
|
if (jsimd_can_h2v2_downsample())
|
||||||
downsample->methods[ci] = jsimd_h2v2_downsample;
|
downsample->methods[ci] = jsimd_h2v2_downsample;
|
||||||
else
|
else
|
||||||
|
|||||||
@@ -69,7 +69,7 @@ jpeg_write_coefficients (j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays)
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
GLOBAL(void)
|
GLOBAL(void)
|
||||||
jpeg_copy_critical_parameters (j_decompress_ptr srcinfo,
|
jpeg_copy_critical_parameters (const j_decompress_ptr srcinfo,
|
||||||
j_compress_ptr dstinfo)
|
j_compress_ptr dstinfo)
|
||||||
{
|
{
|
||||||
JQUANT_TBL ** qtblptr;
|
JQUANT_TBL ** qtblptr;
|
||||||
|
|||||||
@@ -333,7 +333,7 @@ jpeg_consume_input (j_decompress_ptr cinfo)
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
GLOBAL(boolean)
|
GLOBAL(boolean)
|
||||||
jpeg_input_complete (j_decompress_ptr cinfo)
|
jpeg_input_complete (const j_decompress_ptr cinfo)
|
||||||
{
|
{
|
||||||
/* Check for valid jpeg object */
|
/* Check for valid jpeg object */
|
||||||
if (cinfo->global_state < DSTATE_START ||
|
if (cinfo->global_state < DSTATE_START ||
|
||||||
@@ -348,7 +348,7 @@ jpeg_input_complete (j_decompress_ptr cinfo)
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
GLOBAL(boolean)
|
GLOBAL(boolean)
|
||||||
jpeg_has_multiple_scans (j_decompress_ptr cinfo)
|
jpeg_has_multiple_scans (const j_decompress_ptr cinfo)
|
||||||
{
|
{
|
||||||
/* Only valid after jpeg_read_header completes */
|
/* Only valid after jpeg_read_header completes */
|
||||||
if (cinfo->global_state < DSTATE_READY ||
|
if (cinfo->global_state < DSTATE_READY ||
|
||||||
|
|||||||
@@ -254,7 +254,7 @@ jpeg_stdio_src (j_decompress_ptr cinfo, FILE * infile)
|
|||||||
|
|
||||||
GLOBAL(void)
|
GLOBAL(void)
|
||||||
jpeg_mem_src (j_decompress_ptr cinfo,
|
jpeg_mem_src (j_decompress_ptr cinfo,
|
||||||
unsigned char * inbuffer, unsigned long insize)
|
const unsigned char * inbuffer, unsigned long insize)
|
||||||
{
|
{
|
||||||
struct jpeg_source_mgr * src;
|
struct jpeg_source_mgr * src;
|
||||||
|
|
||||||
@@ -278,6 +278,6 @@ jpeg_mem_src (j_decompress_ptr cinfo,
|
|||||||
src->resync_to_restart = jpeg_resync_to_restart; /* use default method */
|
src->resync_to_restart = jpeg_resync_to_restart; /* use default method */
|
||||||
src->term_source = term_source;
|
src->term_source = term_source;
|
||||||
src->bytes_in_buffer = (size_t) insize;
|
src->bytes_in_buffer = (size_t) insize;
|
||||||
src->next_input_byte = (JOCTET *) inbuffer;
|
src->next_input_byte = (const JOCTET *) inbuffer;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
2
jerror.h
2
jerror.h
@@ -210,6 +210,8 @@ JMESSAGE(JWRN_ARITH_BAD_CODE, "Corrupt JPEG data: bad arithmetic code")
|
|||||||
JMESSAGE(JERR_BAD_PARAM, "Bogus parameter")
|
JMESSAGE(JERR_BAD_PARAM, "Bogus parameter")
|
||||||
JMESSAGE(JERR_BAD_PARAM_VALUE, "Bogus parameter value")
|
JMESSAGE(JERR_BAD_PARAM_VALUE, "Bogus parameter value")
|
||||||
|
|
||||||
|
JMESSAGE(JERR_UNSUPPORTED_SUSPEND, "I/O suspension not supported in scan optimization")
|
||||||
|
|
||||||
#ifdef JMAKE_ENUM_LIST
|
#ifdef JMAKE_ENUM_LIST
|
||||||
|
|
||||||
JMSG_LASTMSGCODE
|
JMSG_LASTMSGCODE
|
||||||
|
|||||||
18
jmorecfg.h
18
jmorecfg.h
@@ -180,6 +180,24 @@ typedef unsigned int JDIMENSION;
|
|||||||
#define EXTERN(type) extern type
|
#define EXTERN(type) extern type
|
||||||
|
|
||||||
|
|
||||||
|
/* Originally, this macro was used as a way of defining function prototypes
|
||||||
|
* for both modern compilers as well as older compilers that did not support
|
||||||
|
* prototype parameters. libjpeg-turbo has never supported these older,
|
||||||
|
* non-ANSI compilers, but the macro is still included because there is some
|
||||||
|
* software out there that uses it.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define JMETHOD(type,methodname,arglist) type (*methodname) arglist
|
||||||
|
|
||||||
|
|
||||||
|
/* libjpeg-turbo no longer supports platforms that have far symbols (MS-DOS),
|
||||||
|
* but again, some software relies on this macro.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#undef FAR
|
||||||
|
#define FAR
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* On a few systems, type boolean and/or its values FALSE, TRUE may appear
|
* On a few systems, type boolean and/or its values FALSE, TRUE may appear
|
||||||
* in standard header files. Or you may have conflicts with application-
|
* in standard header files. Or you may have conflicts with application-
|
||||||
|
|||||||
@@ -91,6 +91,7 @@ struct jpeg_comp_master {
|
|||||||
float trellis_delta_dc_weight;
|
float trellis_delta_dc_weight;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#ifdef C_ARITH_CODING_SUPPORTED
|
||||||
/* The following two definitions specify the allocation chunk size
|
/* The following two definitions specify the allocation chunk size
|
||||||
* for the statistics area.
|
* for the statistics area.
|
||||||
* According to sections F.1.4.4.1.3 and F.1.4.4.2, we need at least
|
* According to sections F.1.4.4.1.3 and F.1.4.4.2, we need at least
|
||||||
@@ -110,7 +111,11 @@ struct jpeg_comp_master {
|
|||||||
typedef struct {
|
typedef struct {
|
||||||
float rate_dc[DC_STAT_BINS][2];
|
float rate_dc[DC_STAT_BINS][2];
|
||||||
float rate_ac[AC_STAT_BINS][2];
|
float rate_ac[AC_STAT_BINS][2];
|
||||||
|
int arith_dc_L;
|
||||||
|
int arith_dc_U;
|
||||||
|
int arith_ac_K;
|
||||||
} arith_rates;
|
} arith_rates;
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Main buffer control (downsampled-data buffer) */
|
/* Main buffer control (downsampled-data buffer) */
|
||||||
struct jpeg_c_main_controller {
|
struct jpeg_c_main_controller {
|
||||||
@@ -386,12 +391,14 @@ EXTERN(void) jcopy_block_row (JBLOCKROW input_row, JBLOCKROW output_row,
|
|||||||
JDIMENSION num_blocks);
|
JDIMENSION num_blocks);
|
||||||
EXTERN(void) jzero_far (void * target, size_t bytestozero);
|
EXTERN(void) jzero_far (void * target, size_t bytestozero);
|
||||||
|
|
||||||
|
#ifdef C_ARITH_CODING_SUPPORTED
|
||||||
EXTERN(void) jget_arith_rates (j_compress_ptr cinfo, int dc_tbl_no, int ac_tbl_no, arith_rates *r);
|
EXTERN(void) jget_arith_rates (j_compress_ptr cinfo, int dc_tbl_no, int ac_tbl_no, arith_rates *r);
|
||||||
|
|
||||||
EXTERN(void) quantize_trellis_arith
|
EXTERN(void) quantize_trellis_arith
|
||||||
(j_compress_ptr cinfo, arith_rates *r, JBLOCKROW coef_blocks, JBLOCKROW src, JDIMENSION num_blocks,
|
(j_compress_ptr cinfo, arith_rates *r, JBLOCKROW coef_blocks, JBLOCKROW src, JDIMENSION num_blocks,
|
||||||
JQUANT_TBL * qtbl, double *norm_src, double *norm_coef, JCOEF *last_dc_val,
|
JQUANT_TBL * qtbl, double *norm_src, double *norm_coef, JCOEF *last_dc_val,
|
||||||
JBLOCKROW coef_blocks_above, JBLOCKROW src_above);
|
JBLOCKROW coef_blocks_above, JBLOCKROW src_above);
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Constant tables in jutils.c */
|
/* Constant tables in jutils.c */
|
||||||
#if 0 /* This table is not actually needed in v6a */
|
#if 0 /* This table is not actually needed in v6a */
|
||||||
|
|||||||
30
jpeglib.h
30
jpeglib.h
@@ -923,6 +923,16 @@ struct jpeg_memory_mgr {
|
|||||||
typedef boolean (*jpeg_marker_parser_method) (j_decompress_ptr cinfo);
|
typedef boolean (*jpeg_marker_parser_method) (j_decompress_ptr cinfo);
|
||||||
|
|
||||||
|
|
||||||
|
/* Originally, this macro was used as a way of defining function prototypes
|
||||||
|
* for both modern compilers as well as older compilers that did not support
|
||||||
|
* prototype parameters. libjpeg-turbo has never supported these older,
|
||||||
|
* non-ANSI compilers, but the macro is still included because there is some
|
||||||
|
* software out there that uses it.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define JPP(arglist) arglist
|
||||||
|
|
||||||
|
|
||||||
/* Default error-management setup */
|
/* Default error-management setup */
|
||||||
EXTERN(struct jpeg_error_mgr *) jpeg_std_error (struct jpeg_error_mgr * err);
|
EXTERN(struct jpeg_error_mgr *) jpeg_std_error (struct jpeg_error_mgr * err);
|
||||||
|
|
||||||
@@ -956,7 +966,7 @@ EXTERN(void) jpeg_stdio_src (j_decompress_ptr cinfo, FILE * infile);
|
|||||||
/* Data source and destination managers: memory buffers. */
|
/* Data source and destination managers: memory buffers. */
|
||||||
EXTERN(void) jpeg_mem_dest (j_compress_ptr cinfo, unsigned char ** outbuffer,
|
EXTERN(void) jpeg_mem_dest (j_compress_ptr cinfo, unsigned char ** outbuffer,
|
||||||
unsigned long * outsize);
|
unsigned long * outsize);
|
||||||
EXTERN(void) jpeg_mem_src (j_decompress_ptr cinfo, unsigned char * inbuffer,
|
EXTERN(void) jpeg_mem_src (j_decompress_ptr cinfo, const unsigned char * inbuffer,
|
||||||
unsigned long insize);
|
unsigned long insize);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -1036,10 +1046,10 @@ EXTERN(JDIMENSION) jpeg_read_raw_data (j_decompress_ptr cinfo, JSAMPIMAGE data,
|
|||||||
JDIMENSION max_lines);
|
JDIMENSION max_lines);
|
||||||
|
|
||||||
/* Additional entry points for buffered-image mode. */
|
/* Additional entry points for buffered-image mode. */
|
||||||
EXTERN(boolean) jpeg_has_multiple_scans (j_decompress_ptr cinfo);
|
EXTERN(boolean) jpeg_has_multiple_scans (const j_decompress_ptr cinfo);
|
||||||
EXTERN(boolean) jpeg_start_output (j_decompress_ptr cinfo, int scan_number);
|
EXTERN(boolean) jpeg_start_output (j_decompress_ptr cinfo, int scan_number);
|
||||||
EXTERN(boolean) jpeg_finish_output (j_decompress_ptr cinfo);
|
EXTERN(boolean) jpeg_finish_output (j_decompress_ptr cinfo);
|
||||||
EXTERN(boolean) jpeg_input_complete (j_decompress_ptr cinfo);
|
EXTERN(boolean) jpeg_input_complete (const j_decompress_ptr cinfo);
|
||||||
EXTERN(void) jpeg_new_colormap (j_decompress_ptr cinfo);
|
EXTERN(void) jpeg_new_colormap (j_decompress_ptr cinfo);
|
||||||
EXTERN(int) jpeg_consume_input (j_decompress_ptr cinfo);
|
EXTERN(int) jpeg_consume_input (j_decompress_ptr cinfo);
|
||||||
/* Return value is one of: */
|
/* Return value is one of: */
|
||||||
@@ -1068,7 +1078,7 @@ EXTERN(void) jpeg_set_marker_processor (j_decompress_ptr cinfo,
|
|||||||
EXTERN(jvirt_barray_ptr *) jpeg_read_coefficients (j_decompress_ptr cinfo);
|
EXTERN(jvirt_barray_ptr *) jpeg_read_coefficients (j_decompress_ptr cinfo);
|
||||||
EXTERN(void) jpeg_write_coefficients (j_compress_ptr cinfo,
|
EXTERN(void) jpeg_write_coefficients (j_compress_ptr cinfo,
|
||||||
jvirt_barray_ptr * coef_arrays);
|
jvirt_barray_ptr * coef_arrays);
|
||||||
EXTERN(void) jpeg_copy_critical_parameters (j_decompress_ptr srcinfo,
|
EXTERN(void) jpeg_copy_critical_parameters (const j_decompress_ptr srcinfo,
|
||||||
j_compress_ptr dstinfo);
|
j_compress_ptr dstinfo);
|
||||||
|
|
||||||
/* If you choose to abort compression or decompression before completing
|
/* If you choose to abort compression or decompression before completing
|
||||||
@@ -1090,25 +1100,25 @@ EXTERN(void) jpeg_destroy (j_common_ptr cinfo);
|
|||||||
EXTERN(boolean) jpeg_resync_to_restart (j_decompress_ptr cinfo, int desired);
|
EXTERN(boolean) jpeg_resync_to_restart (j_decompress_ptr cinfo, int desired);
|
||||||
|
|
||||||
/* Accessor functions for extension parameters */
|
/* Accessor functions for extension parameters */
|
||||||
EXTERN(boolean) jpeg_c_bool_param_supported (j_compress_ptr cinfo,
|
EXTERN(boolean) jpeg_c_bool_param_supported (const j_compress_ptr cinfo,
|
||||||
J_BOOLEAN_PARAM param);
|
J_BOOLEAN_PARAM param);
|
||||||
EXTERN(void) jpeg_c_set_bool_param (j_compress_ptr cinfo,
|
EXTERN(void) jpeg_c_set_bool_param (j_compress_ptr cinfo,
|
||||||
J_BOOLEAN_PARAM param, boolean value);
|
J_BOOLEAN_PARAM param, boolean value);
|
||||||
EXTERN(boolean) jpeg_c_get_bool_param (j_compress_ptr cinfo,
|
EXTERN(boolean) jpeg_c_get_bool_param (const j_compress_ptr cinfo,
|
||||||
J_BOOLEAN_PARAM param);
|
J_BOOLEAN_PARAM param);
|
||||||
|
|
||||||
EXTERN(boolean) jpeg_c_float_param_supported (j_compress_ptr cinfo,
|
EXTERN(boolean) jpeg_c_float_param_supported (const j_compress_ptr cinfo,
|
||||||
J_FLOAT_PARAM param);
|
J_FLOAT_PARAM param);
|
||||||
EXTERN(void) jpeg_c_set_float_param (j_compress_ptr cinfo, J_FLOAT_PARAM param,
|
EXTERN(void) jpeg_c_set_float_param (j_compress_ptr cinfo, J_FLOAT_PARAM param,
|
||||||
float value);
|
float value);
|
||||||
EXTERN(float) jpeg_c_get_float_param (j_compress_ptr cinfo,
|
EXTERN(float) jpeg_c_get_float_param (const j_compress_ptr cinfo,
|
||||||
J_FLOAT_PARAM param);
|
J_FLOAT_PARAM param);
|
||||||
|
|
||||||
EXTERN(boolean) jpeg_c_int_param_supported (j_compress_ptr cinfo,
|
EXTERN(boolean) jpeg_c_int_param_supported (const j_compress_ptr cinfo,
|
||||||
J_INT_PARAM param);
|
J_INT_PARAM param);
|
||||||
EXTERN(void) jpeg_c_set_int_param (j_compress_ptr cinfo, J_INT_PARAM param,
|
EXTERN(void) jpeg_c_set_int_param (j_compress_ptr cinfo, J_INT_PARAM param,
|
||||||
int value);
|
int value);
|
||||||
EXTERN(int) jpeg_c_get_int_param (j_compress_ptr cinfo, J_INT_PARAM param);
|
EXTERN(int) jpeg_c_get_int_param (const j_compress_ptr cinfo, J_INT_PARAM param);
|
||||||
|
|
||||||
|
|
||||||
/* These marker codes are exported since applications and data source modules
|
/* These marker codes are exported since applications and data source modules
|
||||||
|
|||||||
@@ -624,6 +624,9 @@ main (int argc, char **argv)
|
|||||||
end_progress_monitor((j_common_ptr) &dstinfo);
|
end_progress_monitor((j_common_ptr) &dstinfo);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
free(inbuffer);
|
||||||
|
free(outbuffer);
|
||||||
|
|
||||||
/* All done. */
|
/* All done. */
|
||||||
exit(jsrcerr.num_warnings + jdsterr.num_warnings ?EXIT_WARNING:EXIT_SUCCESS);
|
exit(jsrcerr.num_warnings + jdsterr.num_warnings ?EXIT_WARNING:EXIT_SUCCESS);
|
||||||
return 0; /* suppress no-return-value warnings */
|
return 0; /* suppress no-return-value warnings */
|
||||||
|
|||||||
@@ -112,6 +112,7 @@ int main(int argc, char *argv[]) {
|
|||||||
|
|
||||||
image_buffer = malloc(frame_width*16 + 2*(frame_width/2)*8);
|
image_buffer = malloc(frame_width*16 + 2*(frame_width/2)*8);
|
||||||
if (!image_buffer) {
|
if (!image_buffer) {
|
||||||
|
free(yuv_buffer);
|
||||||
fprintf(stderr, "Memory allocation failure!\n");
|
fprintf(stderr, "Memory allocation failure!\n");
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
@@ -163,6 +164,7 @@ int main(int argc, char *argv[]) {
|
|||||||
yuv_fd = fopen(yuv_path, "wb");
|
yuv_fd = fopen(yuv_path, "wb");
|
||||||
if (!yuv_fd) {
|
if (!yuv_fd) {
|
||||||
fprintf(stderr, "Invalid path to YUV file!");
|
fprintf(stderr, "Invalid path to YUV file!");
|
||||||
|
free(yuv_buffer);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
if (fwrite(yuv_buffer, yuv_size, 1, yuv_fd) != 1) {
|
if (fwrite(yuv_buffer, yuv_size, 1, yuv_fd) != 1) {
|
||||||
|
|||||||
2
rdbmp.c
2
rdbmp.c
@@ -381,7 +381,7 @@ start_input_bmp (j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (biWidth <= 0 || biHeight <= 0)
|
if (biWidth <= 0 || biHeight <= 0 || biWidth > 0x7fffffffL || biHeight > 0x7fffffffL)
|
||||||
ERREXIT(cinfo, JERR_BMP_EMPTY);
|
ERREXIT(cinfo, JERR_BMP_EMPTY);
|
||||||
if (biPlanes != 1)
|
if (biPlanes != 1)
|
||||||
ERREXIT(cinfo, JERR_BMP_BADPLANES);
|
ERREXIT(cinfo, JERR_BMP_BADPLANES);
|
||||||
|
|||||||
4
rdpng.c
4
rdpng.c
@@ -76,6 +76,10 @@ start_input_png (j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
|
|||||||
cinfo->in_color_space = JCS_RGB;
|
cinfo->in_color_space = JCS_RGB;
|
||||||
cinfo->input_components = 3;
|
cinfo->input_components = 3;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (bit_depth == 16)
|
||||||
|
png_set_strip_16(source->png_ptr);
|
||||||
|
|
||||||
cinfo->data_precision = 8;
|
cinfo->data_precision = 8;
|
||||||
cinfo->image_width = width;
|
cinfo->image_width = width;
|
||||||
cinfo->image_height = height;
|
cinfo->image_height = height;
|
||||||
|
|||||||
24
rdppm.c
24
rdppm.c
@@ -68,6 +68,7 @@ typedef struct {
|
|||||||
JSAMPROW pixrow; /* compressor input buffer */
|
JSAMPROW pixrow; /* compressor input buffer */
|
||||||
size_t buffer_width; /* width of I/O buffer */
|
size_t buffer_width; /* width of I/O buffer */
|
||||||
JSAMPLE *rescale; /* => maxval-remapping array, or NULL */
|
JSAMPLE *rescale; /* => maxval-remapping array, or NULL */
|
||||||
|
int maxval;
|
||||||
} ppm_source_struct;
|
} ppm_source_struct;
|
||||||
|
|
||||||
typedef ppm_source_struct * ppm_source_ptr;
|
typedef ppm_source_struct * ppm_source_ptr;
|
||||||
@@ -91,7 +92,7 @@ pbm_getc (FILE * infile)
|
|||||||
|
|
||||||
|
|
||||||
LOCAL(unsigned int)
|
LOCAL(unsigned int)
|
||||||
read_pbm_integer (j_compress_ptr cinfo, FILE * infile)
|
read_pbm_integer (j_compress_ptr cinfo, FILE * infile, int maxval)
|
||||||
/* Read an unsigned decimal integer from the PPM file */
|
/* Read an unsigned decimal integer from the PPM file */
|
||||||
/* Swallows one trailing character after the integer */
|
/* Swallows one trailing character after the integer */
|
||||||
/* Note that on a 16-bit-int machine, only values up to 64k can be read. */
|
/* Note that on a 16-bit-int machine, only values up to 64k can be read. */
|
||||||
@@ -115,6 +116,10 @@ read_pbm_integer (j_compress_ptr cinfo, FILE * infile)
|
|||||||
val *= 10;
|
val *= 10;
|
||||||
val += ch - '0';
|
val += ch - '0';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (val > maxval)
|
||||||
|
ERREXIT(cinfo, JERR_PPM_TOOLARGE);
|
||||||
|
|
||||||
return val;
|
return val;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -139,10 +144,11 @@ get_text_gray_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
|
|||||||
register JSAMPROW ptr;
|
register JSAMPROW ptr;
|
||||||
register JSAMPLE *rescale = source->rescale;
|
register JSAMPLE *rescale = source->rescale;
|
||||||
JDIMENSION col;
|
JDIMENSION col;
|
||||||
|
int maxval = source->maxval;
|
||||||
|
|
||||||
ptr = source->pub.buffer[0];
|
ptr = source->pub.buffer[0];
|
||||||
for (col = cinfo->image_width; col > 0; col--) {
|
for (col = cinfo->image_width; col > 0; col--) {
|
||||||
*ptr++ = rescale[read_pbm_integer(cinfo, infile)];
|
*ptr++ = rescale[read_pbm_integer(cinfo, infile, maxval)];
|
||||||
}
|
}
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
@@ -157,12 +163,13 @@ get_text_rgb_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
|
|||||||
register JSAMPROW ptr;
|
register JSAMPROW ptr;
|
||||||
register JSAMPLE *rescale = source->rescale;
|
register JSAMPLE *rescale = source->rescale;
|
||||||
JDIMENSION col;
|
JDIMENSION col;
|
||||||
|
int maxval = source->maxval;
|
||||||
|
|
||||||
ptr = source->pub.buffer[0];
|
ptr = source->pub.buffer[0];
|
||||||
for (col = cinfo->image_width; col > 0; col--) {
|
for (col = cinfo->image_width; col > 0; col--) {
|
||||||
*ptr++ = rescale[read_pbm_integer(cinfo, infile)];
|
*ptr++ = rescale[read_pbm_integer(cinfo, infile, maxval)];
|
||||||
*ptr++ = rescale[read_pbm_integer(cinfo, infile)];
|
*ptr++ = rescale[read_pbm_integer(cinfo, infile, maxval)];
|
||||||
*ptr++ = rescale[read_pbm_integer(cinfo, infile)];
|
*ptr++ = rescale[read_pbm_integer(cinfo, infile, maxval)];
|
||||||
}
|
}
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
@@ -311,9 +318,9 @@ start_input_ppm (j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* fetch the remaining header info */
|
/* fetch the remaining header info */
|
||||||
w = read_pbm_integer(cinfo, source->pub.input_file);
|
w = read_pbm_integer(cinfo, source->pub.input_file, 65535);
|
||||||
h = read_pbm_integer(cinfo, source->pub.input_file);
|
h = read_pbm_integer(cinfo, source->pub.input_file, 65535);
|
||||||
maxval = read_pbm_integer(cinfo, source->pub.input_file);
|
maxval = read_pbm_integer(cinfo, source->pub.input_file, 65535);
|
||||||
|
|
||||||
if (w <= 0 || h <= 0 || maxval <= 0) /* error check */
|
if (w <= 0 || h <= 0 || maxval <= 0) /* error check */
|
||||||
ERREXIT(cinfo, JERR_PPM_NOT);
|
ERREXIT(cinfo, JERR_PPM_NOT);
|
||||||
@@ -321,6 +328,7 @@ start_input_ppm (j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
|
|||||||
cinfo->data_precision = BITS_IN_JSAMPLE; /* we always rescale data to this */
|
cinfo->data_precision = BITS_IN_JSAMPLE; /* we always rescale data to this */
|
||||||
cinfo->image_width = (JDIMENSION) w;
|
cinfo->image_width = (JDIMENSION) w;
|
||||||
cinfo->image_height = (JDIMENSION) h;
|
cinfo->image_height = (JDIMENSION) h;
|
||||||
|
source->maxval = maxval;
|
||||||
|
|
||||||
/* initialize flags to most common settings */
|
/* initialize flags to most common settings */
|
||||||
need_iobuffer = TRUE; /* do we need an I/O buffer? */
|
need_iobuffer = TRUE; /* do we need an I/O buffer? */
|
||||||
|
|||||||
@@ -364,7 +364,8 @@ start_input_tga (j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
|
|||||||
if (cmaptype > 1 || /* cmaptype must be 0 or 1 */
|
if (cmaptype > 1 || /* cmaptype must be 0 or 1 */
|
||||||
source->pixel_size < 1 || source->pixel_size > 4 ||
|
source->pixel_size < 1 || source->pixel_size > 4 ||
|
||||||
(UCH(targaheader[16]) & 7) != 0 || /* bits/pixel must be multiple of 8 */
|
(UCH(targaheader[16]) & 7) != 0 || /* bits/pixel must be multiple of 8 */
|
||||||
interlace_type != 0) /* currently don't allow interlaced image */
|
interlace_type != 0 || /* currently don't allow interlaced image */
|
||||||
|
width == 0 || height == 0) /* image width/height must be nonzero */
|
||||||
ERREXIT(cinfo, JERR_TGA_BADPARMS);
|
ERREXIT(cinfo, JERR_TGA_BADPARMS);
|
||||||
|
|
||||||
if (subtype > 8) {
|
if (subtype > 8) {
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ onexit()
|
|||||||
|
|
||||||
usage()
|
usage()
|
||||||
{
|
{
|
||||||
echo "$0 [-build32 [32-bit build dir]] [-buildarmv6 [ARM v6 build dir]] [-buildarmv7 [ARM v7 build dir]] [-buildarmv7s [ARM v7s build dir]]"
|
echo "$0 [-build32 [32-bit build dir]] [-buildarmv6 [ARMv6 build dir]] [-buildarmv7 [ARMv7 build dir]] [-buildarmv7s [ARMv7s build dir] [-buildarmv8 [ARMv8 build dir]] [-lipo [path to lipo]]"
|
||||||
exit 1
|
exit 1
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -33,7 +33,10 @@ BUILDDIRARMV7=@abs_top_srcdir@/iosarmv7
|
|||||||
BUILDARMV7=0
|
BUILDARMV7=0
|
||||||
BUILDDIRARMV7S=@abs_top_srcdir@/iosarmv7s
|
BUILDDIRARMV7S=@abs_top_srcdir@/iosarmv7s
|
||||||
BUILDARMV7S=0
|
BUILDARMV7S=0
|
||||||
|
BUILDDIRARMV8=@abs_top_srcdir@/iosarmv8
|
||||||
|
BUILDARMV8=0
|
||||||
WITH_JAVA=@WITH_JAVA@
|
WITH_JAVA=@WITH_JAVA@
|
||||||
|
LIPO=lipo
|
||||||
|
|
||||||
PREFIX=%{__prefix}
|
PREFIX=%{__prefix}
|
||||||
BINDIR=%{__bindir}
|
BINDIR=%{__bindir}
|
||||||
@@ -75,6 +78,21 @@ while [ $# -gt 0 ]; do
|
|||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
;;
|
;;
|
||||||
|
-buildarmv8)
|
||||||
|
BUILDARMV8=1
|
||||||
|
if [ $# -gt 1 ]; then
|
||||||
|
if [[ ! "$2" =~ -.* ]]; then
|
||||||
|
BUILDDIRARMV8=$2; shift
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
-lipo)
|
||||||
|
if [ $# -gt 1 ]; then
|
||||||
|
if [[ ! "$2" =~ -.* ]]; then
|
||||||
|
LIPO=$2; shift
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
;;
|
||||||
esac
|
esac
|
||||||
shift
|
shift
|
||||||
done
|
done
|
||||||
@@ -110,50 +128,50 @@ if [ $BUILD32 = 1 ]; then
|
|||||||
popd
|
popd
|
||||||
if [ ! -h $TMPDIR/dist.x86/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib -a \
|
if [ ! -h $TMPDIR/dist.x86/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib -a \
|
||||||
! -h $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib ]; then
|
! -h $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib ]; then
|
||||||
lipo -create \
|
$LIPO -create \
|
||||||
-arch i386 $TMPDIR/dist.x86/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib \
|
-arch i386 $TMPDIR/dist.x86/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib \
|
||||||
-arch x86_64 $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib \
|
-arch x86_64 $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib \
|
||||||
-output $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib
|
-output $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib
|
||||||
elif [ ! -h $TMPDIR/dist.x86/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib -a \
|
elif [ ! -h $TMPDIR/dist.x86/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib -a \
|
||||||
! -h $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib ]; then
|
! -h $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib ]; then
|
||||||
lipo -create \
|
$LIPO -create \
|
||||||
-arch i386 $TMPDIR/dist.x86/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib \
|
-arch i386 $TMPDIR/dist.x86/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib \
|
||||||
-arch x86_64 $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib \
|
-arch x86_64 $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib \
|
||||||
-output $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib
|
-output $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib
|
||||||
fi
|
fi
|
||||||
lipo -create \
|
$LIPO -create \
|
||||||
-arch i386 $TMPDIR/dist.x86/$LIBDIR/libjpeg.a \
|
-arch i386 $TMPDIR/dist.x86/$LIBDIR/libjpeg.a \
|
||||||
-arch x86_64 $PKGROOT/$LIBDIR/libjpeg.a \
|
-arch x86_64 $PKGROOT/$LIBDIR/libjpeg.a \
|
||||||
-output $PKGROOT/$LIBDIR/libjpeg.a
|
-output $PKGROOT/$LIBDIR/libjpeg.a
|
||||||
lipo -create \
|
$LIPO -create \
|
||||||
-arch i386 $TMPDIR/dist.x86/$LIBDIR/libturbojpeg.0.dylib \
|
-arch i386 $TMPDIR/dist.x86/$LIBDIR/libturbojpeg.0.dylib \
|
||||||
-arch x86_64 $PKGROOT/$LIBDIR/libturbojpeg.0.dylib \
|
-arch x86_64 $PKGROOT/$LIBDIR/libturbojpeg.0.dylib \
|
||||||
-output $PKGROOT/$LIBDIR/libturbojpeg.0.dylib
|
-output $PKGROOT/$LIBDIR/libturbojpeg.0.dylib
|
||||||
lipo -create \
|
$LIPO -create \
|
||||||
-arch i386 $TMPDIR/dist.x86/$LIBDIR/libturbojpeg.a \
|
-arch i386 $TMPDIR/dist.x86/$LIBDIR/libturbojpeg.a \
|
||||||
-arch x86_64 $PKGROOT/$LIBDIR/libturbojpeg.a \
|
-arch x86_64 $PKGROOT/$LIBDIR/libturbojpeg.a \
|
||||||
-output $PKGROOT/$LIBDIR/libturbojpeg.a
|
-output $PKGROOT/$LIBDIR/libturbojpeg.a
|
||||||
lipo -create \
|
$LIPO -create \
|
||||||
-arch i386 $TMPDIR/dist.x86/$BINDIR/cjpeg \
|
-arch i386 $TMPDIR/dist.x86/$BINDIR/cjpeg \
|
||||||
-arch x86_64 $PKGROOT/$BINDIR/cjpeg \
|
-arch x86_64 $PKGROOT/$BINDIR/cjpeg \
|
||||||
-output $PKGROOT/$BINDIR/cjpeg
|
-output $PKGROOT/$BINDIR/cjpeg
|
||||||
lipo -create \
|
$LIPO -create \
|
||||||
-arch i386 $TMPDIR/dist.x86/$BINDIR/djpeg \
|
-arch i386 $TMPDIR/dist.x86/$BINDIR/djpeg \
|
||||||
-arch x86_64 $PKGROOT/$BINDIR/djpeg \
|
-arch x86_64 $PKGROOT/$BINDIR/djpeg \
|
||||||
-output $PKGROOT/$BINDIR/djpeg
|
-output $PKGROOT/$BINDIR/djpeg
|
||||||
lipo -create \
|
$LIPO -create \
|
||||||
-arch i386 $TMPDIR/dist.x86/$BINDIR/jpegtran \
|
-arch i386 $TMPDIR/dist.x86/$BINDIR/jpegtran \
|
||||||
-arch x86_64 $PKGROOT/$BINDIR/jpegtran \
|
-arch x86_64 $PKGROOT/$BINDIR/jpegtran \
|
||||||
-output $PKGROOT/$BINDIR/jpegtran
|
-output $PKGROOT/$BINDIR/jpegtran
|
||||||
lipo -create \
|
$LIPO -create \
|
||||||
-arch i386 $TMPDIR/dist.x86/$BINDIR/tjbench \
|
-arch i386 $TMPDIR/dist.x86/$BINDIR/tjbench \
|
||||||
-arch x86_64 $PKGROOT/$BINDIR/tjbench \
|
-arch x86_64 $PKGROOT/$BINDIR/tjbench \
|
||||||
-output $PKGROOT/$BINDIR/tjbench
|
-output $PKGROOT/$BINDIR/tjbench
|
||||||
lipo -create \
|
$LIPO -create \
|
||||||
-arch i386 $TMPDIR/dist.x86/$BINDIR/rdjpgcom \
|
-arch i386 $TMPDIR/dist.x86/$BINDIR/rdjpgcom \
|
||||||
-arch x86_64 $PKGROOT/$BINDIR/rdjpgcom \
|
-arch x86_64 $PKGROOT/$BINDIR/rdjpgcom \
|
||||||
-output $PKGROOT/$BINDIR/rdjpgcom
|
-output $PKGROOT/$BINDIR/rdjpgcom
|
||||||
lipo -create \
|
$LIPO -create \
|
||||||
-arch i386 $TMPDIR/dist.x86/$BINDIR/wrjpgcom \
|
-arch i386 $TMPDIR/dist.x86/$BINDIR/wrjpgcom \
|
||||||
-arch x86_64 $PKGROOT/$BINDIR/wrjpgcom \
|
-arch x86_64 $PKGROOT/$BINDIR/wrjpgcom \
|
||||||
-output $PKGROOT/$BINDIR/wrjpgcom
|
-output $PKGROOT/$BINDIR/wrjpgcom
|
||||||
@@ -162,71 +180,258 @@ fi
|
|||||||
|
|
||||||
if [ $BUILDARMV6 = 1 ]; then
|
if [ $BUILDARMV6 = 1 ]; then
|
||||||
if [ ! -d $BUILDDIRARMV6 ]; then
|
if [ ! -d $BUILDDIRARMV6 ]; then
|
||||||
echo ERROR: ARM v6 build directory $BUILDDIRARMV6 does not exist
|
echo ERROR: ARMv6 build directory $BUILDDIRARMV6 does not exist
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
if [ ! -f $BUILDDIRARMV6/Makefile ]; then
|
if [ ! -f $BUILDDIRARMV6/Makefile ]; then
|
||||||
echo ERROR: ARM v6 build directory $BUILDDIRARMV6 is not configured
|
echo ERROR: ARMv6 build directory $BUILDDIRARMV6 is not configured
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
mkdir -p $TMPDIR/dist.armv6
|
mkdir -p $TMPDIR/dist.armv6
|
||||||
pushd $BUILDDIRARMV6
|
pushd $BUILDDIRARMV6
|
||||||
make install DESTDIR=$TMPDIR/dist.armv6
|
make install DESTDIR=$TMPDIR/dist.armv6
|
||||||
popd
|
popd
|
||||||
lipo -create \
|
if [ ! -h $TMPDIR/dist.armv6/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib -a \
|
||||||
|
! -h $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib ]; then
|
||||||
|
$LIPO -create \
|
||||||
|
$PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib \
|
||||||
|
-arch arm $TMPDIR/dist.armv6/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib \
|
||||||
|
-output $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib
|
||||||
|
elif [ ! -h $TMPDIR/dist.armv6/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib -a \
|
||||||
|
! -h $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib ]; then
|
||||||
|
$LIPO -create \
|
||||||
|
$PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib \
|
||||||
|
-arch arm $TMPDIR/dist.armv6/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib \
|
||||||
|
-output $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib
|
||||||
|
fi
|
||||||
|
$LIPO -create \
|
||||||
$PKGROOT/$LIBDIR/libjpeg.a \
|
$PKGROOT/$LIBDIR/libjpeg.a \
|
||||||
-arch arm $TMPDIR/dist.armv6/$LIBDIR/libjpeg.a \
|
-arch arm $TMPDIR/dist.armv6/$LIBDIR/libjpeg.a \
|
||||||
-output $PKGROOT/$LIBDIR/libjpeg.a
|
-output $PKGROOT/$LIBDIR/libjpeg.a
|
||||||
lipo -create \
|
$LIPO -create \
|
||||||
|
$PKGROOT/$LIBDIR/libturbojpeg.0.dylib \
|
||||||
|
-arch arm $TMPDIR/dist.armv6/$LIBDIR/libturbojpeg.0.dylib \
|
||||||
|
-output $PKGROOT/$LIBDIR/libturbojpeg.0.dylib
|
||||||
|
$LIPO -create \
|
||||||
$PKGROOT/$LIBDIR/libturbojpeg.a \
|
$PKGROOT/$LIBDIR/libturbojpeg.a \
|
||||||
-arch arm $TMPDIR/dist.armv6/$LIBDIR/libturbojpeg.a \
|
-arch arm $TMPDIR/dist.armv6/$LIBDIR/libturbojpeg.a \
|
||||||
-output $PKGROOT/$LIBDIR/libturbojpeg.a
|
-output $PKGROOT/$LIBDIR/libturbojpeg.a
|
||||||
|
$LIPO -create \
|
||||||
|
$PKGROOT/$BINDIR/cjpeg \
|
||||||
|
-arch arm $TMPDIR/dist.armv6/$BINDIR/cjpeg \
|
||||||
|
-output $PKGROOT/$BINDIR/cjpeg
|
||||||
|
$LIPO -create \
|
||||||
|
$PKGROOT/$BINDIR/djpeg \
|
||||||
|
-arch arm $TMPDIR/dist.armv6/$BINDIR/djpeg \
|
||||||
|
-output $PKGROOT/$BINDIR/djpeg
|
||||||
|
$LIPO -create \
|
||||||
|
$PKGROOT/$BINDIR/jpegtran \
|
||||||
|
-arch arm $TMPDIR/dist.armv6/$BINDIR/jpegtran \
|
||||||
|
-output $PKGROOT/$BINDIR/jpegtran
|
||||||
|
$LIPO -create \
|
||||||
|
$PKGROOT/$BINDIR/tjbench \
|
||||||
|
-arch arm $TMPDIR/dist.armv6/$BINDIR/tjbench \
|
||||||
|
-output $PKGROOT/$BINDIR/tjbench
|
||||||
|
$LIPO -create \
|
||||||
|
$PKGROOT/$BINDIR/rdjpgcom \
|
||||||
|
-arch arm $TMPDIR/dist.armv6/$BINDIR/rdjpgcom \
|
||||||
|
-output $PKGROOT/$BINDIR/rdjpgcom
|
||||||
|
$LIPO -create \
|
||||||
|
$PKGROOT/$BINDIR/wrjpgcom \
|
||||||
|
-arch arm $TMPDIR/dist.armv6/$BINDIR/wrjpgcom \
|
||||||
|
-output $PKGROOT/$BINDIR/wrjpgcom
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ $BUILDARMV7 = 1 ]; then
|
if [ $BUILDARMV7 = 1 ]; then
|
||||||
if [ ! -d $BUILDDIRARMV7 ]; then
|
if [ ! -d $BUILDDIRARMV7 ]; then
|
||||||
echo ERROR: ARM v7 build directory $BUILDDIRARMV7 does not exist
|
echo ERROR: ARMv7 build directory $BUILDDIRARMV7 does not exist
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
if [ ! -f $BUILDDIRARMV7/Makefile ]; then
|
if [ ! -f $BUILDDIRARMV7/Makefile ]; then
|
||||||
echo ERROR: ARM v7 build directory $BUILDDIRARMV7 is not configured
|
echo ERROR: ARMv7 build directory $BUILDDIRARMV7 is not configured
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
mkdir -p $TMPDIR/dist.armv7
|
mkdir -p $TMPDIR/dist.armv7
|
||||||
pushd $BUILDDIRARMV7
|
pushd $BUILDDIRARMV7
|
||||||
make install DESTDIR=$TMPDIR/dist.armv7
|
make install DESTDIR=$TMPDIR/dist.armv7
|
||||||
popd
|
popd
|
||||||
lipo -create \
|
if [ ! -h $TMPDIR/dist.armv7/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib -a \
|
||||||
|
! -h $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib ]; then
|
||||||
|
$LIPO -create \
|
||||||
|
$PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib \
|
||||||
|
-arch arm $TMPDIR/dist.armv7/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib \
|
||||||
|
-output $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib
|
||||||
|
elif [ ! -h $TMPDIR/dist.armv7/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib -a \
|
||||||
|
! -h $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib ]; then
|
||||||
|
$LIPO -create \
|
||||||
|
$PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib \
|
||||||
|
-arch arm $TMPDIR/dist.armv7/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib \
|
||||||
|
-output $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib
|
||||||
|
fi
|
||||||
|
$LIPO -create \
|
||||||
$PKGROOT/$LIBDIR/libjpeg.a \
|
$PKGROOT/$LIBDIR/libjpeg.a \
|
||||||
-arch arm $TMPDIR/dist.armv7/$LIBDIR/libjpeg.a \
|
-arch arm $TMPDIR/dist.armv7/$LIBDIR/libjpeg.a \
|
||||||
-output $PKGROOT/$LIBDIR/libjpeg.a
|
-output $PKGROOT/$LIBDIR/libjpeg.a
|
||||||
lipo -create \
|
$LIPO -create \
|
||||||
|
$PKGROOT/$LIBDIR/libturbojpeg.0.dylib \
|
||||||
|
-arch arm $TMPDIR/dist.armv7/$LIBDIR/libturbojpeg.0.dylib \
|
||||||
|
-output $PKGROOT/$LIBDIR/libturbojpeg.0.dylib
|
||||||
|
$LIPO -create \
|
||||||
$PKGROOT/$LIBDIR/libturbojpeg.a \
|
$PKGROOT/$LIBDIR/libturbojpeg.a \
|
||||||
-arch arm $TMPDIR/dist.armv7/$LIBDIR/libturbojpeg.a \
|
-arch arm $TMPDIR/dist.armv7/$LIBDIR/libturbojpeg.a \
|
||||||
-output $PKGROOT/$LIBDIR/libturbojpeg.a
|
-output $PKGROOT/$LIBDIR/libturbojpeg.a
|
||||||
|
$LIPO -create \
|
||||||
|
$PKGROOT/$BINDIR/cjpeg \
|
||||||
|
-arch arm $TMPDIR/dist.armv7/$BINDIR/cjpeg \
|
||||||
|
-output $PKGROOT/$BINDIR/cjpeg
|
||||||
|
$LIPO -create \
|
||||||
|
$PKGROOT/$BINDIR/djpeg \
|
||||||
|
-arch arm $TMPDIR/dist.armv7/$BINDIR/djpeg \
|
||||||
|
-output $PKGROOT/$BINDIR/djpeg
|
||||||
|
$LIPO -create \
|
||||||
|
$PKGROOT/$BINDIR/jpegtran \
|
||||||
|
-arch arm $TMPDIR/dist.armv7/$BINDIR/jpegtran \
|
||||||
|
-output $PKGROOT/$BINDIR/jpegtran
|
||||||
|
$LIPO -create \
|
||||||
|
$PKGROOT/$BINDIR/tjbench \
|
||||||
|
-arch arm $TMPDIR/dist.armv7/$BINDIR/tjbench \
|
||||||
|
-output $PKGROOT/$BINDIR/tjbench
|
||||||
|
$LIPO -create \
|
||||||
|
$PKGROOT/$BINDIR/rdjpgcom \
|
||||||
|
-arch arm $TMPDIR/dist.armv7/$BINDIR/rdjpgcom \
|
||||||
|
-output $PKGROOT/$BINDIR/rdjpgcom
|
||||||
|
$LIPO -create \
|
||||||
|
$PKGROOT/$BINDIR/wrjpgcom \
|
||||||
|
-arch arm $TMPDIR/dist.armv7/$BINDIR/wrjpgcom \
|
||||||
|
-output $PKGROOT/$BINDIR/wrjpgcom
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ $BUILDARMV7S = 1 ]; then
|
if [ $BUILDARMV7S = 1 ]; then
|
||||||
if [ ! -d $BUILDDIRARMV7S ]; then
|
if [ ! -d $BUILDDIRARMV7S ]; then
|
||||||
echo ERROR: ARM v7s build directory $BUILDDIRARMV7S does not exist
|
echo ERROR: ARMv7s build directory $BUILDDIRARMV7S does not exist
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
if [ ! -f $BUILDDIRARMV7S/Makefile ]; then
|
if [ ! -f $BUILDDIRARMV7S/Makefile ]; then
|
||||||
echo ERROR: ARM v7s build directory $BUILDDIRARMV7S is not configured
|
echo ERROR: ARMv7s build directory $BUILDDIRARMV7S is not configured
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
mkdir -p $TMPDIR/dist.armv7s
|
mkdir -p $TMPDIR/dist.armv7s
|
||||||
pushd $BUILDDIRARMV7S
|
pushd $BUILDDIRARMV7S
|
||||||
make install DESTDIR=$TMPDIR/dist.armv7s
|
make install DESTDIR=$TMPDIR/dist.armv7s
|
||||||
popd
|
popd
|
||||||
lipo -create \
|
if [ ! -h $TMPDIR/dist.armv7s/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib -a \
|
||||||
|
! -h $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib ]; then
|
||||||
|
$LIPO -create \
|
||||||
|
$PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib \
|
||||||
|
-arch arm $TMPDIR/dist.armv7s/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib \
|
||||||
|
-output $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib
|
||||||
|
elif [ ! -h $TMPDIR/dist.armv7s/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib -a \
|
||||||
|
! -h $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib ]; then
|
||||||
|
$LIPO -create \
|
||||||
|
$PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib \
|
||||||
|
-arch arm $TMPDIR/dist.armv7s/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib \
|
||||||
|
-output $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib
|
||||||
|
fi
|
||||||
|
$LIPO -create \
|
||||||
$PKGROOT/$LIBDIR/libjpeg.a \
|
$PKGROOT/$LIBDIR/libjpeg.a \
|
||||||
-arch arm $TMPDIR/dist.armv7s/$LIBDIR/libjpeg.a \
|
-arch arm $TMPDIR/dist.armv7s/$LIBDIR/libjpeg.a \
|
||||||
-output $PKGROOT/$LIBDIR/libjpeg.a
|
-output $PKGROOT/$LIBDIR/libjpeg.a
|
||||||
lipo -create \
|
$LIPO -create \
|
||||||
|
$PKGROOT/$LIBDIR/libturbojpeg.0.dylib \
|
||||||
|
-arch arm $TMPDIR/dist.armv7s/$LIBDIR/libturbojpeg.0.dylib \
|
||||||
|
-output $PKGROOT/$LIBDIR/libturbojpeg.0.dylib
|
||||||
|
$LIPO -create \
|
||||||
$PKGROOT/$LIBDIR/libturbojpeg.a \
|
$PKGROOT/$LIBDIR/libturbojpeg.a \
|
||||||
-arch arm $TMPDIR/dist.armv7s/$LIBDIR/libturbojpeg.a \
|
-arch arm $TMPDIR/dist.armv7s/$LIBDIR/libturbojpeg.a \
|
||||||
-output $PKGROOT/$LIBDIR/libturbojpeg.a
|
-output $PKGROOT/$LIBDIR/libturbojpeg.a
|
||||||
|
$LIPO -create \
|
||||||
|
$PKGROOT/$BINDIR/cjpeg \
|
||||||
|
-arch arm $TMPDIR/dist.armv7s/$BINDIR/cjpeg \
|
||||||
|
-output $PKGROOT/$BINDIR/cjpeg
|
||||||
|
$LIPO -create \
|
||||||
|
$PKGROOT/$BINDIR/djpeg \
|
||||||
|
-arch arm $TMPDIR/dist.armv7s/$BINDIR/djpeg \
|
||||||
|
-output $PKGROOT/$BINDIR/djpeg
|
||||||
|
$LIPO -create \
|
||||||
|
$PKGROOT/$BINDIR/jpegtran \
|
||||||
|
-arch arm $TMPDIR/dist.armv7s/$BINDIR/jpegtran \
|
||||||
|
-output $PKGROOT/$BINDIR/jpegtran
|
||||||
|
$LIPO -create \
|
||||||
|
$PKGROOT/$BINDIR/tjbench \
|
||||||
|
-arch arm $TMPDIR/dist.armv7s/$BINDIR/tjbench \
|
||||||
|
-output $PKGROOT/$BINDIR/tjbench
|
||||||
|
$LIPO -create \
|
||||||
|
$PKGROOT/$BINDIR/rdjpgcom \
|
||||||
|
-arch arm $TMPDIR/dist.armv7s/$BINDIR/rdjpgcom \
|
||||||
|
-output $PKGROOT/$BINDIR/rdjpgcom
|
||||||
|
$LIPO -create \
|
||||||
|
$PKGROOT/$BINDIR/wrjpgcom \
|
||||||
|
-arch arm $TMPDIR/dist.armv7s/$BINDIR/wrjpgcom \
|
||||||
|
-output $PKGROOT/$BINDIR/wrjpgcom
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $BUILDARMV8 = 1 ]; then
|
||||||
|
if [ ! -d $BUILDDIRARMV8 ]; then
|
||||||
|
echo ERROR: ARMv8 build directory $BUILDDIRARMV8 does not exist
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if [ ! -f $BUILDDIRARMV8/Makefile ]; then
|
||||||
|
echo ERROR: ARMv8 build directory $BUILDDIRARMV8 is not configured
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
mkdir -p $TMPDIR/dist.armv8
|
||||||
|
pushd $BUILDDIRARMV8
|
||||||
|
make install DESTDIR=$TMPDIR/dist.armv8
|
||||||
|
popd
|
||||||
|
if [ ! -h $TMPDIR/dist.armv8/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib -a \
|
||||||
|
! -h $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib ]; then
|
||||||
|
$LIPO -create \
|
||||||
|
$PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib \
|
||||||
|
-arch arm64 $TMPDIR/dist.armv8/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib \
|
||||||
|
-output $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib
|
||||||
|
elif [ ! -h $TMPDIR/dist.armv8/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib -a \
|
||||||
|
! -h $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib ]; then
|
||||||
|
$LIPO -create \
|
||||||
|
$PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib \
|
||||||
|
-arch arm64 $TMPDIR/dist.armv8/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib \
|
||||||
|
-output $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib
|
||||||
|
fi
|
||||||
|
$LIPO -create \
|
||||||
|
$PKGROOT/$LIBDIR/libjpeg.a \
|
||||||
|
-arch arm64 $TMPDIR/dist.armv8/$LIBDIR/libjpeg.a \
|
||||||
|
-output $PKGROOT/$LIBDIR/libjpeg.a
|
||||||
|
$LIPO -create \
|
||||||
|
$PKGROOT/$LIBDIR/libturbojpeg.0.dylib \
|
||||||
|
-arch arm64 $TMPDIR/dist.armv8/$LIBDIR/libturbojpeg.0.dylib \
|
||||||
|
-output $PKGROOT/$LIBDIR/libturbojpeg.0.dylib
|
||||||
|
$LIPO -create \
|
||||||
|
$PKGROOT/$LIBDIR/libturbojpeg.a \
|
||||||
|
-arch arm64 $TMPDIR/dist.armv8/$LIBDIR/libturbojpeg.a \
|
||||||
|
-output $PKGROOT/$LIBDIR/libturbojpeg.a
|
||||||
|
$LIPO -create \
|
||||||
|
$PKGROOT/$BINDIR/cjpeg \
|
||||||
|
-arch arm64 $TMPDIR/dist.armv8/$BINDIR/cjpeg \
|
||||||
|
-output $PKGROOT/$BINDIR/cjpeg
|
||||||
|
$LIPO -create \
|
||||||
|
$PKGROOT/$BINDIR/djpeg \
|
||||||
|
-arch arm64 $TMPDIR/dist.armv8/$BINDIR/djpeg \
|
||||||
|
-output $PKGROOT/$BINDIR/djpeg
|
||||||
|
$LIPO -create \
|
||||||
|
$PKGROOT/$BINDIR/jpegtran \
|
||||||
|
-arch arm64 $TMPDIR/dist.armv8/$BINDIR/jpegtran \
|
||||||
|
-output $PKGROOT/$BINDIR/jpegtran
|
||||||
|
$LIPO -create \
|
||||||
|
$PKGROOT/$BINDIR/tjbench \
|
||||||
|
-arch arm64 $TMPDIR/dist.armv8/$BINDIR/tjbench \
|
||||||
|
-output $PKGROOT/$BINDIR/tjbench
|
||||||
|
$LIPO -create \
|
||||||
|
$PKGROOT/$BINDIR/rdjpgcom \
|
||||||
|
-arch arm64 $TMPDIR/dist.armv8/$BINDIR/rdjpgcom \
|
||||||
|
-output $PKGROOT/$BINDIR/rdjpgcom
|
||||||
|
$LIPO -create \
|
||||||
|
$PKGROOT/$BINDIR/wrjpgcom \
|
||||||
|
-arch arm64 $TMPDIR/dist.armv8/$BINDIR/wrjpgcom \
|
||||||
|
-output $PKGROOT/$BINDIR/wrjpgcom
|
||||||
fi
|
fi
|
||||||
|
|
||||||
install_name_tool -id $LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib
|
install_name_tool -id $LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib
|
||||||
|
|||||||
@@ -111,6 +111,7 @@ Section "Uninstall"
|
|||||||
|
|
||||||
!ifdef GCC
|
!ifdef GCC
|
||||||
Delete $INSTDIR\bin\libjpeg-@DLL_VERSION@.dll
|
Delete $INSTDIR\bin\libjpeg-@DLL_VERSION@.dll
|
||||||
|
Delete $INSTDIR\bin\libturbojpeg.dll
|
||||||
Delete $SYSDIR\libturbojpeg.dll
|
Delete $SYSDIR\libturbojpeg.dll
|
||||||
Delete $INSTDIR\lib\libturbojpeg.dll.a"
|
Delete $INSTDIR\lib\libturbojpeg.dll.a"
|
||||||
Delete $INSTDIR\lib\libturbojpeg.a"
|
Delete $INSTDIR\lib\libturbojpeg.a"
|
||||||
@@ -118,6 +119,7 @@ Section "Uninstall"
|
|||||||
Delete $INSTDIR\lib\libjpeg.a"
|
Delete $INSTDIR\lib\libjpeg.a"
|
||||||
!else
|
!else
|
||||||
Delete $INSTDIR\bin\jpeg@DLL_VERSION@.dll
|
Delete $INSTDIR\bin\jpeg@DLL_VERSION@.dll
|
||||||
|
Delete $INSTDIR\bin\turbojpeg.dll
|
||||||
Delete $SYSDIR\turbojpeg.dll
|
Delete $SYSDIR\turbojpeg.dll
|
||||||
Delete $INSTDIR\lib\jpeg.lib
|
Delete $INSTDIR\lib\jpeg.lib
|
||||||
Delete $INSTDIR\lib\jpeg-static.lib
|
Delete $INSTDIR\lib\jpeg-static.lib
|
||||||
|
|||||||
@@ -72,9 +72,16 @@ endif
|
|||||||
|
|
||||||
if SIMD_POWERPC
|
if SIMD_POWERPC
|
||||||
|
|
||||||
libsimd_la_SOURCES = jsimd_powerpc.c jsimd_powerpc_altivec.c
|
libsimd_la_SOURCES = jsimd_powerpc.c \
|
||||||
|
jccolor-altivec.c jcgray-altivec.c \
|
||||||
|
jfdctfst-altivec.c jfdctint-altivec.c \
|
||||||
|
jidctfst-altivec.c jidctint-altivec.c \
|
||||||
|
jquanti-altivec.c
|
||||||
libsimd_la_CFLAGS = -maltivec
|
libsimd_la_CFLAGS = -maltivec
|
||||||
|
|
||||||
|
jccolor-altivec.lo: jccolext-altivec.c
|
||||||
|
jcgray-altivec.lo: jcgryext-altivec.c
|
||||||
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
AM_CPPFLAGS = -I$(top_srcdir)
|
AM_CPPFLAGS = -I$(top_srcdir)
|
||||||
|
|||||||
250
simd/jccolext-altivec.c
Normal file
250
simd/jccolext-altivec.c
Normal file
@@ -0,0 +1,250 @@
|
|||||||
|
/*
|
||||||
|
* AltiVec optimizations for libjpeg-turbo
|
||||||
|
*
|
||||||
|
* Copyright (C) 2014, D. R. Commander.
|
||||||
|
* Copyright (C) 2014, Jay Foad.
|
||||||
|
* All rights reserved.
|
||||||
|
* This software is provided 'as-is', without any express or implied
|
||||||
|
* warranty. In no event will the authors be held liable for any damages
|
||||||
|
* arising from the use of this software.
|
||||||
|
*
|
||||||
|
* Permission is granted to anyone to use this software for any purpose,
|
||||||
|
* including commercial applications, and to alter it and redistribute it
|
||||||
|
* freely, subject to the following restrictions:
|
||||||
|
*
|
||||||
|
* 1. The origin of this software must not be misrepresented; you must not
|
||||||
|
* claim that you wrote the original software. If you use this software
|
||||||
|
* in a product, an acknowledgment in the product documentation would be
|
||||||
|
* appreciated but is not required.
|
||||||
|
* 2. Altered source versions must be plainly marked as such, and must not be
|
||||||
|
* misrepresented as being the original software.
|
||||||
|
* 3. This notice may not be removed or altered from any source distribution.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* This file is included by jccolor-altivec.c */
|
||||||
|
|
||||||
|
|
||||||
|
void jsimd_rgb_ycc_convert_altivec (JDIMENSION img_width, JSAMPARRAY input_buf,
|
||||||
|
JSAMPIMAGE output_buf,
|
||||||
|
JDIMENSION output_row, int num_rows)
|
||||||
|
{
|
||||||
|
JSAMPROW inptr;
|
||||||
|
JSAMPROW outptr0, outptr1, outptr2;
|
||||||
|
int pitch;
|
||||||
|
__vector unsigned char rgb0, rgb1 = {0}, rgb2 = {0}, rgb3 = {0}, rgbg0,
|
||||||
|
rgbg1, rgbg2, rgbg3, y, cb, cr;
|
||||||
|
#if RGB_PIXELSIZE == 4
|
||||||
|
__vector unsigned char rgb4;
|
||||||
|
#endif
|
||||||
|
__vector short rg0, rg1, rg2, rg3, bg0, bg1, bg2, bg3;
|
||||||
|
__vector unsigned short y01, y23, cr01, cr23, cb01, cb23;
|
||||||
|
__vector int y0, y1, y2, y3, cr0, cr1, cr2, cr3, cb0, cb1, cb2, cb3;
|
||||||
|
|
||||||
|
/* Constants */
|
||||||
|
__vector short pw_f0299_f0337 = { __4X2(F_0_299, F_0_337) },
|
||||||
|
pw_f0114_f0250 = { __4X2(F_0_114, F_0_250) },
|
||||||
|
pw_mf016_mf033 = { __4X2(-F_0_168, -F_0_331) },
|
||||||
|
pw_mf008_mf041 = { __4X2(-F_0_081, -F_0_418) };
|
||||||
|
__vector unsigned short pw_f050_f000 = { __4X2(F_0_500, 0) };
|
||||||
|
__vector int pd_onehalf = { __4X(ONE_HALF) },
|
||||||
|
pd_onehalfm1_cj = { __4X(ONE_HALF - 1 + (CENTERJSAMPLE << SCALEBITS)) };
|
||||||
|
__vector unsigned char zero = { __16X(0) },
|
||||||
|
shift_pack_index =
|
||||||
|
{ 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29};
|
||||||
|
|
||||||
|
while (--num_rows >= 0) {
|
||||||
|
inptr = *input_buf++;
|
||||||
|
outptr0 = output_buf[0][output_row];
|
||||||
|
outptr1 = output_buf[1][output_row];
|
||||||
|
outptr2 = output_buf[2][output_row];
|
||||||
|
output_row++;
|
||||||
|
|
||||||
|
for (pitch = img_width * RGB_PIXELSIZE; pitch > 0;
|
||||||
|
pitch -= RGB_PIXELSIZE * 16, inptr += RGB_PIXELSIZE * 16,
|
||||||
|
outptr0 += 16, outptr1 += 16, outptr2 += 16) {
|
||||||
|
|
||||||
|
#if RGB_PIXELSIZE == 3
|
||||||
|
/* Load 16 pixels == 48 bytes */
|
||||||
|
if ((size_t)inptr & 15) {
|
||||||
|
__vector unsigned char unaligned_shift_index;
|
||||||
|
rgb0 = vec_ld(0, inptr);
|
||||||
|
if (pitch > 16)
|
||||||
|
rgb1 = vec_ld(16, inptr);
|
||||||
|
else
|
||||||
|
rgb1 = vec_ld(-1, inptr + pitch);
|
||||||
|
if (pitch > 32)
|
||||||
|
rgb2 = vec_ld(32, inptr);
|
||||||
|
else
|
||||||
|
rgb2 = vec_ld(-1, inptr + pitch);
|
||||||
|
if (pitch > 48)
|
||||||
|
rgb3 = vec_ld(48, inptr);
|
||||||
|
else
|
||||||
|
rgb3 = vec_ld(-1, inptr + pitch);
|
||||||
|
unaligned_shift_index = vec_lvsl(0, inptr);
|
||||||
|
rgb0 = vec_perm(rgb0, rgb1, unaligned_shift_index);
|
||||||
|
rgb1 = vec_perm(rgb1, rgb2, unaligned_shift_index);
|
||||||
|
rgb2 = vec_perm(rgb2, rgb3, unaligned_shift_index);
|
||||||
|
} else {
|
||||||
|
rgb0 = vec_ld(0, inptr);
|
||||||
|
if (pitch > 16)
|
||||||
|
rgb1 = vec_ld(16, inptr);
|
||||||
|
if (pitch > 32)
|
||||||
|
rgb2 = vec_ld(32, inptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* rgb0 = R0 G0 B0 R1 G1 B1 R2 G2 B2 R3 G3 B3 R4 G4 B4 R5
|
||||||
|
* rgb1 = G5 B5 R6 G6 B6 R7 G7 B7 R8 G8 B8 R9 G9 B9 Ra Ga
|
||||||
|
* rgb2 = Ba Rb Gb Bb Rc Gc Bc Rd Gd Bd Re Ge Be Rf Gf Bf
|
||||||
|
*
|
||||||
|
* rgbg0 = R0 G0 R1 G1 R2 G2 R3 G3 B0 G0 B1 G1 B2 G2 B3 G3
|
||||||
|
* rgbg1 = R4 G4 R5 G5 R6 G6 R7 G7 B4 G4 B5 G5 B6 G6 B7 G7
|
||||||
|
* rgbg2 = R8 G8 R9 G9 Ra Ga Rb Gb B8 G8 B9 G9 Ba Ga Bb Gb
|
||||||
|
* rgbg3 = Rc Gc Rd Gd Re Ge Rf Gf Bc Gc Bd Gd Be Ge Bf Gf
|
||||||
|
*/
|
||||||
|
rgbg0 = vec_perm(rgb0, rgb0, (__vector unsigned char)RGBG_INDEX0);
|
||||||
|
rgbg1 = vec_perm(rgb0, rgb1, (__vector unsigned char)RGBG_INDEX1);
|
||||||
|
rgbg2 = vec_perm(rgb1, rgb2, (__vector unsigned char)RGBG_INDEX2);
|
||||||
|
rgbg3 = vec_perm(rgb2, rgb2, (__vector unsigned char)RGBG_INDEX3);
|
||||||
|
#else
|
||||||
|
/* Load 16 pixels == 64 bytes */
|
||||||
|
if ((size_t)inptr & 15) {
|
||||||
|
__vector unsigned char unaligned_shift_index;
|
||||||
|
rgb0 = vec_ld(0, inptr);
|
||||||
|
if (pitch > 16)
|
||||||
|
rgb1 = vec_ld(16, inptr);
|
||||||
|
else
|
||||||
|
rgb1 = vec_ld(-1, inptr + pitch);
|
||||||
|
if (pitch > 32)
|
||||||
|
rgb2 = vec_ld(32, inptr);
|
||||||
|
else
|
||||||
|
rgb2 = vec_ld(-1, inptr + pitch);
|
||||||
|
if (pitch > 48)
|
||||||
|
rgb3 = vec_ld(48, inptr);
|
||||||
|
else
|
||||||
|
rgb3 = vec_ld(-1, inptr + pitch);
|
||||||
|
if (pitch > 64)
|
||||||
|
rgb4 = vec_ld(64, inptr);
|
||||||
|
else
|
||||||
|
rgb4 = vec_ld(-1, inptr + pitch);
|
||||||
|
unaligned_shift_index = vec_lvsl(0, inptr);
|
||||||
|
rgb0 = vec_perm(rgb0, rgb1, unaligned_shift_index);
|
||||||
|
rgb1 = vec_perm(rgb1, rgb2, unaligned_shift_index);
|
||||||
|
rgb2 = vec_perm(rgb2, rgb3, unaligned_shift_index);
|
||||||
|
rgb3 = vec_perm(rgb3, rgb4, unaligned_shift_index);
|
||||||
|
} else {
|
||||||
|
rgb0 = vec_ld(0, inptr);
|
||||||
|
if (pitch > 16)
|
||||||
|
rgb1 = vec_ld(16, inptr);
|
||||||
|
if (pitch > 32)
|
||||||
|
rgb2 = vec_ld(32, inptr);
|
||||||
|
if (pitch > 48)
|
||||||
|
rgb3 = vec_ld(48, inptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* rgb0 = R0 G0 B0 X0 R1 G1 B1 X1 R2 G2 B2 X2 R3 G3 B3 X3
|
||||||
|
* rgb0 = R4 G4 B4 X4 R5 G5 B5 X5 R6 G6 B6 X6 R7 G7 B7 X7
|
||||||
|
* rgb0 = R8 G8 B8 X8 R9 G9 B9 X9 Ra Ga Ba Xa Rb Gb Bb Xb
|
||||||
|
* rgb0 = Rc Gc Bc Xc Rd Gd Bd Xd Re Ge Be Xe Rf Gf Bf Xf
|
||||||
|
*
|
||||||
|
* rgbg0 = R0 G0 R1 G1 R2 G2 R3 G3 B0 G0 B1 G1 B2 G2 B3 G3
|
||||||
|
* rgbg1 = R4 G4 R5 G5 R6 G6 R7 G7 B4 G4 B5 G5 B6 G6 B7 G7
|
||||||
|
* rgbg2 = R8 G8 R9 G9 Ra Ga Rb Gb B8 G8 B9 G9 Ba Ga Bb Gb
|
||||||
|
* rgbg3 = Rc Gc Rd Gd Re Ge Rf Gf Bc Gc Bd Gd Be Ge Bf Gf
|
||||||
|
*/
|
||||||
|
rgbg0 = vec_perm(rgb0, rgb0, (__vector unsigned char)RGBG_INDEX);
|
||||||
|
rgbg1 = vec_perm(rgb1, rgb1, (__vector unsigned char)RGBG_INDEX);
|
||||||
|
rgbg2 = vec_perm(rgb2, rgb2, (__vector unsigned char)RGBG_INDEX);
|
||||||
|
rgbg3 = vec_perm(rgb3, rgb3, (__vector unsigned char)RGBG_INDEX);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* rg0 = R0 G0 R1 G1 R2 G2 R3 G3
|
||||||
|
* bg0 = B0 G0 B1 G1 B2 G2 B3 G3
|
||||||
|
* ...
|
||||||
|
*
|
||||||
|
* NOTE: We have to use vec_merge*() here because vec_unpack*() doesn't
|
||||||
|
* support unsigned vectors.
|
||||||
|
*/
|
||||||
|
rg0 = (__vector signed short)vec_mergeh(zero, rgbg0);
|
||||||
|
bg0 = (__vector signed short)vec_mergel(zero, rgbg0);
|
||||||
|
rg1 = (__vector signed short)vec_mergeh(zero, rgbg1);
|
||||||
|
bg1 = (__vector signed short)vec_mergel(zero, rgbg1);
|
||||||
|
rg2 = (__vector signed short)vec_mergeh(zero, rgbg2);
|
||||||
|
bg2 = (__vector signed short)vec_mergel(zero, rgbg2);
|
||||||
|
rg3 = (__vector signed short)vec_mergeh(zero, rgbg3);
|
||||||
|
bg3 = (__vector signed short)vec_mergel(zero, rgbg3);
|
||||||
|
|
||||||
|
/* (Original)
|
||||||
|
* Y = 0.29900 * R + 0.58700 * G + 0.11400 * B
|
||||||
|
* Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE
|
||||||
|
* Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE
|
||||||
|
*
|
||||||
|
* (This implementation)
|
||||||
|
* Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G
|
||||||
|
* Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE
|
||||||
|
* Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Calculate Y values */
|
||||||
|
|
||||||
|
y0 = vec_msums(rg0, pw_f0299_f0337, pd_onehalf);
|
||||||
|
y1 = vec_msums(rg1, pw_f0299_f0337, pd_onehalf);
|
||||||
|
y2 = vec_msums(rg2, pw_f0299_f0337, pd_onehalf);
|
||||||
|
y3 = vec_msums(rg3, pw_f0299_f0337, pd_onehalf);
|
||||||
|
y0 = vec_msums(bg0, pw_f0114_f0250, y0);
|
||||||
|
y1 = vec_msums(bg1, pw_f0114_f0250, y1);
|
||||||
|
y2 = vec_msums(bg2, pw_f0114_f0250, y2);
|
||||||
|
y3 = vec_msums(bg3, pw_f0114_f0250, y3);
|
||||||
|
/* Clever way to avoid 4 shifts + 2 packs. This packs the high word from
|
||||||
|
* each dword into a new 16-bit vector, which is the equivalent of
|
||||||
|
* descaling the 32-bit results (right-shifting by 16 bits) and then
|
||||||
|
* packing them.
|
||||||
|
*/
|
||||||
|
y01 = vec_perm((__vector unsigned short)y0, (__vector unsigned short)y1,
|
||||||
|
shift_pack_index);
|
||||||
|
y23 = vec_perm((__vector unsigned short)y2, (__vector unsigned short)y3,
|
||||||
|
shift_pack_index);
|
||||||
|
y = vec_pack(y01, y23);
|
||||||
|
vec_st(y, 0, outptr0);
|
||||||
|
|
||||||
|
/* Calculate Cb values */
|
||||||
|
cb0 = vec_msums(rg0, pw_mf016_mf033, pd_onehalfm1_cj);
|
||||||
|
cb1 = vec_msums(rg1, pw_mf016_mf033, pd_onehalfm1_cj);
|
||||||
|
cb2 = vec_msums(rg2, pw_mf016_mf033, pd_onehalfm1_cj);
|
||||||
|
cb3 = vec_msums(rg3, pw_mf016_mf033, pd_onehalfm1_cj);
|
||||||
|
cb0 = (__vector int)vec_msum((__vector unsigned short)bg0, pw_f050_f000,
|
||||||
|
(__vector unsigned int)cb0);
|
||||||
|
cb1 = (__vector int)vec_msum((__vector unsigned short)bg1, pw_f050_f000,
|
||||||
|
(__vector unsigned int)cb1);
|
||||||
|
cb2 = (__vector int)vec_msum((__vector unsigned short)bg2, pw_f050_f000,
|
||||||
|
(__vector unsigned int)cb2);
|
||||||
|
cb3 = (__vector int)vec_msum((__vector unsigned short)bg3, pw_f050_f000,
|
||||||
|
(__vector unsigned int)cb3);
|
||||||
|
cb01 = vec_perm((__vector unsigned short)cb0,
|
||||||
|
(__vector unsigned short)cb1, shift_pack_index);
|
||||||
|
cb23 = vec_perm((__vector unsigned short)cb2,
|
||||||
|
(__vector unsigned short)cb3, shift_pack_index);
|
||||||
|
cb = vec_pack(cb01, cb23);
|
||||||
|
vec_st(cb, 0, outptr1);
|
||||||
|
|
||||||
|
/* Calculate Cr values */
|
||||||
|
cr0 = vec_msums(bg0, pw_mf008_mf041, pd_onehalfm1_cj);
|
||||||
|
cr1 = vec_msums(bg1, pw_mf008_mf041, pd_onehalfm1_cj);
|
||||||
|
cr2 = vec_msums(bg2, pw_mf008_mf041, pd_onehalfm1_cj);
|
||||||
|
cr3 = vec_msums(bg3, pw_mf008_mf041, pd_onehalfm1_cj);
|
||||||
|
cr0 = (__vector int)vec_msum((__vector unsigned short)rg0, pw_f050_f000,
|
||||||
|
(__vector unsigned int)cr0);
|
||||||
|
cr1 = (__vector int)vec_msum((__vector unsigned short)rg1, pw_f050_f000,
|
||||||
|
(__vector unsigned int)cr1);
|
||||||
|
cr2 = (__vector int)vec_msum((__vector unsigned short)rg2, pw_f050_f000,
|
||||||
|
(__vector unsigned int)cr2);
|
||||||
|
cr3 = (__vector int)vec_msum((__vector unsigned short)rg3, pw_f050_f000,
|
||||||
|
(__vector unsigned int)cr3);
|
||||||
|
cr01 = vec_perm((__vector unsigned short)cr0,
|
||||||
|
(__vector unsigned short)cr1, shift_pack_index);
|
||||||
|
cr23 = vec_perm((__vector unsigned short)cr2,
|
||||||
|
(__vector unsigned short)cr3, shift_pack_index);
|
||||||
|
cr = vec_pack(cr01, cr23);
|
||||||
|
vec_st(cr, 0, outptr2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
104
simd/jccolor-altivec.c
Normal file
104
simd/jccolor-altivec.c
Normal file
@@ -0,0 +1,104 @@
|
|||||||
|
/*
|
||||||
|
* AltiVec optimizations for libjpeg-turbo
|
||||||
|
*
|
||||||
|
* Copyright (C) 2014, D. R. Commander.
|
||||||
|
* All rights reserved.
|
||||||
|
* This software is provided 'as-is', without any express or implied
|
||||||
|
* warranty. In no event will the authors be held liable for any damages
|
||||||
|
* arising from the use of this software.
|
||||||
|
*
|
||||||
|
* Permission is granted to anyone to use this software for any purpose,
|
||||||
|
* including commercial applications, and to alter it and redistribute it
|
||||||
|
* freely, subject to the following restrictions:
|
||||||
|
*
|
||||||
|
* 1. The origin of this software must not be misrepresented; you must not
|
||||||
|
* claim that you wrote the original software. If you use this software
|
||||||
|
* in a product, an acknowledgment in the product documentation would be
|
||||||
|
* appreciated but is not required.
|
||||||
|
* 2. Altered source versions must be plainly marked as such, and must not be
|
||||||
|
* misrepresented as being the original software.
|
||||||
|
* 3. This notice may not be removed or altered from any source distribution.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* RGB --> YCC CONVERSION */
|
||||||
|
|
||||||
|
#include "jsimd_altivec.h"
|
||||||
|
|
||||||
|
|
||||||
|
#define F_0_081 5329 /* FIX(0.08131) */
|
||||||
|
#define F_0_114 7471 /* FIX(0.11400) */
|
||||||
|
#define F_0_168 11059 /* FIX(0.16874) */
|
||||||
|
#define F_0_250 16384 /* FIX(0.25000) */
|
||||||
|
#define F_0_299 19595 /* FIX(0.29900) */
|
||||||
|
#define F_0_331 21709 /* FIX(0.33126) */
|
||||||
|
#define F_0_418 27439 /* FIX(0.41869) */
|
||||||
|
#define F_0_500 32768 /* FIX(0.50000) */
|
||||||
|
#define F_0_587 38470 /* FIX(0.58700) */
|
||||||
|
#define F_0_337 (F_0_587 - F_0_250) /* FIX(0.58700) - FIX(0.25000) */
|
||||||
|
|
||||||
|
#define SCALEBITS 16
|
||||||
|
#define ONE_HALF (1 << (SCALEBITS - 1))
|
||||||
|
|
||||||
|
|
||||||
|
#define RGBG_INDEX0 {0,1,3,4,6,7,9,10,2,1,5,4,8,7,11,10}
|
||||||
|
#define RGBG_INDEX1 {12,13,15,16,18,19,21,22,14,13,17,16,20,19,23,22}
|
||||||
|
#define RGBG_INDEX2 {8,9,11,12,14,15,17,18,10,9,13,12,16,15,19,18}
|
||||||
|
#define RGBG_INDEX3 {4,5,7,8,10,11,13,14,6,5,9,8,12,11,15,14}
|
||||||
|
#include "jccolext-altivec.c"
|
||||||
|
#undef RGB_PIXELSIZE
|
||||||
|
|
||||||
|
#define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
|
||||||
|
#define jsimd_rgb_ycc_convert_altivec jsimd_extrgb_ycc_convert_altivec
|
||||||
|
#include "jccolext-altivec.c"
|
||||||
|
#undef RGB_PIXELSIZE
|
||||||
|
#undef RGBG_INDEX0
|
||||||
|
#undef RGBG_INDEX1
|
||||||
|
#undef RGBG_INDEX2
|
||||||
|
#undef RGBG_INDEX3
|
||||||
|
#undef jsimd_rgb_ycc_convert_altivec
|
||||||
|
|
||||||
|
#define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
|
||||||
|
#define RGBG_INDEX {0,1,4,5,8,9,12,13,2,1,6,5,10,9,14,13}
|
||||||
|
#define jsimd_rgb_ycc_convert_altivec jsimd_extrgbx_ycc_convert_altivec
|
||||||
|
#include "jccolext-altivec.c"
|
||||||
|
#undef RGB_PIXELSIZE
|
||||||
|
#undef RGBG_INDEX
|
||||||
|
#undef jsimd_rgb_ycc_convert_altivec
|
||||||
|
|
||||||
|
#define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
|
||||||
|
#define RGBG_INDEX0 {2,1,5,4,8,7,11,10,0,1,3,4,6,7,9,10}
|
||||||
|
#define RGBG_INDEX1 {14,13,17,16,20,19,23,22,12,13,15,16,18,19,21,22}
|
||||||
|
#define RGBG_INDEX2 {10,9,13,12,16,15,19,18,8,9,11,12,14,15,17,18}
|
||||||
|
#define RGBG_INDEX3 {6,5,9,8,12,11,15,14,4,5,7,8,10,11,13,14}
|
||||||
|
#define jsimd_rgb_ycc_convert_altivec jsimd_extbgr_ycc_convert_altivec
|
||||||
|
#include "jccolext-altivec.c"
|
||||||
|
#undef RGB_PIXELSIZE
|
||||||
|
#undef RGBG_INDEX0
|
||||||
|
#undef RGBG_INDEX1
|
||||||
|
#undef RGBG_INDEX2
|
||||||
|
#undef RGBG_INDEX3
|
||||||
|
#undef jsimd_rgb_ycc_convert_altivec
|
||||||
|
|
||||||
|
#define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
|
||||||
|
#define RGBG_INDEX {2,1,6,5,10,9,14,13,0,1,4,5,8,9,12,13}
|
||||||
|
#define jsimd_rgb_ycc_convert_altivec jsimd_extbgrx_ycc_convert_altivec
|
||||||
|
#include "jccolext-altivec.c"
|
||||||
|
#undef RGB_PIXELSIZE
|
||||||
|
#undef RGBG_INDEX
|
||||||
|
#undef jsimd_rgb_ycc_convert_altivec
|
||||||
|
|
||||||
|
#define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
|
||||||
|
#define RGBG_INDEX {3,2,7,6,11,10,15,14,1,2,5,6,9,10,13,14}
|
||||||
|
#define jsimd_rgb_ycc_convert_altivec jsimd_extxbgr_ycc_convert_altivec
|
||||||
|
#include "jccolext-altivec.c"
|
||||||
|
#undef RGB_PIXELSIZE
|
||||||
|
#undef RGBG_INDEX
|
||||||
|
#undef jsimd_rgb_ycc_convert_altivec
|
||||||
|
|
||||||
|
#define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
|
||||||
|
#define RGBG_INDEX {1,2,5,6,9,10,13,14,3,2,7,6,11,10,15,14}
|
||||||
|
#define jsimd_rgb_ycc_convert_altivec jsimd_extxrgb_ycc_convert_altivec
|
||||||
|
#include "jccolext-altivec.c"
|
||||||
|
#undef RGB_PIXELSIZE
|
||||||
|
#undef RGBG_INDEX
|
||||||
|
#undef jsimd_rgb_ycc_convert_altivec
|
||||||
99
simd/jcgray-altivec.c
Normal file
99
simd/jcgray-altivec.c
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
/*
|
||||||
|
* AltiVec optimizations for libjpeg-turbo
|
||||||
|
*
|
||||||
|
* Copyright (C) 2014, D. R. Commander.
|
||||||
|
* All rights reserved.
|
||||||
|
* This software is provided 'as-is', without any express or implied
|
||||||
|
* warranty. In no event will the authors be held liable for any damages
|
||||||
|
* arising from the use of this software.
|
||||||
|
*
|
||||||
|
* Permission is granted to anyone to use this software for any purpose,
|
||||||
|
* including commercial applications, and to alter it and redistribute it
|
||||||
|
* freely, subject to the following restrictions:
|
||||||
|
*
|
||||||
|
* 1. The origin of this software must not be misrepresented; you must not
|
||||||
|
* claim that you wrote the original software. If you use this software
|
||||||
|
* in a product, an acknowledgment in the product documentation would be
|
||||||
|
* appreciated but is not required.
|
||||||
|
* 2. Altered source versions must be plainly marked as such, and must not be
|
||||||
|
* misrepresented as being the original software.
|
||||||
|
* 3. This notice may not be removed or altered from any source distribution.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* RGB --> GRAYSCALE CONVERSION */
|
||||||
|
|
||||||
|
#include "jsimd_altivec.h"
|
||||||
|
|
||||||
|
|
||||||
|
#define F_0_114 7471 /* FIX(0.11400) */
|
||||||
|
#define F_0_250 16384 /* FIX(0.25000) */
|
||||||
|
#define F_0_299 19595 /* FIX(0.29900) */
|
||||||
|
#define F_0_587 38470 /* FIX(0.58700) */
|
||||||
|
#define F_0_337 (F_0_587 - F_0_250) /* FIX(0.58700) - FIX(0.25000) */
|
||||||
|
|
||||||
|
#define SCALEBITS 16
|
||||||
|
#define ONE_HALF (1 << (SCALEBITS - 1))
|
||||||
|
|
||||||
|
|
||||||
|
#define RGBG_INDEX0 {0,1,3,4,6,7,9,10,2,1,5,4,8,7,11,10}
|
||||||
|
#define RGBG_INDEX1 {12,13,15,16,18,19,21,22,14,13,17,16,20,19,23,22}
|
||||||
|
#define RGBG_INDEX2 {8,9,11,12,14,15,17,18,10,9,13,12,16,15,19,18}
|
||||||
|
#define RGBG_INDEX3 {4,5,7,8,10,11,13,14,6,5,9,8,12,11,15,14}
|
||||||
|
#include "jcgryext-altivec.c"
|
||||||
|
#undef RGB_PIXELSIZE
|
||||||
|
|
||||||
|
#define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
|
||||||
|
#define jsimd_rgb_gray_convert_altivec jsimd_extrgb_gray_convert_altivec
|
||||||
|
#include "jcgryext-altivec.c"
|
||||||
|
#undef RGB_PIXELSIZE
|
||||||
|
#undef RGBG_INDEX0
|
||||||
|
#undef RGBG_INDEX1
|
||||||
|
#undef RGBG_INDEX2
|
||||||
|
#undef RGBG_INDEX3
|
||||||
|
#undef jsimd_rgb_gray_convert_altivec
|
||||||
|
|
||||||
|
#define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
|
||||||
|
#define RGBG_INDEX {0,1,4,5,8,9,12,13,2,1,6,5,10,9,14,13}
|
||||||
|
#define jsimd_rgb_gray_convert_altivec jsimd_extrgbx_gray_convert_altivec
|
||||||
|
#include "jcgryext-altivec.c"
|
||||||
|
#undef RGB_PIXELSIZE
|
||||||
|
#undef RGBG_INDEX
|
||||||
|
#undef jsimd_rgb_gray_convert_altivec
|
||||||
|
|
||||||
|
#define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
|
||||||
|
#define RGBG_INDEX0 {2,1,5,4,8,7,11,10,0,1,3,4,6,7,9,10}
|
||||||
|
#define RGBG_INDEX1 {14,13,17,16,20,19,23,22,12,13,15,16,18,19,21,22}
|
||||||
|
#define RGBG_INDEX2 {10,9,13,12,16,15,19,18,8,9,11,12,14,15,17,18}
|
||||||
|
#define RGBG_INDEX3 {6,5,9,8,12,11,15,14,4,5,7,8,10,11,13,14}
|
||||||
|
#define jsimd_rgb_gray_convert_altivec jsimd_extbgr_gray_convert_altivec
|
||||||
|
#include "jcgryext-altivec.c"
|
||||||
|
#undef RGB_PIXELSIZE
|
||||||
|
#undef RGBG_INDEX0
|
||||||
|
#undef RGBG_INDEX1
|
||||||
|
#undef RGBG_INDEX2
|
||||||
|
#undef RGBG_INDEX3
|
||||||
|
#undef jsimd_rgb_gray_convert_altivec
|
||||||
|
|
||||||
|
#define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
|
||||||
|
#define RGBG_INDEX {2,1,6,5,10,9,14,13,0,1,4,5,8,9,12,13}
|
||||||
|
#define jsimd_rgb_gray_convert_altivec jsimd_extbgrx_gray_convert_altivec
|
||||||
|
#include "jcgryext-altivec.c"
|
||||||
|
#undef RGB_PIXELSIZE
|
||||||
|
#undef RGBG_INDEX
|
||||||
|
#undef jsimd_rgb_gray_convert_altivec
|
||||||
|
|
||||||
|
#define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
|
||||||
|
#define RGBG_INDEX {3,2,7,6,11,10,15,14,1,2,5,6,9,10,13,14}
|
||||||
|
#define jsimd_rgb_gray_convert_altivec jsimd_extxbgr_gray_convert_altivec
|
||||||
|
#include "jcgryext-altivec.c"
|
||||||
|
#undef RGB_PIXELSIZE
|
||||||
|
#undef RGBG_INDEX
|
||||||
|
#undef jsimd_rgb_gray_convert_altivec
|
||||||
|
|
||||||
|
#define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
|
||||||
|
#define RGBG_INDEX {1,2,5,6,9,10,13,14,3,2,7,6,11,10,15,14}
|
||||||
|
#define jsimd_rgb_gray_convert_altivec jsimd_extxrgb_gray_convert_altivec
|
||||||
|
#include "jcgryext-altivec.c"
|
||||||
|
#undef RGB_PIXELSIZE
|
||||||
|
#undef RGBG_INDEX
|
||||||
|
#undef jsimd_rgb_gray_convert_altivec
|
||||||
200
simd/jcgryext-altivec.c
Normal file
200
simd/jcgryext-altivec.c
Normal file
@@ -0,0 +1,200 @@
|
|||||||
|
/*
|
||||||
|
* AltiVec optimizations for libjpeg-turbo
|
||||||
|
*
|
||||||
|
* Copyright (C) 2014, D. R. Commander.
|
||||||
|
* Copyright (C) 2014, Jay Foad.
|
||||||
|
* All rights reserved.
|
||||||
|
* This software is provided 'as-is', without any express or implied
|
||||||
|
* warranty. In no event will the authors be held liable for any damages
|
||||||
|
* arising from the use of this software.
|
||||||
|
*
|
||||||
|
* Permission is granted to anyone to use this software for any purpose,
|
||||||
|
* including commercial applications, and to alter it and redistribute it
|
||||||
|
* freely, subject to the following restrictions:
|
||||||
|
*
|
||||||
|
* 1. The origin of this software must not be misrepresented; you must not
|
||||||
|
* claim that you wrote the original software. If you use this software
|
||||||
|
* in a product, an acknowledgment in the product documentation would be
|
||||||
|
* appreciated but is not required.
|
||||||
|
* 2. Altered source versions must be plainly marked as such, and must not be
|
||||||
|
* misrepresented as being the original software.
|
||||||
|
* 3. This notice may not be removed or altered from any source distribution.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* This file is included by jcgray-altivec.c */
|
||||||
|
|
||||||
|
|
||||||
|
void jsimd_rgb_gray_convert_altivec (JDIMENSION img_width,
|
||||||
|
JSAMPARRAY input_buf,
|
||||||
|
JSAMPIMAGE output_buf,
|
||||||
|
JDIMENSION output_row, int num_rows)
|
||||||
|
{
|
||||||
|
JSAMPROW inptr, outptr;
|
||||||
|
int pitch;
|
||||||
|
__vector unsigned char rgb0, rgb1 = {0}, rgb2 = {0}, rgb3 = {0}, rgbg0,
|
||||||
|
rgbg1, rgbg2, rgbg3, y;
|
||||||
|
#if RGB_PIXELSIZE == 4
|
||||||
|
__vector unsigned char rgb4;
|
||||||
|
#endif
|
||||||
|
__vector short rg0, rg1, rg2, rg3, bg0, bg1, bg2, bg3;
|
||||||
|
__vector unsigned short y01, y23;
|
||||||
|
__vector int y0, y1, y2, y3;
|
||||||
|
|
||||||
|
/* Constants */
|
||||||
|
__vector short pw_f0299_f0337 = { __4X2(F_0_299, F_0_337) },
|
||||||
|
pw_f0114_f0250 = { __4X2(F_0_114, F_0_250) };
|
||||||
|
__vector int pd_onehalf = { __4X(ONE_HALF) };
|
||||||
|
__vector unsigned char zero = { __16X(0) },
|
||||||
|
shift_pack_index =
|
||||||
|
{ 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29};
|
||||||
|
|
||||||
|
while (--num_rows >= 0) {
|
||||||
|
inptr = *input_buf++;
|
||||||
|
outptr = output_buf[0][output_row];
|
||||||
|
output_row++;
|
||||||
|
|
||||||
|
for (pitch = img_width * RGB_PIXELSIZE; pitch > 0;
|
||||||
|
pitch -= RGB_PIXELSIZE * 16, inptr += RGB_PIXELSIZE * 16,
|
||||||
|
outptr += 16) {
|
||||||
|
|
||||||
|
#if RGB_PIXELSIZE == 3
|
||||||
|
/* Load 16 pixels == 48 bytes */
|
||||||
|
if ((size_t)inptr & 15) {
|
||||||
|
__vector unsigned char unaligned_shift_index;
|
||||||
|
rgb0 = vec_ld(0, inptr);
|
||||||
|
if (pitch > 16)
|
||||||
|
rgb1 = vec_ld(16, inptr);
|
||||||
|
else
|
||||||
|
rgb1 = vec_ld(-1, inptr + pitch);
|
||||||
|
if (pitch > 32)
|
||||||
|
rgb2 = vec_ld(32, inptr);
|
||||||
|
else
|
||||||
|
rgb2 = vec_ld(-1, inptr + pitch);
|
||||||
|
if (pitch > 48)
|
||||||
|
rgb3 = vec_ld(48, inptr);
|
||||||
|
else
|
||||||
|
rgb3 = vec_ld(-1, inptr + pitch);
|
||||||
|
unaligned_shift_index = vec_lvsl(0, inptr);
|
||||||
|
rgb0 = vec_perm(rgb0, rgb1, unaligned_shift_index);
|
||||||
|
rgb1 = vec_perm(rgb1, rgb2, unaligned_shift_index);
|
||||||
|
rgb2 = vec_perm(rgb2, rgb3, unaligned_shift_index);
|
||||||
|
} else {
|
||||||
|
rgb0 = vec_ld(0, inptr);
|
||||||
|
if (pitch > 16)
|
||||||
|
rgb1 = vec_ld(16, inptr);
|
||||||
|
if (pitch > 32)
|
||||||
|
rgb2 = vec_ld(32, inptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* rgb0 = R0 G0 B0 R1 G1 B1 R2 G2 B2 R3 G3 B3 R4 G4 B4 R5
|
||||||
|
* rgb1 = G5 B5 R6 G6 B6 R7 G7 B7 R8 G8 B8 R9 G9 B9 Ra Ga
|
||||||
|
* rgb2 = Ba Rb Gb Bb Rc Gc Bc Rd Gd Bd Re Ge Be Rf Gf Bf
|
||||||
|
*
|
||||||
|
* rgbg0 = R0 G0 R1 G1 R2 G2 R3 G3 B0 G0 B1 G1 B2 G2 B3 G3
|
||||||
|
* rgbg1 = R4 G4 R5 G5 R6 G6 R7 G7 B4 G4 B5 G5 B6 G6 B7 G7
|
||||||
|
* rgbg2 = R8 G8 R9 G9 Ra Ga Rb Gb B8 G8 B9 G9 Ba Ga Bb Gb
|
||||||
|
* rgbg3 = Rc Gc Rd Gd Re Ge Rf Gf Bc Gc Bd Gd Be Ge Bf Gf
|
||||||
|
*/
|
||||||
|
rgbg0 = vec_perm(rgb0, rgb0, (__vector unsigned char)RGBG_INDEX0);
|
||||||
|
rgbg1 = vec_perm(rgb0, rgb1, (__vector unsigned char)RGBG_INDEX1);
|
||||||
|
rgbg2 = vec_perm(rgb1, rgb2, (__vector unsigned char)RGBG_INDEX2);
|
||||||
|
rgbg3 = vec_perm(rgb2, rgb2, (__vector unsigned char)RGBG_INDEX3);
|
||||||
|
#else
|
||||||
|
/* Load 16 pixels == 64 bytes */
|
||||||
|
if ((size_t)inptr & 15) {
|
||||||
|
__vector unsigned char unaligned_shift_index;
|
||||||
|
rgb0 = vec_ld(0, inptr);
|
||||||
|
if (pitch > 16)
|
||||||
|
rgb1 = vec_ld(16, inptr);
|
||||||
|
else
|
||||||
|
rgb1 = vec_ld(-1, inptr + pitch);
|
||||||
|
if (pitch > 32)
|
||||||
|
rgb2 = vec_ld(32, inptr);
|
||||||
|
else
|
||||||
|
rgb2 = vec_ld(-1, inptr + pitch);
|
||||||
|
if (pitch > 48)
|
||||||
|
rgb3 = vec_ld(48, inptr);
|
||||||
|
else
|
||||||
|
rgb3 = vec_ld(-1, inptr + pitch);
|
||||||
|
if (pitch > 64)
|
||||||
|
rgb4 = vec_ld(64, inptr);
|
||||||
|
else
|
||||||
|
rgb4 = vec_ld(-1, inptr + pitch);
|
||||||
|
unaligned_shift_index = vec_lvsl(0, inptr);
|
||||||
|
rgb0 = vec_perm(rgb0, rgb1, unaligned_shift_index);
|
||||||
|
rgb1 = vec_perm(rgb1, rgb2, unaligned_shift_index);
|
||||||
|
rgb2 = vec_perm(rgb2, rgb3, unaligned_shift_index);
|
||||||
|
rgb3 = vec_perm(rgb3, rgb4, unaligned_shift_index);
|
||||||
|
} else {
|
||||||
|
rgb0 = vec_ld(0, inptr);
|
||||||
|
if (pitch > 16)
|
||||||
|
rgb1 = vec_ld(16, inptr);
|
||||||
|
if (pitch > 32)
|
||||||
|
rgb2 = vec_ld(32, inptr);
|
||||||
|
if (pitch > 48)
|
||||||
|
rgb3 = vec_ld(48, inptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* rgb0 = R0 G0 B0 X0 R1 G1 B1 X1 R2 G2 B2 X2 R3 G3 B3 X3
|
||||||
|
* rgb0 = R4 G4 B4 X4 R5 G5 B5 X5 R6 G6 B6 X6 R7 G7 B7 X7
|
||||||
|
* rgb0 = R8 G8 B8 X8 R9 G9 B9 X9 Ra Ga Ba Xa Rb Gb Bb Xb
|
||||||
|
* rgb0 = Rc Gc Bc Xc Rd Gd Bd Xd Re Ge Be Xe Rf Gf Bf Xf
|
||||||
|
*
|
||||||
|
* rgbg0 = R0 G0 R1 G1 R2 G2 R3 G3 B0 G0 B1 G1 B2 G2 B3 G3
|
||||||
|
* rgbg1 = R4 G4 R5 G5 R6 G6 R7 G7 B4 G4 B5 G5 B6 G6 B7 G7
|
||||||
|
* rgbg2 = R8 G8 R9 G9 Ra Ga Rb Gb B8 G8 B9 G9 Ba Ga Bb Gb
|
||||||
|
* rgbg3 = Rc Gc Rd Gd Re Ge Rf Gf Bc Gc Bd Gd Be Ge Bf Gf
|
||||||
|
*/
|
||||||
|
rgbg0 = vec_perm(rgb0, rgb0, (__vector unsigned char)RGBG_INDEX);
|
||||||
|
rgbg1 = vec_perm(rgb1, rgb1, (__vector unsigned char)RGBG_INDEX);
|
||||||
|
rgbg2 = vec_perm(rgb2, rgb2, (__vector unsigned char)RGBG_INDEX);
|
||||||
|
rgbg3 = vec_perm(rgb3, rgb3, (__vector unsigned char)RGBG_INDEX);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* rg0 = R0 G0 R1 G1 R2 G2 R3 G3
|
||||||
|
* bg0 = B0 G0 B1 G1 B2 G2 B3 G3
|
||||||
|
* ...
|
||||||
|
*
|
||||||
|
* NOTE: We have to use vec_merge*() here because vec_unpack*() doesn't
|
||||||
|
* support unsigned vectors.
|
||||||
|
*/
|
||||||
|
rg0 = (__vector signed short)vec_mergeh(zero, rgbg0);
|
||||||
|
bg0 = (__vector signed short)vec_mergel(zero, rgbg0);
|
||||||
|
rg1 = (__vector signed short)vec_mergeh(zero, rgbg1);
|
||||||
|
bg1 = (__vector signed short)vec_mergel(zero, rgbg1);
|
||||||
|
rg2 = (__vector signed short)vec_mergeh(zero, rgbg2);
|
||||||
|
bg2 = (__vector signed short)vec_mergel(zero, rgbg2);
|
||||||
|
rg3 = (__vector signed short)vec_mergeh(zero, rgbg3);
|
||||||
|
bg3 = (__vector signed short)vec_mergel(zero, rgbg3);
|
||||||
|
|
||||||
|
/* (Original)
|
||||||
|
* Y = 0.29900 * R + 0.58700 * G + 0.11400 * B
|
||||||
|
*
|
||||||
|
* (This implementation)
|
||||||
|
* Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Calculate Y values */
|
||||||
|
|
||||||
|
y0 = vec_msums(rg0, pw_f0299_f0337, pd_onehalf);
|
||||||
|
y1 = vec_msums(rg1, pw_f0299_f0337, pd_onehalf);
|
||||||
|
y2 = vec_msums(rg2, pw_f0299_f0337, pd_onehalf);
|
||||||
|
y3 = vec_msums(rg3, pw_f0299_f0337, pd_onehalf);
|
||||||
|
y0 = vec_msums(bg0, pw_f0114_f0250, y0);
|
||||||
|
y1 = vec_msums(bg1, pw_f0114_f0250, y1);
|
||||||
|
y2 = vec_msums(bg2, pw_f0114_f0250, y2);
|
||||||
|
y3 = vec_msums(bg3, pw_f0114_f0250, y3);
|
||||||
|
/* Clever way to avoid 4 shifts + 2 packs. This packs the high word from
|
||||||
|
* each dword into a new 16-bit vector, which is the equivalent of
|
||||||
|
* descaling the 32-bit results (right-shifting by 16 bits) and then
|
||||||
|
* packing them.
|
||||||
|
*/
|
||||||
|
y01 = vec_perm((__vector unsigned short)y0, (__vector unsigned short)y1,
|
||||||
|
shift_pack_index);
|
||||||
|
y23 = vec_perm((__vector unsigned short)y2, (__vector unsigned short)y3,
|
||||||
|
shift_pack_index);
|
||||||
|
y = vec_pack(y01, y23);
|
||||||
|
vec_st(y, 0, outptr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
156
simd/jfdctfst-altivec.c
Normal file
156
simd/jfdctfst-altivec.c
Normal file
@@ -0,0 +1,156 @@
|
|||||||
|
/*
|
||||||
|
* AltiVec optimizations for libjpeg-turbo
|
||||||
|
*
|
||||||
|
* Copyright (C) 2014, D. R. Commander.
|
||||||
|
* All rights reserved.
|
||||||
|
* This software is provided 'as-is', without any express or implied
|
||||||
|
* warranty. In no event will the authors be held liable for any damages
|
||||||
|
* arising from the use of this software.
|
||||||
|
*
|
||||||
|
* Permission is granted to anyone to use this software for any purpose,
|
||||||
|
* including commercial applications, and to alter it and redistribute it
|
||||||
|
* freely, subject to the following restrictions:
|
||||||
|
*
|
||||||
|
* 1. The origin of this software must not be misrepresented; you must not
|
||||||
|
* claim that you wrote the original software. If you use this software
|
||||||
|
* in a product, an acknowledgment in the product documentation would be
|
||||||
|
* appreciated but is not required.
|
||||||
|
* 2. Altered source versions must be plainly marked as such, and must not be
|
||||||
|
* misrepresented as being the original software.
|
||||||
|
* 3. This notice may not be removed or altered from any source distribution.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* FAST INTEGER FORWARD DCT
|
||||||
|
*
|
||||||
|
* This is similar to the SSE2 implementation, except that we left-shift the
|
||||||
|
* constants by 1 less bit (the -1 in CONST_SHIFT.) This is because
|
||||||
|
* vec_madds(arg1, arg2, arg3) generates the 16-bit saturated sum of:
|
||||||
|
* the elements in arg3 + the most significant 17 bits of
|
||||||
|
* (the elements in arg1 * the elements in arg2).
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "jsimd_altivec.h"
|
||||||
|
|
||||||
|
|
||||||
|
#define F_0_382 98 /* FIX(0.382683433) */
|
||||||
|
#define F_0_541 139 /* FIX(0.541196100) */
|
||||||
|
#define F_0_707 181 /* FIX(0.707106781) */
|
||||||
|
#define F_1_306 334 /* FIX(1.306562965) */
|
||||||
|
|
||||||
|
#define CONST_BITS 8
|
||||||
|
#define PRE_MULTIPLY_SCALE_BITS 2
|
||||||
|
#define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS - 1)
|
||||||
|
|
||||||
|
|
||||||
|
#define DO_FDCT() \
|
||||||
|
{ \
|
||||||
|
/* Even part */ \
|
||||||
|
\
|
||||||
|
tmp10 = vec_add(tmp0, tmp3); \
|
||||||
|
tmp13 = vec_sub(tmp0, tmp3); \
|
||||||
|
tmp11 = vec_add(tmp1, tmp2); \
|
||||||
|
tmp12 = vec_sub(tmp1, tmp2); \
|
||||||
|
\
|
||||||
|
out0 = vec_add(tmp10, tmp11); \
|
||||||
|
out4 = vec_sub(tmp10, tmp11); \
|
||||||
|
\
|
||||||
|
z1 = vec_add(tmp12, tmp13); \
|
||||||
|
z1 = vec_sl(z1, pre_multiply_scale_bits); \
|
||||||
|
z1 = vec_madds(z1, pw_0707, zero); \
|
||||||
|
\
|
||||||
|
out2 = vec_add(tmp13, z1); \
|
||||||
|
out6 = vec_sub(tmp13, z1); \
|
||||||
|
\
|
||||||
|
/* Odd part */ \
|
||||||
|
\
|
||||||
|
tmp10 = vec_add(tmp4, tmp5); \
|
||||||
|
tmp11 = vec_add(tmp5, tmp6); \
|
||||||
|
tmp12 = vec_add(tmp6, tmp7); \
|
||||||
|
\
|
||||||
|
tmp10 = vec_sl(tmp10, pre_multiply_scale_bits); \
|
||||||
|
tmp12 = vec_sl(tmp12, pre_multiply_scale_bits); \
|
||||||
|
z5 = vec_sub(tmp10, tmp12); \
|
||||||
|
z5 = vec_madds(z5, pw_0382, zero); \
|
||||||
|
\
|
||||||
|
z2 = vec_madds(tmp10, pw_0541, z5); \
|
||||||
|
z4 = vec_madds(tmp12, pw_1306, z5); \
|
||||||
|
\
|
||||||
|
tmp11 = vec_sl(tmp11, pre_multiply_scale_bits); \
|
||||||
|
z3 = vec_madds(tmp11, pw_0707, zero); \
|
||||||
|
\
|
||||||
|
z11 = vec_add(tmp7, z3); \
|
||||||
|
z13 = vec_sub(tmp7, z3); \
|
||||||
|
\
|
||||||
|
out5 = vec_add(z13, z2); \
|
||||||
|
out3 = vec_sub(z13, z2); \
|
||||||
|
out1 = vec_add(z11, z4); \
|
||||||
|
out7 = vec_sub(z11, z4); \
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void
|
||||||
|
jsimd_fdct_ifast_altivec (DCTELEM *data)
|
||||||
|
{
|
||||||
|
__vector short row0, row1, row2, row3, row4, row5, row6, row7,
|
||||||
|
col0, col1, col2, col3, col4, col5, col6, col7,
|
||||||
|
tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp10, tmp11, tmp12, tmp13,
|
||||||
|
z1, z2, z3, z4, z5, z11, z13,
|
||||||
|
out0, out1, out2, out3, out4, out5, out6, out7;
|
||||||
|
|
||||||
|
/* Constants */
|
||||||
|
__vector short zero = vec_splat_s16(0),
|
||||||
|
pw_0382 = { __8X(F_0_382 << CONST_SHIFT) },
|
||||||
|
pw_0541 = { __8X(F_0_541 << CONST_SHIFT) },
|
||||||
|
pw_0707 = { __8X(F_0_707 << CONST_SHIFT) },
|
||||||
|
pw_1306 = { __8X(F_1_306 << CONST_SHIFT) };
|
||||||
|
__vector unsigned short
|
||||||
|
pre_multiply_scale_bits = { __8X(PRE_MULTIPLY_SCALE_BITS) };
|
||||||
|
|
||||||
|
/* Pass 1: process rows */
|
||||||
|
|
||||||
|
row0 = vec_ld(0, data);
|
||||||
|
row1 = vec_ld(16, data);
|
||||||
|
row2 = vec_ld(32, data);
|
||||||
|
row3 = vec_ld(48, data);
|
||||||
|
row4 = vec_ld(64, data);
|
||||||
|
row5 = vec_ld(80, data);
|
||||||
|
row6 = vec_ld(96, data);
|
||||||
|
row7 = vec_ld(112, data);
|
||||||
|
|
||||||
|
TRANSPOSE(row, col);
|
||||||
|
|
||||||
|
tmp0 = vec_add(col0, col7);
|
||||||
|
tmp7 = vec_sub(col0, col7);
|
||||||
|
tmp1 = vec_add(col1, col6);
|
||||||
|
tmp6 = vec_sub(col1, col6);
|
||||||
|
tmp2 = vec_add(col2, col5);
|
||||||
|
tmp5 = vec_sub(col2, col5);
|
||||||
|
tmp3 = vec_add(col3, col4);
|
||||||
|
tmp4 = vec_sub(col3, col4);
|
||||||
|
|
||||||
|
DO_FDCT();
|
||||||
|
|
||||||
|
/* Pass 2: process columns */
|
||||||
|
|
||||||
|
TRANSPOSE(out, row);
|
||||||
|
|
||||||
|
tmp0 = vec_add(row0, row7);
|
||||||
|
tmp7 = vec_sub(row0, row7);
|
||||||
|
tmp1 = vec_add(row1, row6);
|
||||||
|
tmp6 = vec_sub(row1, row6);
|
||||||
|
tmp2 = vec_add(row2, row5);
|
||||||
|
tmp5 = vec_sub(row2, row5);
|
||||||
|
tmp3 = vec_add(row3, row4);
|
||||||
|
tmp4 = vec_sub(row3, row4);
|
||||||
|
|
||||||
|
DO_FDCT();
|
||||||
|
|
||||||
|
vec_st(out0, 0, data);
|
||||||
|
vec_st(out1, 16, data);
|
||||||
|
vec_st(out2, 32, data);
|
||||||
|
vec_st(out3, 48, data);
|
||||||
|
vec_st(out4, 64, data);
|
||||||
|
vec_st(out5, 80, data);
|
||||||
|
vec_st(out6, 96, data);
|
||||||
|
vec_st(out7, 112, data);
|
||||||
|
}
|
||||||
262
simd/jfdctint-altivec.c
Normal file
262
simd/jfdctint-altivec.c
Normal file
@@ -0,0 +1,262 @@
|
|||||||
|
/*
|
||||||
|
* AltiVec optimizations for libjpeg-turbo
|
||||||
|
*
|
||||||
|
* Copyright (C) 2014, D. R. Commander.
|
||||||
|
* All rights reserved.
|
||||||
|
* This software is provided 'as-is', without any express or implied
|
||||||
|
* warranty. In no event will the authors be held liable for any damages
|
||||||
|
* arising from the use of this software.
|
||||||
|
*
|
||||||
|
* Permission is granted to anyone to use this software for any purpose,
|
||||||
|
* including commercial applications, and to alter it and redistribute it
|
||||||
|
* freely, subject to the following restrictions:
|
||||||
|
*
|
||||||
|
* 1. The origin of this software must not be misrepresented; you must not
|
||||||
|
* claim that you wrote the original software. If you use this software
|
||||||
|
* in a product, an acknowledgment in the product documentation would be
|
||||||
|
* appreciated but is not required.
|
||||||
|
* 2. Altered source versions must be plainly marked as such, and must not be
|
||||||
|
* misrepresented as being the original software.
|
||||||
|
* 3. This notice may not be removed or altered from any source distribution.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* SLOW INTEGER FORWARD DCT */
|
||||||
|
|
||||||
|
#include "jsimd_altivec.h"
|
||||||
|
|
||||||
|
|
||||||
|
#define F_0_298 2446 /* FIX(0.298631336) */
|
||||||
|
#define F_0_390 3196 /* FIX(0.390180644) */
|
||||||
|
#define F_0_541 4433 /* FIX(0.541196100) */
|
||||||
|
#define F_0_765 6270 /* FIX(0.765366865) */
|
||||||
|
#define F_0_899 7373 /* FIX(0.899976223) */
|
||||||
|
#define F_1_175 9633 /* FIX(1.175875602) */
|
||||||
|
#define F_1_501 12299 /* FIX(1.501321110) */
|
||||||
|
#define F_1_847 15137 /* FIX(1.847759065) */
|
||||||
|
#define F_1_961 16069 /* FIX(1.961570560) */
|
||||||
|
#define F_2_053 16819 /* FIX(2.053119869) */
|
||||||
|
#define F_2_562 20995 /* FIX(2.562915447) */
|
||||||
|
#define F_3_072 25172 /* FIX(3.072711026) */
|
||||||
|
|
||||||
|
#define CONST_BITS 13
|
||||||
|
#define PASS1_BITS 2
|
||||||
|
#define DESCALE_P1 (CONST_BITS - PASS1_BITS)
|
||||||
|
#define DESCALE_P2 (CONST_BITS + PASS1_BITS)
|
||||||
|
|
||||||
|
|
||||||
|
#define DO_FDCT_COMMON(PASS) \
|
||||||
|
{ \
|
||||||
|
/* (Original) \
|
||||||
|
* z1 = (tmp12 + tmp13) * 0.541196100; \
|
||||||
|
* data2 = z1 + tmp13 * 0.765366865; \
|
||||||
|
* data6 = z1 + tmp12 * -1.847759065; \
|
||||||
|
* \
|
||||||
|
* (This implementation) \
|
||||||
|
* data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100; \
|
||||||
|
* data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065); \
|
||||||
|
*/ \
|
||||||
|
\
|
||||||
|
tmp1312l = vec_mergeh(tmp13, tmp12); \
|
||||||
|
tmp1312h = vec_mergel(tmp13, tmp12); \
|
||||||
|
\
|
||||||
|
out2l = vec_msums(tmp1312l, pw_f130_f054, pd_descale_p##PASS); \
|
||||||
|
out2h = vec_msums(tmp1312h, pw_f130_f054, pd_descale_p##PASS); \
|
||||||
|
out6l = vec_msums(tmp1312l, pw_f054_mf130, pd_descale_p##PASS); \
|
||||||
|
out6h = vec_msums(tmp1312h, pw_f054_mf130, pd_descale_p##PASS); \
|
||||||
|
\
|
||||||
|
out2l = vec_sra(out2l, descale_p##PASS); \
|
||||||
|
out2h = vec_sra(out2h, descale_p##PASS); \
|
||||||
|
out6l = vec_sra(out6l, descale_p##PASS); \
|
||||||
|
out6h = vec_sra(out6h, descale_p##PASS); \
|
||||||
|
\
|
||||||
|
out2 = vec_pack(out2l, out2h); \
|
||||||
|
out6 = vec_pack(out6l, out6h); \
|
||||||
|
\
|
||||||
|
/* Odd part */ \
|
||||||
|
\
|
||||||
|
z3 = vec_add(tmp4, tmp6); \
|
||||||
|
z4 = vec_add(tmp5, tmp7); \
|
||||||
|
\
|
||||||
|
/* (Original) \
|
||||||
|
* z5 = (z3 + z4) * 1.175875602; \
|
||||||
|
* z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; \
|
||||||
|
* z3 += z5; z4 += z5; \
|
||||||
|
* \
|
||||||
|
* (This implementation) \
|
||||||
|
* z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; \
|
||||||
|
* z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); \
|
||||||
|
*/ \
|
||||||
|
\
|
||||||
|
z34l = vec_mergeh(z3, z4); \
|
||||||
|
z34h = vec_mergel(z3, z4); \
|
||||||
|
\
|
||||||
|
z3l = vec_msums(z34l, pw_mf078_f117, pd_descale_p##PASS); \
|
||||||
|
z3h = vec_msums(z34h, pw_mf078_f117, pd_descale_p##PASS); \
|
||||||
|
z4l = vec_msums(z34l, pw_f117_f078, pd_descale_p##PASS); \
|
||||||
|
z4h = vec_msums(z34h, pw_f117_f078, pd_descale_p##PASS); \
|
||||||
|
\
|
||||||
|
/* (Original) \
|
||||||
|
* z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; \
|
||||||
|
* tmp4 = tmp4 * 0.298631336; tmp5 = tmp5 * 2.053119869; \
|
||||||
|
* tmp6 = tmp6 * 3.072711026; tmp7 = tmp7 * 1.501321110; \
|
||||||
|
* z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; \
|
||||||
|
* data7 = tmp4 + z1 + z3; data5 = tmp5 + z2 + z4; \
|
||||||
|
* data3 = tmp6 + z2 + z3; data1 = tmp7 + z1 + z4; \
|
||||||
|
* \
|
||||||
|
* (This implementation) \
|
||||||
|
* tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223; \
|
||||||
|
* tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447; \
|
||||||
|
* tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447); \
|
||||||
|
* tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223); \
|
||||||
|
* data7 = tmp4 + z3; data5 = tmp5 + z4; \
|
||||||
|
* data3 = tmp6 + z3; data1 = tmp7 + z4; \
|
||||||
|
*/ \
|
||||||
|
\
|
||||||
|
tmp47l = vec_mergeh(tmp4, tmp7); \
|
||||||
|
tmp47h = vec_mergel(tmp4, tmp7); \
|
||||||
|
\
|
||||||
|
out7l = vec_msums(tmp47l, pw_mf060_mf089, z3l); \
|
||||||
|
out7h = vec_msums(tmp47h, pw_mf060_mf089, z3h); \
|
||||||
|
out1l = vec_msums(tmp47l, pw_mf089_f060, z4l); \
|
||||||
|
out1h = vec_msums(tmp47h, pw_mf089_f060, z4h); \
|
||||||
|
\
|
||||||
|
out7l = vec_sra(out7l, descale_p##PASS); \
|
||||||
|
out7h = vec_sra(out7h, descale_p##PASS); \
|
||||||
|
out1l = vec_sra(out1l, descale_p##PASS); \
|
||||||
|
out1h = vec_sra(out1h, descale_p##PASS); \
|
||||||
|
\
|
||||||
|
out7 = vec_pack(out7l, out7h); \
|
||||||
|
out1 = vec_pack(out1l, out1h); \
|
||||||
|
\
|
||||||
|
tmp56l = vec_mergeh(tmp5, tmp6); \
|
||||||
|
tmp56h = vec_mergel(tmp5, tmp6); \
|
||||||
|
\
|
||||||
|
out5l = vec_msums(tmp56l, pw_mf050_mf256, z4l); \
|
||||||
|
out5h = vec_msums(tmp56h, pw_mf050_mf256, z4h); \
|
||||||
|
out3l = vec_msums(tmp56l, pw_mf256_f050, z3l); \
|
||||||
|
out3h = vec_msums(tmp56h, pw_mf256_f050, z3h); \
|
||||||
|
\
|
||||||
|
out5l = vec_sra(out5l, descale_p##PASS); \
|
||||||
|
out5h = vec_sra(out5h, descale_p##PASS); \
|
||||||
|
out3l = vec_sra(out3l, descale_p##PASS); \
|
||||||
|
out3h = vec_sra(out3h, descale_p##PASS); \
|
||||||
|
\
|
||||||
|
out5 = vec_pack(out5l, out5h); \
|
||||||
|
out3 = vec_pack(out3l, out3h); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define DO_FDCT_ROWS() \
|
||||||
|
{ \
|
||||||
|
/* Even part */ \
|
||||||
|
\
|
||||||
|
tmp10 = vec_add(tmp0, tmp3); \
|
||||||
|
tmp13 = vec_sub(tmp0, tmp3); \
|
||||||
|
tmp11 = vec_add(tmp1, tmp2); \
|
||||||
|
tmp12 = vec_sub(tmp1, tmp2); \
|
||||||
|
\
|
||||||
|
out0 = vec_add(tmp10, tmp11); \
|
||||||
|
out0 = vec_sl(out0, pass1_bits); \
|
||||||
|
out4 = vec_sub(tmp10, tmp11); \
|
||||||
|
out4 = vec_sl(out4, pass1_bits); \
|
||||||
|
\
|
||||||
|
DO_FDCT_COMMON(1); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define DO_FDCT_COLS() \
|
||||||
|
{ \
|
||||||
|
/* Even part */ \
|
||||||
|
\
|
||||||
|
tmp10 = vec_add(tmp0, tmp3); \
|
||||||
|
tmp13 = vec_sub(tmp0, tmp3); \
|
||||||
|
tmp11 = vec_add(tmp1, tmp2); \
|
||||||
|
tmp12 = vec_sub(tmp1, tmp2); \
|
||||||
|
\
|
||||||
|
out0 = vec_add(tmp10, tmp11); \
|
||||||
|
out0 = vec_add(out0, pw_descale_p2x); \
|
||||||
|
out0 = vec_sra(out0, pass1_bits); \
|
||||||
|
out4 = vec_sub(tmp10, tmp11); \
|
||||||
|
out4 = vec_add(out4, pw_descale_p2x); \
|
||||||
|
out4 = vec_sra(out4, pass1_bits); \
|
||||||
|
\
|
||||||
|
DO_FDCT_COMMON(2); \
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void
|
||||||
|
jsimd_fdct_islow_altivec (DCTELEM *data)
|
||||||
|
{
|
||||||
|
__vector short row0, row1, row2, row3, row4, row5, row6, row7,
|
||||||
|
col0, col1, col2, col3, col4, col5, col6, col7,
|
||||||
|
tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp10, tmp11, tmp12, tmp13,
|
||||||
|
tmp47l, tmp47h, tmp56l, tmp56h, tmp1312l, tmp1312h,
|
||||||
|
z3, z4, z34l, z34h,
|
||||||
|
out0, out1, out2, out3, out4, out5, out6, out7;
|
||||||
|
__vector int z3l, z3h, z4l, z4h,
|
||||||
|
out1l, out1h, out2l, out2h, out3l, out3h, out5l, out5h, out6l, out6h,
|
||||||
|
out7l, out7h;
|
||||||
|
|
||||||
|
/* Constants */
|
||||||
|
__vector short
|
||||||
|
pw_f130_f054 = { __4X2(F_0_541 + F_0_765, F_0_541) },
|
||||||
|
pw_f054_mf130 = { __4X2(F_0_541, F_0_541 - F_1_847) },
|
||||||
|
pw_mf078_f117 = { __4X2(F_1_175 - F_1_961, F_1_175) },
|
||||||
|
pw_f117_f078 = { __4X2(F_1_175, F_1_175 - F_0_390) },
|
||||||
|
pw_mf060_mf089 = { __4X2(F_0_298 - F_0_899, -F_0_899) },
|
||||||
|
pw_mf089_f060 = { __4X2(-F_0_899, F_1_501 - F_0_899) },
|
||||||
|
pw_mf050_mf256 = { __4X2(F_2_053 - F_2_562, -F_2_562) },
|
||||||
|
pw_mf256_f050 = { __4X2(-F_2_562, F_3_072 - F_2_562) },
|
||||||
|
pw_descale_p2x = { __8X(1 << (PASS1_BITS - 1)) };
|
||||||
|
__vector unsigned short pass1_bits = { __8X(PASS1_BITS) };
|
||||||
|
__vector int pd_descale_p1 = { __4X(1 << (DESCALE_P1 - 1)) },
|
||||||
|
pd_descale_p2 = { __4X(1 << (DESCALE_P2 - 1)) };
|
||||||
|
__vector unsigned int descale_p1 = { __4X(DESCALE_P1) },
|
||||||
|
descale_p2 = { __4X(DESCALE_P2) };
|
||||||
|
|
||||||
|
/* Pass 1: process rows */
|
||||||
|
|
||||||
|
row0 = vec_ld(0, data);
|
||||||
|
row1 = vec_ld(16, data);
|
||||||
|
row2 = vec_ld(32, data);
|
||||||
|
row3 = vec_ld(48, data);
|
||||||
|
row4 = vec_ld(64, data);
|
||||||
|
row5 = vec_ld(80, data);
|
||||||
|
row6 = vec_ld(96, data);
|
||||||
|
row7 = vec_ld(112, data);
|
||||||
|
|
||||||
|
TRANSPOSE(row, col);
|
||||||
|
|
||||||
|
tmp0 = vec_add(col0, col7);
|
||||||
|
tmp7 = vec_sub(col0, col7);
|
||||||
|
tmp1 = vec_add(col1, col6);
|
||||||
|
tmp6 = vec_sub(col1, col6);
|
||||||
|
tmp2 = vec_add(col2, col5);
|
||||||
|
tmp5 = vec_sub(col2, col5);
|
||||||
|
tmp3 = vec_add(col3, col4);
|
||||||
|
tmp4 = vec_sub(col3, col4);
|
||||||
|
|
||||||
|
DO_FDCT_ROWS();
|
||||||
|
|
||||||
|
/* Pass 2: process columns */
|
||||||
|
|
||||||
|
TRANSPOSE(out, row);
|
||||||
|
|
||||||
|
tmp0 = vec_add(row0, row7);
|
||||||
|
tmp7 = vec_sub(row0, row7);
|
||||||
|
tmp1 = vec_add(row1, row6);
|
||||||
|
tmp6 = vec_sub(row1, row6);
|
||||||
|
tmp2 = vec_add(row2, row5);
|
||||||
|
tmp5 = vec_sub(row2, row5);
|
||||||
|
tmp3 = vec_add(row3, row4);
|
||||||
|
tmp4 = vec_sub(row3, row4);
|
||||||
|
|
||||||
|
DO_FDCT_COLS();
|
||||||
|
|
||||||
|
vec_st(out0, 0, data);
|
||||||
|
vec_st(out1, 16, data);
|
||||||
|
vec_st(out2, 32, data);
|
||||||
|
vec_st(out3, 48, data);
|
||||||
|
vec_st(out4, 64, data);
|
||||||
|
vec_st(out5, 80, data);
|
||||||
|
vec_st(out6, 96, data);
|
||||||
|
vec_st(out7, 112, data);
|
||||||
|
}
|
||||||
@@ -444,11 +444,11 @@ EXTN(jsimd_fdct_islow_mmx):
|
|||||||
psubw mm6,mm4 ; mm6=tmp12
|
psubw mm6,mm4 ; mm6=tmp12
|
||||||
|
|
||||||
movq mm7,mm5
|
movq mm7,mm5
|
||||||
paddw mm5,mm0 ; mm5=tmp10+tmp11
|
paddsw mm5,mm0 ; mm5=tmp10+tmp11
|
||||||
psubw mm7,mm0 ; mm7=tmp10-tmp11
|
psubsw mm7,mm0 ; mm7=tmp10-tmp11
|
||||||
|
|
||||||
paddw mm5,[GOTOFF(ebx,PW_DESCALE_P2X)]
|
paddsw mm5,[GOTOFF(ebx,PW_DESCALE_P2X)]
|
||||||
paddw mm7,[GOTOFF(ebx,PW_DESCALE_P2X)]
|
paddsw mm7,[GOTOFF(ebx,PW_DESCALE_P2X)]
|
||||||
psraw mm5,PASS1_BITS ; mm5=data0
|
psraw mm5,PASS1_BITS ; mm5=data0
|
||||||
psraw mm7,PASS1_BITS ; mm7=data4
|
psraw mm7,PASS1_BITS ; mm7=data4
|
||||||
|
|
||||||
|
|||||||
@@ -454,11 +454,11 @@ EXTN(jsimd_fdct_islow_sse2):
|
|||||||
psubw xmm6,xmm4 ; xmm6=tmp12
|
psubw xmm6,xmm4 ; xmm6=tmp12
|
||||||
|
|
||||||
movdqa xmm5,xmm7
|
movdqa xmm5,xmm7
|
||||||
paddw xmm7,xmm2 ; xmm7=tmp10+tmp11
|
paddsw xmm7,xmm2 ; xmm7=tmp10+tmp11
|
||||||
psubw xmm5,xmm2 ; xmm5=tmp10-tmp11
|
psubsw xmm5,xmm2 ; xmm5=tmp10-tmp11
|
||||||
|
|
||||||
paddw xmm7,[rel PW_DESCALE_P2X]
|
paddsw xmm7,[rel PW_DESCALE_P2X]
|
||||||
paddw xmm5,[rel PW_DESCALE_P2X]
|
paddsw xmm5,[rel PW_DESCALE_P2X]
|
||||||
psraw xmm7,PASS1_BITS ; xmm7=data0
|
psraw xmm7,PASS1_BITS ; xmm7=data0
|
||||||
psraw xmm5,PASS1_BITS ; xmm5=data4
|
psraw xmm5,PASS1_BITS ; xmm5=data4
|
||||||
|
|
||||||
|
|||||||
@@ -462,11 +462,11 @@ EXTN(jsimd_fdct_islow_sse2):
|
|||||||
psubw xmm6,xmm4 ; xmm6=tmp12
|
psubw xmm6,xmm4 ; xmm6=tmp12
|
||||||
|
|
||||||
movdqa xmm5,xmm7
|
movdqa xmm5,xmm7
|
||||||
paddw xmm7,xmm2 ; xmm7=tmp10+tmp11
|
paddsw xmm7,xmm2 ; xmm7=tmp10+tmp11
|
||||||
psubw xmm5,xmm2 ; xmm5=tmp10-tmp11
|
psubsw xmm5,xmm2 ; xmm5=tmp10-tmp11
|
||||||
|
|
||||||
paddw xmm7,[GOTOFF(ebx,PW_DESCALE_P2X)]
|
paddsw xmm7,[GOTOFF(ebx,PW_DESCALE_P2X)]
|
||||||
paddw xmm5,[GOTOFF(ebx,PW_DESCALE_P2X)]
|
paddsw xmm5,[GOTOFF(ebx,PW_DESCALE_P2X)]
|
||||||
psraw xmm7,PASS1_BITS ; xmm7=data0
|
psraw xmm7,PASS1_BITS ; xmm7=data0
|
||||||
psraw xmm5,PASS1_BITS ; xmm5=data4
|
psraw xmm5,PASS1_BITS ; xmm5=data4
|
||||||
|
|
||||||
|
|||||||
256
simd/jidctfst-altivec.c
Normal file
256
simd/jidctfst-altivec.c
Normal file
@@ -0,0 +1,256 @@
|
|||||||
|
/*
|
||||||
|
* AltiVec optimizations for libjpeg-turbo
|
||||||
|
*
|
||||||
|
* Copyright (C) 2014, D. R. Commander.
|
||||||
|
* All rights reserved.
|
||||||
|
* This software is provided 'as-is', without any express or implied
|
||||||
|
* warranty. In no event will the authors be held liable for any damages
|
||||||
|
* arising from the use of this software.
|
||||||
|
*
|
||||||
|
* Permission is granted to anyone to use this software for any purpose,
|
||||||
|
* including commercial applications, and to alter it and redistribute it
|
||||||
|
* freely, subject to the following restrictions:
|
||||||
|
*
|
||||||
|
* 1. The origin of this software must not be misrepresented; you must not
|
||||||
|
* claim that you wrote the original software. If you use this software
|
||||||
|
* in a product, an acknowledgment in the product documentation would be
|
||||||
|
* appreciated but is not required.
|
||||||
|
* 2. Altered source versions must be plainly marked as such, and must not be
|
||||||
|
* misrepresented as being the original software.
|
||||||
|
* 3. This notice may not be removed or altered from any source distribution.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* FAST INTEGER INVERSE DCT
|
||||||
|
*
|
||||||
|
* This is similar to the SSE2 implementation, except that we left-shift the
|
||||||
|
* constants by 1 less bit (the -1 in CONST_SHIFT.) This is because
|
||||||
|
* vec_madds(arg1, arg2, arg3) generates the 16-bit saturated sum of:
|
||||||
|
* the elements in arg3 + the most significant 17 bits of
|
||||||
|
* (the elements in arg1 * the elements in arg2).
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "jsimd_altivec.h"
|
||||||
|
|
||||||
|
|
||||||
|
#define F_1_082 277 /* FIX(1.082392200) */
|
||||||
|
#define F_1_414 362 /* FIX(1.414213562) */
|
||||||
|
#define F_1_847 473 /* FIX(1.847759065) */
|
||||||
|
#define F_2_613 669 /* FIX(2.613125930) */
|
||||||
|
#define F_1_613 (F_2_613 - 256) /* FIX(2.613125930) - FIX(1) */
|
||||||
|
|
||||||
|
#define CONST_BITS 8
|
||||||
|
#define PASS1_BITS 2
|
||||||
|
#define PRE_MULTIPLY_SCALE_BITS 2
|
||||||
|
#define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS - 1)
|
||||||
|
|
||||||
|
|
||||||
|
#define DO_IDCT(in) \
|
||||||
|
{ \
|
||||||
|
/* Even part */ \
|
||||||
|
\
|
||||||
|
tmp10 = vec_add(in##0, in##4); \
|
||||||
|
tmp11 = vec_sub(in##0, in##4); \
|
||||||
|
tmp13 = vec_add(in##2, in##6); \
|
||||||
|
\
|
||||||
|
tmp12 = vec_sub(in##2, in##6); \
|
||||||
|
tmp12 = vec_sl(tmp12, pre_multiply_scale_bits); \
|
||||||
|
tmp12 = vec_madds(tmp12, pw_F1414, zero); \
|
||||||
|
tmp12 = vec_sub(tmp12, tmp13); \
|
||||||
|
\
|
||||||
|
tmp0 = vec_add(tmp10, tmp13); \
|
||||||
|
tmp3 = vec_sub(tmp10, tmp13); \
|
||||||
|
tmp1 = vec_add(tmp11, tmp12); \
|
||||||
|
tmp2 = vec_sub(tmp11, tmp12); \
|
||||||
|
\
|
||||||
|
/* Odd part */ \
|
||||||
|
\
|
||||||
|
z13 = vec_add(in##5, in##3); \
|
||||||
|
z10 = vec_sub(in##5, in##3); \
|
||||||
|
z10s = vec_sl(z10, pre_multiply_scale_bits); \
|
||||||
|
z11 = vec_add(in##1, in##7); \
|
||||||
|
z12s = vec_sub(in##1, in##7); \
|
||||||
|
z12s = vec_sl(z12s, pre_multiply_scale_bits); \
|
||||||
|
\
|
||||||
|
tmp11 = vec_sub(z11, z13); \
|
||||||
|
tmp11 = vec_sl(tmp11, pre_multiply_scale_bits); \
|
||||||
|
tmp11 = vec_madds(tmp11, pw_F1414, zero); \
|
||||||
|
\
|
||||||
|
tmp7 = vec_add(z11, z13); \
|
||||||
|
\
|
||||||
|
/* To avoid overflow... \
|
||||||
|
* \
|
||||||
|
* (Original) \
|
||||||
|
* tmp12 = -2.613125930 * z10 + z5; \
|
||||||
|
* \
|
||||||
|
* (This implementation) \
|
||||||
|
* tmp12 = (-1.613125930 - 1) * z10 + z5; \
|
||||||
|
* = -1.613125930 * z10 - z10 + z5; \
|
||||||
|
*/ \
|
||||||
|
\
|
||||||
|
z5 = vec_add(z10s, z12s); \
|
||||||
|
z5 = vec_madds(z5, pw_F1847, zero); \
|
||||||
|
\
|
||||||
|
tmp10 = vec_madds(z12s, pw_F1082, zero); \
|
||||||
|
tmp10 = vec_sub(tmp10, z5); \
|
||||||
|
tmp12 = vec_madds(z10s, pw_MF1613, z5); \
|
||||||
|
tmp12 = vec_sub(tmp12, z10); \
|
||||||
|
\
|
||||||
|
tmp6 = vec_sub(tmp12, tmp7); \
|
||||||
|
tmp5 = vec_sub(tmp11, tmp6); \
|
||||||
|
tmp4 = vec_add(tmp10, tmp5); \
|
||||||
|
\
|
||||||
|
out0 = vec_add(tmp0, tmp7); \
|
||||||
|
out1 = vec_add(tmp1, tmp6); \
|
||||||
|
out2 = vec_add(tmp2, tmp5); \
|
||||||
|
out3 = vec_sub(tmp3, tmp4); \
|
||||||
|
out4 = vec_add(tmp3, tmp4); \
|
||||||
|
out5 = vec_sub(tmp2, tmp5); \
|
||||||
|
out6 = vec_sub(tmp1, tmp6); \
|
||||||
|
out7 = vec_sub(tmp0, tmp7); \
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void
|
||||||
|
jsimd_idct_ifast_altivec (void * dct_table_, JCOEFPTR coef_block,
|
||||||
|
JSAMPARRAY output_buf, JDIMENSION output_col)
|
||||||
|
{
|
||||||
|
short *dct_table = (short *)dct_table_;
|
||||||
|
__vector short row0, row1, row2, row3, row4, row5, row6, row7,
|
||||||
|
col0, col1, col2, col3, col4, col5, col6, col7,
|
||||||
|
quant0, quant1, quant2, quant3, quant4, quant5, quant6, quant7,
|
||||||
|
tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp10, tmp11, tmp12, tmp13,
|
||||||
|
z5, z10, z10s, z11, z12s, z13,
|
||||||
|
out0, out1, out2, out3, out4, out5, out6, out7;
|
||||||
|
__vector signed char outb;
|
||||||
|
int *outptr;
|
||||||
|
|
||||||
|
/* Constants */
|
||||||
|
__vector short zero = { __8X(0) },
|
||||||
|
pw_F1414 = { __8X(F_1_414 << CONST_SHIFT) },
|
||||||
|
pw_F1847 = { __8X(F_1_847 << CONST_SHIFT) },
|
||||||
|
pw_MF1613 = { __8X(-F_1_613 << CONST_SHIFT) },
|
||||||
|
pw_F1082 = { __8X(F_1_082 << CONST_SHIFT) };
|
||||||
|
__vector unsigned short
|
||||||
|
pre_multiply_scale_bits = { __8X(PRE_MULTIPLY_SCALE_BITS) },
|
||||||
|
pass1_bits3 = { __8X(PASS1_BITS + 3) };
|
||||||
|
__vector signed char pb_centerjsamp = { __16X(CENTERJSAMPLE) };
|
||||||
|
|
||||||
|
/* Pass 1: process columns */
|
||||||
|
|
||||||
|
col0 = vec_ld(0, coef_block);
|
||||||
|
col1 = vec_ld(16, coef_block);
|
||||||
|
col2 = vec_ld(32, coef_block);
|
||||||
|
col3 = vec_ld(48, coef_block);
|
||||||
|
col4 = vec_ld(64, coef_block);
|
||||||
|
col5 = vec_ld(80, coef_block);
|
||||||
|
col6 = vec_ld(96, coef_block);
|
||||||
|
col7 = vec_ld(112, coef_block);
|
||||||
|
|
||||||
|
tmp1 = vec_or(col1, col2);
|
||||||
|
tmp2 = vec_or(col3, col4);
|
||||||
|
tmp1 = vec_or(tmp1, tmp2);
|
||||||
|
tmp3 = vec_or(col5, col6);
|
||||||
|
tmp3 = vec_or(tmp3, col7);
|
||||||
|
tmp1 = vec_or(tmp1, tmp3);
|
||||||
|
|
||||||
|
quant0 = *(__vector short *)&dct_table[0];
|
||||||
|
col0 = vec_mladd(col0, quant0, zero);
|
||||||
|
|
||||||
|
if (vec_all_eq(tmp1, zero)) {
|
||||||
|
/* AC terms all zero */
|
||||||
|
|
||||||
|
row0 = vec_splat(col0, 0);
|
||||||
|
row1 = vec_splat(col0, 1);
|
||||||
|
row2 = vec_splat(col0, 2);
|
||||||
|
row3 = vec_splat(col0, 3);
|
||||||
|
row4 = vec_splat(col0, 4);
|
||||||
|
row5 = vec_splat(col0, 5);
|
||||||
|
row6 = vec_splat(col0, 6);
|
||||||
|
row7 = vec_splat(col0, 7);
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
quant1 = *(__vector short *)&dct_table[8];
|
||||||
|
quant2 = *(__vector short *)&dct_table[16];
|
||||||
|
quant3 = *(__vector short *)&dct_table[24];
|
||||||
|
quant4 = *(__vector short *)&dct_table[32];
|
||||||
|
quant5 = *(__vector short *)&dct_table[40];
|
||||||
|
quant6 = *(__vector short *)&dct_table[48];
|
||||||
|
quant7 = *(__vector short *)&dct_table[56];
|
||||||
|
|
||||||
|
col1 = vec_mladd(col1, quant1, zero);
|
||||||
|
col2 = vec_mladd(col2, quant2, zero);
|
||||||
|
col3 = vec_mladd(col3, quant3, zero);
|
||||||
|
col4 = vec_mladd(col4, quant4, zero);
|
||||||
|
col5 = vec_mladd(col5, quant5, zero);
|
||||||
|
col6 = vec_mladd(col6, quant6, zero);
|
||||||
|
col7 = vec_mladd(col7, quant7, zero);
|
||||||
|
|
||||||
|
DO_IDCT(col);
|
||||||
|
|
||||||
|
TRANSPOSE(out, row);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Pass 2: process rows */
|
||||||
|
|
||||||
|
DO_IDCT(row);
|
||||||
|
|
||||||
|
out0 = vec_sra(out0, pass1_bits3);
|
||||||
|
out1 = vec_sra(out1, pass1_bits3);
|
||||||
|
out2 = vec_sra(out2, pass1_bits3);
|
||||||
|
out3 = vec_sra(out3, pass1_bits3);
|
||||||
|
out4 = vec_sra(out4, pass1_bits3);
|
||||||
|
out5 = vec_sra(out5, pass1_bits3);
|
||||||
|
out6 = vec_sra(out6, pass1_bits3);
|
||||||
|
out7 = vec_sra(out7, pass1_bits3);
|
||||||
|
|
||||||
|
TRANSPOSE(out, col);
|
||||||
|
|
||||||
|
outb = vec_packs(col0, col0);
|
||||||
|
outb = vec_add(outb, pb_centerjsamp);
|
||||||
|
outptr = (int *)(output_buf[0] + output_col);
|
||||||
|
vec_ste((__vector int)outb, 0, outptr);
|
||||||
|
vec_ste((__vector int)outb, 4, outptr);
|
||||||
|
|
||||||
|
outb = vec_packs(col1, col1);
|
||||||
|
outb = vec_add(outb, pb_centerjsamp);
|
||||||
|
outptr = (int *)(output_buf[1] + output_col);
|
||||||
|
vec_ste((__vector int)outb, 0, outptr);
|
||||||
|
vec_ste((__vector int)outb, 4, outptr);
|
||||||
|
|
||||||
|
outb = vec_packs(col2, col2);
|
||||||
|
outb = vec_add(outb, pb_centerjsamp);
|
||||||
|
outptr = (int *)(output_buf[2] + output_col);
|
||||||
|
vec_ste((__vector int)outb, 0, outptr);
|
||||||
|
vec_ste((__vector int)outb, 4, outptr);
|
||||||
|
|
||||||
|
outb = vec_packs(col3, col3);
|
||||||
|
outb = vec_add(outb, pb_centerjsamp);
|
||||||
|
outptr = (int *)(output_buf[3] + output_col);
|
||||||
|
vec_ste((__vector int)outb, 0, outptr);
|
||||||
|
vec_ste((__vector int)outb, 4, outptr);
|
||||||
|
|
||||||
|
outb = vec_packs(col4, col4);
|
||||||
|
outb = vec_add(outb, pb_centerjsamp);
|
||||||
|
outptr = (int *)(output_buf[4] + output_col);
|
||||||
|
vec_ste((__vector int)outb, 0, outptr);
|
||||||
|
vec_ste((__vector int)outb, 4, outptr);
|
||||||
|
|
||||||
|
outb = vec_packs(col5, col5);
|
||||||
|
outb = vec_add(outb, pb_centerjsamp);
|
||||||
|
outptr = (int *)(output_buf[5] + output_col);
|
||||||
|
vec_ste((__vector int)outb, 0, outptr);
|
||||||
|
vec_ste((__vector int)outb, 4, outptr);
|
||||||
|
|
||||||
|
outb = vec_packs(col6, col6);
|
||||||
|
outb = vec_add(outb, pb_centerjsamp);
|
||||||
|
outptr = (int *)(output_buf[6] + output_col);
|
||||||
|
vec_ste((__vector int)outb, 0, outptr);
|
||||||
|
vec_ste((__vector int)outb, 4, outptr);
|
||||||
|
|
||||||
|
outb = vec_packs(col7, col7);
|
||||||
|
outb = vec_add(outb, pb_centerjsamp);
|
||||||
|
outptr = (int *)(output_buf[7] + output_col);
|
||||||
|
vec_ste((__vector int)outb, 0, outptr);
|
||||||
|
vec_ste((__vector int)outb, 4, outptr);
|
||||||
|
}
|
||||||
358
simd/jidctint-altivec.c
Normal file
358
simd/jidctint-altivec.c
Normal file
@@ -0,0 +1,358 @@
|
|||||||
|
/*
|
||||||
|
* AltiVec optimizations for libjpeg-turbo
|
||||||
|
*
|
||||||
|
* Copyright (C) 2014, D. R. Commander.
|
||||||
|
* All rights reserved.
|
||||||
|
* This software is provided 'as-is', without any express or implied
|
||||||
|
* warranty. In no event will the authors be held liable for any damages
|
||||||
|
* arising from the use of this software.
|
||||||
|
*
|
||||||
|
* Permission is granted to anyone to use this software for any purpose,
|
||||||
|
* including commercial applications, and to alter it and redistribute it
|
||||||
|
* freely, subject to the following restrictions:
|
||||||
|
*
|
||||||
|
* 1. The origin of this software must not be misrepresented; you must not
|
||||||
|
* claim that you wrote the original software. If you use this software
|
||||||
|
* in a product, an acknowledgment in the product documentation would be
|
||||||
|
* appreciated but is not required.
|
||||||
|
* 2. Altered source versions must be plainly marked as such, and must not be
|
||||||
|
* misrepresented as being the original software.
|
||||||
|
* 3. This notice may not be removed or altered from any source distribution.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* SLOW INTEGER INVERSE DCT */
|
||||||
|
|
||||||
|
#include "jsimd_altivec.h"
|
||||||
|
|
||||||
|
|
||||||
|
#define F_0_298 2446 /* FIX(0.298631336) */
|
||||||
|
#define F_0_390 3196 /* FIX(0.390180644) */
|
||||||
|
#define F_0_541 4433 /* FIX(0.541196100) */
|
||||||
|
#define F_0_765 6270 /* FIX(0.765366865) */
|
||||||
|
#define F_0_899 7373 /* FIX(0.899976223) */
|
||||||
|
#define F_1_175 9633 /* FIX(1.175875602) */
|
||||||
|
#define F_1_501 12299 /* FIX(1.501321110) */
|
||||||
|
#define F_1_847 15137 /* FIX(1.847759065) */
|
||||||
|
#define F_1_961 16069 /* FIX(1.961570560) */
|
||||||
|
#define F_2_053 16819 /* FIX(2.053119869) */
|
||||||
|
#define F_2_562 20995 /* FIX(2.562915447) */
|
||||||
|
#define F_3_072 25172 /* FIX(3.072711026) */
|
||||||
|
|
||||||
|
#define CONST_BITS 13
|
||||||
|
#define PASS1_BITS 2
|
||||||
|
#define DESCALE_P1 (CONST_BITS - PASS1_BITS)
|
||||||
|
#define DESCALE_P2 (CONST_BITS + PASS1_BITS + 3)
|
||||||
|
|
||||||
|
|
||||||
|
#define DO_IDCT(in, PASS) \
|
||||||
|
{ \
|
||||||
|
/* Even part \
|
||||||
|
* \
|
||||||
|
* (Original) \
|
||||||
|
* z1 = (z2 + z3) * 0.541196100; \
|
||||||
|
* tmp2 = z1 + z3 * -1.847759065; \
|
||||||
|
* tmp3 = z1 + z2 * 0.765366865; \
|
||||||
|
* \
|
||||||
|
* (This implementation) \
|
||||||
|
* tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065); \
|
||||||
|
* tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100; \
|
||||||
|
*/ \
|
||||||
|
\
|
||||||
|
in##26l = vec_mergeh(in##2, in##6); \
|
||||||
|
in##26h = vec_mergel(in##2, in##6); \
|
||||||
|
\
|
||||||
|
tmp3l = vec_msums(in##26l, pw_f130_f054, zero32); \
|
||||||
|
tmp3h = vec_msums(in##26h, pw_f130_f054, zero32); \
|
||||||
|
tmp2l = vec_msums(in##26l, pw_f054_mf130, zero32); \
|
||||||
|
tmp2h = vec_msums(in##26h, pw_f054_mf130, zero32); \
|
||||||
|
\
|
||||||
|
tmp0 = vec_add(in##0, in##4); \
|
||||||
|
tmp1 = vec_sub(in##0, in##4); \
|
||||||
|
\
|
||||||
|
tmp0l = vec_unpackh(tmp0); \
|
||||||
|
tmp0h = vec_unpackl(tmp0); \
|
||||||
|
tmp0l = vec_sl(tmp0l, const_bits); \
|
||||||
|
tmp0h = vec_sl(tmp0h, const_bits); \
|
||||||
|
tmp0l = vec_add(tmp0l, pd_descale_p##PASS); \
|
||||||
|
tmp0h = vec_add(tmp0h, pd_descale_p##PASS); \
|
||||||
|
\
|
||||||
|
tmp10l = vec_add(tmp0l, tmp3l); \
|
||||||
|
tmp10h = vec_add(tmp0h, tmp3h); \
|
||||||
|
tmp13l = vec_sub(tmp0l, tmp3l); \
|
||||||
|
tmp13h = vec_sub(tmp0h, tmp3h); \
|
||||||
|
\
|
||||||
|
tmp1l = vec_unpackh(tmp1); \
|
||||||
|
tmp1h = vec_unpackl(tmp1); \
|
||||||
|
tmp1l = vec_sl(tmp1l, const_bits); \
|
||||||
|
tmp1h = vec_sl(tmp1h, const_bits); \
|
||||||
|
tmp1l = vec_add(tmp1l, pd_descale_p##PASS); \
|
||||||
|
tmp1h = vec_add(tmp1h, pd_descale_p##PASS); \
|
||||||
|
\
|
||||||
|
tmp11l = vec_add(tmp1l, tmp2l); \
|
||||||
|
tmp11h = vec_add(tmp1h, tmp2h); \
|
||||||
|
tmp12l = vec_sub(tmp1l, tmp2l); \
|
||||||
|
tmp12h = vec_sub(tmp1h, tmp2h); \
|
||||||
|
\
|
||||||
|
/* Odd part */ \
|
||||||
|
\
|
||||||
|
z3 = vec_add(in##3, in##7); \
|
||||||
|
z4 = vec_add(in##1, in##5); \
|
||||||
|
\
|
||||||
|
/* (Original) \
|
||||||
|
* z5 = (z3 + z4) * 1.175875602; \
|
||||||
|
* z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; \
|
||||||
|
* z3 += z5; z4 += z5; \
|
||||||
|
* \
|
||||||
|
* (This implementation) \
|
||||||
|
* z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; \
|
||||||
|
* z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); \
|
||||||
|
*/ \
|
||||||
|
\
|
||||||
|
z34l = vec_mergeh(z3, z4); \
|
||||||
|
z34h = vec_mergel(z3, z4); \
|
||||||
|
\
|
||||||
|
z3l = vec_msums(z34l, pw_mf078_f117, zero32); \
|
||||||
|
z3h = vec_msums(z34h, pw_mf078_f117, zero32); \
|
||||||
|
z4l = vec_msums(z34l, pw_f117_f078, zero32); \
|
||||||
|
z4h = vec_msums(z34h, pw_f117_f078, zero32); \
|
||||||
|
\
|
||||||
|
/* (Original) \
|
||||||
|
* z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; \
|
||||||
|
* tmp0 = tmp0 * 0.298631336; tmp1 = tmp1 * 2.053119869; \
|
||||||
|
* tmp2 = tmp2 * 3.072711026; tmp3 = tmp3 * 1.501321110; \
|
||||||
|
* z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; \
|
||||||
|
* tmp0 += z1 + z3; tmp1 += z2 + z4; \
|
||||||
|
* tmp2 += z2 + z3; tmp3 += z1 + z4; \
|
||||||
|
* \
|
||||||
|
* (This implementation) \
|
||||||
|
* tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223; \
|
||||||
|
* tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447; \
|
||||||
|
* tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447); \
|
||||||
|
* tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223); \
|
||||||
|
* tmp0 += z3; tmp1 += z4; \
|
||||||
|
* tmp2 += z3; tmp3 += z4; \
|
||||||
|
*/ \
|
||||||
|
\
|
||||||
|
in##71l = vec_mergeh(in##7, in##1); \
|
||||||
|
in##71h = vec_mergel(in##7, in##1); \
|
||||||
|
\
|
||||||
|
tmp0l = vec_msums(in##71l, pw_mf060_mf089, z3l); \
|
||||||
|
tmp0h = vec_msums(in##71h, pw_mf060_mf089, z3h); \
|
||||||
|
tmp3l = vec_msums(in##71l, pw_mf089_f060, z4l); \
|
||||||
|
tmp3h = vec_msums(in##71h, pw_mf089_f060, z4h); \
|
||||||
|
\
|
||||||
|
in##53l = vec_mergeh(in##5, in##3); \
|
||||||
|
in##53h = vec_mergel(in##5, in##3); \
|
||||||
|
\
|
||||||
|
tmp1l = vec_msums(in##53l, pw_mf050_mf256, z4l); \
|
||||||
|
tmp1h = vec_msums(in##53h, pw_mf050_mf256, z4h); \
|
||||||
|
tmp2l = vec_msums(in##53l, pw_mf256_f050, z3l); \
|
||||||
|
tmp2h = vec_msums(in##53h, pw_mf256_f050, z3h); \
|
||||||
|
\
|
||||||
|
/* Final output stage */ \
|
||||||
|
\
|
||||||
|
out0l = vec_add(tmp10l, tmp3l); \
|
||||||
|
out0h = vec_add(tmp10h, tmp3h); \
|
||||||
|
out7l = vec_sub(tmp10l, tmp3l); \
|
||||||
|
out7h = vec_sub(tmp10h, tmp3h); \
|
||||||
|
\
|
||||||
|
out0l = vec_sra(out0l, descale_p##PASS); \
|
||||||
|
out0h = vec_sra(out0h, descale_p##PASS); \
|
||||||
|
out7l = vec_sra(out7l, descale_p##PASS); \
|
||||||
|
out7h = vec_sra(out7h, descale_p##PASS); \
|
||||||
|
\
|
||||||
|
out0 = vec_pack(out0l, out0h); \
|
||||||
|
out7 = vec_pack(out7l, out7h); \
|
||||||
|
\
|
||||||
|
out1l = vec_add(tmp11l, tmp2l); \
|
||||||
|
out1h = vec_add(tmp11h, tmp2h); \
|
||||||
|
out6l = vec_sub(tmp11l, tmp2l); \
|
||||||
|
out6h = vec_sub(tmp11h, tmp2h); \
|
||||||
|
\
|
||||||
|
out1l = vec_sra(out1l, descale_p##PASS); \
|
||||||
|
out1h = vec_sra(out1h, descale_p##PASS); \
|
||||||
|
out6l = vec_sra(out6l, descale_p##PASS); \
|
||||||
|
out6h = vec_sra(out6h, descale_p##PASS); \
|
||||||
|
\
|
||||||
|
out1 = vec_pack(out1l, out1h); \
|
||||||
|
out6 = vec_pack(out6l, out6h); \
|
||||||
|
\
|
||||||
|
out2l = vec_add(tmp12l, tmp1l); \
|
||||||
|
out2h = vec_add(tmp12h, tmp1h); \
|
||||||
|
out5l = vec_sub(tmp12l, tmp1l); \
|
||||||
|
out5h = vec_sub(tmp12h, tmp1h); \
|
||||||
|
\
|
||||||
|
out2l = vec_sra(out2l, descale_p##PASS); \
|
||||||
|
out2h = vec_sra(out2h, descale_p##PASS); \
|
||||||
|
out5l = vec_sra(out5l, descale_p##PASS); \
|
||||||
|
out5h = vec_sra(out5h, descale_p##PASS); \
|
||||||
|
\
|
||||||
|
out2 = vec_pack(out2l, out2h); \
|
||||||
|
out5 = vec_pack(out5l, out5h); \
|
||||||
|
\
|
||||||
|
out3l = vec_add(tmp13l, tmp0l); \
|
||||||
|
out3h = vec_add(tmp13h, tmp0h); \
|
||||||
|
out4l = vec_sub(tmp13l, tmp0l); \
|
||||||
|
out4h = vec_sub(tmp13h, tmp0h); \
|
||||||
|
\
|
||||||
|
out3l = vec_sra(out3l, descale_p##PASS); \
|
||||||
|
out3h = vec_sra(out3h, descale_p##PASS); \
|
||||||
|
out4l = vec_sra(out4l, descale_p##PASS); \
|
||||||
|
out4h = vec_sra(out4h, descale_p##PASS); \
|
||||||
|
\
|
||||||
|
out3 = vec_pack(out3l, out3h); \
|
||||||
|
out4 = vec_pack(out4l, out4h); \
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void
|
||||||
|
jsimd_idct_islow_altivec (void * dct_table_, JCOEFPTR coef_block,
|
||||||
|
JSAMPARRAY output_buf, JDIMENSION output_col)
|
||||||
|
{
|
||||||
|
short *dct_table = (short *)dct_table_;
|
||||||
|
__vector short row0, row1, row2, row3, row4, row5, row6, row7,
|
||||||
|
col0, col1, col2, col3, col4, col5, col6, col7,
|
||||||
|
quant0, quant1, quant2, quant3, quant4, quant5, quant6, quant7,
|
||||||
|
tmp0, tmp1, tmp2, tmp3, z3, z4,
|
||||||
|
z34l, z34h, col71l, col71h, col26l, col26h, col53l, col53h,
|
||||||
|
row71l, row71h, row26l, row26h, row53l, row53h,
|
||||||
|
out0, out1, out2, out3, out4, out5, out6, out7;
|
||||||
|
__vector int tmp0l, tmp0h, tmp1l, tmp1h, tmp2l, tmp2h, tmp3l, tmp3h,
|
||||||
|
tmp10l, tmp10h, tmp11l, tmp11h, tmp12l, tmp12h, tmp13l, tmp13h,
|
||||||
|
z3l, z3h, z4l, z4h,
|
||||||
|
out0l, out0h, out1l, out1h, out2l, out2h, out3l, out3h, out4l, out4h,
|
||||||
|
out5l, out5h, out6l, out6h, out7l, out7h;
|
||||||
|
__vector signed char outb;
|
||||||
|
int *outptr;
|
||||||
|
|
||||||
|
/* Constants */
|
||||||
|
__vector short zero16 = { __8X(0) },
|
||||||
|
pw_f130_f054 = { __4X2(F_0_541 + F_0_765, F_0_541) },
|
||||||
|
pw_f054_mf130 = { __4X2(F_0_541, F_0_541 - F_1_847) },
|
||||||
|
pw_mf078_f117 = { __4X2(F_1_175 - F_1_961, F_1_175) },
|
||||||
|
pw_f117_f078 = { __4X2(F_1_175, F_1_175 - F_0_390) },
|
||||||
|
pw_mf060_mf089 = { __4X2(F_0_298 - F_0_899, -F_0_899) },
|
||||||
|
pw_mf089_f060 = { __4X2(-F_0_899, F_1_501 - F_0_899) },
|
||||||
|
pw_mf050_mf256 = { __4X2(F_2_053 - F_2_562, -F_2_562) },
|
||||||
|
pw_mf256_f050 = { __4X2(-F_2_562, F_3_072 - F_2_562) };
|
||||||
|
__vector unsigned short pass1_bits = { __8X(PASS1_BITS) };
|
||||||
|
__vector int zero32 = { __4X(0) },
|
||||||
|
pd_descale_p1 = { __4X(1 << (DESCALE_P1 - 1)) },
|
||||||
|
pd_descale_p2 = { __4X(1 << (DESCALE_P2 - 1)) };
|
||||||
|
__vector unsigned int descale_p1 = { __4X(DESCALE_P1) },
|
||||||
|
descale_p2 = { __4X(DESCALE_P2) },
|
||||||
|
const_bits = { __4X(CONST_BITS) };
|
||||||
|
__vector signed char pb_centerjsamp = { __16X(CENTERJSAMPLE) };
|
||||||
|
|
||||||
|
/* Pass 1: process columns */
|
||||||
|
|
||||||
|
col0 = *(__vector short *)&coef_block[0];
|
||||||
|
col1 = *(__vector short *)&coef_block[8];
|
||||||
|
col2 = *(__vector short *)&coef_block[16];
|
||||||
|
col3 = *(__vector short *)&coef_block[24];
|
||||||
|
col4 = *(__vector short *)&coef_block[32];
|
||||||
|
col5 = *(__vector short *)&coef_block[40];
|
||||||
|
col6 = *(__vector short *)&coef_block[48];
|
||||||
|
col7 = *(__vector short *)&coef_block[56];
|
||||||
|
|
||||||
|
tmp1 = vec_or(col1, col2);
|
||||||
|
tmp2 = vec_or(col3, col4);
|
||||||
|
tmp1 = vec_or(tmp1, tmp2);
|
||||||
|
tmp3 = vec_or(col5, col6);
|
||||||
|
tmp3 = vec_or(tmp3, col7);
|
||||||
|
tmp1 = vec_or(tmp1, tmp3);
|
||||||
|
|
||||||
|
quant0 = *(__vector short *)&dct_table[0];
|
||||||
|
col0 = vec_mladd(col0, quant0, zero16);
|
||||||
|
|
||||||
|
if (vec_all_eq(tmp1, zero16)) {
|
||||||
|
/* AC terms all zero */
|
||||||
|
|
||||||
|
col0 = vec_sl(col0, pass1_bits);
|
||||||
|
|
||||||
|
row0 = vec_splat(col0, 0);
|
||||||
|
row1 = vec_splat(col0, 1);
|
||||||
|
row2 = vec_splat(col0, 2);
|
||||||
|
row3 = vec_splat(col0, 3);
|
||||||
|
row4 = vec_splat(col0, 4);
|
||||||
|
row5 = vec_splat(col0, 5);
|
||||||
|
row6 = vec_splat(col0, 6);
|
||||||
|
row7 = vec_splat(col0, 7);
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
quant1 = *(__vector short *)&dct_table[8];
|
||||||
|
quant2 = *(__vector short *)&dct_table[16];
|
||||||
|
quant3 = *(__vector short *)&dct_table[24];
|
||||||
|
quant4 = *(__vector short *)&dct_table[32];
|
||||||
|
quant5 = *(__vector short *)&dct_table[40];
|
||||||
|
quant6 = *(__vector short *)&dct_table[48];
|
||||||
|
quant7 = *(__vector short *)&dct_table[56];
|
||||||
|
|
||||||
|
col1 = vec_mladd(col1, quant1, zero16);
|
||||||
|
col2 = vec_mladd(col2, quant2, zero16);
|
||||||
|
col3 = vec_mladd(col3, quant3, zero16);
|
||||||
|
col4 = vec_mladd(col4, quant4, zero16);
|
||||||
|
col5 = vec_mladd(col5, quant5, zero16);
|
||||||
|
col6 = vec_mladd(col6, quant6, zero16);
|
||||||
|
col7 = vec_mladd(col7, quant7, zero16);
|
||||||
|
|
||||||
|
DO_IDCT(col, 1);
|
||||||
|
|
||||||
|
TRANSPOSE(out, row);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Pass 2: process rows */
|
||||||
|
|
||||||
|
DO_IDCT(row, 2);
|
||||||
|
|
||||||
|
TRANSPOSE(out, col);
|
||||||
|
|
||||||
|
outb = vec_packs(col0, col0);
|
||||||
|
outb = vec_add(outb, pb_centerjsamp);
|
||||||
|
outptr = (int *)(output_buf[0] + output_col);
|
||||||
|
vec_ste((__vector int)outb, 0, outptr);
|
||||||
|
vec_ste((__vector int)outb, 4, outptr);
|
||||||
|
|
||||||
|
outb = vec_packs(col1, col1);
|
||||||
|
outb = vec_add(outb, pb_centerjsamp);
|
||||||
|
outptr = (int *)(output_buf[1] + output_col);
|
||||||
|
vec_ste((__vector int)outb, 0, outptr);
|
||||||
|
vec_ste((__vector int)outb, 4, outptr);
|
||||||
|
|
||||||
|
outb = vec_packs(col2, col2);
|
||||||
|
outb = vec_add(outb, pb_centerjsamp);
|
||||||
|
outptr = (int *)(output_buf[2] + output_col);
|
||||||
|
vec_ste((__vector int)outb, 0, outptr);
|
||||||
|
vec_ste((__vector int)outb, 4, outptr);
|
||||||
|
|
||||||
|
outb = vec_packs(col3, col3);
|
||||||
|
outb = vec_add(outb, pb_centerjsamp);
|
||||||
|
outptr = (int *)(output_buf[3] + output_col);
|
||||||
|
vec_ste((__vector int)outb, 0, outptr);
|
||||||
|
vec_ste((__vector int)outb, 4, outptr);
|
||||||
|
|
||||||
|
outb = vec_packs(col4, col4);
|
||||||
|
outb = vec_add(outb, pb_centerjsamp);
|
||||||
|
outptr = (int *)(output_buf[4] + output_col);
|
||||||
|
vec_ste((__vector int)outb, 0, outptr);
|
||||||
|
vec_ste((__vector int)outb, 4, outptr);
|
||||||
|
|
||||||
|
outb = vec_packs(col5, col5);
|
||||||
|
outb = vec_add(outb, pb_centerjsamp);
|
||||||
|
outptr = (int *)(output_buf[5] + output_col);
|
||||||
|
vec_ste((__vector int)outb, 0, outptr);
|
||||||
|
vec_ste((__vector int)outb, 4, outptr);
|
||||||
|
|
||||||
|
outb = vec_packs(col6, col6);
|
||||||
|
outb = vec_add(outb, pb_centerjsamp);
|
||||||
|
outptr = (int *)(output_buf[6] + output_col);
|
||||||
|
vec_ste((__vector int)outb, 0, outptr);
|
||||||
|
vec_ste((__vector int)outb, 4, outptr);
|
||||||
|
|
||||||
|
outb = vec_packs(col7, col7);
|
||||||
|
outb = vec_add(outb, pb_centerjsamp);
|
||||||
|
outptr = (int *)(output_buf[7] + output_col);
|
||||||
|
vec_ste((__vector int)outb, 0, outptr);
|
||||||
|
vec_ste((__vector int)outb, 4, outptr);
|
||||||
|
}
|
||||||
236
simd/jquanti-altivec.c
Normal file
236
simd/jquanti-altivec.c
Normal file
@@ -0,0 +1,236 @@
|
|||||||
|
/*
|
||||||
|
* AltiVec optimizations for libjpeg-turbo
|
||||||
|
*
|
||||||
|
* Copyright (C) 2014, D. R. Commander.
|
||||||
|
* All rights reserved.
|
||||||
|
* This software is provided 'as-is', without any express or implied
|
||||||
|
* warranty. In no event will the authors be held liable for any damages
|
||||||
|
* arising from the use of this software.
|
||||||
|
*
|
||||||
|
* Permission is granted to anyone to use this software for any purpose,
|
||||||
|
* including commercial applications, and to alter it and redistribute it
|
||||||
|
* freely, subject to the following restrictions:
|
||||||
|
*
|
||||||
|
* 1. The origin of this software must not be misrepresented; you must not
|
||||||
|
* claim that you wrote the original software. If you use this software
|
||||||
|
* in a product, an acknowledgment in the product documentation would be
|
||||||
|
* appreciated but is not required.
|
||||||
|
* 2. Altered source versions must be plainly marked as such, and must not be
|
||||||
|
* misrepresented as being the original software.
|
||||||
|
* 3. This notice may not be removed or altered from any source distribution.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* INTEGER QUANTIZATION AND SAMPLE CONVERSION */
|
||||||
|
|
||||||
|
#include "jsimd_altivec.h"
|
||||||
|
|
||||||
|
|
||||||
|
/* NOTE: The address will either be aligned or offset by 8 bytes, so we can
|
||||||
|
* always get the data we want by using a single vector load (although we may
|
||||||
|
* have to permute the result.)
|
||||||
|
*/
|
||||||
|
#define LOAD_ROW(row) { \
|
||||||
|
elemptr = sample_data[row] + start_col; \
|
||||||
|
in##row = vec_ld(0, elemptr); \
|
||||||
|
if ((size_t)elemptr & 15) \
|
||||||
|
in##row = vec_perm(in##row, in##row, vec_lvsl(0, elemptr)); \
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void
|
||||||
|
jsimd_convsamp_altivec (JSAMPARRAY sample_data, JDIMENSION start_col,
|
||||||
|
DCTELEM * workspace)
|
||||||
|
{
|
||||||
|
JSAMPROW elemptr;
|
||||||
|
__vector unsigned char in0, in1, in2, in3, in4, in5, in6, in7;
|
||||||
|
__vector short out0, out1, out2, out3, out4, out5, out6, out7;
|
||||||
|
|
||||||
|
/* Constants */
|
||||||
|
__vector short pw_centerjsamp = { __8X(CENTERJSAMPLE) };
|
||||||
|
__vector unsigned char zero = { __16X(0) };
|
||||||
|
|
||||||
|
LOAD_ROW(0);
|
||||||
|
LOAD_ROW(1);
|
||||||
|
LOAD_ROW(2);
|
||||||
|
LOAD_ROW(3);
|
||||||
|
LOAD_ROW(4);
|
||||||
|
LOAD_ROW(5);
|
||||||
|
LOAD_ROW(6);
|
||||||
|
LOAD_ROW(7);
|
||||||
|
|
||||||
|
out0 = (__vector short)vec_mergeh(zero, in0);
|
||||||
|
out1 = (__vector short)vec_mergeh(zero, in1);
|
||||||
|
out2 = (__vector short)vec_mergeh(zero, in2);
|
||||||
|
out3 = (__vector short)vec_mergeh(zero, in3);
|
||||||
|
out4 = (__vector short)vec_mergeh(zero, in4);
|
||||||
|
out5 = (__vector short)vec_mergeh(zero, in5);
|
||||||
|
out6 = (__vector short)vec_mergeh(zero, in6);
|
||||||
|
out7 = (__vector short)vec_mergeh(zero, in7);
|
||||||
|
|
||||||
|
out0 = vec_sub(out0, pw_centerjsamp);
|
||||||
|
out1 = vec_sub(out1, pw_centerjsamp);
|
||||||
|
out2 = vec_sub(out2, pw_centerjsamp);
|
||||||
|
out3 = vec_sub(out3, pw_centerjsamp);
|
||||||
|
out4 = vec_sub(out4, pw_centerjsamp);
|
||||||
|
out5 = vec_sub(out5, pw_centerjsamp);
|
||||||
|
out6 = vec_sub(out6, pw_centerjsamp);
|
||||||
|
out7 = vec_sub(out7, pw_centerjsamp);
|
||||||
|
|
||||||
|
vec_st(out0, 0, workspace);
|
||||||
|
vec_st(out1, 16, workspace);
|
||||||
|
vec_st(out2, 32, workspace);
|
||||||
|
vec_st(out3, 48, workspace);
|
||||||
|
vec_st(out4, 64, workspace);
|
||||||
|
vec_st(out5, 80, workspace);
|
||||||
|
vec_st(out6, 96, workspace);
|
||||||
|
vec_st(out7, 112, workspace);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#define WORD_BIT 16
|
||||||
|
|
||||||
|
/* There is no AltiVec unsigned multiply instruction, hence this. */
|
||||||
|
|
||||||
|
#define MULTIPLY(vs0, vs1, out) { \
|
||||||
|
tmpe = vec_mule((__vector unsigned short)vs0, \
|
||||||
|
(__vector unsigned short)vs1); \
|
||||||
|
tmpo = vec_mulo((__vector unsigned short)vs0, \
|
||||||
|
(__vector unsigned short)vs1); \
|
||||||
|
out = (__vector short)vec_perm((__vector unsigned short)tmpe, \
|
||||||
|
(__vector unsigned short)tmpo, \
|
||||||
|
shift_pack_index); \
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
jsimd_quantize_altivec (JCOEFPTR coef_block, DCTELEM * divisors,
|
||||||
|
DCTELEM * workspace)
|
||||||
|
{
|
||||||
|
__vector short row0, row1, row2, row3, row4, row5, row6, row7;
|
||||||
|
__vector short row0s, row1s, row2s, row3s, row4s, row5s, row6s, row7s;
|
||||||
|
__vector short corr0, corr1, corr2, corr3, corr4, corr5, corr6, corr7;
|
||||||
|
__vector short recip0, recip1, recip2, recip3, recip4, recip5, recip6,
|
||||||
|
recip7;
|
||||||
|
__vector short scale0, scale1, scale2, scale3, scale4, scale5, scale6,
|
||||||
|
scale7;
|
||||||
|
__vector unsigned int tmpe, tmpo;
|
||||||
|
|
||||||
|
/* Constants */
|
||||||
|
__vector unsigned short pw_word_bit_m1 = { __8X(WORD_BIT - 1) };
|
||||||
|
__vector unsigned char shift_pack_index =
|
||||||
|
{ 0, 1, 16, 17, 4, 5, 20, 21, 8, 9, 24, 25, 12, 13, 28, 29};
|
||||||
|
|
||||||
|
row0 = vec_ld(0, workspace);
|
||||||
|
row1 = vec_ld(16, workspace);
|
||||||
|
row2 = vec_ld(32, workspace);
|
||||||
|
row3 = vec_ld(48, workspace);
|
||||||
|
row4 = vec_ld(64, workspace);
|
||||||
|
row5 = vec_ld(80, workspace);
|
||||||
|
row6 = vec_ld(96, workspace);
|
||||||
|
row7 = vec_ld(112, workspace);
|
||||||
|
|
||||||
|
/* Branch-less absolute value */
|
||||||
|
row0s = vec_sra(row0, pw_word_bit_m1);
|
||||||
|
row1s = vec_sra(row1, pw_word_bit_m1);
|
||||||
|
row2s = vec_sra(row2, pw_word_bit_m1);
|
||||||
|
row3s = vec_sra(row3, pw_word_bit_m1);
|
||||||
|
row4s = vec_sra(row4, pw_word_bit_m1);
|
||||||
|
row5s = vec_sra(row5, pw_word_bit_m1);
|
||||||
|
row6s = vec_sra(row6, pw_word_bit_m1);
|
||||||
|
row7s = vec_sra(row7, pw_word_bit_m1);
|
||||||
|
row0 = vec_xor(row0, row0s);
|
||||||
|
row1 = vec_xor(row1, row1s);
|
||||||
|
row2 = vec_xor(row2, row2s);
|
||||||
|
row3 = vec_xor(row3, row3s);
|
||||||
|
row4 = vec_xor(row4, row4s);
|
||||||
|
row5 = vec_xor(row5, row5s);
|
||||||
|
row6 = vec_xor(row6, row6s);
|
||||||
|
row7 = vec_xor(row7, row7s);
|
||||||
|
row0 = vec_sub(row0, row0s);
|
||||||
|
row1 = vec_sub(row1, row1s);
|
||||||
|
row2 = vec_sub(row2, row2s);
|
||||||
|
row3 = vec_sub(row3, row3s);
|
||||||
|
row4 = vec_sub(row4, row4s);
|
||||||
|
row5 = vec_sub(row5, row5s);
|
||||||
|
row6 = vec_sub(row6, row6s);
|
||||||
|
row7 = vec_sub(row7, row7s);
|
||||||
|
|
||||||
|
corr0 = vec_ld(DCTSIZE2 * 2, divisors);
|
||||||
|
corr1 = vec_ld(DCTSIZE2 * 2 + 16, divisors);
|
||||||
|
corr2 = vec_ld(DCTSIZE2 * 2 + 32, divisors);
|
||||||
|
corr3 = vec_ld(DCTSIZE2 * 2 + 48, divisors);
|
||||||
|
corr4 = vec_ld(DCTSIZE2 * 2 + 64, divisors);
|
||||||
|
corr5 = vec_ld(DCTSIZE2 * 2 + 80, divisors);
|
||||||
|
corr6 = vec_ld(DCTSIZE2 * 2 + 96, divisors);
|
||||||
|
corr7 = vec_ld(DCTSIZE2 * 2 + 112, divisors);
|
||||||
|
|
||||||
|
row0 = vec_add(row0, corr0);
|
||||||
|
row1 = vec_add(row1, corr1);
|
||||||
|
row2 = vec_add(row2, corr2);
|
||||||
|
row3 = vec_add(row3, corr3);
|
||||||
|
row4 = vec_add(row4, corr4);
|
||||||
|
row5 = vec_add(row5, corr5);
|
||||||
|
row6 = vec_add(row6, corr6);
|
||||||
|
row7 = vec_add(row7, corr7);
|
||||||
|
|
||||||
|
recip0 = vec_ld(0, divisors);
|
||||||
|
recip1 = vec_ld(16, divisors);
|
||||||
|
recip2 = vec_ld(32, divisors);
|
||||||
|
recip3 = vec_ld(48, divisors);
|
||||||
|
recip4 = vec_ld(64, divisors);
|
||||||
|
recip5 = vec_ld(80, divisors);
|
||||||
|
recip6 = vec_ld(96, divisors);
|
||||||
|
recip7 = vec_ld(112, divisors);
|
||||||
|
|
||||||
|
MULTIPLY(row0, recip0, row0);
|
||||||
|
MULTIPLY(row1, recip1, row1);
|
||||||
|
MULTIPLY(row2, recip2, row2);
|
||||||
|
MULTIPLY(row3, recip3, row3);
|
||||||
|
MULTIPLY(row4, recip4, row4);
|
||||||
|
MULTIPLY(row5, recip5, row5);
|
||||||
|
MULTIPLY(row6, recip6, row6);
|
||||||
|
MULTIPLY(row7, recip7, row7);
|
||||||
|
|
||||||
|
scale0 = vec_ld(DCTSIZE2 * 4, divisors);
|
||||||
|
scale1 = vec_ld(DCTSIZE2 * 4 + 16, divisors);
|
||||||
|
scale2 = vec_ld(DCTSIZE2 * 4 + 32, divisors);
|
||||||
|
scale3 = vec_ld(DCTSIZE2 * 4 + 48, divisors);
|
||||||
|
scale4 = vec_ld(DCTSIZE2 * 4 + 64, divisors);
|
||||||
|
scale5 = vec_ld(DCTSIZE2 * 4 + 80, divisors);
|
||||||
|
scale6 = vec_ld(DCTSIZE2 * 4 + 96, divisors);
|
||||||
|
scale7 = vec_ld(DCTSIZE2 * 4 + 112, divisors);
|
||||||
|
|
||||||
|
MULTIPLY(row0, scale0, row0);
|
||||||
|
MULTIPLY(row1, scale1, row1);
|
||||||
|
MULTIPLY(row2, scale2, row2);
|
||||||
|
MULTIPLY(row3, scale3, row3);
|
||||||
|
MULTIPLY(row4, scale4, row4);
|
||||||
|
MULTIPLY(row5, scale5, row5);
|
||||||
|
MULTIPLY(row6, scale6, row6);
|
||||||
|
MULTIPLY(row7, scale7, row7);
|
||||||
|
|
||||||
|
row0 = vec_xor(row0, row0s);
|
||||||
|
row1 = vec_xor(row1, row1s);
|
||||||
|
row2 = vec_xor(row2, row2s);
|
||||||
|
row3 = vec_xor(row3, row3s);
|
||||||
|
row4 = vec_xor(row4, row4s);
|
||||||
|
row5 = vec_xor(row5, row5s);
|
||||||
|
row6 = vec_xor(row6, row6s);
|
||||||
|
row7 = vec_xor(row7, row7s);
|
||||||
|
row0 = vec_sub(row0, row0s);
|
||||||
|
row1 = vec_sub(row1, row1s);
|
||||||
|
row2 = vec_sub(row2, row2s);
|
||||||
|
row3 = vec_sub(row3, row3s);
|
||||||
|
row4 = vec_sub(row4, row4s);
|
||||||
|
row5 = vec_sub(row5, row5s);
|
||||||
|
row6 = vec_sub(row6, row6s);
|
||||||
|
row7 = vec_sub(row7, row7s);
|
||||||
|
|
||||||
|
vec_st(row0, 0, coef_block);
|
||||||
|
vec_st(row1, 16, coef_block);
|
||||||
|
vec_st(row2, 32, coef_block);
|
||||||
|
vec_st(row3, 48, coef_block);
|
||||||
|
vec_st(row4, 64, coef_block);
|
||||||
|
vec_st(row5, 80, coef_block);
|
||||||
|
vec_st(row6, 96, coef_block);
|
||||||
|
vec_st(row7, 112, coef_block);
|
||||||
|
}
|
||||||
60
simd/jsimd.h
60
simd/jsimd.h
@@ -116,6 +116,28 @@ EXTERN(void) jsimd_extxrgb_ycc_convert_mips_dspr2
|
|||||||
(JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
(JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
JDIMENSION output_row, int num_rows);
|
JDIMENSION output_row, int num_rows);
|
||||||
|
|
||||||
|
EXTERN(void) jsimd_rgb_ycc_convert_altivec
|
||||||
|
(JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
|
JDIMENSION output_row, int num_rows);
|
||||||
|
EXTERN(void) jsimd_extrgb_ycc_convert_altivec
|
||||||
|
(JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
|
JDIMENSION output_row, int num_rows);
|
||||||
|
EXTERN(void) jsimd_extrgbx_ycc_convert_altivec
|
||||||
|
(JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
|
JDIMENSION output_row, int num_rows);
|
||||||
|
EXTERN(void) jsimd_extbgr_ycc_convert_altivec
|
||||||
|
(JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
|
JDIMENSION output_row, int num_rows);
|
||||||
|
EXTERN(void) jsimd_extbgrx_ycc_convert_altivec
|
||||||
|
(JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
|
JDIMENSION output_row, int num_rows);
|
||||||
|
EXTERN(void) jsimd_extxbgr_ycc_convert_altivec
|
||||||
|
(JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
|
JDIMENSION output_row, int num_rows);
|
||||||
|
EXTERN(void) jsimd_extxrgb_ycc_convert_altivec
|
||||||
|
(JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
|
JDIMENSION output_row, int num_rows);
|
||||||
|
|
||||||
/* RGB & extended RGB --> Grayscale Colorspace Conversion */
|
/* RGB & extended RGB --> Grayscale Colorspace Conversion */
|
||||||
EXTERN(void) jsimd_rgb_gray_convert_mmx
|
EXTERN(void) jsimd_rgb_gray_convert_mmx
|
||||||
(JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
(JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
@@ -184,6 +206,28 @@ EXTERN(void) jsimd_extxrgb_gray_convert_mips_dspr2
|
|||||||
(JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
(JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
JDIMENSION output_row, int num_rows);
|
JDIMENSION output_row, int num_rows);
|
||||||
|
|
||||||
|
EXTERN(void) jsimd_rgb_gray_convert_altivec
|
||||||
|
(JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
|
JDIMENSION output_row, int num_rows);
|
||||||
|
EXTERN(void) jsimd_extrgb_gray_convert_altivec
|
||||||
|
(JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
|
JDIMENSION output_row, int num_rows);
|
||||||
|
EXTERN(void) jsimd_extrgbx_gray_convert_altivec
|
||||||
|
(JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
|
JDIMENSION output_row, int num_rows);
|
||||||
|
EXTERN(void) jsimd_extbgr_gray_convert_altivec
|
||||||
|
(JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
|
JDIMENSION output_row, int num_rows);
|
||||||
|
EXTERN(void) jsimd_extbgrx_gray_convert_altivec
|
||||||
|
(JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
|
JDIMENSION output_row, int num_rows);
|
||||||
|
EXTERN(void) jsimd_extxbgr_gray_convert_altivec
|
||||||
|
(JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
|
JDIMENSION output_row, int num_rows);
|
||||||
|
EXTERN(void) jsimd_extxrgb_gray_convert_altivec
|
||||||
|
(JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
|
JDIMENSION output_row, int num_rows);
|
||||||
|
|
||||||
/* YCC --> RGB & extended RGB Colorspace Conversion */
|
/* YCC --> RGB & extended RGB Colorspace Conversion */
|
||||||
EXTERN(void) jsimd_ycc_rgb_convert_mmx
|
EXTERN(void) jsimd_ycc_rgb_convert_mmx
|
||||||
(JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
|
(JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
|
||||||
@@ -524,6 +568,9 @@ EXTERN(void) jsimd_convsamp_neon
|
|||||||
EXTERN(void) jsimd_convsamp_mips_dspr2
|
EXTERN(void) jsimd_convsamp_mips_dspr2
|
||||||
(JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace);
|
(JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace);
|
||||||
|
|
||||||
|
EXTERN(void) jsimd_convsamp_altivec
|
||||||
|
(JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace);
|
||||||
|
|
||||||
/* Floating Point Sample Conversion */
|
/* Floating Point Sample Conversion */
|
||||||
EXTERN(void) jsimd_convsamp_float_3dnow
|
EXTERN(void) jsimd_convsamp_float_3dnow
|
||||||
(JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT * workspace);
|
(JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT * workspace);
|
||||||
@@ -545,6 +592,8 @@ EXTERN(void) jsimd_fdct_islow_sse2 (DCTELEM * data);
|
|||||||
|
|
||||||
EXTERN(void) jsimd_fdct_islow_mips_dspr2 (DCTELEM * data);
|
EXTERN(void) jsimd_fdct_islow_mips_dspr2 (DCTELEM * data);
|
||||||
|
|
||||||
|
EXTERN(void) jsimd_fdct_islow_altivec (DCTELEM * data);
|
||||||
|
|
||||||
/* Fast Integer Forward DCT */
|
/* Fast Integer Forward DCT */
|
||||||
EXTERN(void) jsimd_fdct_ifast_mmx (DCTELEM * data);
|
EXTERN(void) jsimd_fdct_ifast_mmx (DCTELEM * data);
|
||||||
|
|
||||||
@@ -576,6 +625,9 @@ EXTERN(void) jsimd_quantize_neon
|
|||||||
EXTERN(void) jsimd_quantize_mips_dspr2
|
EXTERN(void) jsimd_quantize_mips_dspr2
|
||||||
(JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace);
|
(JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace);
|
||||||
|
|
||||||
|
EXTERN(void) jsimd_quantize_altivec
|
||||||
|
(JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace);
|
||||||
|
|
||||||
/* Floating Point Quantization */
|
/* Floating Point Quantization */
|
||||||
EXTERN(void) jsimd_quantize_float_3dnow
|
EXTERN(void) jsimd_quantize_float_3dnow
|
||||||
(JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace);
|
(JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace);
|
||||||
@@ -644,6 +696,10 @@ EXTERN(void) jsimd_idct_islow_mips_dspr2
|
|||||||
(void * dct_table, JCOEFPTR coef_block, int * output_buf,
|
(void * dct_table, JCOEFPTR coef_block, int * output_buf,
|
||||||
JSAMPLE * output_col);
|
JSAMPLE * output_col);
|
||||||
|
|
||||||
|
EXTERN(void) jsimd_idct_islow_altivec
|
||||||
|
(void * dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
||||||
|
JDIMENSION output_col);
|
||||||
|
|
||||||
/* Fast Integer Inverse DCT */
|
/* Fast Integer Inverse DCT */
|
||||||
EXTERN(void) jsimd_idct_ifast_mmx
|
EXTERN(void) jsimd_idct_ifast_mmx
|
||||||
(void * dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
(void * dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
||||||
@@ -665,6 +721,10 @@ EXTERN(void) jsimd_idct_ifast_rows_mips_dspr2
|
|||||||
(DCTELEM * wsptr, JSAMPARRAY output_buf, JDIMENSION output_col,
|
(DCTELEM * wsptr, JSAMPARRAY output_buf, JDIMENSION output_col,
|
||||||
const int * idct_coefs);
|
const int * idct_coefs);
|
||||||
|
|
||||||
|
EXTERN(void) jsimd_idct_ifast_altivec
|
||||||
|
(void * dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
||||||
|
JDIMENSION output_col);
|
||||||
|
|
||||||
/* Floating Point Inverse DCT */
|
/* Floating Point Inverse DCT */
|
||||||
EXTERN(void) jsimd_idct_float_3dnow
|
EXTERN(void) jsimd_idct_float_3dnow
|
||||||
(void * dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
(void * dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
||||||
|
|||||||
@@ -29,6 +29,14 @@
|
|||||||
#include "jsimd.h"
|
#include "jsimd.h"
|
||||||
#include <altivec.h>
|
#include <altivec.h>
|
||||||
|
|
||||||
|
|
||||||
|
/* Common code */
|
||||||
|
|
||||||
|
#define __4X(a) a, a, a, a
|
||||||
|
#define __4X2(a, b) a, b, a, b, a, b, a, b
|
||||||
|
#define __8X(a) __4X(a), __4X(a)
|
||||||
|
#define __16X(a) __8X(a), __8X(a)
|
||||||
|
|
||||||
#define TRANSPOSE(row, col) \
|
#define TRANSPOSE(row, col) \
|
||||||
{ \
|
{ \
|
||||||
__vector short row04l, row04h, row15l, row15h, \
|
__vector short row04l, row04h, row15l, row15h, \
|
||||||
@@ -47,7 +55,7 @@
|
|||||||
row37h = vec_mergel(row##3, row##7); /* row37h=(34 74 35 75 36 76 37 77) */ \
|
row37h = vec_mergel(row##3, row##7); /* row37h=(34 74 35 75 36 76 37 77) */ \
|
||||||
\
|
\
|
||||||
/* transpose coefficients (phase 2) */ \
|
/* transpose coefficients (phase 2) */ \
|
||||||
col01e = vec_mergeh(row04l, row26l); /* col01e=(00 20 40 60 01 21 41 61} */ \
|
col01e = vec_mergeh(row04l, row26l); /* col01e=(00 20 40 60 01 21 41 61) */ \
|
||||||
col23e = vec_mergel(row04l, row26l); /* col23e=(02 22 42 62 03 23 43 63) */ \
|
col23e = vec_mergel(row04l, row26l); /* col23e=(02 22 42 62 03 23 43 63) */ \
|
||||||
col45e = vec_mergeh(row04h, row26h); /* col45e=(04 24 44 64 05 25 45 65) */ \
|
col45e = vec_mergeh(row04h, row26h); /* col45e=(04 24 44 64 05 25 45 65) */ \
|
||||||
col67e = vec_mergel(row04h, row26h); /* col67e=(06 26 46 66 07 27 47 67) */ \
|
col67e = vec_mergel(row04h, row26h); /* col67e=(06 26 46 66 07 27 47 67) */ \
|
||||||
@@ -58,7 +66,7 @@
|
|||||||
\
|
\
|
||||||
/* transpose coefficients (phase 3) */ \
|
/* transpose coefficients (phase 3) */ \
|
||||||
col##0 = vec_mergeh(col01e, col01o); /* col0=(00 10 20 30 40 50 60 70) */ \
|
col##0 = vec_mergeh(col01e, col01o); /* col0=(00 10 20 30 40 50 60 70) */ \
|
||||||
col##1 = vec_mergel(col01e, col01o); /* col1=(01 11 21 31 41 51 61 71} */ \
|
col##1 = vec_mergel(col01e, col01o); /* col1=(01 11 21 31 41 51 61 71) */ \
|
||||||
col##2 = vec_mergeh(col23e, col23o); /* col2=(02 12 22 32 42 52 62 72) */ \
|
col##2 = vec_mergeh(col23e, col23o); /* col2=(02 12 22 32 42 52 62 72) */ \
|
||||||
col##3 = vec_mergel(col23e, col23o); /* col3=(03 13 23 33 43 53 63 73) */ \
|
col##3 = vec_mergel(col23e, col23o); /* col3=(03 13 23 33 43 53 63 73) */ \
|
||||||
col##4 = vec_mergeh(col45e, col45o); /* col4=(04 14 24 34 44 54 64 74) */ \
|
col##4 = vec_mergeh(col45e, col45o); /* col4=(04 14 24 34 44 54 64 74) */ \
|
||||||
@@ -66,125 +74,3 @@
|
|||||||
col##6 = vec_mergeh(col67e, col67o); /* col6=(06 16 26 36 46 56 66 76) */ \
|
col##6 = vec_mergeh(col67e, col67o); /* col6=(06 16 26 36 46 56 66 76) */ \
|
||||||
col##7 = vec_mergel(col67e, col67o); /* col7=(07 17 27 37 47 57 67 77) */ \
|
col##7 = vec_mergel(col67e, col67o); /* col7=(07 17 27 37 47 57 67 77) */ \
|
||||||
}
|
}
|
||||||
|
|
||||||
static const __vector short constants __attribute__((aligned(16))) =
|
|
||||||
{
|
|
||||||
98 << 5, /* FIX(0.382683433) */
|
|
||||||
139 << 5, /* FIX(0.541196100) */
|
|
||||||
181 << 5, /* FIX(0.707106781) */
|
|
||||||
334 << 5 /* FIX(1.306562965) */
|
|
||||||
};
|
|
||||||
|
|
||||||
#define DO_DCT() \
|
|
||||||
{ \
|
|
||||||
/* Even part */ \
|
|
||||||
\
|
|
||||||
tmp10 = vec_add(tmp0, tmp3); \
|
|
||||||
tmp13 = vec_sub(tmp0, tmp3); \
|
|
||||||
tmp11 = vec_add(tmp1, tmp2); \
|
|
||||||
tmp12 = vec_sub(tmp1, tmp2); \
|
|
||||||
\
|
|
||||||
out0 = vec_add(tmp10, tmp11); \
|
|
||||||
out4 = vec_sub(tmp10, tmp11); \
|
|
||||||
\
|
|
||||||
z1 = vec_add(tmp12, tmp13); \
|
|
||||||
z1 = vec_sl(z1, PRE_MULTIPLY_SCALE_BITS); \
|
|
||||||
z1 = vec_madds(z1, PW_0707, zero); \
|
|
||||||
\
|
|
||||||
out2 = vec_add(tmp13, z1); \
|
|
||||||
out6 = vec_sub(tmp13, z1); \
|
|
||||||
\
|
|
||||||
/* Odd part */ \
|
|
||||||
\
|
|
||||||
tmp10 = vec_add(tmp4, tmp5); \
|
|
||||||
tmp11 = vec_add(tmp5, tmp6); \
|
|
||||||
tmp12 = vec_add(tmp6, tmp7); \
|
|
||||||
\
|
|
||||||
tmp10 = vec_sl(tmp10, PRE_MULTIPLY_SCALE_BITS); \
|
|
||||||
tmp12 = vec_sl(tmp12, PRE_MULTIPLY_SCALE_BITS); \
|
|
||||||
z5 = vec_sub(tmp10, tmp12); \
|
|
||||||
z5 = vec_madds(z5, PW_0382, zero); \
|
|
||||||
\
|
|
||||||
z2 = vec_madds(tmp10, PW_0541, zero); \
|
|
||||||
z2 = vec_add(z2, z5); \
|
|
||||||
\
|
|
||||||
z4 = vec_madds(tmp12, PW_1306, zero); \
|
|
||||||
z4 = vec_add(z4, z5); \
|
|
||||||
\
|
|
||||||
tmp11 = vec_sl(tmp11, PRE_MULTIPLY_SCALE_BITS); \
|
|
||||||
z3 = vec_madds(tmp11, PW_0707, zero); \
|
|
||||||
\
|
|
||||||
z11 = vec_add(tmp7, z3); \
|
|
||||||
z13 = vec_sub(tmp7, z3); \
|
|
||||||
\
|
|
||||||
out5 = vec_add(z13, z2); \
|
|
||||||
out3 = vec_sub(z13, z2); \
|
|
||||||
out1 = vec_add(z11, z4); \
|
|
||||||
out7 = vec_sub(z11, z4); \
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
jsimd_fdct_ifast_altivec (DCTELEM *data)
|
|
||||||
{
|
|
||||||
__vector short row0, row1, row2, row3, row4, row5, row6, row7,
|
|
||||||
col0, col1, col2, col3, col4, col5, col6, col7,
|
|
||||||
tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp10, tmp11, tmp12, tmp13,
|
|
||||||
z1, z2, z3, z4, z5, z11, z13,
|
|
||||||
out0, out1, out2, out3, out4, out5, out6, out7;
|
|
||||||
|
|
||||||
/* Constants */
|
|
||||||
__vector short zero = vec_splat_s16(0),
|
|
||||||
PW_0382 = vec_splat(constants, 0),
|
|
||||||
PW_0541 = vec_splat(constants, 1),
|
|
||||||
PW_0707 = vec_splat(constants, 2),
|
|
||||||
PW_1306 = vec_splat(constants, 3);
|
|
||||||
__vector unsigned short PRE_MULTIPLY_SCALE_BITS = vec_splat_u16(2);
|
|
||||||
|
|
||||||
/* Pass 1: process rows. */
|
|
||||||
|
|
||||||
row0 = *(__vector short *)&data[0];
|
|
||||||
row1 = *(__vector short *)&data[8];
|
|
||||||
row2 = *(__vector short *)&data[16];
|
|
||||||
row3 = *(__vector short *)&data[24];
|
|
||||||
row4 = *(__vector short *)&data[32];
|
|
||||||
row5 = *(__vector short *)&data[40];
|
|
||||||
row6 = *(__vector short *)&data[48];
|
|
||||||
row7 = *(__vector short *)&data[56];
|
|
||||||
|
|
||||||
TRANSPOSE(row, col);
|
|
||||||
|
|
||||||
tmp0 = vec_add(col0, col7);
|
|
||||||
tmp7 = vec_sub(col0, col7);
|
|
||||||
tmp1 = vec_add(col1, col6);
|
|
||||||
tmp6 = vec_sub(col1, col6);
|
|
||||||
tmp2 = vec_add(col2, col5);
|
|
||||||
tmp5 = vec_sub(col2, col5);
|
|
||||||
tmp3 = vec_add(col3, col4);
|
|
||||||
tmp4 = vec_sub(col3, col4);
|
|
||||||
|
|
||||||
DO_DCT();
|
|
||||||
|
|
||||||
/* Pass 2: process columns. */
|
|
||||||
|
|
||||||
TRANSPOSE(out, row);
|
|
||||||
|
|
||||||
tmp0 = vec_add(row0, row7);
|
|
||||||
tmp7 = vec_sub(row0, row7);
|
|
||||||
tmp1 = vec_add(row1, row6);
|
|
||||||
tmp6 = vec_sub(row1, row6);
|
|
||||||
tmp2 = vec_add(row2, row5);
|
|
||||||
tmp5 = vec_sub(row2, row5);
|
|
||||||
tmp3 = vec_add(row3, row4);
|
|
||||||
tmp4 = vec_sub(row3, row4);
|
|
||||||
|
|
||||||
DO_DCT();
|
|
||||||
|
|
||||||
*(__vector short *)&data[0] = out0;
|
|
||||||
*(__vector short *)&data[8] = out1;
|
|
||||||
*(__vector short *)&data[16] = out2;
|
|
||||||
*(__vector short *)&data[24] = out3;
|
|
||||||
*(__vector short *)&data[32] = out4;
|
|
||||||
*(__vector short *)&data[40] = out5;
|
|
||||||
*(__vector short *)&data[48] = out6;
|
|
||||||
*(__vector short *)&data[56] = out7;
|
|
||||||
}
|
|
||||||
@@ -6,6 +6,7 @@
|
|||||||
* Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
|
* Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
|
||||||
* Copyright (C) 2013-2014, Linaro Limited
|
* Copyright (C) 2013-2014, Linaro Limited
|
||||||
* Author: Ragesh Radhakrishnan <ragesh.r@linaro.org>
|
* Author: Ragesh Radhakrishnan <ragesh.r@linaro.org>
|
||||||
|
* Copyright (C) 2014, D. R. Commander. All rights reserved.
|
||||||
*
|
*
|
||||||
* This software is provided 'as-is', without any express or implied
|
* This software is provided 'as-is', without any express or implied
|
||||||
* warranty. In no event will the authors be held liable for any damages
|
* warranty. In no event will the authors be held liable for any damages
|
||||||
@@ -197,21 +198,21 @@ _\fname:
|
|||||||
tmp13 = q1; \
|
tmp13 = q1; \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define XFIX_0_899976223 v0.4h[0]
|
#define XFIX_0_899976223 v0.h[0]
|
||||||
#define XFIX_0_541196100 v0.4h[1]
|
#define XFIX_0_541196100 v0.h[1]
|
||||||
#define XFIX_2_562915447 v0.4h[2]
|
#define XFIX_2_562915447 v0.h[2]
|
||||||
#define XFIX_0_298631336_MINUS_0_899976223 v0.4h[3]
|
#define XFIX_0_298631336_MINUS_0_899976223 v0.h[3]
|
||||||
#define XFIX_1_501321110_MINUS_0_899976223 v1.4h[0]
|
#define XFIX_1_501321110_MINUS_0_899976223 v1.h[0]
|
||||||
#define XFIX_2_053119869_MINUS_2_562915447 v1.4h[1]
|
#define XFIX_2_053119869_MINUS_2_562915447 v1.h[1]
|
||||||
#define XFIX_0_541196100_PLUS_0_765366865 v1.4h[2]
|
#define XFIX_0_541196100_PLUS_0_765366865 v1.h[2]
|
||||||
#define XFIX_1_175875602 v1.4h[3]
|
#define XFIX_1_175875602 v1.h[3]
|
||||||
#define XFIX_1_175875602_MINUS_0_390180644 v2.4h[0]
|
#define XFIX_1_175875602_MINUS_0_390180644 v2.h[0]
|
||||||
#define XFIX_0_541196100_MINUS_1_847759065 v2.4h[1]
|
#define XFIX_0_541196100_MINUS_1_847759065 v2.h[1]
|
||||||
#define XFIX_3_072711026_MINUS_2_562915447 v2.4h[2]
|
#define XFIX_3_072711026_MINUS_2_562915447 v2.h[2]
|
||||||
#define XFIX_1_175875602_MINUS_1_961570560 v2.4h[3]
|
#define XFIX_1_175875602_MINUS_1_961570560 v2.h[3]
|
||||||
|
|
||||||
.balign 16
|
.balign 16
|
||||||
jsimd_idct_islow_neon_consts:
|
Ljsimd_idct_islow_neon_consts:
|
||||||
.short FIX_0_899976223 /* d0[0] */
|
.short FIX_0_899976223 /* d0[0] */
|
||||||
.short FIX_0_541196100 /* d0[1] */
|
.short FIX_0_541196100 /* d0[1] */
|
||||||
.short FIX_2_562915447 /* d0[2] */
|
.short FIX_2_562915447 /* d0[2] */
|
||||||
@@ -256,54 +257,54 @@ asm_function jsimd_idct_islow_neon
|
|||||||
/* Save all NEON registers and x15 (32 NEON registers * 8 bytes + 16) */
|
/* Save all NEON registers and x15 (32 NEON registers * 8 bytes + 16) */
|
||||||
sub sp, sp, 272
|
sub sp, sp, 272
|
||||||
str x15, [sp], 16
|
str x15, [sp], 16
|
||||||
adr x15, jsimd_idct_islow_neon_consts
|
adr x15, Ljsimd_idct_islow_neon_consts
|
||||||
st1 {v0.8b - v3.8b}, [sp], 32
|
st1 {v0.8b, v1.8b, v2.8b, v3.8b}, [sp], 32
|
||||||
st1 {v4.8b - v7.8b}, [sp], 32
|
st1 {v4.8b, v5.8b, v6.8b, v7.8b}, [sp], 32
|
||||||
st1 {v8.8b - v11.8b}, [sp], 32
|
st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32
|
||||||
st1 {v12.8b - v15.8b}, [sp], 32
|
st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32
|
||||||
st1 {v16.8b - v19.8b}, [sp], 32
|
st1 {v16.8b, v17.8b, v18.8b, v19.8b}, [sp], 32
|
||||||
st1 {v20.8b - v23.8b}, [sp], 32
|
st1 {v20.8b, v21.8b, v22.8b, v23.8b}, [sp], 32
|
||||||
st1 {v24.8b - v27.8b}, [sp], 32
|
st1 {v24.8b, v25.8b, v26.8b, v27.8b}, [sp], 32
|
||||||
st1 {v28.8b - v31.8b}, [sp], 32
|
st1 {v28.8b, v29.8b, v30.8b, v31.8b}, [sp], 32
|
||||||
ld1 {v16.4h, v17.4h, v18.4h, v19.4h}, [COEF_BLOCK], 32
|
ld1 {v16.4h, v17.4h, v18.4h, v19.4h}, [COEF_BLOCK], 32
|
||||||
ld1 {v0.4h, v1.4h, v2.4h, v3.4h}, [DCT_TABLE], 32
|
ld1 {v0.4h, v1.4h, v2.4h, v3.4h}, [DCT_TABLE], 32
|
||||||
ld1 {v20.4h, v21.4h, v22.4h, v23.4h}, [COEF_BLOCK], 32
|
ld1 {v20.4h, v21.4h, v22.4h, v23.4h}, [COEF_BLOCK], 32
|
||||||
mul v16.4h, v16.4h, v0.4h
|
mul v16.4h, v16.4h, v0.4h
|
||||||
mul v17.4h, v17.4h, v1.4h
|
mul v17.4h, v17.4h, v1.4h
|
||||||
ins v16.2d[1], v17.2d[0] /* 128 bit q8 */
|
ins v16.d[1], v17.d[0] /* 128 bit q8 */
|
||||||
ld1 {v4.4h, v5.4h, v6.4h, v7.4h}, [DCT_TABLE], 32
|
ld1 {v4.4h, v5.4h, v6.4h, v7.4h}, [DCT_TABLE], 32
|
||||||
mul v18.4h, v18.4h, v2.4h
|
mul v18.4h, v18.4h, v2.4h
|
||||||
mul v19.4h, v19.4h, v3.4h
|
mul v19.4h, v19.4h, v3.4h
|
||||||
ins v18.2d[1], v19.2d[0] /* 128 bit q9 */
|
ins v18.d[1], v19.d[0] /* 128 bit q9 */
|
||||||
ld1 {v24.4h, v25.4h, v26.4h, v27.4h}, [COEF_BLOCK], 32
|
ld1 {v24.4h, v25.4h, v26.4h, v27.4h}, [COEF_BLOCK], 32
|
||||||
mul v20.4h, v20.4h, v4.4h
|
mul v20.4h, v20.4h, v4.4h
|
||||||
mul v21.4h, v21.4h, v5.4h
|
mul v21.4h, v21.4h, v5.4h
|
||||||
ins v20.2d[1], v21.2d[0] /* 128 bit q10 */
|
ins v20.d[1], v21.d[0] /* 128 bit q10 */
|
||||||
ld1 {v0.4h, v1.4h, v2.4h, v3.4h}, [DCT_TABLE], 32
|
ld1 {v0.4h, v1.4h, v2.4h, v3.4h}, [DCT_TABLE], 32
|
||||||
mul v22.4h, v22.4h, v6.4h
|
mul v22.4h, v22.4h, v6.4h
|
||||||
mul v23.4h, v23.4h, v7.4h
|
mul v23.4h, v23.4h, v7.4h
|
||||||
ins v22.2d[1], v23.2d[0] /* 128 bit q11 */
|
ins v22.d[1], v23.d[0] /* 128 bit q11 */
|
||||||
ld1 {v28.4h, v29.4h, v30.4h, v31.4h}, [COEF_BLOCK]
|
ld1 {v28.4h, v29.4h, v30.4h, v31.4h}, [COEF_BLOCK]
|
||||||
mul v24.4h, v24.4h, v0.4h
|
mul v24.4h, v24.4h, v0.4h
|
||||||
mul v25.4h, v25.4h, v1.4h
|
mul v25.4h, v25.4h, v1.4h
|
||||||
ins v24.2d[1], v25.2d[0] /* 128 bit q12 */
|
ins v24.d[1], v25.d[0] /* 128 bit q12 */
|
||||||
ld1 {v4.4h, v5.4h, v6.4h, v7.4h}, [DCT_TABLE], 32
|
ld1 {v4.4h, v5.4h, v6.4h, v7.4h}, [DCT_TABLE], 32
|
||||||
mul v28.4h, v28.4h, v4.4h
|
mul v28.4h, v28.4h, v4.4h
|
||||||
mul v29.4h, v29.4h, v5.4h
|
mul v29.4h, v29.4h, v5.4h
|
||||||
ins v28.2d[1], v29.2d[0] /* 128 bit q14 */
|
ins v28.d[1], v29.d[0] /* 128 bit q14 */
|
||||||
mul v26.4h, v26.4h, v2.4h
|
mul v26.4h, v26.4h, v2.4h
|
||||||
mul v27.4h, v27.4h, v3.4h
|
mul v27.4h, v27.4h, v3.4h
|
||||||
ins v26.2d[1], v27.2d[0] /* 128 bit q13 */
|
ins v26.d[1], v27.d[0] /* 128 bit q13 */
|
||||||
ld1 {v0.4h, v1.4h, v2.4h, v3.4h}, [x15] /* load constants */
|
ld1 {v0.4h, v1.4h, v2.4h, v3.4h}, [x15] /* load constants */
|
||||||
add x15, x15, #16
|
add x15, x15, #16
|
||||||
mul v30.4h, v30.4h, v6.4h
|
mul v30.4h, v30.4h, v6.4h
|
||||||
mul v31.4h, v31.4h, v7.4h
|
mul v31.4h, v31.4h, v7.4h
|
||||||
ins v30.2d[1], v31.2d[0] /* 128 bit q15 */
|
ins v30.d[1], v31.d[0] /* 128 bit q15 */
|
||||||
/* Go to the bottom of the stack */
|
/* Go to the bottom of the stack */
|
||||||
sub sp, sp, 352
|
sub sp, sp, 352
|
||||||
stp x4, x5, [sp], 16
|
stp x4, x5, [sp], 16
|
||||||
st1 {v8.4h - v11.4h}, [sp], 32 /* save NEON registers */
|
st1 {v8.4h, v9.4h, v10.4h, v11.4h}, [sp], 32 /* save NEON registers */
|
||||||
st1 {v12.4h - v15.4h}, [sp], 32
|
st1 {v12.4h, v13.4h, v14.4h, v15.4h}, [sp], 32
|
||||||
/* 1-D IDCT, pass 1, left 4x8 half */
|
/* 1-D IDCT, pass 1, left 4x8 half */
|
||||||
add v4.4h, ROW7L.4h, ROW3L.4h
|
add v4.4h, ROW7L.4h, ROW3L.4h
|
||||||
add v5.4h, ROW5L.4h, ROW1L.4h
|
add v5.4h, ROW5L.4h, ROW1L.4h
|
||||||
@@ -378,7 +379,7 @@ asm_function jsimd_idct_islow_neon
|
|||||||
rshrn ROW0L.4h, v12.4s, #11
|
rshrn ROW0L.4h, v12.4s, #11
|
||||||
rshrn ROW4L.4h, v6.4s, #11
|
rshrn ROW4L.4h, v6.4s, #11
|
||||||
|
|
||||||
beq 3f /* Go to do some special handling for the sparse right 4x8 half */
|
b.eq 3f /* Go to do some special handling for the sparse right 4x8 half */
|
||||||
|
|
||||||
/* 1-D IDCT, pass 1, right 4x8 half */
|
/* 1-D IDCT, pass 1, right 4x8 half */
|
||||||
ld1 {v2.4h}, [x15] /* reload constants */
|
ld1 {v2.4h}, [x15] /* reload constants */
|
||||||
@@ -553,33 +554,33 @@ asm_function jsimd_idct_islow_neon
|
|||||||
shrn ROW4R.4h, v6.4s, #16
|
shrn ROW4R.4h, v6.4s, #16
|
||||||
|
|
||||||
2: /* Descale to 8-bit and range limit */
|
2: /* Descale to 8-bit and range limit */
|
||||||
ins v16.2d[1], v17.2d[0]
|
ins v16.d[1], v17.d[0]
|
||||||
ins v18.2d[1], v19.2d[0]
|
ins v18.d[1], v19.d[0]
|
||||||
ins v20.2d[1], v21.2d[0]
|
ins v20.d[1], v21.d[0]
|
||||||
ins v22.2d[1], v23.2d[0]
|
ins v22.d[1], v23.d[0]
|
||||||
sqrshrn v16.8b, v16.8h, #2
|
sqrshrn v16.8b, v16.8h, #2
|
||||||
sqrshrn2 v16.16b, v18.8h, #2
|
sqrshrn2 v16.16b, v18.8h, #2
|
||||||
sqrshrn v18.8b, v20.8h, #2
|
sqrshrn v18.8b, v20.8h, #2
|
||||||
sqrshrn2 v18.16b, v22.8h, #2
|
sqrshrn2 v18.16b, v22.8h, #2
|
||||||
|
|
||||||
/* vpop {v8.4h - d15.4h} */ /* restore NEON registers */
|
/* vpop {v8.4h - d15.4h} */ /* restore NEON registers */
|
||||||
ld1 {v8.4h - v11.4h}, [sp], 32
|
ld1 {v8.4h, v9.4h, v10.4h, v11.4h}, [sp], 32
|
||||||
ld1 {v12.4h - v15.4h}, [sp], 32
|
ld1 {v12.4h, v13.4h, v14.4h, v15.4h}, [sp], 32
|
||||||
ins v24.2d[1], v25.2d[0]
|
ins v24.d[1], v25.d[0]
|
||||||
|
|
||||||
sqrshrn v20.8b, v24.8h, #2
|
sqrshrn v20.8b, v24.8h, #2
|
||||||
/* Transpose the final 8-bit samples and do signed->unsigned conversion */
|
/* Transpose the final 8-bit samples and do signed->unsigned conversion */
|
||||||
/* trn1 v16.8h, v16.8h, v18.8h */
|
/* trn1 v16.8h, v16.8h, v18.8h */
|
||||||
transpose v16, v18, v3, .16b, .8h
|
transpose v16, v18, v3, .16b, .8h
|
||||||
ins v26.2d[1], v27.2d[0]
|
ins v26.d[1], v27.d[0]
|
||||||
ins v28.2d[1], v29.2d[0]
|
ins v28.d[1], v29.d[0]
|
||||||
ins v30.2d[1], v31.2d[0]
|
ins v30.d[1], v31.d[0]
|
||||||
sqrshrn2 v20.16b, v26.8h, #2
|
sqrshrn2 v20.16b, v26.8h, #2
|
||||||
sqrshrn v22.8b, v28.8h, #2
|
sqrshrn v22.8b, v28.8h, #2
|
||||||
movi v0.16b, #(CENTERJSAMPLE)
|
movi v0.16b, #(CENTERJSAMPLE)
|
||||||
sqrshrn2 v22.16b, v30.8h, #2
|
sqrshrn2 v22.16b, v30.8h, #2
|
||||||
transpose_single v16, v17, v3, .2d, .8b
|
transpose_single v16, v17, v3, .d, .8b
|
||||||
transpose_single v18, v19, v3, .2d, .8b
|
transpose_single v18, v19, v3, .d, .8b
|
||||||
add v16.8b, v16.8b, v0.8b
|
add v16.8b, v16.8b, v0.8b
|
||||||
add v17.8b, v17.8b, v0.8b
|
add v17.8b, v17.8b, v0.8b
|
||||||
add v18.8b, v18.8b, v0.8b
|
add v18.8b, v18.8b, v0.8b
|
||||||
@@ -590,7 +591,7 @@ asm_function jsimd_idct_islow_neon
|
|||||||
add TMP1, TMP1, OUTPUT_COL
|
add TMP1, TMP1, OUTPUT_COL
|
||||||
add TMP2, TMP2, OUTPUT_COL
|
add TMP2, TMP2, OUTPUT_COL
|
||||||
st1 {v16.8b}, [TMP1]
|
st1 {v16.8b}, [TMP1]
|
||||||
transpose_single v20, v21, v3, .2d, .8b
|
transpose_single v20, v21, v3, .d, .8b
|
||||||
st1 {v17.8b}, [TMP2]
|
st1 {v17.8b}, [TMP2]
|
||||||
ldp TMP1, TMP2, [OUTPUT_BUF], 16
|
ldp TMP1, TMP2, [OUTPUT_BUF], 16
|
||||||
add TMP1, TMP1, OUTPUT_COL
|
add TMP1, TMP1, OUTPUT_COL
|
||||||
@@ -605,7 +606,7 @@ asm_function jsimd_idct_islow_neon
|
|||||||
add TMP2, TMP2, OUTPUT_COL
|
add TMP2, TMP2, OUTPUT_COL
|
||||||
add TMP3, TMP3, OUTPUT_COL
|
add TMP3, TMP3, OUTPUT_COL
|
||||||
add TMP4, TMP4, OUTPUT_COL
|
add TMP4, TMP4, OUTPUT_COL
|
||||||
transpose_single v22, v23, v3, .2d, .8b
|
transpose_single v22, v23, v3, .d, .8b
|
||||||
st1 {v20.8b}, [TMP1]
|
st1 {v20.8b}, [TMP1]
|
||||||
add v22.8b, v22.8b, v0.8b
|
add v22.8b, v22.8b, v0.8b
|
||||||
add v23.8b, v23.8b, v0.8b
|
add v23.8b, v23.8b, v0.8b
|
||||||
@@ -613,14 +614,14 @@ asm_function jsimd_idct_islow_neon
|
|||||||
st1 {v22.8b}, [TMP3]
|
st1 {v22.8b}, [TMP3]
|
||||||
st1 {v23.8b}, [TMP4]
|
st1 {v23.8b}, [TMP4]
|
||||||
ldr x15, [sp], 16
|
ldr x15, [sp], 16
|
||||||
ld1 {v0.8b - v3.8b}, [sp], 32
|
ld1 {v0.8b, v1.8b, v2.8b, v3.8b}, [sp], 32
|
||||||
ld1 {v4.8b - v7.8b}, [sp], 32
|
ld1 {v4.8b, v5.8b, v6.8b, v7.8b}, [sp], 32
|
||||||
ld1 {v8.8b - v11.8b}, [sp], 32
|
ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32
|
||||||
ld1 {v12.8b - v15.8b}, [sp], 32
|
ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32
|
||||||
ld1 {v16.8b - v19.8b}, [sp], 32
|
ld1 {v16.8b, v17.8b, v18.8b, v19.8b}, [sp], 32
|
||||||
ld1 {v20.8b - v23.8b}, [sp], 32
|
ld1 {v20.8b, v21.8b, v22.8b, v23.8b}, [sp], 32
|
||||||
ld1 {v24.8b - v27.8b}, [sp], 32
|
ld1 {v24.8b, v25.8b, v26.8b, v27.8b}, [sp], 32
|
||||||
ld1 {v28.8b - v31.8b}, [sp], 32
|
ld1 {v28.8b, v29.8b, v30.8b, v31.8b}, [sp], 32
|
||||||
blr x30
|
blr x30
|
||||||
|
|
||||||
3: /* Left 4x8 half is done, right 4x8 half contains mostly zeros */
|
3: /* Left 4x8 half is done, right 4x8 half contains mostly zeros */
|
||||||
@@ -636,17 +637,17 @@ asm_function jsimd_idct_islow_neon
|
|||||||
transpose ROW0L, ROW2L, v3, .16b, .2s
|
transpose ROW0L, ROW2L, v3, .16b, .2s
|
||||||
transpose ROW5L, ROW7L, v3, .16b, .2s
|
transpose ROW5L, ROW7L, v3, .16b, .2s
|
||||||
cmp x0, #0
|
cmp x0, #0
|
||||||
beq 4f /* Right 4x8 half has all zeros, go to 'sparse' second pass */
|
b.eq 4f /* Right 4x8 half has all zeros, go to 'sparse' second pass */
|
||||||
|
|
||||||
/* Only row 0 is non-zero for the right 4x8 half */
|
/* Only row 0 is non-zero for the right 4x8 half */
|
||||||
dup ROW1R.4h, ROW0R.4h[1]
|
dup ROW1R.4h, ROW0R.h[1]
|
||||||
dup ROW2R.4h, ROW0R.4h[2]
|
dup ROW2R.4h, ROW0R.h[2]
|
||||||
dup ROW3R.4h, ROW0R.4h[3]
|
dup ROW3R.4h, ROW0R.h[3]
|
||||||
dup ROW4R.4h, ROW0R.4h[0]
|
dup ROW4R.4h, ROW0R.h[0]
|
||||||
dup ROW5R.4h, ROW0R.4h[1]
|
dup ROW5R.4h, ROW0R.h[1]
|
||||||
dup ROW6R.4h, ROW0R.4h[2]
|
dup ROW6R.4h, ROW0R.h[2]
|
||||||
dup ROW7R.4h, ROW0R.4h[3]
|
dup ROW7R.4h, ROW0R.h[3]
|
||||||
dup ROW0R.4h, ROW0R.4h[0]
|
dup ROW0R.4h, ROW0R.h[0]
|
||||||
b 1b /* Go to 'normal' second pass */
|
b 1b /* Go to 'normal' second pass */
|
||||||
|
|
||||||
4: /* 1-D IDCT, pass 2 (sparse variant with zero rows 4-7), left 4x8 half */
|
4: /* 1-D IDCT, pass 2 (sparse variant with zero rows 4-7), left 4x8 half */
|
||||||
@@ -770,13 +771,13 @@ asm_function jsimd_idct_islow_neon
|
|||||||
* per 1-D IDCT pass, totalling to 5 VQDMULH and 35 VADD/VSUB instructions.
|
* per 1-D IDCT pass, totalling to 5 VQDMULH and 35 VADD/VSUB instructions.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#define XFIX_1_082392200 v0.4h[0]
|
#define XFIX_1_082392200 v0.h[0]
|
||||||
#define XFIX_1_414213562 v0.4h[1]
|
#define XFIX_1_414213562 v0.h[1]
|
||||||
#define XFIX_1_847759065 v0.4h[2]
|
#define XFIX_1_847759065 v0.h[2]
|
||||||
#define XFIX_2_613125930 v0.4h[3]
|
#define XFIX_2_613125930 v0.h[3]
|
||||||
|
|
||||||
.balign 16
|
.balign 16
|
||||||
jsimd_idct_ifast_neon_consts:
|
Ljsimd_idct_ifast_neon_consts:
|
||||||
.short (277 * 128 - 256 * 128) /* XFIX_1_082392200 */
|
.short (277 * 128 - 256 * 128) /* XFIX_1_082392200 */
|
||||||
.short (362 * 128 - 256 * 128) /* XFIX_1_414213562 */
|
.short (362 * 128 - 256 * 128) /* XFIX_1_414213562 */
|
||||||
.short (473 * 128 - 256 * 128) /* XFIX_1_847759065 */
|
.short (473 * 128 - 256 * 128) /* XFIX_1_847759065 */
|
||||||
@@ -810,12 +811,12 @@ asm_function jsimd_idct_ifast_neon
|
|||||||
/* Save NEON registers used in fast IDCT */
|
/* Save NEON registers used in fast IDCT */
|
||||||
sub sp, sp, #176
|
sub sp, sp, #176
|
||||||
stp x22, x23, [sp], 16
|
stp x22, x23, [sp], 16
|
||||||
adr x23, jsimd_idct_ifast_neon_consts
|
adr x23, Ljsimd_idct_ifast_neon_consts
|
||||||
st1 {v0.8b - v3.8b}, [sp], 32
|
st1 {v0.8b, v1.8b, v2.8b, v3.8b}, [sp], 32
|
||||||
st1 {v4.8b - v7.8b}, [sp], 32
|
st1 {v4.8b, v5.8b, v6.8b, v7.8b}, [sp], 32
|
||||||
st1 {v8.8b - v11.8b}, [sp], 32
|
st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32
|
||||||
st1 {v12.8b - v15.8b}, [sp], 32
|
st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32
|
||||||
st1 {v16.8b - v19.8b}, [sp], 32
|
st1 {v16.8b, v17.8b, v18.8b, v19.8b}, [sp], 32
|
||||||
ld1 {v8.8h, v9.8h}, [COEF_BLOCK], 32
|
ld1 {v8.8h, v9.8h}, [COEF_BLOCK], 32
|
||||||
ld1 {v0.8h, v1.8h}, [DCT_TABLE], 32
|
ld1 {v0.8h, v1.8h}, [DCT_TABLE], 32
|
||||||
ld1 {v10.8h, v11.8h}, [COEF_BLOCK], 32
|
ld1 {v10.8h, v11.8h}, [COEF_BLOCK], 32
|
||||||
@@ -909,24 +910,24 @@ asm_function jsimd_idct_ifast_neon
|
|||||||
trn2 v15.4s, v18.4s, v15.4s
|
trn2 v15.4s, v18.4s, v15.4s
|
||||||
/* vswp v14.4h, v10-MSB.4h */
|
/* vswp v14.4h, v10-MSB.4h */
|
||||||
umov x22, v14.d[0]
|
umov x22, v14.d[0]
|
||||||
ins v14.2d[0], v10.2d[1]
|
ins v14.d[0], v10.d[1]
|
||||||
ins v10.2d[1], x22
|
ins v10.d[1], x22
|
||||||
/* vswp v13.4h, v9MSB.4h */
|
/* vswp v13.4h, v9MSB.4h */
|
||||||
|
|
||||||
umov x22, v13.d[0]
|
umov x22, v13.d[0]
|
||||||
ins v13.2d[0], v9.2d[1]
|
ins v13.d[0], v9.d[1]
|
||||||
ins v9.2d[1], x22
|
ins v9.d[1], x22
|
||||||
/* 1-D IDCT, pass 2 */
|
/* 1-D IDCT, pass 2 */
|
||||||
sub v2.8h, v10.8h, v14.8h
|
sub v2.8h, v10.8h, v14.8h
|
||||||
/* vswp v15.4h, v11MSB.4h */
|
/* vswp v15.4h, v11MSB.4h */
|
||||||
umov x22, v15.d[0]
|
umov x22, v15.d[0]
|
||||||
ins v15.2d[0], v11.2d[1]
|
ins v15.d[0], v11.d[1]
|
||||||
ins v11.2d[1], x22
|
ins v11.d[1], x22
|
||||||
add v14.8h, v10.8h, v14.8h
|
add v14.8h, v10.8h, v14.8h
|
||||||
/* vswp v12.4h, v8-MSB.4h */
|
/* vswp v12.4h, v8-MSB.4h */
|
||||||
umov x22, v12.d[0]
|
umov x22, v12.d[0]
|
||||||
ins v12.2d[0], v8.2d[1]
|
ins v12.d[0], v8.d[1]
|
||||||
ins v8.2d[1], x22
|
ins v8.d[1], x22
|
||||||
sub v1.8h, v11.8h, v13.8h
|
sub v1.8h, v11.8h, v13.8h
|
||||||
add v13.8h, v11.8h, v13.8h
|
add v13.8h, v11.8h, v13.8h
|
||||||
sub v5.8h, v9.8h, v15.8h
|
sub v5.8h, v9.8h, v15.8h
|
||||||
@@ -997,13 +998,13 @@ asm_function jsimd_idct_ifast_neon
|
|||||||
trn1 v9.4s, v9.4s, v11.4s
|
trn1 v9.4s, v9.4s, v11.4s
|
||||||
trn2 v11.4s, v18.4s, v11.4s
|
trn2 v11.4s, v18.4s, v11.4s
|
||||||
/* make copy */
|
/* make copy */
|
||||||
ins v17.2d[0], v8.2d[1]
|
ins v17.d[0], v8.d[1]
|
||||||
/* Transpose d16-d17-msb */
|
/* Transpose d16-d17-msb */
|
||||||
mov v18.16b, v8.16b
|
mov v18.16b, v8.16b
|
||||||
trn1 v8.8b, v8.8b, v17.8b
|
trn1 v8.8b, v8.8b, v17.8b
|
||||||
trn2 v17.8b, v18.8b, v17.8b
|
trn2 v17.8b, v18.8b, v17.8b
|
||||||
/* make copy */
|
/* make copy */
|
||||||
ins v19.2d[0], v9.2d[1]
|
ins v19.d[0], v9.d[1]
|
||||||
mov v18.16b, v9.16b
|
mov v18.16b, v9.16b
|
||||||
trn1 v9.8b, v9.8b, v19.8b
|
trn1 v9.8b, v9.8b, v19.8b
|
||||||
trn2 v19.8b, v18.8b, v19.8b
|
trn2 v19.8b, v18.8b, v19.8b
|
||||||
@@ -1018,7 +1019,7 @@ asm_function jsimd_idct_ifast_neon
|
|||||||
add TMP2, TMP2, OUTPUT_COL
|
add TMP2, TMP2, OUTPUT_COL
|
||||||
st1 {v9.8b}, [TMP1]
|
st1 {v9.8b}, [TMP1]
|
||||||
/* make copy */
|
/* make copy */
|
||||||
ins v7.2d[0], v10.2d[1]
|
ins v7.d[0], v10.d[1]
|
||||||
mov v18.16b, v10.16b
|
mov v18.16b, v10.16b
|
||||||
trn1 v10.8b, v10.8b, v7.8b
|
trn1 v10.8b, v10.8b, v7.8b
|
||||||
trn2 v7.8b, v18.8b, v7.8b
|
trn2 v7.8b, v18.8b, v7.8b
|
||||||
@@ -1031,7 +1032,7 @@ asm_function jsimd_idct_ifast_neon
|
|||||||
add TMP5, TMP5, OUTPUT_COL
|
add TMP5, TMP5, OUTPUT_COL
|
||||||
st1 {v10.8b}, [TMP1]
|
st1 {v10.8b}, [TMP1]
|
||||||
/* make copy */
|
/* make copy */
|
||||||
ins v16.2d[0], v11.2d[1]
|
ins v16.d[0], v11.d[1]
|
||||||
mov v18.16b, v11.16b
|
mov v18.16b, v11.16b
|
||||||
trn1 v11.8b, v11.8b, v16.8b
|
trn1 v11.8b, v11.8b, v16.8b
|
||||||
trn2 v16.8b, v18.8b, v16.8b
|
trn2 v16.8b, v18.8b, v16.8b
|
||||||
@@ -1040,11 +1041,11 @@ asm_function jsimd_idct_ifast_neon
|
|||||||
st1 {v16.8b}, [TMP5]
|
st1 {v16.8b}, [TMP5]
|
||||||
sub sp, sp, #176
|
sub sp, sp, #176
|
||||||
ldp x22, x23, [sp], 16
|
ldp x22, x23, [sp], 16
|
||||||
ld1 {v0.8b - v3.8b}, [sp], 32
|
ld1 {v0.8b, v1.8b, v2.8b, v3.8b}, [sp], 32
|
||||||
ld1 {v4.8b - v7.8b}, [sp], 32
|
ld1 {v4.8b, v5.8b, v6.8b, v7.8b}, [sp], 32
|
||||||
ld1 {v8.8b - v11.8b}, [sp], 32
|
ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32
|
||||||
ld1 {v12.8b - v15.8b}, [sp], 32
|
ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32
|
||||||
ld1 {v16.8b - v19.8b}, [sp], 32
|
ld1 {v16.8b, v17.8b, v18.8b, v19.8b}, [sp], 32
|
||||||
blr x30
|
blr x30
|
||||||
|
|
||||||
.unreq DCT_TABLE
|
.unreq DCT_TABLE
|
||||||
@@ -1095,38 +1096,38 @@ asm_function jsimd_idct_ifast_neon
|
|||||||
#define FIX_3_624509785 (29692) /* FIX(3.624509785) */
|
#define FIX_3_624509785 (29692) /* FIX(3.624509785) */
|
||||||
|
|
||||||
.balign 16
|
.balign 16
|
||||||
jsimd_idct_4x4_neon_consts:
|
Ljsimd_idct_4x4_neon_consts:
|
||||||
.short FIX_1_847759065 /* v0.4h[0] */
|
.short FIX_1_847759065 /* v0.h[0] */
|
||||||
.short -FIX_0_765366865 /* v0.4h[1] */
|
.short -FIX_0_765366865 /* v0.h[1] */
|
||||||
.short -FIX_0_211164243 /* v0.4h[2] */
|
.short -FIX_0_211164243 /* v0.h[2] */
|
||||||
.short FIX_1_451774981 /* v0.4h[3] */
|
.short FIX_1_451774981 /* v0.h[3] */
|
||||||
.short -FIX_2_172734803 /* d1[0] */
|
.short -FIX_2_172734803 /* d1[0] */
|
||||||
.short FIX_1_061594337 /* d1[1] */
|
.short FIX_1_061594337 /* d1[1] */
|
||||||
.short -FIX_0_509795579 /* d1[2] */
|
.short -FIX_0_509795579 /* d1[2] */
|
||||||
.short -FIX_0_601344887 /* d1[3] */
|
.short -FIX_0_601344887 /* d1[3] */
|
||||||
.short FIX_0_899976223 /* v2.4h[0] */
|
.short FIX_0_899976223 /* v2.h[0] */
|
||||||
.short FIX_2_562915447 /* v2.4h[1] */
|
.short FIX_2_562915447 /* v2.h[1] */
|
||||||
.short 1 << (CONST_BITS+1) /* v2.4h[2] */
|
.short 1 << (CONST_BITS+1) /* v2.h[2] */
|
||||||
.short 0 /* v2.4h[3] */
|
.short 0 /* v2.h[3] */
|
||||||
|
|
||||||
.macro idct_helper x4, x6, x8, x10, x12, x14, x16, shift, y26, y27, y28, y29
|
.macro idct_helper x4, x6, x8, x10, x12, x14, x16, shift, y26, y27, y28, y29
|
||||||
smull v28.4s, \x4, v2.4h[2]
|
smull v28.4s, \x4, v2.h[2]
|
||||||
smlal v28.4s, \x8, v0.4h[0]
|
smlal v28.4s, \x8, v0.h[0]
|
||||||
smlal v28.4s, \x14, v0.4h[1]
|
smlal v28.4s, \x14, v0.h[1]
|
||||||
|
|
||||||
smull v26.4s, \x16, v1.4h[2]
|
smull v26.4s, \x16, v1.h[2]
|
||||||
smlal v26.4s, \x12, v1.4h[3]
|
smlal v26.4s, \x12, v1.h[3]
|
||||||
smlal v26.4s, \x10, v2.4h[0]
|
smlal v26.4s, \x10, v2.h[0]
|
||||||
smlal v26.4s, \x6, v2.4h[1]
|
smlal v26.4s, \x6, v2.h[1]
|
||||||
|
|
||||||
smull v30.4s, \x4, v2.4h[2]
|
smull v30.4s, \x4, v2.h[2]
|
||||||
smlsl v30.4s, \x8, v0.4h[0]
|
smlsl v30.4s, \x8, v0.h[0]
|
||||||
smlsl v30.4s, \x14, v0.4h[1]
|
smlsl v30.4s, \x14, v0.h[1]
|
||||||
|
|
||||||
smull v24.4s, \x16, v0.4h[2]
|
smull v24.4s, \x16, v0.h[2]
|
||||||
smlal v24.4s, \x12, v0.4h[3]
|
smlal v24.4s, \x12, v0.h[3]
|
||||||
smlal v24.4s, \x10, v1.4h[0]
|
smlal v24.4s, \x10, v1.h[0]
|
||||||
smlal v24.4s, \x6, v1.4h[1]
|
smlal v24.4s, \x6, v1.h[1]
|
||||||
|
|
||||||
add v20.4s, v28.4s, v26.4s
|
add v20.4s, v28.4s, v26.4s
|
||||||
sub v28.4s, v28.4s, v26.4s
|
sub v28.4s, v28.4s, v26.4s
|
||||||
@@ -1171,15 +1172,15 @@ asm_function jsimd_idct_4x4_neon
|
|||||||
sub sp, sp, 272
|
sub sp, sp, 272
|
||||||
str x15, [sp], 16
|
str x15, [sp], 16
|
||||||
/* Load constants (v3.4h is just used for padding) */
|
/* Load constants (v3.4h is just used for padding) */
|
||||||
adr TMP4, jsimd_idct_4x4_neon_consts
|
adr TMP4, Ljsimd_idct_4x4_neon_consts
|
||||||
st1 {v0.8b - v3.8b}, [sp], 32
|
st1 {v0.8b, v1.8b, v2.8b, v3.8b}, [sp], 32
|
||||||
st1 {v4.8b - v7.8b}, [sp], 32
|
st1 {v4.8b, v5.8b, v6.8b, v7.8b}, [sp], 32
|
||||||
st1 {v8.8b - v11.8b}, [sp], 32
|
st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32
|
||||||
st1 {v12.8b - v15.8b}, [sp], 32
|
st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32
|
||||||
st1 {v16.8b - v19.8b}, [sp], 32
|
st1 {v16.8b, v17.8b, v18.8b, v19.8b}, [sp], 32
|
||||||
st1 {v20.8b - v23.8b}, [sp], 32
|
st1 {v20.8b, v21.8b, v22.8b, v23.8b}, [sp], 32
|
||||||
st1 {v24.8b - v27.8b}, [sp], 32
|
st1 {v24.8b, v25.8b, v26.8b, v27.8b}, [sp], 32
|
||||||
st1 {v28.8b - v31.8b}, [sp], 32
|
st1 {v28.8b, v29.8b, v30.8b, v31.8b}, [sp], 32
|
||||||
ld1 {v0.4h, v1.4h, v2.4h, v3.4h}, [TMP4]
|
ld1 {v0.4h, v1.4h, v2.4h, v3.4h}, [TMP4]
|
||||||
|
|
||||||
/* Load all COEF_BLOCK into NEON registers with the following allocation:
|
/* Load all COEF_BLOCK into NEON registers with the following allocation:
|
||||||
@@ -1203,45 +1204,45 @@ asm_function jsimd_idct_4x4_neon
|
|||||||
ld1 {v18.4h, v19.4h, v20.4h, v21.4h}, [DCT_TABLE], 32
|
ld1 {v18.4h, v19.4h, v20.4h, v21.4h}, [DCT_TABLE], 32
|
||||||
mul v4.4h, v4.4h, v18.4h
|
mul v4.4h, v4.4h, v18.4h
|
||||||
mul v5.4h, v5.4h, v19.4h
|
mul v5.4h, v5.4h, v19.4h
|
||||||
ins v4.2d[1], v5.2d[0] /* 128 bit q4 */
|
ins v4.d[1], v5.d[0] /* 128 bit q4 */
|
||||||
ld1 {v22.4h, v23.4h, v24.4h, v25.4h}, [DCT_TABLE], 32
|
ld1 {v22.4h, v23.4h, v24.4h, v25.4h}, [DCT_TABLE], 32
|
||||||
mul v6.4h, v6.4h, v20.4h
|
mul v6.4h, v6.4h, v20.4h
|
||||||
mul v7.4h, v7.4h, v21.4h
|
mul v7.4h, v7.4h, v21.4h
|
||||||
ins v6.2d[1], v7.2d[0] /* 128 bit q6 */
|
ins v6.d[1], v7.d[0] /* 128 bit q6 */
|
||||||
mul v8.4h, v8.4h, v22.4h
|
mul v8.4h, v8.4h, v22.4h
|
||||||
mul v9.4h, v9.4h, v23.4h
|
mul v9.4h, v9.4h, v23.4h
|
||||||
ins v8.2d[1], v9.2d[0] /* 128 bit q8 */
|
ins v8.d[1], v9.d[0] /* 128 bit q8 */
|
||||||
add DCT_TABLE, DCT_TABLE, #16
|
add DCT_TABLE, DCT_TABLE, #16
|
||||||
ld1 {v26.4h, v27.4h, v28.4h, v29.4h}, [DCT_TABLE], 32
|
ld1 {v26.4h, v27.4h, v28.4h, v29.4h}, [DCT_TABLE], 32
|
||||||
mul v10.4h, v10.4h, v24.4h
|
mul v10.4h, v10.4h, v24.4h
|
||||||
mul v11.4h, v11.4h, v25.4h
|
mul v11.4h, v11.4h, v25.4h
|
||||||
ins v10.2d[1], v11.2d[0] /* 128 bit q10 */
|
ins v10.d[1], v11.d[0] /* 128 bit q10 */
|
||||||
mul v12.4h, v12.4h, v26.4h
|
mul v12.4h, v12.4h, v26.4h
|
||||||
mul v13.4h, v13.4h, v27.4h
|
mul v13.4h, v13.4h, v27.4h
|
||||||
ins v12.2d[1], v13.2d[0] /* 128 bit q12 */
|
ins v12.d[1], v13.d[0] /* 128 bit q12 */
|
||||||
ld1 {v30.4h, v31.4h}, [DCT_TABLE], 16
|
ld1 {v30.4h, v31.4h}, [DCT_TABLE], 16
|
||||||
mul v14.4h, v14.4h, v28.4h
|
mul v14.4h, v14.4h, v28.4h
|
||||||
mul v15.4h, v15.4h, v29.4h
|
mul v15.4h, v15.4h, v29.4h
|
||||||
ins v14.2d[1], v15.2d[0] /* 128 bit q14 */
|
ins v14.d[1], v15.d[0] /* 128 bit q14 */
|
||||||
mul v16.4h, v16.4h, v30.4h
|
mul v16.4h, v16.4h, v30.4h
|
||||||
mul v17.4h, v17.4h, v31.4h
|
mul v17.4h, v17.4h, v31.4h
|
||||||
ins v16.2d[1], v17.2d[0] /* 128 bit q16 */
|
ins v16.d[1], v17.d[0] /* 128 bit q16 */
|
||||||
|
|
||||||
/* Pass 1 */
|
/* Pass 1 */
|
||||||
idct_helper v4.4h, v6.4h, v8.4h, v10.4h, v12.4h, v14.4h, v16.4h, 12, v4.4h, v6.4h, v8.4h, v10.4h
|
idct_helper v4.4h, v6.4h, v8.4h, v10.4h, v12.4h, v14.4h, v16.4h, 12, v4.4h, v6.4h, v8.4h, v10.4h
|
||||||
transpose_4x4 v4, v6, v8, v10, v3
|
transpose_4x4 v4, v6, v8, v10, v3
|
||||||
ins v10.2d[1], v11.2d[0]
|
ins v10.d[1], v11.d[0]
|
||||||
idct_helper v5.4h, v7.4h, v9.4h, v11.4h, v13.4h, v15.4h, v17.4h, 12, v5.4h, v7.4h, v9.4h, v11.4h
|
idct_helper v5.4h, v7.4h, v9.4h, v11.4h, v13.4h, v15.4h, v17.4h, 12, v5.4h, v7.4h, v9.4h, v11.4h
|
||||||
transpose_4x4 v5, v7, v9, v11, v3
|
transpose_4x4 v5, v7, v9, v11, v3
|
||||||
ins v10.2d[1], v11.2d[0]
|
ins v10.d[1], v11.d[0]
|
||||||
/* Pass 2 */
|
/* Pass 2 */
|
||||||
idct_helper v4.4h, v6.4h, v8.4h, v10.4h, v7.4h, v9.4h, v11.4h, 19, v26.4h, v27.4h, v28.4h, v29.4h
|
idct_helper v4.4h, v6.4h, v8.4h, v10.4h, v7.4h, v9.4h, v11.4h, 19, v26.4h, v27.4h, v28.4h, v29.4h
|
||||||
transpose_4x4 v26, v27, v28, v29, v3
|
transpose_4x4 v26, v27, v28, v29, v3
|
||||||
|
|
||||||
/* Range limit */
|
/* Range limit */
|
||||||
movi v30.8h, #0x80
|
movi v30.8h, #0x80
|
||||||
ins v26.2d[1], v27.2d[0]
|
ins v26.d[1], v27.d[0]
|
||||||
ins v28.2d[1], v29.2d[0]
|
ins v28.d[1], v29.d[0]
|
||||||
add v26.8h, v26.8h, v30.8h
|
add v26.8h, v26.8h, v30.8h
|
||||||
add v28.8h, v28.8h, v30.8h
|
add v28.8h, v28.8h, v30.8h
|
||||||
sqxtun v26.8b, v26.8h
|
sqxtun v26.8b, v26.8h
|
||||||
@@ -1286,14 +1287,14 @@ asm_function jsimd_idct_4x4_neon
|
|||||||
/* vpop {v8.4h - v15.4h} ;not available */
|
/* vpop {v8.4h - v15.4h} ;not available */
|
||||||
sub sp, sp, #272
|
sub sp, sp, #272
|
||||||
ldr x15, [sp], 16
|
ldr x15, [sp], 16
|
||||||
ld1 {v0.8b - v3.8b}, [sp], 32
|
ld1 {v0.8b, v1.8b, v2.8b, v3.8b}, [sp], 32
|
||||||
ld1 {v4.8b - v7.8b}, [sp], 32
|
ld1 {v4.8b, v5.8b, v6.8b, v7.8b}, [sp], 32
|
||||||
ld1 {v8.8b - v11.8b}, [sp], 32
|
ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32
|
||||||
ld1 {v12.8b - v15.8b}, [sp], 32
|
ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32
|
||||||
ld1 {v16.8b - v19.8b}, [sp], 32
|
ld1 {v16.8b, v17.8b, v18.8b, v19.8b}, [sp], 32
|
||||||
ld1 {v20.8b - v23.8b}, [sp], 32
|
ld1 {v20.8b, v21.8b, v22.8b, v23.8b}, [sp], 32
|
||||||
ld1 {v24.8b - v27.8b}, [sp], 32
|
ld1 {v24.8b, v25.8b, v26.8b, v27.8b}, [sp], 32
|
||||||
ld1 {v28.8b - v31.8b}, [sp], 32
|
ld1 {v28.8b, v29.8b, v30.8b, v31.8b}, [sp], 32
|
||||||
blr x30
|
blr x30
|
||||||
|
|
||||||
.unreq DCT_TABLE
|
.unreq DCT_TABLE
|
||||||
@@ -1325,7 +1326,7 @@ asm_function jsimd_idct_4x4_neon
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
.balign 8
|
.balign 8
|
||||||
jsimd_idct_2x2_neon_consts:
|
Ljsimd_idct_2x2_neon_consts:
|
||||||
.short -FIX_0_720959822 /* v14[0] */
|
.short -FIX_0_720959822 /* v14[0] */
|
||||||
.short FIX_0_850430095 /* v14[1] */
|
.short FIX_0_850430095 /* v14[1] */
|
||||||
.short -FIX_1_272758580 /* v14[2] */
|
.short -FIX_1_272758580 /* v14[2] */
|
||||||
@@ -1333,10 +1334,10 @@ jsimd_idct_2x2_neon_consts:
|
|||||||
|
|
||||||
.macro idct_helper x4, x6, x10, x12, x16, shift, y26, y27
|
.macro idct_helper x4, x6, x10, x12, x16, shift, y26, y27
|
||||||
sshll v15.4s, \x4, #15
|
sshll v15.4s, \x4, #15
|
||||||
smull v26.4s, \x6, v14.4h[3]
|
smull v26.4s, \x6, v14.h[3]
|
||||||
smlal v26.4s, \x10, v14.4h[2]
|
smlal v26.4s, \x10, v14.h[2]
|
||||||
smlal v26.4s, \x12, v14.4h[1]
|
smlal v26.4s, \x12, v14.h[1]
|
||||||
smlal v26.4s, \x16, v14.4h[0]
|
smlal v26.4s, \x16, v14.h[0]
|
||||||
|
|
||||||
add v20.4s, v15.4s, v26.4s
|
add v20.4s, v15.4s, v26.4s
|
||||||
sub v15.4s, v15.4s, v26.4s
|
sub v15.4s, v15.4s, v26.4s
|
||||||
@@ -1367,14 +1368,14 @@ asm_function jsimd_idct_2x2_neon
|
|||||||
str x15, [sp], 16
|
str x15, [sp], 16
|
||||||
|
|
||||||
/* Load constants */
|
/* Load constants */
|
||||||
adr TMP2, jsimd_idct_2x2_neon_consts
|
adr TMP2, Ljsimd_idct_2x2_neon_consts
|
||||||
st1 {v4.8b - v7.8b}, [sp], 32
|
st1 {v4.8b, v5.8b, v6.8b, v7.8b}, [sp], 32
|
||||||
st1 {v8.8b - v11.8b}, [sp], 32
|
st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32
|
||||||
st1 {v12.8b - v15.8b}, [sp], 32
|
st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32
|
||||||
st1 {v16.8b - v19.8b}, [sp], 32
|
st1 {v16.8b, v17.8b, v18.8b, v19.8b}, [sp], 32
|
||||||
st1 {v21.8b - v22.8b}, [sp], 16
|
st1 {v21.8b, v22.8b}, [sp], 16
|
||||||
st1 {v24.8b - v27.8b}, [sp], 32
|
st1 {v24.8b, v25.8b, v26.8b, v27.8b}, [sp], 32
|
||||||
st1 {v30.8b - v31.8b}, [sp], 16
|
st1 {v30.8b, v31.8b}, [sp], 16
|
||||||
ld1 {v14.4h}, [TMP2]
|
ld1 {v14.4h}, [TMP2]
|
||||||
|
|
||||||
/* Load all COEF_BLOCK into NEON registers with the following allocation:
|
/* Load all COEF_BLOCK into NEON registers with the following allocation:
|
||||||
@@ -1400,25 +1401,25 @@ asm_function jsimd_idct_2x2_neon
|
|||||||
ld1 {v18.4h, v19.4h, v20.4h, v21.4h}, [DCT_TABLE], 32
|
ld1 {v18.4h, v19.4h, v20.4h, v21.4h}, [DCT_TABLE], 32
|
||||||
mul v4.4h, v4.4h, v18.4h
|
mul v4.4h, v4.4h, v18.4h
|
||||||
mul v5.4h, v5.4h, v19.4h
|
mul v5.4h, v5.4h, v19.4h
|
||||||
ins v4.2d[1], v5.2d[0]
|
ins v4.d[1], v5.d[0]
|
||||||
mul v6.4h, v6.4h, v20.4h
|
mul v6.4h, v6.4h, v20.4h
|
||||||
mul v7.4h, v7.4h, v21.4h
|
mul v7.4h, v7.4h, v21.4h
|
||||||
ins v6.2d[1], v7.2d[0]
|
ins v6.d[1], v7.d[0]
|
||||||
add DCT_TABLE, DCT_TABLE, #16
|
add DCT_TABLE, DCT_TABLE, #16
|
||||||
ld1 {v24.4h, v25.4h}, [DCT_TABLE], 16
|
ld1 {v24.4h, v25.4h}, [DCT_TABLE], 16
|
||||||
mul v10.4h, v10.4h, v24.4h
|
mul v10.4h, v10.4h, v24.4h
|
||||||
mul v11.4h, v11.4h, v25.4h
|
mul v11.4h, v11.4h, v25.4h
|
||||||
ins v10.2d[1], v11.2d[0]
|
ins v10.d[1], v11.d[0]
|
||||||
add DCT_TABLE, DCT_TABLE, #16
|
add DCT_TABLE, DCT_TABLE, #16
|
||||||
ld1 {v26.4h, v27.4h}, [DCT_TABLE], 16
|
ld1 {v26.4h, v27.4h}, [DCT_TABLE], 16
|
||||||
mul v12.4h, v12.4h, v26.4h
|
mul v12.4h, v12.4h, v26.4h
|
||||||
mul v13.4h, v13.4h, v27.4h
|
mul v13.4h, v13.4h, v27.4h
|
||||||
ins v12.2d[1], v13.2d[0]
|
ins v12.d[1], v13.d[0]
|
||||||
add DCT_TABLE, DCT_TABLE, #16
|
add DCT_TABLE, DCT_TABLE, #16
|
||||||
ld1 {v30.4h, v31.4h}, [DCT_TABLE], 16
|
ld1 {v30.4h, v31.4h}, [DCT_TABLE], 16
|
||||||
mul v16.4h, v16.4h, v30.4h
|
mul v16.4h, v16.4h, v30.4h
|
||||||
mul v17.4h, v17.4h, v31.4h
|
mul v17.4h, v17.4h, v31.4h
|
||||||
ins v16.2d[1], v17.2d[0]
|
ins v16.d[1], v17.d[0]
|
||||||
|
|
||||||
/* Pass 1 */
|
/* Pass 1 */
|
||||||
#if 0
|
#if 0
|
||||||
@@ -1427,14 +1428,14 @@ asm_function jsimd_idct_2x2_neon
|
|||||||
idct_helper v5.4h, v7.4h, v11.4h, v13.4h, v17.4h, 13, v5.4h, v7.4h
|
idct_helper v5.4h, v7.4h, v11.4h, v13.4h, v17.4h, 13, v5.4h, v7.4h
|
||||||
transpose_4x4 v5.4h, v7.4h, v9.4h, v11.4h
|
transpose_4x4 v5.4h, v7.4h, v9.4h, v11.4h
|
||||||
#else
|
#else
|
||||||
smull v26.4s, v6.4h, v14.4h[3]
|
smull v26.4s, v6.4h, v14.h[3]
|
||||||
smlal v26.4s, v10.4h, v14.4h[2]
|
smlal v26.4s, v10.4h, v14.h[2]
|
||||||
smlal v26.4s, v12.4h, v14.4h[1]
|
smlal v26.4s, v12.4h, v14.h[1]
|
||||||
smlal v26.4s, v16.4h, v14.4h[0]
|
smlal v26.4s, v16.4h, v14.h[0]
|
||||||
smull v24.4s, v7.4h, v14.4h[3]
|
smull v24.4s, v7.4h, v14.h[3]
|
||||||
smlal v24.4s, v11.4h, v14.4h[2]
|
smlal v24.4s, v11.4h, v14.h[2]
|
||||||
smlal v24.4s, v13.4h, v14.4h[1]
|
smlal v24.4s, v13.4h, v14.h[1]
|
||||||
smlal v24.4s, v17.4h, v14.4h[0]
|
smlal v24.4s, v17.4h, v14.h[0]
|
||||||
sshll v15.4s, v4.4h, #15
|
sshll v15.4s, v4.4h, #15
|
||||||
sshll v30.4s, v5.4h, #15
|
sshll v30.4s, v5.4h, #15
|
||||||
add v20.4s, v15.4s, v26.4s
|
add v20.4s, v15.4s, v26.4s
|
||||||
@@ -1445,12 +1446,12 @@ asm_function jsimd_idct_2x2_neon
|
|||||||
sub v15.4s, v30.4s, v24.4s
|
sub v15.4s, v30.4s, v24.4s
|
||||||
rshrn v5.4h, v20.4s, #13
|
rshrn v5.4h, v20.4s, #13
|
||||||
rshrn v7.4h, v15.4s, #13
|
rshrn v7.4h, v15.4s, #13
|
||||||
ins v4.2d[1], v5.2d[0]
|
ins v4.d[1], v5.d[0]
|
||||||
ins v6.2d[1], v7.2d[0]
|
ins v6.d[1], v7.d[0]
|
||||||
transpose v4, v6, v3, .16b, .8h
|
transpose v4, v6, v3, .16b, .8h
|
||||||
transpose v6, v10, v3, .16b, .4s
|
transpose v6, v10, v3, .16b, .4s
|
||||||
ins v11.2d[0], v10.2d[1]
|
ins v11.d[0], v10.d[1]
|
||||||
ins v7.2d[0], v6.2d[1]
|
ins v7.d[0], v6.d[1]
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Pass 2 */
|
/* Pass 2 */
|
||||||
@@ -1458,10 +1459,10 @@ asm_function jsimd_idct_2x2_neon
|
|||||||
|
|
||||||
/* Range limit */
|
/* Range limit */
|
||||||
movi v30.8h, #0x80
|
movi v30.8h, #0x80
|
||||||
ins v26.2d[1], v27.2d[0]
|
ins v26.d[1], v27.d[0]
|
||||||
add v26.8h, v26.8h, v30.8h
|
add v26.8h, v26.8h, v30.8h
|
||||||
sqxtun v30.8b, v26.8h
|
sqxtun v30.8b, v26.8h
|
||||||
ins v26.2d[0], v30.2d[0]
|
ins v26.d[0], v30.d[0]
|
||||||
sqxtun v27.8b, v26.8h
|
sqxtun v27.8b, v26.8h
|
||||||
|
|
||||||
/* Store results to the output buffer */
|
/* Store results to the output buffer */
|
||||||
@@ -1476,13 +1477,13 @@ asm_function jsimd_idct_2x2_neon
|
|||||||
|
|
||||||
sub sp, sp, #208
|
sub sp, sp, #208
|
||||||
ldr x15, [sp], 16
|
ldr x15, [sp], 16
|
||||||
ld1 {v4.8b - v7.8b}, [sp], 32
|
ld1 {v4.8b, v5.8b, v6.8b, v7.8b}, [sp], 32
|
||||||
ld1 {v8.8b - v11.8b}, [sp], 32
|
ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32
|
||||||
ld1 {v12.8b - v15.8b}, [sp], 32
|
ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32
|
||||||
ld1 {v16.8b - v19.8b}, [sp], 32
|
ld1 {v16.8b, v17.8b, v18.8b, v19.8b}, [sp], 32
|
||||||
ld1 {v21.8b - v22.8b}, [sp], 16
|
ld1 {v21.8b, v22.8b}, [sp], 16
|
||||||
ld1 {v24.8b - v27.8b}, [sp], 32
|
ld1 {v24.8b, v25.8b, v26.8b, v27.8b}, [sp], 32
|
||||||
ld1 {v30.8b - v31.8b}, [sp], 16
|
ld1 {v30.8b, v31.8b}, [sp], 16
|
||||||
blr x30
|
blr x30
|
||||||
|
|
||||||
.unreq DCT_TABLE
|
.unreq DCT_TABLE
|
||||||
@@ -1514,9 +1515,9 @@ asm_function jsimd_idct_2x2_neon
|
|||||||
ld1 {v4.8b}, [U], 8
|
ld1 {v4.8b}, [U], 8
|
||||||
ld1 {v5.8b}, [V], 8
|
ld1 {v5.8b}, [V], 8
|
||||||
ld1 {v0.8b}, [Y], 8
|
ld1 {v0.8b}, [Y], 8
|
||||||
prfm PLDL1KEEP, [U, #64]
|
prfm pldl1keep, [U, #64]
|
||||||
prfm PLDL1KEEP, [V, #64]
|
prfm pldl1keep, [V, #64]
|
||||||
prfm PLDL1KEEP, [Y, #64]
|
prfm pldl1keep, [Y, #64]
|
||||||
.elseif \size == 4
|
.elseif \size == 4
|
||||||
ld1 {v4.b}[0], [U], 1
|
ld1 {v4.b}[0], [U], 1
|
||||||
ld1 {v4.b}[1], [U], 1
|
ld1 {v4.b}[1], [U], 1
|
||||||
@@ -1606,14 +1607,14 @@ asm_function jsimd_idct_2x2_neon
|
|||||||
.macro do_yuv_to_rgb_stage1
|
.macro do_yuv_to_rgb_stage1
|
||||||
uaddw v6.8h, v2.8h, v4.8b /* q3 = u - 128 */
|
uaddw v6.8h, v2.8h, v4.8b /* q3 = u - 128 */
|
||||||
uaddw v8.8h, v2.8h, v5.8b /* q2 = v - 128 */
|
uaddw v8.8h, v2.8h, v5.8b /* q2 = v - 128 */
|
||||||
smull v20.4s, v6.4h, v1.4h[1] /* multiply by -11277 */
|
smull v20.4s, v6.4h, v1.h[1] /* multiply by -11277 */
|
||||||
smlal v20.4s, v8.4h, v1.4h[2] /* multiply by -23401 */
|
smlal v20.4s, v8.4h, v1.h[2] /* multiply by -23401 */
|
||||||
smull2 v22.4s, v6.8h, v1.4h[1] /* multiply by -11277 */
|
smull2 v22.4s, v6.8h, v1.h[1] /* multiply by -11277 */
|
||||||
smlal2 v22.4s, v8.8h, v1.4h[2] /* multiply by -23401 */
|
smlal2 v22.4s, v8.8h, v1.h[2] /* multiply by -23401 */
|
||||||
smull v24.4s, v8.4h, v1.4h[0] /* multiply by 22971 */
|
smull v24.4s, v8.4h, v1.h[0] /* multiply by 22971 */
|
||||||
smull2 v26.4s, v8.8h, v1.4h[0] /* multiply by 22971 */
|
smull2 v26.4s, v8.8h, v1.h[0] /* multiply by 22971 */
|
||||||
smull v28.4s, v6.4h, v1.4h[3] /* multiply by 29033 */
|
smull v28.4s, v6.4h, v1.h[3] /* multiply by 29033 */
|
||||||
smull2 v30.4s, v6.8h, v1.4h[3] /* multiply by 29033 */
|
smull2 v30.4s, v6.8h, v1.h[3] /* multiply by 29033 */
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
.macro do_yuv_to_rgb_stage2
|
.macro do_yuv_to_rgb_stage2
|
||||||
@@ -1656,18 +1657,18 @@ asm_function jsimd_idct_2x2_neon
|
|||||||
sqxtun v1\g_offs\defsize, v20.8h
|
sqxtun v1\g_offs\defsize, v20.8h
|
||||||
ld1 {v0.8b}, [Y], 8
|
ld1 {v0.8b}, [Y], 8
|
||||||
sqxtun v1\r_offs\defsize, v24.8h
|
sqxtun v1\r_offs\defsize, v24.8h
|
||||||
prfm PLDL1KEEP, [U, #64]
|
prfm pldl1keep, [U, #64]
|
||||||
prfm PLDL1KEEP, [V, #64]
|
prfm pldl1keep, [V, #64]
|
||||||
prfm PLDL1KEEP, [Y, #64]
|
prfm pldl1keep, [Y, #64]
|
||||||
sqxtun v1\b_offs\defsize, v28.8h
|
sqxtun v1\b_offs\defsize, v28.8h
|
||||||
uaddw v6.8h, v2.8h, v4.8b /* v6.16b = u - 128 */
|
uaddw v6.8h, v2.8h, v4.8b /* v6.16b = u - 128 */
|
||||||
uaddw v8.8h, v2.8h, v5.8b /* q2 = v - 128 */
|
uaddw v8.8h, v2.8h, v5.8b /* q2 = v - 128 */
|
||||||
smull v20.4s, v6.4h, v1.4h[1] /* multiply by -11277 */
|
smull v20.4s, v6.4h, v1.h[1] /* multiply by -11277 */
|
||||||
smlal v20.4s, v8.4h, v1.4h[2] /* multiply by -23401 */
|
smlal v20.4s, v8.4h, v1.h[2] /* multiply by -23401 */
|
||||||
smull2 v22.4s, v6.8h, v1.4h[1] /* multiply by -11277 */
|
smull2 v22.4s, v6.8h, v1.h[1] /* multiply by -11277 */
|
||||||
smlal2 v22.4s, v8.8h, v1.4h[2] /* multiply by -23401 */
|
smlal2 v22.4s, v8.8h, v1.h[2] /* multiply by -23401 */
|
||||||
smull v24.4s, v8.4h, v1.4h[0] /* multiply by 22971 */
|
smull v24.4s, v8.4h, v1.h[0] /* multiply by 22971 */
|
||||||
smull2 v26.4s, v8.8h, v1.4h[0] /* multiply by 22971 */
|
smull2 v26.4s, v8.8h, v1.h[0] /* multiply by 22971 */
|
||||||
.else /**************************** rgb565 ***********************************/
|
.else /**************************** rgb565 ***********************************/
|
||||||
sqshlu v21.8h, v20.8h, #8
|
sqshlu v21.8h, v20.8h, #8
|
||||||
sqshlu v25.8h, v24.8h, #8
|
sqshlu v25.8h, v24.8h, #8
|
||||||
@@ -1675,21 +1676,21 @@ asm_function jsimd_idct_2x2_neon
|
|||||||
uaddw v6.8h, v2.8h, v4.8b /* v6.16b = u - 128 */
|
uaddw v6.8h, v2.8h, v4.8b /* v6.16b = u - 128 */
|
||||||
uaddw v8.8h, v2.8h, v5.8b /* q2 = v - 128 */
|
uaddw v8.8h, v2.8h, v5.8b /* q2 = v - 128 */
|
||||||
ld1 {v0.8b}, [Y], 8
|
ld1 {v0.8b}, [Y], 8
|
||||||
smull v20.4s, v6.4h, v1.4h[1] /* multiply by -11277 */
|
smull v20.4s, v6.4h, v1.h[1] /* multiply by -11277 */
|
||||||
smlal v20.4s, v8.4h, v1.4h[2] /* multiply by -23401 */
|
smlal v20.4s, v8.4h, v1.h[2] /* multiply by -23401 */
|
||||||
smull2 v22.4s, v6.8h, v1.4h[1] /* multiply by -11277 */
|
smull2 v22.4s, v6.8h, v1.h[1] /* multiply by -11277 */
|
||||||
smlal2 v22.4s, v8.8h, v1.4h[2] /* multiply by -23401 */
|
smlal2 v22.4s, v8.8h, v1.h[2] /* multiply by -23401 */
|
||||||
sri v25.8h, v21.8h, #5
|
sri v25.8h, v21.8h, #5
|
||||||
smull v24.4s, v8.4h, v1.4h[0] /* multiply by 22971 */
|
smull v24.4s, v8.4h, v1.h[0] /* multiply by 22971 */
|
||||||
smull2 v26.4s, v8.8h, v1.4h[0] /* multiply by 22971 */
|
smull2 v26.4s, v8.8h, v1.h[0] /* multiply by 22971 */
|
||||||
prfm PLDL1KEEP, [U, #64]
|
prfm pldl1keep, [U, #64]
|
||||||
prfm PLDL1KEEP, [V, #64]
|
prfm pldl1keep, [V, #64]
|
||||||
prfm PLDL1KEEP, [Y, #64]
|
prfm pldl1keep, [Y, #64]
|
||||||
sri v25.8h, v29.8h, #11
|
sri v25.8h, v29.8h, #11
|
||||||
.endif
|
.endif
|
||||||
do_store \bpp, 8
|
do_store \bpp, 8
|
||||||
smull v28.4s, v6.4h, v1.4h[3] /* multiply by 29033 */
|
smull v28.4s, v6.4h, v1.h[3] /* multiply by 29033 */
|
||||||
smull2 v30.4s, v6.8h, v1.4h[3] /* multiply by 29033 */
|
smull2 v30.4s, v6.8h, v1.h[3] /* multiply by 29033 */
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
.macro do_yuv_to_rgb
|
.macro do_yuv_to_rgb
|
||||||
@@ -1702,7 +1703,7 @@ asm_function jsimd_idct_2x2_neon
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
.balign 16
|
.balign 16
|
||||||
jsimd_ycc_\colorid\()_neon_consts:
|
Ljsimd_ycc_\colorid\()_neon_consts:
|
||||||
.short 0, 0, 0, 0
|
.short 0, 0, 0, 0
|
||||||
.short 22971, -11277, -23401, 29033
|
.short 22971, -11277, -23401, 29033
|
||||||
.short -128, -128, -128, -128
|
.short -128, -128, -128, -128
|
||||||
@@ -1717,7 +1718,7 @@ asm_function jsimd_ycc_\colorid\()_convert_neon
|
|||||||
|
|
||||||
INPUT_BUF0 .req x5
|
INPUT_BUF0 .req x5
|
||||||
INPUT_BUF1 .req x6
|
INPUT_BUF1 .req x6
|
||||||
INPUT_BUF2 .req INPUT_BUF
|
INPUT_BUF2 .req x1
|
||||||
|
|
||||||
RGB .req x7
|
RGB .req x7
|
||||||
Y .req x8
|
Y .req x8
|
||||||
@@ -1728,16 +1729,16 @@ asm_function jsimd_ycc_\colorid\()_convert_neon
|
|||||||
sub sp, sp, 336
|
sub sp, sp, 336
|
||||||
str x15, [sp], 16
|
str x15, [sp], 16
|
||||||
/* Load constants to d1, d2, d3 (v0.4h is just used for padding) */
|
/* Load constants to d1, d2, d3 (v0.4h is just used for padding) */
|
||||||
adr x15, jsimd_ycc_\colorid\()_neon_consts
|
adr x15, Ljsimd_ycc_\colorid\()_neon_consts
|
||||||
/* Save NEON registers */
|
/* Save NEON registers */
|
||||||
st1 {v0.8b - v3.8b}, [sp], 32
|
st1 {v0.8b, v1.8b, v2.8b, v3.8b}, [sp], 32
|
||||||
st1 {v4.8b - v7.8b}, [sp], 32
|
st1 {v4.8b, v5.8b, v6.8b, v7.8b}, [sp], 32
|
||||||
st1 {v8.8b - v11.8b}, [sp], 32
|
st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32
|
||||||
st1 {v12.8b - v15.8b}, [sp], 32
|
st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32
|
||||||
st1 {v16.8b - v19.8b}, [sp], 32
|
st1 {v16.8b, v17.8b, v18.8b, v19.8b}, [sp], 32
|
||||||
st1 {v20.8b - v23.8b}, [sp], 32
|
st1 {v20.8b, v21.8b, v22.8b, v23.8b}, [sp], 32
|
||||||
st1 {v24.8b - v27.8b}, [sp], 32
|
st1 {v24.8b, v25.8b, v26.8b, v27.8b}, [sp], 32
|
||||||
st1 {v28.8b - v31.8b}, [sp], 32
|
st1 {v28.8b, v29.8b, v30.8b, v31.8b}, [sp], 32
|
||||||
ld1 {v0.4h, v1.4h}, [x15], 16
|
ld1 {v0.4h, v1.4h}, [x15], 16
|
||||||
ld1 {v2.8h}, [x15]
|
ld1 {v2.8h}, [x15]
|
||||||
|
|
||||||
@@ -1748,8 +1749,8 @@ asm_function jsimd_ycc_\colorid\()_convert_neon
|
|||||||
stp x8, x9, [sp], 16
|
stp x8, x9, [sp], 16
|
||||||
stp x10, x30, [sp], 16
|
stp x10, x30, [sp], 16
|
||||||
ldr INPUT_BUF0, [INPUT_BUF]
|
ldr INPUT_BUF0, [INPUT_BUF]
|
||||||
ldr INPUT_BUF1, [INPUT_BUF, 8]
|
ldr INPUT_BUF1, [INPUT_BUF, #8]
|
||||||
ldr INPUT_BUF2, [INPUT_BUF, 16]
|
ldr INPUT_BUF2, [INPUT_BUF, #16]
|
||||||
.unreq INPUT_BUF
|
.unreq INPUT_BUF
|
||||||
|
|
||||||
/* Initially set v10, v11.4h, v12.8b, d13 to 0xFF */
|
/* Initially set v10, v11.4h, v12.8b, d13 to 0xFF */
|
||||||
@@ -1758,7 +1759,7 @@ asm_function jsimd_ycc_\colorid\()_convert_neon
|
|||||||
|
|
||||||
/* Outer loop over scanlines */
|
/* Outer loop over scanlines */
|
||||||
cmp NUM_ROWS, #1
|
cmp NUM_ROWS, #1
|
||||||
blt 9f
|
b.lt 9f
|
||||||
0:
|
0:
|
||||||
lsl x16, INPUT_ROW, #3
|
lsl x16, INPUT_ROW, #3
|
||||||
ldr Y, [INPUT_BUF0, x16]
|
ldr Y, [INPUT_BUF0, x16]
|
||||||
@@ -1770,60 +1771,60 @@ asm_function jsimd_ycc_\colorid\()_convert_neon
|
|||||||
|
|
||||||
/* Inner loop over pixels */
|
/* Inner loop over pixels */
|
||||||
subs N, N, #8
|
subs N, N, #8
|
||||||
blt 3f
|
b.lt 3f
|
||||||
do_load 8
|
do_load 8
|
||||||
do_yuv_to_rgb_stage1
|
do_yuv_to_rgb_stage1
|
||||||
subs N, N, #8
|
subs N, N, #8
|
||||||
blt 2f
|
b.lt 2f
|
||||||
1:
|
1:
|
||||||
do_yuv_to_rgb_stage2_store_load_stage1
|
do_yuv_to_rgb_stage2_store_load_stage1
|
||||||
subs N, N, #8
|
subs N, N, #8
|
||||||
bge 1b
|
b.ge 1b
|
||||||
2:
|
2:
|
||||||
do_yuv_to_rgb_stage2
|
do_yuv_to_rgb_stage2
|
||||||
do_store \bpp, 8
|
do_store \bpp, 8
|
||||||
tst N, #7
|
tst N, #7
|
||||||
beq 8f
|
b.eq 8f
|
||||||
3:
|
3:
|
||||||
tst N, #4
|
tst N, #4
|
||||||
beq 3f
|
b.eq 3f
|
||||||
do_load 4
|
do_load 4
|
||||||
3:
|
3:
|
||||||
tst N, #2
|
tst N, #2
|
||||||
beq 4f
|
b.eq 4f
|
||||||
do_load 2
|
do_load 2
|
||||||
4:
|
4:
|
||||||
tst N, #1
|
tst N, #1
|
||||||
beq 5f
|
b.eq 5f
|
||||||
do_load 1
|
do_load 1
|
||||||
5:
|
5:
|
||||||
do_yuv_to_rgb
|
do_yuv_to_rgb
|
||||||
tst N, #4
|
tst N, #4
|
||||||
beq 6f
|
b.eq 6f
|
||||||
do_store \bpp, 4
|
do_store \bpp, 4
|
||||||
6:
|
6:
|
||||||
tst N, #2
|
tst N, #2
|
||||||
beq 7f
|
b.eq 7f
|
||||||
do_store \bpp, 2
|
do_store \bpp, 2
|
||||||
7:
|
7:
|
||||||
tst N, #1
|
tst N, #1
|
||||||
beq 8f
|
b.eq 8f
|
||||||
do_store \bpp, 1
|
do_store \bpp, 1
|
||||||
8:
|
8:
|
||||||
subs NUM_ROWS, NUM_ROWS, #1
|
subs NUM_ROWS, NUM_ROWS, #1
|
||||||
bgt 0b
|
b.gt 0b
|
||||||
9:
|
9:
|
||||||
/* Restore all registers and return */
|
/* Restore all registers and return */
|
||||||
sub sp, sp, #336
|
sub sp, sp, #336
|
||||||
ldr x15, [sp], 16
|
ldr x15, [sp], 16
|
||||||
ld1 {v0.8b - v3.8b}, [sp], 32
|
ld1 {v0.8b, v1.8b, v2.8b, v3.8b}, [sp], 32
|
||||||
ld1 {v4.8b - v7.8b}, [sp], 32
|
ld1 {v4.8b, v5.8b, v6.8b, v7.8b}, [sp], 32
|
||||||
ld1 {v8.8b - v11.8b}, [sp], 32
|
ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32
|
||||||
ld1 {v12.8b - v15.8b}, [sp], 32
|
ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32
|
||||||
ld1 {v16.8b - v19.8b}, [sp], 32
|
ld1 {v16.8b, v17.8b, v18.8b, v19.8b}, [sp], 32
|
||||||
ld1 {v20.8b - v23.8b}, [sp], 32
|
ld1 {v20.8b, v21.8b, v22.8b, v23.8b}, [sp], 32
|
||||||
ld1 {v24.8b - v27.8b}, [sp], 32
|
ld1 {v24.8b, v25.8b, v26.8b, v27.8b}, [sp], 32
|
||||||
ld1 {v28.8b - v31.8b}, [sp], 32
|
ld1 {v28.8b, v29.8b, v30.8b, v31.8b}, [sp], 32
|
||||||
/* pop {r4, r5, r6, r7, r8, r9, r10, pc} */
|
/* pop {r4, r5, r6, r7, r8, r9, r10, pc} */
|
||||||
ldp x4, x5, [sp], 16
|
ldp x4, x5, [sp], 16
|
||||||
ldp x6, x7, [sp], 16
|
ldp x6, x7, [sp], 16
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* jsimd_powerpc64.c
|
* jsimd_powerpc.c
|
||||||
*
|
*
|
||||||
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||||
* Copyright 2009-2011, 2014 D. R. Commander
|
* Copyright 2009-2011, 2014 D. R. Commander
|
||||||
@@ -42,12 +42,38 @@ init_simd (void)
|
|||||||
GLOBAL(int)
|
GLOBAL(int)
|
||||||
jsimd_can_rgb_ycc (void)
|
jsimd_can_rgb_ycc (void)
|
||||||
{
|
{
|
||||||
|
init_simd();
|
||||||
|
|
||||||
|
/* The code is optimised for these values only */
|
||||||
|
if (BITS_IN_JSAMPLE != 8)
|
||||||
|
return 0;
|
||||||
|
if (sizeof(JDIMENSION) != 4)
|
||||||
|
return 0;
|
||||||
|
if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (simd_support & JSIMD_ALTIVEC)
|
||||||
|
return 1;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
GLOBAL(int)
|
GLOBAL(int)
|
||||||
jsimd_can_rgb_gray (void)
|
jsimd_can_rgb_gray (void)
|
||||||
{
|
{
|
||||||
|
init_simd();
|
||||||
|
|
||||||
|
/* The code is optimised for these values only */
|
||||||
|
if (BITS_IN_JSAMPLE != 8)
|
||||||
|
return 0;
|
||||||
|
if (sizeof(JDIMENSION) != 4)
|
||||||
|
return 0;
|
||||||
|
if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (simd_support & JSIMD_ALTIVEC)
|
||||||
|
return 1;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -68,6 +94,37 @@ jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
|
|||||||
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
JDIMENSION output_row, int num_rows)
|
JDIMENSION output_row, int num_rows)
|
||||||
{
|
{
|
||||||
|
void (*altivecfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
|
||||||
|
|
||||||
|
switch(cinfo->in_color_space) {
|
||||||
|
case JCS_EXT_RGB:
|
||||||
|
altivecfct=jsimd_extrgb_ycc_convert_altivec;
|
||||||
|
break;
|
||||||
|
case JCS_EXT_RGBX:
|
||||||
|
case JCS_EXT_RGBA:
|
||||||
|
altivecfct=jsimd_extrgbx_ycc_convert_altivec;
|
||||||
|
break;
|
||||||
|
case JCS_EXT_BGR:
|
||||||
|
altivecfct=jsimd_extbgr_ycc_convert_altivec;
|
||||||
|
break;
|
||||||
|
case JCS_EXT_BGRX:
|
||||||
|
case JCS_EXT_BGRA:
|
||||||
|
altivecfct=jsimd_extbgrx_ycc_convert_altivec;
|
||||||
|
break;
|
||||||
|
case JCS_EXT_XBGR:
|
||||||
|
case JCS_EXT_ABGR:
|
||||||
|
altivecfct=jsimd_extxbgr_ycc_convert_altivec;
|
||||||
|
break;
|
||||||
|
case JCS_EXT_XRGB:
|
||||||
|
case JCS_EXT_ARGB:
|
||||||
|
altivecfct=jsimd_extxrgb_ycc_convert_altivec;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
altivecfct=jsimd_rgb_ycc_convert_altivec;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
altivecfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
|
||||||
}
|
}
|
||||||
|
|
||||||
GLOBAL(void)
|
GLOBAL(void)
|
||||||
@@ -75,6 +132,37 @@ jsimd_rgb_gray_convert (j_compress_ptr cinfo,
|
|||||||
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
JDIMENSION output_row, int num_rows)
|
JDIMENSION output_row, int num_rows)
|
||||||
{
|
{
|
||||||
|
void (*altivecfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
|
||||||
|
|
||||||
|
switch(cinfo->in_color_space) {
|
||||||
|
case JCS_EXT_RGB:
|
||||||
|
altivecfct=jsimd_extrgb_gray_convert_altivec;
|
||||||
|
break;
|
||||||
|
case JCS_EXT_RGBX:
|
||||||
|
case JCS_EXT_RGBA:
|
||||||
|
altivecfct=jsimd_extrgbx_gray_convert_altivec;
|
||||||
|
break;
|
||||||
|
case JCS_EXT_BGR:
|
||||||
|
altivecfct=jsimd_extbgr_gray_convert_altivec;
|
||||||
|
break;
|
||||||
|
case JCS_EXT_BGRX:
|
||||||
|
case JCS_EXT_BGRA:
|
||||||
|
altivecfct=jsimd_extbgrx_gray_convert_altivec;
|
||||||
|
break;
|
||||||
|
case JCS_EXT_XBGR:
|
||||||
|
case JCS_EXT_ABGR:
|
||||||
|
altivecfct=jsimd_extxbgr_gray_convert_altivec;
|
||||||
|
break;
|
||||||
|
case JCS_EXT_XRGB:
|
||||||
|
case JCS_EXT_ARGB:
|
||||||
|
altivecfct=jsimd_extxrgb_gray_convert_altivec;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
altivecfct=jsimd_rgb_gray_convert_altivec;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
altivecfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
|
||||||
}
|
}
|
||||||
|
|
||||||
GLOBAL(void)
|
GLOBAL(void)
|
||||||
@@ -202,6 +290,21 @@ jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
|
|||||||
GLOBAL(int)
|
GLOBAL(int)
|
||||||
jsimd_can_convsamp (void)
|
jsimd_can_convsamp (void)
|
||||||
{
|
{
|
||||||
|
init_simd();
|
||||||
|
|
||||||
|
/* The code is optimised for these values only */
|
||||||
|
if (DCTSIZE != 8)
|
||||||
|
return 0;
|
||||||
|
if (BITS_IN_JSAMPLE != 8)
|
||||||
|
return 0;
|
||||||
|
if (sizeof(JDIMENSION) != 4)
|
||||||
|
return 0;
|
||||||
|
if (sizeof(DCTELEM) != 2)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (simd_support & JSIMD_ALTIVEC)
|
||||||
|
return 1;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -215,6 +318,7 @@ GLOBAL(void)
|
|||||||
jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
|
jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
|
||||||
DCTELEM * workspace)
|
DCTELEM * workspace)
|
||||||
{
|
{
|
||||||
|
jsimd_convsamp_altivec(sample_data, start_col, workspace);
|
||||||
}
|
}
|
||||||
|
|
||||||
GLOBAL(void)
|
GLOBAL(void)
|
||||||
@@ -226,6 +330,17 @@ jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
|
|||||||
GLOBAL(int)
|
GLOBAL(int)
|
||||||
jsimd_can_fdct_islow (void)
|
jsimd_can_fdct_islow (void)
|
||||||
{
|
{
|
||||||
|
init_simd();
|
||||||
|
|
||||||
|
/* The code is optimised for these values only */
|
||||||
|
if (DCTSIZE != 8)
|
||||||
|
return 0;
|
||||||
|
if (sizeof(DCTELEM) != 2)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (simd_support & JSIMD_ALTIVEC)
|
||||||
|
return 1;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -255,6 +370,7 @@ jsimd_can_fdct_float (void)
|
|||||||
GLOBAL(void)
|
GLOBAL(void)
|
||||||
jsimd_fdct_islow (DCTELEM * data)
|
jsimd_fdct_islow (DCTELEM * data)
|
||||||
{
|
{
|
||||||
|
jsimd_fdct_islow_altivec(data);
|
||||||
}
|
}
|
||||||
|
|
||||||
GLOBAL(void)
|
GLOBAL(void)
|
||||||
@@ -271,6 +387,19 @@ jsimd_fdct_float (FAST_FLOAT * data)
|
|||||||
GLOBAL(int)
|
GLOBAL(int)
|
||||||
jsimd_can_quantize (void)
|
jsimd_can_quantize (void)
|
||||||
{
|
{
|
||||||
|
init_simd();
|
||||||
|
|
||||||
|
/* The code is optimised for these values only */
|
||||||
|
if (DCTSIZE != 8)
|
||||||
|
return 0;
|
||||||
|
if (sizeof(JCOEF) != 2)
|
||||||
|
return 0;
|
||||||
|
if (sizeof(DCTELEM) != 2)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (simd_support & JSIMD_ALTIVEC)
|
||||||
|
return 1;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -284,6 +413,7 @@ GLOBAL(void)
|
|||||||
jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
|
jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
|
||||||
DCTELEM * workspace)
|
DCTELEM * workspace)
|
||||||
{
|
{
|
||||||
|
jsimd_quantize_altivec(coef_block, divisors, workspace);
|
||||||
}
|
}
|
||||||
|
|
||||||
GLOBAL(void)
|
GLOBAL(void)
|
||||||
@@ -321,12 +451,34 @@ jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
|||||||
GLOBAL(int)
|
GLOBAL(int)
|
||||||
jsimd_can_idct_islow (void)
|
jsimd_can_idct_islow (void)
|
||||||
{
|
{
|
||||||
|
init_simd();
|
||||||
|
|
||||||
|
/* The code is optimised for these values only */
|
||||||
|
if (DCTSIZE != 8)
|
||||||
|
return 0;
|
||||||
|
if (sizeof(JCOEF) != 2)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (simd_support & JSIMD_ALTIVEC)
|
||||||
|
return 1;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
GLOBAL(int)
|
GLOBAL(int)
|
||||||
jsimd_can_idct_ifast (void)
|
jsimd_can_idct_ifast (void)
|
||||||
{
|
{
|
||||||
|
init_simd();
|
||||||
|
|
||||||
|
/* The code is optimised for these values only */
|
||||||
|
if (DCTSIZE != 8)
|
||||||
|
return 0;
|
||||||
|
if (sizeof(JCOEF) != 2)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (simd_support & JSIMD_ALTIVEC)
|
||||||
|
return 1;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -341,6 +493,8 @@ jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
|||||||
JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
||||||
JDIMENSION output_col)
|
JDIMENSION output_col)
|
||||||
{
|
{
|
||||||
|
jsimd_idct_islow_altivec(compptr->dct_table, coef_block, output_buf,
|
||||||
|
output_col);
|
||||||
}
|
}
|
||||||
|
|
||||||
GLOBAL(void)
|
GLOBAL(void)
|
||||||
@@ -348,6 +502,8 @@ jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
|||||||
JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
||||||
JDIMENSION output_col)
|
JDIMENSION output_col)
|
||||||
{
|
{
|
||||||
|
jsimd_idct_ifast_altivec(compptr->dct_table, coef_block, output_buf,
|
||||||
|
output_col);
|
||||||
}
|
}
|
||||||
|
|
||||||
GLOBAL(void)
|
GLOBAL(void)
|
||||||
|
|||||||
@@ -50,4 +50,7 @@ TURBOJPEG_1.4
|
|||||||
tjDecompressToYUVPlanes;
|
tjDecompressToYUVPlanes;
|
||||||
tjEncodeYUV3;
|
tjEncodeYUV3;
|
||||||
tjEncodeYUVPlanes;
|
tjEncodeYUVPlanes;
|
||||||
|
tjPlaneHeight;
|
||||||
|
tjPlaneSizeYUV;
|
||||||
|
tjPlaneWidth;
|
||||||
} TURBOJPEG_1.2;
|
} TURBOJPEG_1.2;
|
||||||
|
|||||||
@@ -76,6 +76,9 @@ TURBOJPEG_1.4
|
|||||||
tjDecompressToYUVPlanes;
|
tjDecompressToYUVPlanes;
|
||||||
tjEncodeYUV3;
|
tjEncodeYUV3;
|
||||||
tjEncodeYUVPlanes;
|
tjEncodeYUVPlanes;
|
||||||
|
tjPlaneHeight;
|
||||||
|
tjPlaneSizeYUV;
|
||||||
|
tjPlaneWidth;
|
||||||
Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV__IIII;
|
Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV__IIII;
|
||||||
Java_org_libjpegturbo_turbojpeg_TJCompressor_compressFromYUV___3_3B_3II_3III_3BII;
|
Java_org_libjpegturbo_turbojpeg_TJCompressor_compressFromYUV___3_3B_3II_3III_3BII;
|
||||||
Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3BIIIIII_3_3B_3I_3III;
|
Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3BIIIIII_3_3B_3I_3III;
|
||||||
|
|||||||
@@ -320,6 +320,14 @@ static int setDecompDefaults(struct jpeg_decompress_struct *dinfo,
|
|||||||
static int getSubsamp(j_decompress_ptr dinfo)
|
static int getSubsamp(j_decompress_ptr dinfo)
|
||||||
{
|
{
|
||||||
int retval=-1, i, k;
|
int retval=-1, i, k;
|
||||||
|
|
||||||
|
/* The sampling factors actually have no meaning with grayscale JPEG files,
|
||||||
|
and in fact it's possible to generate grayscale JPEGs with sampling
|
||||||
|
factors > 1 (even though those sampling factors are ignored by the
|
||||||
|
decompressor.) Thus, we need to treat grayscale as a special case. */
|
||||||
|
if(dinfo->num_components==1 && dinfo->jpeg_color_space==JCS_GRAYSCALE)
|
||||||
|
return TJSAMP_GRAY;
|
||||||
|
|
||||||
for(i=0; i<NUMSUBOPT; i++)
|
for(i=0; i<NUMSUBOPT; i++)
|
||||||
{
|
{
|
||||||
if(dinfo->num_components==pixelsize[i]
|
if(dinfo->num_components==pixelsize[i]
|
||||||
|
|||||||
@@ -196,6 +196,7 @@ int main(int argc, char *argv[]) {
|
|||||||
image_buffer =
|
image_buffer =
|
||||||
malloc(frame_width*frame_height + 2*(frame_width/2)*(frame_height/2));
|
malloc(frame_width*frame_height + 2*(frame_width/2)*(frame_height/2));
|
||||||
if (!image_buffer) {
|
if (!image_buffer) {
|
||||||
|
free(yuv_buffer);
|
||||||
fprintf(stderr, "Memory allocation failure!\n");
|
fprintf(stderr, "Memory allocation failure!\n");
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user