Merge remote-tracking branch 'turbo/master'
* turbo/master: (105 commits) makemacpkg.in: Allow universal DMG w/o ARMv8 arch Remove more unnecessary NULL checks before free() Eliminate unnecessary NULL checks before tjFree() Eliminate unnecessary NULL checks before free() simd/arm64/jsimd_neon.S: Fix checkstyle issue tjTransform(): Use instance err. for bad crop spec README.md, package specs: Various tweaks djpeg.c: Fix compiler warning w/o mem. src manager ARMv8 SIMD: Support execute-only memory (XOM) Travis: Use MacPorts instead of Homebrew Huffman enc.: Fix very rare local buffer overrun TurboJPEG: Fix erroneous subsampling detection ChangeLog.md: List CVE IDs for specific fixes tjDecompressToYUV*(): Fix OOB write/double free 64-bit tjbench: Fix signed int overflow/segfault Fix copyright header formatting buglets example.txt: Avoid undefined setjmp() behavior Mac: Support hiding SIMD fct symbols w/ NASM 2.14+ TJBench: Fix output with -componly -quiet Build: Don't require ASM_NASM if !REQUIRE_SIMD ...
This commit is contained in:
171
BUILDING.md
171
BUILDING.md
@@ -15,13 +15,18 @@ Build Requirements
|
||||
* If using NASM, 2.10 or later is required.
|
||||
* If using NASM, 2.10 or later (except 2.11.08) is required for an x86-64 Mac
|
||||
build (2.11.08 does not work properly with libjpeg-turbo's x86-64 SIMD code
|
||||
when building macho64 objects.) NASM or YASM can be obtained from
|
||||
[MacPorts](http://www.macports.org/) or [Homebrew](http://brew.sh/).
|
||||
when building macho64 objects.)
|
||||
* If using YASM, 1.2.0 or later is required.
|
||||
* If building on macOS, NASM or YASM can be obtained from
|
||||
[MacPorts](http://www.macports.org/) or [Homebrew](http://brew.sh/).
|
||||
- NOTE: Currently, if it is desirable to hide the SIMD function symbols in
|
||||
Mac executables or shared libraries that statically link with
|
||||
libjpeg-turbo, then YASM must be used when building libjpeg-turbo.
|
||||
libjpeg-turbo, then NASM 2.14 or later or YASM must be used when
|
||||
building libjpeg-turbo.
|
||||
* If building on Windows, **nasm.exe**/**yasm.exe** should be in your `PATH`.
|
||||
* NASM and YASM are located in the CRB (Code Ready Builder) repository on
|
||||
Red Hat Enterprise Linux 8 and in the PowerTools repository on CentOS 8,
|
||||
which is not enabled by default.
|
||||
|
||||
The binary RPMs released by the NASM project do not work on older Linux
|
||||
systems, such as Red Hat Enterprise Linux 5. On such systems, you can easily
|
||||
@@ -48,8 +53,9 @@ Build Requirements
|
||||
install the Java Developer Package, which can be downloaded from
|
||||
<http://developer.apple.com/downloads> (Apple ID required.) For other
|
||||
systems, you can obtain the Oracle Java Development Kit from
|
||||
<http://www.java.com>.
|
||||
<http://www.oracle.com/technetwork/java/javase/downloads>.
|
||||
|
||||
* If using JDK 11 or later, CMake 3.10.x or later must also be used.
|
||||
|
||||
### Windows
|
||||
|
||||
@@ -83,7 +89,10 @@ Build Requirements
|
||||
appropriate compiler paths automatically set.
|
||||
|
||||
- If building the TurboJPEG Java wrapper, JDK 1.5 or later is required. This
|
||||
can be downloaded from <http://www.java.com>.
|
||||
can be downloaded from
|
||||
<http://www.oracle.com/technetwork/java/javase/downloads>.
|
||||
|
||||
* If using JDK 11 or later, CMake 3.10.x or later must also be used.
|
||||
|
||||
|
||||
Out-of-Tree Builds
|
||||
@@ -521,7 +530,7 @@ a universal library.
|
||||
Building libjpeg-turbo for Android
|
||||
----------------------------------
|
||||
|
||||
Building libjpeg-turbo for Android platforms requires the
|
||||
Building libjpeg-turbo for Android platforms requires v13b or later of the
|
||||
[Android NDK](https://developer.android.com/tools/sdk/ndk).
|
||||
|
||||
|
||||
@@ -531,35 +540,21 @@ The following is a general recipe script that can be modified for your specific
|
||||
needs.
|
||||
|
||||
# Set these variables to suit your needs
|
||||
NDK_PATH={full path to the "ndk" directory-- for example, /opt/android/sdk/ndk-bundle}
|
||||
BUILD_PLATFORM={the platform name for the NDK package you installed--
|
||||
for example, "windows-x86" or "linux-x86_64" or "darwin-x86_64"}
|
||||
TOOLCHAIN_VERSION={"4.8", "4.9", "clang3.5", etc. This corresponds to a
|
||||
toolchain directory under ${NDK_PATH}/toolchains/.}
|
||||
ANDROID_VERSION={The minimum version of Android to support-- for example,
|
||||
NDK_PATH={full path to the NDK directory-- for example,
|
||||
/opt/android/android-ndk-r16b}
|
||||
TOOLCHAIN={"gcc" or "clang"-- "gcc" must be used with NDK r16b and earlier,
|
||||
and "clang" must be used with NDK r17c and later}
|
||||
ANDROID_VERSION={the minimum version of Android to support-- for example,
|
||||
"16", "19", etc.}
|
||||
|
||||
# It should not be necessary to modify the rest
|
||||
HOST=arm-linux-androideabi
|
||||
SYSROOT=${NDK_PATH}/platforms/android-${ANDROID_VERSION}/arch-arm
|
||||
export CFLAGS="-march=armv7-a -mfloat-abi=softfp -fprefetch-loop-arrays \
|
||||
-D__ANDROID_API__=${ANDROID_VERSION} --sysroot=${SYSROOT} \
|
||||
-isystem ${NDK_PATH}/sysroot/usr/include \
|
||||
-isystem ${NDK_PATH}/sysroot/usr/include/${HOST}"
|
||||
export LDFLAGS=-pie
|
||||
TOOLCHAIN=${NDK_PATH}/toolchains/${HOST}-${TOOLCHAIN_VERSION}/prebuilt/${BUILD_PLATFORM}
|
||||
|
||||
cd {build_directory}
|
||||
|
||||
cat <<EOF >toolchain.cmake
|
||||
set(CMAKE_SYSTEM_NAME Linux)
|
||||
set(CMAKE_SYSTEM_PROCESSOR arm)
|
||||
set(CMAKE_C_COMPILER ${TOOLCHAIN}/bin/${HOST}-gcc)
|
||||
set(CMAKE_FIND_ROOT_PATH ${TOOLCHAIN}/${HOST})
|
||||
EOF
|
||||
|
||||
cmake -G"Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=toolchain.cmake \
|
||||
-DCMAKE_POSITION_INDEPENDENT_CODE=1 \
|
||||
cmake -G"Unix Makefiles" \
|
||||
-DANDROID_ABI=armeabi-v7a \
|
||||
-DANDROID_ARM_MODE=arm \
|
||||
-DANDROID_PLATFORM=android-${ANDROID_VERSION} \
|
||||
-DANDROID_TOOLCHAIN=${TOOLCHAIN} \
|
||||
-DCMAKE_ASM_FLAGS="--target=arm-linux-androideabi${ANDROID_VERSION}" \
|
||||
-DCMAKE_TOOLCHAIN_FILE=${NDK_PATH}/build/cmake/android.toolchain.cmake \
|
||||
[additional CMake flags] {source_directory}
|
||||
make
|
||||
|
||||
@@ -570,34 +565,21 @@ The following is a general recipe script that can be modified for your specific
|
||||
needs.
|
||||
|
||||
# Set these variables to suit your needs
|
||||
NDK_PATH={full path to the "ndk" directory-- for example, /opt/android/sdk/ndk-bundle}
|
||||
BUILD_PLATFORM={the platform name for the NDK package you installed--
|
||||
for example, "windows-x86" or "linux-x86_64" or "darwin-x86_64"}
|
||||
TOOLCHAIN_VERSION={"4.8", "4.9", "clang3.5", etc. This corresponds to a
|
||||
toolchain directory under ${NDK_PATH}/toolchains/.}
|
||||
ANDROID_VERSION={The minimum version of Android to support. "21" or later
|
||||
NDK_PATH={full path to the NDK directory-- for example,
|
||||
/opt/android/android-ndk-r16b}
|
||||
TOOLCHAIN={"gcc" or "clang"-- "gcc" must be used with NDK r14b and earlier,
|
||||
and "clang" must be used with NDK r17c and later}
|
||||
ANDROID_VERSION={the minimum version of Android to support. "21" or later
|
||||
is required for a 64-bit build.}
|
||||
|
||||
# It should not be necessary to modify the rest
|
||||
HOST=aarch64-linux-android
|
||||
SYSROOT=${NDK_PATH}/platforms/android-${ANDROID_VERSION}/arch-arm64
|
||||
export CFLAGS="-D__ANDROID_API__=${ANDROID_VERSION} --sysroot=${SYSROOT} \
|
||||
-isystem ${NDK_PATH}/sysroot/usr/include \
|
||||
-isystem ${NDK_PATH}/sysroot/usr/include/${HOST}"
|
||||
export LDFLAGS=-pie
|
||||
TOOLCHAIN=${NDK_PATH}/toolchains/${HOST}-${TOOLCHAIN_VERSION}/prebuilt/${BUILD_PLATFORM}
|
||||
|
||||
cd {build_directory}
|
||||
|
||||
cat <<EOF >toolchain.cmake
|
||||
set(CMAKE_SYSTEM_NAME Linux)
|
||||
set(CMAKE_SYSTEM_PROCESSOR aarch64)
|
||||
set(CMAKE_C_COMPILER ${TOOLCHAIN}/bin/${HOST}-gcc)
|
||||
set(CMAKE_FIND_ROOT_PATH ${TOOLCHAIN}/${HOST})
|
||||
EOF
|
||||
|
||||
cmake -G"Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=toolchain.cmake \
|
||||
-DCMAKE_POSITION_INDEPENDENT_CODE=1 \
|
||||
cmake -G"Unix Makefiles" \
|
||||
-DANDROID_ABI=arm64-v8a \
|
||||
-DANDROID_ARM_MODE=arm \
|
||||
-DANDROID_PLATFORM=android-${ANDROID_VERSION} \
|
||||
-DANDROID_TOOLCHAIN=${TOOLCHAIN} \
|
||||
-DCMAKE_ASM_FLAGS="--target=aarch64-linux-android${ANDROID_VERSION}" \
|
||||
-DCMAKE_TOOLCHAIN_FILE=${NDK_PATH}/build/cmake/android.toolchain.cmake \
|
||||
[additional CMake flags] {source_directory}
|
||||
make
|
||||
|
||||
@@ -608,34 +590,19 @@ The following is a general recipe script that can be modified for your specific
|
||||
needs.
|
||||
|
||||
# Set these variables to suit your needs
|
||||
NDK_PATH={full path to the "ndk" directory-- for example, /opt/android/sdk/ndk-bundle}
|
||||
BUILD_PLATFORM={the platform name for the NDK package you installed--
|
||||
for example, "windows-x86" or "linux-x86_64" or "darwin-x86_64"}
|
||||
TOOLCHAIN_VERSION={"4.8", "4.9", "clang3.5", etc. This corresponds to a
|
||||
toolchain directory under ${NDK_PATH}/toolchains/.}
|
||||
NDK_PATH={full path to the NDK directory-- for example,
|
||||
/opt/android/android-ndk-r16b}
|
||||
TOOLCHAIN={"gcc" or "clang"-- "gcc" must be used with NDK r14b and earlier,
|
||||
and "clang" must be used with NDK r17c and later}
|
||||
ANDROID_VERSION={The minimum version of Android to support-- for example,
|
||||
"16", "19", etc.}
|
||||
|
||||
# It should not be necessary to modify the rest
|
||||
HOST=i686-linux-android
|
||||
SYSROOT=${NDK_PATH}/platforms/android-${ANDROID_VERSION}/arch-x86
|
||||
export CFLAGS="-D__ANDROID_API__=${ANDROID_VERSION} --sysroot=${SYSROOT} \
|
||||
-isystem ${NDK_PATH}/sysroot/usr/include \
|
||||
-isystem ${NDK_PATH}/sysroot/usr/include/${HOST}"
|
||||
export LDFLAGS=-pie
|
||||
TOOLCHAIN=${NDK_PATH}/toolchains/x86-${TOOLCHAIN_VERSION}/prebuilt/${BUILD_PLATFORM}
|
||||
|
||||
cd {build_directory}
|
||||
|
||||
cat <<EOF >toolchain.cmake
|
||||
set(CMAKE_SYSTEM_NAME Linux)
|
||||
set(CMAKE_SYSTEM_PROCESSOR i386)
|
||||
set(CMAKE_C_COMPILER ${TOOLCHAIN}/bin/${HOST}-gcc)
|
||||
set(CMAKE_FIND_ROOT_PATH ${TOOLCHAIN}/${HOST})
|
||||
EOF
|
||||
|
||||
cmake -G"Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=toolchain.cmake \
|
||||
-DCMAKE_POSITION_INDEPENDENT_CODE=1 \
|
||||
cmake -G"Unix Makefiles" \
|
||||
-DANDROID_ABI=x86 \
|
||||
-DANDROID_PLATFORM=android-${ANDROID_VERSION} \
|
||||
-DANDROID_TOOLCHAIN=${TOOLCHAIN} \
|
||||
-DCMAKE_TOOLCHAIN_FILE=${NDK_PATH}/build/cmake/android.toolchain.cmake \
|
||||
[additional CMake flags] {source_directory}
|
||||
make
|
||||
|
||||
@@ -646,45 +613,23 @@ The following is a general recipe script that can be modified for your specific
|
||||
needs.
|
||||
|
||||
# Set these variables to suit your needs
|
||||
NDK_PATH={full path to the "ndk" directory-- for example, /opt/android/sdk/ndk-bundle}
|
||||
BUILD_PLATFORM={the platform name for the NDK package you installed--
|
||||
for example, "windows-x86" or "linux-x86_64" or "darwin-x86_64"}
|
||||
TOOLCHAIN_VERSION={"4.8", "4.9", "clang3.5", etc. This corresponds to a
|
||||
toolchain directory under ${NDK_PATH}/toolchains/.}
|
||||
ANDROID_VERSION={The minimum version of Android to support. "21" or later
|
||||
NDK_PATH={full path to the NDK directory-- for example,
|
||||
/opt/android/android-ndk-r16b}
|
||||
TOOLCHAIN={"gcc" or "clang"-- "gcc" must be used with NDK r14b and earlier,
|
||||
and "clang" must be used with NDK r17c and later}
|
||||
ANDROID_VERSION={the minimum version of Android to support. "21" or later
|
||||
is required for a 64-bit build.}
|
||||
|
||||
# It should not be necessary to modify the rest
|
||||
HOST=x86_64-linux-android
|
||||
SYSROOT=${NDK_PATH}/platforms/android-${ANDROID_VERSION}/arch-x86_64
|
||||
export CFLAGS="-D__ANDROID_API__=${ANDROID_VERSION} --sysroot=${SYSROOT} \
|
||||
-isystem ${NDK_PATH}/sysroot/usr/include \
|
||||
-isystem ${NDK_PATH}/sysroot/usr/include/${HOST}"
|
||||
export LDFLAGS=-pie
|
||||
TOOLCHAIN=${NDK_PATH}/toolchains/x86_64-${TOOLCHAIN_VERSION}/prebuilt/${BUILD_PLATFORM}
|
||||
|
||||
cd {build_directory}
|
||||
|
||||
cat <<EOF >toolchain.cmake
|
||||
set(CMAKE_SYSTEM_NAME Linux)
|
||||
set(CMAKE_SYSTEM_PROCESSOR x86_64)
|
||||
set(CMAKE_C_COMPILER ${TOOLCHAIN}/bin/${HOST}-gcc)
|
||||
set(CMAKE_FIND_ROOT_PATH ${TOOLCHAIN}/${HOST})
|
||||
EOF
|
||||
|
||||
cmake -G"Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=toolchain.cmake \
|
||||
-DCMAKE_POSITION_INDEPENDENT_CODE=1 \
|
||||
cmake -G"Unix Makefiles" \
|
||||
-DANDROID_ABI=x86_64 \
|
||||
-DANDROID_PLATFORM=android-${ANDROID_VERSION} \
|
||||
-DANDROID_TOOLCHAIN=${TOOLCHAIN} \
|
||||
-DCMAKE_TOOLCHAIN_FILE=${NDK_PATH}/build/cmake/android.toolchain.cmake \
|
||||
[additional CMake flags] {source_directory}
|
||||
make
|
||||
|
||||
|
||||
If building for Android 4.0.x (API level < 16) or earlier, remove
|
||||
`-DCMAKE_POSITION_INDEPENDENT_CODE=1` from the CMake arguments and `-pie` from
|
||||
`LDFLAGS`.
|
||||
|
||||
If building on Windows, add `.exe` to the end of `CMAKE_C_COMPILER`.
|
||||
|
||||
|
||||
Advanced CMake Options
|
||||
----------------------
|
||||
|
||||
|
||||
4
Brewfile
4
Brewfile
@@ -1,4 +0,0 @@
|
||||
brew 'yasm'
|
||||
brew 'gcc@5'
|
||||
brew 'md5sha1sum'
|
||||
cask 'Caskroom/versions/java6'
|
||||
@@ -109,8 +109,6 @@ endif()
|
||||
|
||||
include(cmakescripts/GNUInstallDirs.cmake)
|
||||
|
||||
set(CMAKE_INSTALL_RPATH ${CMAKE_INSTALL_FULL_LIBDIR})
|
||||
|
||||
macro(report_directory var)
|
||||
if(CMAKE_INSTALL_${var} STREQUAL CMAKE_INSTALL_FULL_${var})
|
||||
message(STATUS "CMAKE_INSTALL_${var} = ${CMAKE_INSTALL_${var}}")
|
||||
@@ -193,6 +191,10 @@ endif()
|
||||
report_option(ENABLE_SHARED "Shared libraries")
|
||||
report_option(ENABLE_STATIC "Static libraries")
|
||||
|
||||
if(ENABLE_SHARED)
|
||||
set(CMAKE_INSTALL_RPATH ${CMAKE_INSTALL_FULL_LIBDIR})
|
||||
endif()
|
||||
|
||||
if(WITH_12BIT)
|
||||
set(WITH_ARITH_DEC 0)
|
||||
set(WITH_ARITH_ENC 0)
|
||||
@@ -333,7 +335,7 @@ if(CMAKE_SYSTEM_NAME STREQUAL "SunOS")
|
||||
endif()
|
||||
if(${var} MATCHES "-xO2")
|
||||
string(REGEX REPLACE "-xO2" "-xO5" ${var} "${${var}}")
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
endif()
|
||||
endif()
|
||||
@@ -470,8 +472,8 @@ if(UNIX AND NOT APPLE)
|
||||
# still work.
|
||||
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/conftest.map
|
||||
"VERS_1 { global: foo; local: *; }; VERS_2 { global: foo2; } VERS_1;")
|
||||
set(CMAKE_REQUIRED_FLAGS "-Wl,-M,${CMAKE_CURRENT_BINARY_DIR}/conftest.map")
|
||||
check_c_source_compiles("void foo() {} void foo2() {} int main(void) { return 0; }"
|
||||
set(CMAKE_REQUIRED_FLAGS "-Wl,-M,${CMAKE_CURRENT_BINARY_DIR}/conftest.map -shared")
|
||||
check_c_source_compiles("int foo() { return 0; } int foo2() { return 2; }"
|
||||
HAVE_MAPFILE)
|
||||
set(CMAKE_REQUIRED_FLAGS)
|
||||
file(REMOVE ${CMAKE_CURRENT_BINARY_DIR}/conftest.map)
|
||||
@@ -536,7 +538,7 @@ elseif(NOT WITH_12BIT)
|
||||
endif()
|
||||
if(WITH_SIMD)
|
||||
message(STATUS "SIMD extensions: ${CPU_TYPE} (WITH_SIMD = ${WITH_SIMD})")
|
||||
if(MSVC_IDE)
|
||||
if(MSVC_IDE OR XCODE)
|
||||
set_source_files_properties(${SIMD_OBJS} PROPERTIES GENERATED 1)
|
||||
endif()
|
||||
else()
|
||||
@@ -565,16 +567,16 @@ if(WITH_TURBOJPEG)
|
||||
turbojpeg.c transupp.c jdatadst-tj.c jdatasrc-tj.c rdbmp.c rdppm.c
|
||||
wrbmp.c wrppm.c)
|
||||
set(TJMAPFILE ${CMAKE_CURRENT_SOURCE_DIR}/turbojpeg-mapfile)
|
||||
if(WITH_JAVA)
|
||||
set(TURBOJPEG_SOURCES ${TURBOJPEG_SOURCES} turbojpeg-jni.c)
|
||||
include_directories(${JAVA_INCLUDE_PATH} ${JAVA_INCLUDE_PATH2})
|
||||
if(WITH_JAVA)
|
||||
set(TURBOJPEG_SOURCES ${TURBOJPEG_SOURCES} turbojpeg-jni.c)
|
||||
include_directories(${JAVA_INCLUDE_PATH} ${JAVA_INCLUDE_PATH2})
|
||||
set(TJMAPFILE ${CMAKE_CURRENT_SOURCE_DIR}/turbojpeg-mapfile.jni)
|
||||
endif()
|
||||
endif()
|
||||
add_library(turbojpeg SHARED ${TURBOJPEG_SOURCES})
|
||||
set_property(TARGET turbojpeg PROPERTY COMPILE_FLAGS
|
||||
"-DBMP_SUPPORTED -DPPM_SUPPORTED")
|
||||
if(WIN32)
|
||||
set_target_properties(turbojpeg PROPERTIES DEFINE_SYMBOL DLLDEFINE)
|
||||
set_target_properties(turbojpeg PROPERTIES DEFINE_SYMBOL DLLDEFINE)
|
||||
endif()
|
||||
if(MINGW)
|
||||
set_target_properties(turbojpeg PROPERTIES LINK_FLAGS -Wl,--kill-at)
|
||||
@@ -609,7 +611,7 @@ if(WITH_TURBOJPEG)
|
||||
target_link_libraries(tjexample turbojpeg)
|
||||
if(UNIX)
|
||||
target_link_libraries(tjexample m)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(ENABLE_STATIC)
|
||||
@@ -699,7 +701,7 @@ add_executable(wrjpgcom wrjpgcom.c)
|
||||
|
||||
add_subdirectory(md5)
|
||||
|
||||
if(MSVC_IDE)
|
||||
if(MSVC_IDE OR XCODE)
|
||||
set(OBJDIR "\${CTEST_CONFIGURATION_TYPE}/")
|
||||
else()
|
||||
set(OBJDIR "")
|
||||
@@ -715,7 +717,7 @@ if(WITH_12BIT)
|
||||
set(MD5_JPEG_422_IFAST_OPT 7322e3bd2f127f7de4b40d4480ce60e4)
|
||||
set(MD5_PPM_422_IFAST 79807fa552899e66a04708f533e16950)
|
||||
set(MD5_PPM_422M_IFAST 07737bfe8a7c1c87aaa393a0098d16b0)
|
||||
set(MD5_JPEG_420_IFAST_Q100_PROG a1da220b5604081863a504297ed59e55)
|
||||
set(MD5_JPEG_420_IFAST_Q100_PROG 008ab68d6ddbba04a8f01deee4e0f9f8)
|
||||
set(MD5_PPM_420_Q100_IFAST 1b3730122709f53d007255e8dfd3305e)
|
||||
set(MD5_PPM_420M_Q100_IFAST 980a1a3c5bf9510022869d30b7d26566)
|
||||
set(MD5_JPEG_GRAY_ISLOW 235c90707b16e2e069f37c888b2636d9)
|
||||
@@ -765,7 +767,7 @@ else()
|
||||
set(MD5_PPM_422M_IFAST 8dbc65323d62cca7c91ba02dd1cfa81d)
|
||||
set(MD5_BMP_422M_IFAST_565 3294bd4d9a1f2b3d08ea6020d0db7065)
|
||||
set(MD5_BMP_422M_IFAST_565D da98c9c7b6039511be4a79a878a9abc1)
|
||||
set(MD5_JPEG_420_IFAST_Q100_PROG 990cbe0329c882420a2094da7e5adade)
|
||||
set(MD5_JPEG_420_IFAST_Q100_PROG e59bb462016a8d9a748c330a3474bb55)
|
||||
set(MD5_PPM_420_Q100_IFAST 5a732542015c278ff43635e473a8a294)
|
||||
set(MD5_PPM_420M_Q100_IFAST ff692ee9323a3b424894862557c092f1)
|
||||
set(MD5_JPEG_GRAY_ISLOW 72b51f894b8f4a10b3ee3066770aa38d)
|
||||
@@ -1032,6 +1034,8 @@ foreach(libtype ${TEST_LIBTYPES})
|
||||
|
||||
add_test(djpeg-${libtype}-rgb-islow-icc-cmp
|
||||
${MD5CMP} b06a39d730129122e85c1363ed1bbc9e testout_rgb_islow.icc)
|
||||
set_tests_properties(djpeg-${libtype}-rgb-islow-icc-cmp PROPERTIES
|
||||
DEPENDS djpeg-${libtype}-rgb-islow)
|
||||
|
||||
add_bittest(jpegtran icc "-copy;all;-icc;${TESTIMAGES}/test2.icc"
|
||||
testout_rgb_islow2.jpg testout_rgb_islow.jpg ${MD5_JPEG_RGB_ISLOW2})
|
||||
@@ -1078,7 +1082,7 @@ foreach(libtype ${TEST_LIBTYPES})
|
||||
|
||||
# CC: RGB->YCC SAMP: fullsize/h2v2 FDCT: ifast ENT: prog huff
|
||||
add_bittest(cjpeg 420-q100-ifast-prog
|
||||
"-sample;2x2;-quality;100;-dct;fast;-prog"
|
||||
"-sample;2x2;-quality;100;-dct;fast;-scans;${TESTIMAGES}/test.scan"
|
||||
testout_420_q100_ifast_prog.jpg ${TESTIMAGES}/testorig.ppm
|
||||
${MD5_JPEG_420_IFAST_Q100_PROG})
|
||||
|
||||
@@ -1126,12 +1130,12 @@ foreach(libtype ${TEST_LIBTYPES})
|
||||
${MD5_JPEG_420S_IFAST_OPT})
|
||||
|
||||
if(FLOATTEST)
|
||||
# CC: RGB->YCC SAMP: fullsize/int FDCT: float ENT: prog huff
|
||||
# CC: RGB->YCC SAMP: fullsize/int FDCT: float ENT: prog huff
|
||||
add_bittest(cjpeg 3x2-float-prog "-sample;3x2;-dct;float;-prog"
|
||||
testout_3x2_float_prog.jpg ${TESTIMAGES}/testorig.ppm
|
||||
${MD5_JPEG_3x2_FLOAT_PROG_${FLOATTEST_UC}})
|
||||
|
||||
# CC: YCC->RGB SAMP: fullsize/int IDCT: float ENT: prog huff
|
||||
# CC: YCC->RGB SAMP: fullsize/int IDCT: float ENT: prog huff
|
||||
add_bittest(djpeg 3x2-float-prog "-dct;float"
|
||||
testout_3x2_float.ppm testout_3x2_float_prog.jpg
|
||||
${MD5_PPM_3x2_FLOAT_${FLOATTEST_UC}} cjpeg-${libtype}-3x2-float-prog)
|
||||
@@ -1321,6 +1325,8 @@ if(WITH_TURBOJPEG)
|
||||
COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest -yuv -alloc
|
||||
COMMAND echo tjbenchtest -progressive
|
||||
COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest -progressive
|
||||
COMMAND echo tjbenchtest -progressive -yuv
|
||||
COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest -progressive -yuv
|
||||
COMMAND echo tjexampletest
|
||||
COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjexampletest
|
||||
COMMAND echo tjbenchtest.java
|
||||
@@ -1329,6 +1335,9 @@ if(WITH_TURBOJPEG)
|
||||
COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest.java -yuv
|
||||
COMMAND echo tjbenchtest.java -progressive
|
||||
COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest.java -progressive
|
||||
COMMAND echo tjexampletest.java -progressive -yuv
|
||||
COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest.java
|
||||
-progressive -yuv
|
||||
COMMAND echo tjexampletest.java
|
||||
COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjexampletest.java
|
||||
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest
|
||||
@@ -1344,6 +1353,10 @@ if(WITH_TURBOJPEG)
|
||||
COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest -yuv
|
||||
COMMAND echo tjbenchtest -yuv -alloc
|
||||
COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest -yuv -alloc
|
||||
COMMAND echo tjbenchtest -progressive
|
||||
COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest -progressive
|
||||
COMMAND echo tjbenchtest -progressive -yuv
|
||||
COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest -progressive -yuv
|
||||
COMMAND echo tjexampletest
|
||||
COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjexampletest
|
||||
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest)
|
||||
@@ -1363,12 +1376,22 @@ if(WITH_TURBOJPEG)
|
||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
|
||||
if(NOT CMAKE_VERSION VERSION_LESS "3.1" AND MSVC AND
|
||||
CMAKE_C_LINKER_SUPPORTS_PDB)
|
||||
install(FILES "$<TARGET_PDB_FILE:turbojpeg>"
|
||||
DESTINATION ${CMAKE_INSTALL_BINDIR} OPTIONAL)
|
||||
endif()
|
||||
endif()
|
||||
if(ENABLE_STATIC)
|
||||
install(TARGETS turbojpeg-static ARCHIVE
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR})
|
||||
if(NOT ENABLE_SHARED)
|
||||
install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/tjbench-static${EXE}
|
||||
if(MSVC_IDE OR XCODE)
|
||||
set(DIR "${CMAKE_CURRENT_BINARY_DIR}/\${CMAKE_INSTALL_CONFIG_NAME}")
|
||||
else()
|
||||
set(DIR ${CMAKE_CURRENT_BINARY_DIR})
|
||||
endif()
|
||||
install(PROGRAMS ${DIR}/tjbench-static${EXE}
|
||||
DESTINATION ${CMAKE_INSTALL_BINDIR} RENAME tjbench${EXE})
|
||||
endif()
|
||||
endif()
|
||||
@@ -1379,11 +1402,16 @@ endif()
|
||||
if(ENABLE_STATIC)
|
||||
install(TARGETS jpeg-static ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
|
||||
if(NOT ENABLE_SHARED)
|
||||
install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/cjpeg-static${EXE}
|
||||
if(MSVC_IDE OR XCODE)
|
||||
set(DIR "${CMAKE_CURRENT_BINARY_DIR}/\${CMAKE_INSTALL_CONFIG_NAME}")
|
||||
else()
|
||||
set(DIR ${CMAKE_CURRENT_BINARY_DIR})
|
||||
endif()
|
||||
install(PROGRAMS ${DIR}/cjpeg-static${EXE}
|
||||
DESTINATION ${CMAKE_INSTALL_BINDIR} RENAME cjpeg${EXE})
|
||||
install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/djpeg-static${EXE}
|
||||
install(PROGRAMS ${DIR}/djpeg-static${EXE}
|
||||
DESTINATION ${CMAKE_INSTALL_BINDIR} RENAME djpeg${EXE})
|
||||
install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/jpegtran-static${EXE}
|
||||
install(PROGRAMS ${DIR}/jpegtran-static${EXE}
|
||||
DESTINATION ${CMAKE_INSTALL_BINDIR} RENAME jpegtran${EXE})
|
||||
endif()
|
||||
endif()
|
||||
@@ -1408,10 +1436,10 @@ if(UNIX OR MINGW)
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/rdjpgcom.1
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/wrjpgcom.1
|
||||
DESTINATION ${CMAKE_INSTALL_MANDIR}/man1)
|
||||
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/pkgscripts/libjpeg.pc
|
||||
endif()
|
||||
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/pkgscripts/libjpeg.pc
|
||||
${CMAKE_CURRENT_BINARY_DIR}/pkgscripts/libturbojpeg.pc
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
|
||||
endif()
|
||||
|
||||
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/jconfig.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/jerror.h ${CMAKE_CURRENT_SOURCE_DIR}/jmorecfg.h
|
||||
|
||||
195
ChangeLog.md
195
ChangeLog.md
@@ -1,3 +1,152 @@
|
||||
2.0.4
|
||||
=====
|
||||
|
||||
### Significant changes relative to 2.0.3:
|
||||
|
||||
1. Fixed a regression in the Windows packaging system (introduced by
|
||||
2.0 beta1[2]) whereby, if both the 64-bit libjpeg-turbo SDK for GCC and the
|
||||
64-bit libjpeg-turbo SDK for Visual C++ were installed on the same system, only
|
||||
one of them could be uninstalled.
|
||||
|
||||
2. Fixed a signed integer overflow and subsequent segfault that occurred when
|
||||
attempting to decompress images with more than 715827882 pixels using the
|
||||
64-bit C version of TJBench.
|
||||
|
||||
3. Fixed out-of-bounds write in `tjDecompressToYUV2()` and
|
||||
`tjDecompressToYUVPlanes()` (sometimes manifesting as a double free) that
|
||||
occurred when attempting to decompress grayscale JPEG images that were
|
||||
compressed with a sampling factor other than 1 (for instance, with
|
||||
`cjpeg -grayscale -sample 2x2`).
|
||||
|
||||
4. Fixed a regression introduced by 2.0.2[5] that caused the TurboJPEG API to
|
||||
incorrectly identify some JPEG images with unusual sampling factors as 4:4:4
|
||||
JPEG images. This was known to cause a buffer overflow when attempting to
|
||||
decompress some such images using `tjDecompressToYUV2()` or
|
||||
`tjDecompressToYUVPlanes()`.
|
||||
|
||||
5. Fixed an issue, detected by ASan, whereby attempting to losslessly transform
|
||||
a specially-crafted malformed JPEG image containing an extremely-high-frequency
|
||||
coefficient block (junk image data that could never be generated by a
|
||||
legitimate JPEG compressor) could cause the Huffman encoder's local buffer to
|
||||
be overrun. (Refer to 1.4.0[9] and 1.4beta1[15].) Given that the buffer
|
||||
overrun was fully contained within the stack and did not cause a segfault or
|
||||
other user-visible errant behavior, and given that the lossless transformer
|
||||
(unlike the decompressor) is not generally exposed to arbitrary data exploits,
|
||||
this issue did not likely pose a security risk.
|
||||
|
||||
6. The ARM 64-bit (ARMv8) NEON SIMD assembly code now stores constants in a
|
||||
separate read-only data section rather than in the text section, to support
|
||||
execute-only memory layouts.
|
||||
|
||||
|
||||
2.0.3
|
||||
=====
|
||||
|
||||
### Significant changes relative to 2.0.2:
|
||||
|
||||
1. Fixed "using JNI after critical get" errors that occurred on Android
|
||||
platforms when passing invalid arguments to certain methods in the TurboJPEG
|
||||
Java API.
|
||||
|
||||
2. Fixed a regression in the SIMD feature detection code, introduced by
|
||||
the AVX2 SIMD extensions (2.0 beta1[1]), that was known to cause an illegal
|
||||
instruction exception, in rare cases, on CPUs that lack support for CPUID leaf
|
||||
07H (or on which the maximum CPUID leaf has been limited by way of a BIOS
|
||||
setting.)
|
||||
|
||||
3. The 4:4:0 (h1v2) fancy (smooth) chroma upsampling algorithm in the
|
||||
decompressor now uses a similar bias pattern to that of the 4:2:2 (h2v1) fancy
|
||||
chroma upsampling algorithm, rounding up or down the upsampled result for
|
||||
alternate pixels rather than always rounding down. This ensures that,
|
||||
regardless of whether a 4:2:2 JPEG image is rotated or transposed prior to
|
||||
decompression (in the frequency domain) or after decompression (in the spatial
|
||||
domain), the final image will be similar.
|
||||
|
||||
4. Fixed an integer overflow and subsequent segfault that occurred when
|
||||
attempting to compress or decompress images with more than 1 billion pixels
|
||||
using the TurboJPEG API.
|
||||
|
||||
5. Fixed a regression introduced by 2.0 beta1[15] whereby attempting to
|
||||
generate a progressive JPEG image on an SSE2-capable CPU using a scan script
|
||||
containing one or more scans with lengths divisible by 16 would result in an
|
||||
error ("Missing Huffman code table entry") and an invalid JPEG image.
|
||||
|
||||
6. Fixed an issue whereby `tjDecodeYUV()` and `tjDecodeYUVPlanes()` would throw
|
||||
an error ("Invalid progressive parameters") or a warning ("Inconsistent
|
||||
progression sequence") if passed a TurboJPEG instance that was previously used
|
||||
to decompress a progressive JPEG image.
|
||||
|
||||
|
||||
2.0.2
|
||||
=====
|
||||
|
||||
### Significant changes relative to 2.0.1:
|
||||
|
||||
1. Fixed a regression introduced by 2.0.1[5] that prevented a runtime search
|
||||
path (rpath) from being embedded in the libjpeg-turbo shared libraries and
|
||||
executables for macOS and iOS. This caused a fatal error of the form
|
||||
"dyld: Library not loaded" when attempting to use one of the executables,
|
||||
unless `DYLD_LIBRARY_PATH` was explicitly set to the location of the
|
||||
libjpeg-turbo shared libraries.
|
||||
|
||||
2. Fixed an integer overflow and subsequent segfault (CVE-2018-20330) that
|
||||
occurred when attempting to load a BMP file with more than 1 billion pixels
|
||||
using the `tjLoadImage()` function.
|
||||
|
||||
3. Fixed a buffer overrun (CVE-2018-19664) that occurred when attempting to
|
||||
decompress a specially-crafted malformed JPEG image to a 256-color BMP using
|
||||
djpeg.
|
||||
|
||||
4. Fixed a floating point exception that occurred when attempting to
|
||||
decompress a specially-crafted malformed JPEG image with a specified image
|
||||
width or height of 0 using the C version of TJBench.
|
||||
|
||||
5. The TurboJPEG API will now decompress 4:4:4 JPEG images with 2x1, 1x2, 3x1,
|
||||
or 1x3 luminance and chrominance sampling factors. This is a non-standard way
|
||||
of specifying 1x subsampling (normally 4:4:4 JPEGs have 1x1 luminance and
|
||||
chrominance sampling factors), but the JPEG format and the libjpeg API both
|
||||
allow it.
|
||||
|
||||
6. Fixed a regression introduced by 2.0 beta1[7] that caused djpeg to generate
|
||||
incorrect PPM images when used with the `-colors` option.
|
||||
|
||||
7. Fixed an issue whereby a static build of libjpeg-turbo (a build in which
|
||||
`ENABLE_SHARED` is `0`) could not be installed using the Visual Studio IDE.
|
||||
|
||||
8. Fixed a severe performance issue in the Loongson MMI SIMD extensions that
|
||||
occurred when compressing RGB images whose image rows were not 64-bit-aligned.
|
||||
|
||||
|
||||
2.0.1
|
||||
=====
|
||||
|
||||
### Significant changes relative to 2.0.0:
|
||||
|
||||
1. Fixed a regression introduced with the new CMake-based Un*x build system,
|
||||
whereby jconfig.h could cause compiler warnings of the form
|
||||
`"HAVE_*_H" redefined` if it was included by downstream Autotools-based
|
||||
projects that used `AC_CHECK_HEADERS()` to check for the existence of locale.h,
|
||||
stddef.h, or stdlib.h.
|
||||
|
||||
2. The `jsimd_quantize_float_dspr2()` and `jsimd_convsamp_float_dspr2()`
|
||||
functions in the MIPS DSPr2 SIMD extensions are now disabled at compile time
|
||||
if the soft float ABI is enabled. Those functions use instructions that are
|
||||
incompatible with the soft float ABI.
|
||||
|
||||
3. Fixed a regression in the SIMD feature detection code, introduced by
|
||||
the AVX2 SIMD extensions (2.0 beta1[1]), that caused libjpeg-turbo to crash on
|
||||
Windows 7 if Service Pack 1 was not installed.
|
||||
|
||||
4. Fixed out-of-bounds read in cjpeg that occurred when attempting to compress
|
||||
a specially-crafted malformed color-index (8-bit-per-sample) Targa file in
|
||||
which some of the samples (color indices) exceeded the bounds of the Targa
|
||||
file's color table.
|
||||
|
||||
5. Fixed an issue whereby installing a fully static build of libjpeg-turbo
|
||||
(a build in which `CFLAGS` contains `-static` and `ENABLE_SHARED` is `0`) would
|
||||
fail with "No valid ELF RPATH or RUNPATH entry exists in the file."
|
||||
|
||||
|
||||
2.0.0
|
||||
=====
|
||||
|
||||
@@ -30,10 +179,11 @@ would produce a "Bogus message code" error message if the underlying bitmap and
|
||||
PPM readers/writers threw an error that was specific to the readers/writers
|
||||
(as opposed to a general libjpeg API error.)
|
||||
|
||||
4. Fixed an issue whereby a specially-crafted malformed BMP file, one in which
|
||||
the header specified an image width of 1073741824 pixels, would trigger a
|
||||
floating point exception (division by zero) in the `tjLoadImage()` function
|
||||
when attempting to load the BMP file into a 4-component image buffer.
|
||||
4. Fixed an issue (CVE-2018-1152) whereby a specially-crafted malformed BMP
|
||||
file, one in which the header specified an image width of 1073741824 pixels,
|
||||
would trigger a floating point exception (division by zero) in the
|
||||
`tjLoadImage()` function when attempting to load the BMP file into a
|
||||
4-component image buffer.
|
||||
|
||||
5. Fixed an issue whereby certain combinations of calls to
|
||||
`jpeg_skip_scanlines()` and `jpeg_read_scanlines()` could trigger an infinite
|
||||
@@ -47,10 +197,10 @@ a 4:2:2 or 4:2:0 JPEG image using the merged (non-fancy) upsampling algorithms
|
||||
7. The new CMake-based build system will now disable the MIPS DSPr2 SIMD
|
||||
extensions if it detects that the compiler does not support DSPr2 instructions.
|
||||
|
||||
8. Fixed out-of-bounds read in cjpeg that occurred when attempting to compress
|
||||
a specially-crafted malformed color-index (8-bit-per-sample) BMP file in which
|
||||
some of the samples (color indices) exceeded the bounds of the BMP file's color
|
||||
table.
|
||||
8. Fixed out-of-bounds read in cjpeg (CVE-2018-14498) that occurred when
|
||||
attempting to compress a specially-crafted malformed color-index
|
||||
(8-bit-per-sample) BMP file in which some of the samples (color indices)
|
||||
exceeded the bounds of the BMP file's color table.
|
||||
|
||||
9. Fixed a signed integer overflow in the progressive Huffman decoder, detected
|
||||
by the Clang and GCC undefined behavior sanitizers, that could be triggered by
|
||||
@@ -210,8 +360,8 @@ write scanlines in bottom-up order.) djpeg will now exit gracefully if an
|
||||
output format other than PPM/PGM, GIF, or Targa is selected along with the
|
||||
`-crop` option.
|
||||
|
||||
4. Fixed an issue whereby `jpeg_skip_scanlines()` would segfault if color
|
||||
quantization was enabled.
|
||||
4. Fixed an issue (CVE-2017-15232) whereby `jpeg_skip_scanlines()` would
|
||||
segfault if color quantization was enabled.
|
||||
|
||||
5. TJBench (both C and Java versions) will now display usage information if any
|
||||
command-line argument is unrecognized. This prevents the program from silently
|
||||
@@ -838,13 +988,13 @@ and IDCT algorithms (both are used during JPEG decompression.) For unknown
|
||||
reasons (probably related to clang), this code cannot currently be compiled for
|
||||
iOS.
|
||||
|
||||
15. Fixed an extremely rare bug that could cause the Huffman encoder's local
|
||||
buffer to overrun when a very high-frequency MCU is compressed using quality
|
||||
100 and no subsampling, and when the JPEG output buffer is being dynamically
|
||||
resized by the destination manager. This issue was so rare that, even with a
|
||||
test program specifically designed to make the bug occur (by injecting random
|
||||
high-frequency YUV data into the compressor), it was reproducible only once in
|
||||
about every 25 million iterations.
|
||||
15. Fixed an extremely rare bug (CVE-2014-9092) that could cause the Huffman
|
||||
encoder's local buffer to overrun when a very high-frequency MCU is compressed
|
||||
using quality 100 and no subsampling, and when the JPEG output buffer is being
|
||||
dynamically resized by the destination manager. This issue was so rare that,
|
||||
even with a test program specifically designed to make the bug occur (by
|
||||
injecting random high-frequency YUV data into the compressor), it was
|
||||
reproducible only once in about every 25 million iterations.
|
||||
|
||||
16. Fixed an oversight in the TurboJPEG C wrapper: if any of the JPEG
|
||||
compression functions was called repeatedly with the same
|
||||
@@ -879,8 +1029,9 @@ entropy coding (by passing arguments of `-progressive -arithmetic` to cjpeg or
|
||||
jpegtran, for instance) would result in an error, `Requested feature was
|
||||
omitted at compile time`.
|
||||
|
||||
4. Fixed a couple of issues whereby malformed JPEG images would cause
|
||||
libjpeg-turbo to use uninitialized memory during decompression.
|
||||
4. Fixed a couple of issues (CVE-2013-6629 and CVE-2013-6630) whereby malformed
|
||||
JPEG images would cause libjpeg-turbo to use uninitialized memory during
|
||||
decompression.
|
||||
|
||||
5. Fixed an error (`Buffer passed to JPEG library is too small`) that occurred
|
||||
when calling the TurboJPEG YUV encoding function with a very small (< 5x5)
|
||||
@@ -1019,9 +1170,9 @@ correct behavior of the colorspace extensions when merged upsampling is used.
|
||||
upper 64 bits of xmm6 and xmm7 on Win64 platforms, which violated the Win64
|
||||
calling conventions.
|
||||
|
||||
4. Fixed a regression caused by 1.2.0[6] whereby decompressing corrupt JPEG
|
||||
images (specifically, images in which the component count was erroneously set
|
||||
to a large value) would cause libjpeg-turbo to segfault.
|
||||
4. Fixed a regression (CVE-2012-2806) caused by 1.2.0[6] whereby decompressing
|
||||
corrupt JPEG images (specifically, images in which the component count was
|
||||
erroneously set to a large value) would cause libjpeg-turbo to segfault.
|
||||
|
||||
5. Worked around a severe performance issue with "Bobcat" (AMD Embedded APU)
|
||||
processors. The `MASKMOVDQU` instruction, which was used by the libjpeg-turbo
|
||||
|
||||
29
LICENSE.md
29
LICENSE.md
@@ -14,7 +14,7 @@ libjpeg-turbo is covered by three compatible BSD-style open source licenses:
|
||||
This license covers the TurboJPEG API library and associated programs, as
|
||||
well as the build system.
|
||||
|
||||
- The zlib License, which is listed below
|
||||
- The [zlib License](https://opensource.org/licenses/Zlib)
|
||||
|
||||
This license is a subset of the other two, and it covers the libjpeg-turbo
|
||||
SIMD extensions.
|
||||
@@ -66,7 +66,7 @@ best of our understanding.
|
||||
|
||||
2. If your binary distribution includes or uses the TurboJPEG API, then
|
||||
your product documentation must include the text of the Modified BSD
|
||||
License.
|
||||
License (see below.)
|
||||
|
||||
**Origin**
|
||||
- Clause 2 of the Modified BSD License
|
||||
@@ -91,7 +91,8 @@ best of our understanding.
|
||||
The Modified (3-clause) BSD License
|
||||
===================================
|
||||
|
||||
Copyright (C)\<YEAR\> \<AUTHOR\>. All Rights Reserved.
|
||||
Copyright (C)2009-2020 D. R. Commander. All Rights Reserved.
|
||||
Copyright (C)2015 Viktor Szathmáry. All Rights Reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
@@ -118,28 +119,6 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
The zlib License
|
||||
================
|
||||
|
||||
Copyright (C) \<YEAR\>, \<AUTHOR\>.
|
||||
|
||||
This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the authors be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not
|
||||
claim that you wrote the original software. If you use this software
|
||||
in a product, an acknowledgment in the product documentation would be
|
||||
appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
|
||||
Why Three Licenses?
|
||||
===================
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ before_build:
|
||||
- cd cmake_build
|
||||
- cmake .. -G "Visual Studio 15 2017" -DPNG_SUPPORTED=NO
|
||||
|
||||
build_script:
|
||||
build_script:
|
||||
- cd %APPVEYOR_BUILD_FOLDER%
|
||||
- msbuild cmake_build\mozjpeg.sln
|
||||
|
||||
|
||||
6
cjpeg.c
6
cjpeg.c
@@ -879,12 +879,10 @@ main(int argc, char **argv)
|
||||
|
||||
if (memdst) {
|
||||
fprintf(stderr, "Compressed size: %lu bytes\n", outsize);
|
||||
if (outbuffer != NULL)
|
||||
free(outbuffer);
|
||||
free(outbuffer);
|
||||
}
|
||||
|
||||
if (icc_profile != NULL)
|
||||
free(icc_profile);
|
||||
free(icc_profile);
|
||||
|
||||
/* All done. */
|
||||
exit(jerr.num_warnings ? EXIT_WARNING : EXIT_SUCCESS);
|
||||
|
||||
@@ -83,7 +83,7 @@ endif()
|
||||
if(BITS EQUAL 64)
|
||||
set(INST_PLATFORM "${INST_PLATFORM} 64-bit")
|
||||
set(INST_NAME ${INST_NAME}64)
|
||||
set(INST_REG_NAME ${INST_DIR}64)
|
||||
set(INST_REG_NAME ${INST_REG_NAME}64)
|
||||
set(INST_DEFS ${INST_DEFS} -DWIN64)
|
||||
endif()
|
||||
|
||||
@@ -145,6 +145,11 @@ set(DEFAULT_IOS_ARMV8_BUILD ${CMAKE_SOURCE_DIR}/iosarmv8)
|
||||
set(IOS_ARMV8_BUILD ${DEFAULT_IOS_ARMV8_BUILD} CACHE PATH
|
||||
"Directory containing ARMv8 iOS build to include in universal binaries (default: ${DEFAULT_IOS_ARMV8_BUILD})")
|
||||
|
||||
set(OSX_APP_CERT_NAME "" CACHE STRING
|
||||
"Name of the Developer ID Application certificate (in the macOS keychain) that should be used to sign the libjpeg-turbo DMG. Leave this blank to generate an unsigned DMG.")
|
||||
set(OSX_INST_CERT_NAME "" CACHE STRING
|
||||
"Name of the Developer ID Installer certificate (in the macOS keychain) that should be used to sign the libjpeg-turbo installer package. Leave this blank to generate an unsigned package.")
|
||||
|
||||
configure_file(release/makemacpkg.in pkgscripts/makemacpkg)
|
||||
configure_file(release/Distribution.xml.in pkgscripts/Distribution.xml)
|
||||
configure_file(release/uninstall.in pkgscripts/uninstall)
|
||||
|
||||
@@ -118,7 +118,7 @@
|
||||
# absolute paths where necessary, using the same logic.
|
||||
|
||||
#=============================================================================
|
||||
# Copyright 2016 D. R. Commander
|
||||
# Copyright 2016, 2019 D. R. Commander
|
||||
# Copyright 2016 Dmitry Marakasov
|
||||
# Copyright 2016 Roger Leigh
|
||||
# Copyright 2015 Alex Turbov
|
||||
@@ -184,7 +184,7 @@ macro(GNUInstallDirs_set_install_dir var docstring)
|
||||
"${docstring} (Default: ${CMAKE_INSTALL_DEFAULT_${var}})"
|
||||
${_GNUInstallDirs_CMAKE_INSTALL_FORCE_${var}})
|
||||
|
||||
if(NOT "${CMAKE_INSTALL_${var}}" STREQUAL "${CMAKE_INSTALL_DEFAULT_${var}}")
|
||||
if(NOT CMAKE_INSTALL_${var} STREQUAL CMAKE_INSTALL_DEFAULT_${var})
|
||||
unset(_GNUInstallDirs_CMAKE_INSTALL_DEFAULT_${var} CACHE)
|
||||
endif()
|
||||
|
||||
|
||||
4
djpeg.c
4
djpeg.c
@@ -516,7 +516,9 @@ main(int argc, char **argv)
|
||||
FILE *input_file;
|
||||
FILE *output_file;
|
||||
unsigned char *inbuffer = NULL;
|
||||
#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
|
||||
unsigned long insize = 0;
|
||||
#endif
|
||||
JDIMENSION num_scanlines;
|
||||
|
||||
/* On Mac, fetch a command line. */
|
||||
@@ -811,7 +813,7 @@ main(int argc, char **argv)
|
||||
end_progress_monitor((j_common_ptr)&cinfo);
|
||||
#endif
|
||||
|
||||
if (memsrc && inbuffer != NULL)
|
||||
if (memsrc)
|
||||
free(inbuffer);
|
||||
|
||||
/* All done. */
|
||||
|
||||
@@ -2078,7 +2078,7 @@ If you choose option 1, <code>*jpegSize</code> should be set to the size of your
|
||||
<p>You should always use this function to free JPEG destination buffer(s) that were automatically (re)allocated by the compression and transform functions or that were manually allocated using <a class="el" href="group___turbo_j_p_e_g.html#gaec627dd4c5f30b7a775a7aea3bec5d83" title="Allocate an image buffer for use with TurboJPEG.">tjAlloc()</a>.</p>
|
||||
<dl class="params"><dt>Parameters</dt><dd>
|
||||
<table class="params">
|
||||
<tr><td class="paramname">buffer</td><td>address of the buffer to free</td></tr>
|
||||
<tr><td class="paramname">buffer</td><td>address of the buffer to free. If the address is NULL, then this function has no effect.</td></tr>
|
||||
</table>
|
||||
</dd>
|
||||
</dl>
|
||||
|
||||
47
example.txt
47
example.txt
@@ -288,12 +288,14 @@ my_error_exit(j_common_ptr cinfo)
|
||||
}
|
||||
|
||||
|
||||
METHODDEF(int) do_read_JPEG_file(struct jpeg_decompress_struct *cinfo,
|
||||
char *filename);
|
||||
|
||||
/*
|
||||
* Sample routine for JPEG decompression. We assume that the source file name
|
||||
* is passed in. We want to return 1 on success, 0 on error.
|
||||
*/
|
||||
|
||||
|
||||
GLOBAL(int)
|
||||
read_JPEG_file(char *filename)
|
||||
{
|
||||
@@ -301,6 +303,21 @@ read_JPEG_file(char *filename)
|
||||
* working space (which is allocated as needed by the JPEG library).
|
||||
*/
|
||||
struct jpeg_decompress_struct cinfo;
|
||||
|
||||
return do_read_JPEG_file(&cinfo, filename);
|
||||
}
|
||||
|
||||
/*
|
||||
* We call the libjpeg API from within a separate function, because modifying
|
||||
* the local non-volatile jpeg_decompress_struct instance below the setjmp()
|
||||
* return point and then accessing the instance after setjmp() returns would
|
||||
* return in undefined behavior that may potentially overwrite all or part of
|
||||
* the structure.
|
||||
*/
|
||||
|
||||
METHODDEF(int)
|
||||
do_read_JPEG_file(struct jpeg_decompress_struct *cinfo, char *filename)
|
||||
{
|
||||
/* We use our private extension JPEG error handler.
|
||||
* Note that this struct must live as long as the main JPEG parameter
|
||||
* struct, to avoid dangling-pointer problems.
|
||||
@@ -325,27 +342,27 @@ read_JPEG_file(char *filename)
|
||||
/* Step 1: allocate and initialize JPEG decompression object */
|
||||
|
||||
/* We set up the normal JPEG error routines, then override error_exit. */
|
||||
cinfo.err = jpeg_std_error(&jerr.pub);
|
||||
cinfo->err = jpeg_std_error(&jerr.pub);
|
||||
jerr.pub.error_exit = my_error_exit;
|
||||
/* Establish the setjmp return context for my_error_exit to use. */
|
||||
if (setjmp(jerr.setjmp_buffer)) {
|
||||
/* If we get here, the JPEG code has signaled an error.
|
||||
* We need to clean up the JPEG object, close the input file, and return.
|
||||
*/
|
||||
jpeg_destroy_decompress(&cinfo);
|
||||
jpeg_destroy_decompress(cinfo);
|
||||
fclose(infile);
|
||||
return 0;
|
||||
}
|
||||
/* Now we can initialize the JPEG decompression object. */
|
||||
jpeg_create_decompress(&cinfo);
|
||||
jpeg_create_decompress(cinfo);
|
||||
|
||||
/* Step 2: specify data source (eg, a file) */
|
||||
|
||||
jpeg_stdio_src(&cinfo, infile);
|
||||
jpeg_stdio_src(cinfo, infile);
|
||||
|
||||
/* Step 3: read file parameters with jpeg_read_header() */
|
||||
|
||||
(void)jpeg_read_header(&cinfo, TRUE);
|
||||
(void)jpeg_read_header(cinfo, TRUE);
|
||||
/* We can ignore the return value from jpeg_read_header since
|
||||
* (a) suspension is not possible with the stdio data source, and
|
||||
* (b) we passed TRUE to reject a tables-only JPEG file as an error.
|
||||
@@ -360,7 +377,7 @@ read_JPEG_file(char *filename)
|
||||
|
||||
/* Step 5: Start decompressor */
|
||||
|
||||
(void)jpeg_start_decompress(&cinfo);
|
||||
(void)jpeg_start_decompress(cinfo);
|
||||
/* We can ignore the return value since suspension is not possible
|
||||
* with the stdio data source.
|
||||
*/
|
||||
@@ -372,30 +389,30 @@ read_JPEG_file(char *filename)
|
||||
* In this example, we need to make an output work buffer of the right size.
|
||||
*/
|
||||
/* JSAMPLEs per row in output buffer */
|
||||
row_stride = cinfo.output_width * cinfo.output_components;
|
||||
row_stride = cinfo->output_width * cinfo->output_components;
|
||||
/* Make a one-row-high sample array that will go away when done with image */
|
||||
buffer = (*cinfo.mem->alloc_sarray)
|
||||
((j_common_ptr)&cinfo, JPOOL_IMAGE, row_stride, 1);
|
||||
buffer = (*cinfo->mem->alloc_sarray)
|
||||
((j_common_ptr)cinfo, JPOOL_IMAGE, row_stride, 1);
|
||||
|
||||
/* Step 6: while (scan lines remain to be read) */
|
||||
/* jpeg_read_scanlines(...); */
|
||||
|
||||
/* Here we use the library's state variable cinfo.output_scanline as the
|
||||
/* Here we use the library's state variable cinfo->output_scanline as the
|
||||
* loop counter, so that we don't have to keep track ourselves.
|
||||
*/
|
||||
while (cinfo.output_scanline < cinfo.output_height) {
|
||||
while (cinfo->output_scanline < cinfo->output_height) {
|
||||
/* jpeg_read_scanlines expects an array of pointers to scanlines.
|
||||
* Here the array is only one element long, but you could ask for
|
||||
* more than one scanline at a time if that's more convenient.
|
||||
*/
|
||||
(void)jpeg_read_scanlines(&cinfo, buffer, 1);
|
||||
(void)jpeg_read_scanlines(cinfo, buffer, 1);
|
||||
/* Assume put_scanline_someplace wants a pointer and sample count. */
|
||||
put_scanline_someplace(buffer[0], row_stride);
|
||||
}
|
||||
|
||||
/* Step 7: Finish decompression */
|
||||
|
||||
(void)jpeg_finish_decompress(&cinfo);
|
||||
(void)jpeg_finish_decompress(cinfo);
|
||||
/* We can ignore the return value since suspension is not possible
|
||||
* with the stdio data source.
|
||||
*/
|
||||
@@ -403,7 +420,7 @@ read_JPEG_file(char *filename)
|
||||
/* Step 8: Release JPEG decompression object */
|
||||
|
||||
/* This is an important step since it will release a good deal of memory. */
|
||||
jpeg_destroy_decompress(&cinfo);
|
||||
jpeg_destroy_decompress(cinfo);
|
||||
|
||||
/* After finish_decompress, we can close the input file.
|
||||
* Here we postpone it until after no more JPEG errors are possible,
|
||||
|
||||
@@ -58,11 +58,21 @@ endif()
|
||||
add_custom_target(javadoc COMMAND
|
||||
javadoc -notimestamp -d ${CMAKE_CURRENT_SOURCE_DIR}/doc -sourcepath ${CMAKE_CURRENT_SOURCE_DIR} org.libjpegturbo.turbojpeg)
|
||||
set(JAVACLASSPATH ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_FILES_DIRECTORY}/turbojpeg-java.dir)
|
||||
add_custom_target(javah
|
||||
COMMAND javah -d ${CMAKE_CURRENT_SOURCE_DIR} -classpath ${JAVACLASSPATH} org.libjpegturbo.turbojpeg.TJ
|
||||
COMMAND javah -d ${CMAKE_CURRENT_SOURCE_DIR} -classpath ${JAVACLASSPATH} org.libjpegturbo.turbojpeg.TJCompressor
|
||||
COMMAND javah -d ${CMAKE_CURRENT_SOURCE_DIR} -classpath ${JAVACLASSPATH} org.libjpegturbo.turbojpeg.TJDecompressor
|
||||
COMMAND javah -d ${CMAKE_CURRENT_SOURCE_DIR} -classpath ${JAVACLASSPATH} org.libjpegturbo.turbojpeg.TJTransformer)
|
||||
if(Java_VERSION_MAJOR GREATER 9)
|
||||
add_custom_target(javah
|
||||
COMMAND javac -h ${CMAKE_CURRENT_SOURCE_DIR} -classpath ${JAVACLASSPATH}
|
||||
-d ${CMAKE_CURRENT_BINARY_DIR}/__unused
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/org/libjpegturbo/turbojpeg/TJ.java
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/org/libjpegturbo/turbojpeg/TJCompressor.java
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/org/libjpegturbo/turbojpeg/TJDecompressor.java
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/org/libjpegturbo/turbojpeg/TJTransformer.java)
|
||||
else()
|
||||
add_custom_target(javah
|
||||
COMMAND javah -d ${CMAKE_CURRENT_SOURCE_DIR} -classpath ${JAVACLASSPATH} org.libjpegturbo.turbojpeg.TJ
|
||||
COMMAND javah -d ${CMAKE_CURRENT_SOURCE_DIR} -classpath ${JAVACLASSPATH} org.libjpegturbo.turbojpeg.TJCompressor
|
||||
COMMAND javah -d ${CMAKE_CURRENT_SOURCE_DIR} -classpath ${JAVACLASSPATH} org.libjpegturbo.turbojpeg.TJDecompressor
|
||||
COMMAND javah -d ${CMAKE_CURRENT_SOURCE_DIR} -classpath ${JAVACLASSPATH} org.libjpegturbo.turbojpeg.TJTransformer)
|
||||
endif()
|
||||
|
||||
if(NOT DEFINED CMAKE_INSTALL_DEFAULT_JAVADIR)
|
||||
set(CMAKE_INSTALL_DEFAULT_JAVADIR "<CMAKE_INSTALL_DATAROOTDIR>/java")
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C)2009-2014, 2016-2018 D. R. Commander. All Rights Reserved.
|
||||
* Copyright (C)2009-2014, 2016-2019 D. R. Commander. All Rights Reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -121,6 +121,8 @@ final class TJBench {
|
||||
int rindex = TJ.getRedOffset(pixelFormat);
|
||||
int gindex = TJ.getGreenOffset(pixelFormat);
|
||||
int bindex = TJ.getBlueOffset(pixelFormat);
|
||||
if ((long)w[0] * (long)h[0] * (long)ps > (long)Integer.MAX_VALUE)
|
||||
throw new Exception("Image is too large");
|
||||
byte[] dstBuf = new byte[w[0] * h[0] * ps];
|
||||
int pixels = w[0] * h[0], dstPtr = 0, rgbPtr = 0;
|
||||
|
||||
@@ -175,8 +177,11 @@ final class TJBench {
|
||||
|
||||
tjd = new TJDecompressor();
|
||||
|
||||
if (dstBuf == null)
|
||||
if (dstBuf == null) {
|
||||
if ((long)pitch * (long)scaledh > (long)Integer.MAX_VALUE)
|
||||
throw new Exception("Image is too large");
|
||||
dstBuf = new byte[pitch * scaledh];
|
||||
}
|
||||
|
||||
/* Set the destination buffer to gray so we know whether the decompressor
|
||||
attempted to write to it */
|
||||
@@ -202,7 +207,9 @@ final class TJBench {
|
||||
int width = doTile ? Math.min(tilew, w - x) : scaledw;
|
||||
int height = doTile ? Math.min(tileh, h - y) : scaledh;
|
||||
|
||||
tjd.setSourceImage(jpegBuf[tile], jpegSize[tile]);
|
||||
try {
|
||||
tjd.setSourceImage(jpegBuf[tile], jpegSize[tile]);
|
||||
} catch (TJException e) { handleTJException(e); }
|
||||
if (doYUV) {
|
||||
yuvImage.setBuf(yuvImage.getBuf(), width, yuvPad, height, subsamp);
|
||||
try {
|
||||
@@ -329,6 +336,8 @@ final class TJBench {
|
||||
String pfStr = PIXFORMATSTR[pf];
|
||||
YUVImage yuvImage = null;
|
||||
|
||||
if ((long)pitch * (long)h > (long)Integer.MAX_VALUE)
|
||||
throw new Exception("Image is too large");
|
||||
tmpBuf = new byte[pitch * h];
|
||||
|
||||
if (quiet == 0)
|
||||
@@ -469,6 +478,8 @@ final class TJBench {
|
||||
if (!compOnly)
|
||||
decomp(srcBuf, jpegBuf, jpegSize, tmpBuf, w, h, subsamp, jpegQual,
|
||||
fileName, tilew, tileh);
|
||||
else if (quiet == 1)
|
||||
System.out.println("N/A");
|
||||
|
||||
if (tilew == w && tileh == h) break;
|
||||
}
|
||||
@@ -489,6 +500,8 @@ final class TJBench {
|
||||
int tw, th, ttilew, ttileh, tntilesw, tntilesh, tsubsamp;
|
||||
|
||||
FileInputStream fis = new FileInputStream(fileName);
|
||||
if (fis.getChannel().size() > (long)Integer.MAX_VALUE)
|
||||
throw new Exception("Image is too large");
|
||||
int srcSize = (int)fis.getChannel().size();
|
||||
srcBuf = new byte[srcSize];
|
||||
fis.read(srcBuf, 0, srcSize);
|
||||
@@ -500,7 +513,9 @@ final class TJBench {
|
||||
|
||||
tjt = new TJTransformer();
|
||||
|
||||
tjt.setSourceImage(srcBuf, srcSize);
|
||||
try {
|
||||
tjt.setSourceImage(srcBuf, srcSize);
|
||||
} catch (TJException e) { handleTJException(e); }
|
||||
w = tjt.getWidth();
|
||||
h = tjt.getHeight();
|
||||
subsamp = tjt.getSubsamp();
|
||||
@@ -607,7 +622,9 @@ final class TJBench {
|
||||
elapsed = 0.;
|
||||
while (true) {
|
||||
start = getTime();
|
||||
tjt.transform(jpegBuf, t, flags);
|
||||
try {
|
||||
tjt.transform(jpegBuf, t, flags);
|
||||
} catch (TJException e) { handleTJException(e); }
|
||||
jpegSize = tjt.getTransformedSizes();
|
||||
elapsed += getTime() - start;
|
||||
if (iter >= 0) {
|
||||
@@ -705,7 +722,7 @@ final class TJBench {
|
||||
System.out.println(" bytes to which each row of each plane in the intermediate YUV image is");
|
||||
System.out.println(" padded (default = 1)");
|
||||
System.out.println("-scale M/N = Scale down the width/height of the decompressed JPEG image by a");
|
||||
System.out.print (" factor of M/N (M/N = ");
|
||||
System.out.print(" factor of M/N (M/N = ");
|
||||
for (i = 0; i < nsf; i++) {
|
||||
System.out.format("%d/%d", scalingFactors[i].getNum(),
|
||||
scalingFactors[i].getDenom());
|
||||
|
||||
12
jchuff.c
12
jchuff.c
@@ -4,7 +4,7 @@
|
||||
* This file was part of the Independent JPEG Group's software:
|
||||
* Copyright (C) 1991-1997, Thomas G. Lane.
|
||||
* libjpeg-turbo Modifications:
|
||||
* Copyright (C) 2009-2011, 2014-2016, 2018, D. R. Commander.
|
||||
* Copyright (C) 2009-2011, 2014-2016, 2018-2019, D. R. Commander.
|
||||
* Copyright (C) 2015, Matthieu Darbois.
|
||||
* For conditions of distribution and use, see the accompanying README.ijg
|
||||
* file.
|
||||
@@ -43,8 +43,8 @@
|
||||
*/
|
||||
|
||||
/* NOTE: Both GCC and Clang define __GNUC__ */
|
||||
#if defined __GNUC__ && (defined __arm__ || defined __aarch64__)
|
||||
#if !defined __thumb__ || defined __thumb2__
|
||||
#if defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))
|
||||
#if !defined(__thumb__) || defined(__thumb2__)
|
||||
#define USE_CLZ_INTRINSIC
|
||||
#endif
|
||||
#endif
|
||||
@@ -356,6 +356,8 @@ dump_buffer(working_state *state)
|
||||
put_buffer = (put_buffer << size) | code; \
|
||||
}
|
||||
|
||||
#if SIZEOF_SIZE_T != 8 && !defined(_WIN64)
|
||||
|
||||
#define CHECKBUF15() { \
|
||||
if (put_bits > 15) { \
|
||||
EMIT_BYTE() \
|
||||
@@ -363,6 +365,8 @@ dump_buffer(working_state *state)
|
||||
} \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#define CHECKBUF31() { \
|
||||
if (put_bits > 31) { \
|
||||
EMIT_BYTE() \
|
||||
@@ -428,7 +432,7 @@ dump_buffer(working_state *state)
|
||||
* scanning order-- 1, 8, 16, etc.), then this will produce an encoded block
|
||||
* larger than 200 bytes.
|
||||
*/
|
||||
#define BUFSIZE (DCTSIZE2 * 4)
|
||||
#define BUFSIZE (DCTSIZE2 * 8)
|
||||
|
||||
#define LOAD_BUFFER() { \
|
||||
if (state->free_in_buffer < BUFSIZE) { \
|
||||
|
||||
@@ -507,8 +507,8 @@ prepare_for_pass (j_compress_ptr cinfo)
|
||||
*/
|
||||
master->pass_type = output_pass;
|
||||
master->pass_number++;
|
||||
/*FALLTHROUGH*/
|
||||
#endif
|
||||
/*FALLTHROUGH*/
|
||||
case output_pass:
|
||||
/* Do a data-output pass. */
|
||||
/* We need not repeat per-scan setup if prior optimization pass did it. */
|
||||
|
||||
28
jconfig.h.in
28
jconfig.h.in
@@ -10,16 +10,16 @@
|
||||
#define LIBJPEG_TURBO_VERSION_NUMBER @LIBJPEG_TURBO_VERSION_NUMBER@
|
||||
|
||||
/* Support arithmetic encoding */
|
||||
#cmakedefine C_ARITH_CODING_SUPPORTED
|
||||
#cmakedefine C_ARITH_CODING_SUPPORTED 1
|
||||
|
||||
/* Support arithmetic decoding */
|
||||
#cmakedefine D_ARITH_CODING_SUPPORTED
|
||||
#cmakedefine D_ARITH_CODING_SUPPORTED 1
|
||||
|
||||
/* Support in-memory source/destination managers */
|
||||
#cmakedefine MEM_SRCDST_SUPPORTED
|
||||
#cmakedefine MEM_SRCDST_SUPPORTED 1
|
||||
|
||||
/* Use accelerated SIMD routines. */
|
||||
#cmakedefine WITH_SIMD
|
||||
#cmakedefine WITH_SIMD 1
|
||||
|
||||
/*
|
||||
* Define BITS_IN_JSAMPLE as either
|
||||
@@ -33,37 +33,37 @@
|
||||
#define BITS_IN_JSAMPLE @BITS_IN_JSAMPLE@ /* use 8 or 12 */
|
||||
|
||||
/* Define to 1 if you have the <locale.h> header file. */
|
||||
#cmakedefine HAVE_LOCALE_H
|
||||
#cmakedefine HAVE_LOCALE_H 1
|
||||
|
||||
/* Define to 1 if you have the <stddef.h> header file. */
|
||||
#cmakedefine HAVE_STDDEF_H
|
||||
#cmakedefine HAVE_STDDEF_H 1
|
||||
|
||||
/* Define to 1 if you have the <stdlib.h> header file. */
|
||||
#cmakedefine HAVE_STDLIB_H
|
||||
#cmakedefine HAVE_STDLIB_H 1
|
||||
|
||||
/* Define if you need to include <sys/types.h> to get size_t. */
|
||||
#cmakedefine NEED_SYS_TYPES_H
|
||||
#cmakedefine NEED_SYS_TYPES_H 1
|
||||
|
||||
/* Define if you have BSD-like bzero and bcopy in <strings.h> rather than
|
||||
memset/memcpy in <string.h>. */
|
||||
#cmakedefine NEED_BSD_STRINGS
|
||||
#cmakedefine NEED_BSD_STRINGS 1
|
||||
|
||||
/* Define to 1 if the system has the type `unsigned char'. */
|
||||
#cmakedefine HAVE_UNSIGNED_CHAR
|
||||
#cmakedefine HAVE_UNSIGNED_CHAR 1
|
||||
|
||||
/* Define to 1 if the system has the type `unsigned short'. */
|
||||
#cmakedefine HAVE_UNSIGNED_SHORT
|
||||
#cmakedefine HAVE_UNSIGNED_SHORT 1
|
||||
|
||||
/* Compiler does not support pointers to undefined structures. */
|
||||
#cmakedefine INCOMPLETE_TYPES_BROKEN
|
||||
#cmakedefine INCOMPLETE_TYPES_BROKEN 1
|
||||
|
||||
/* Define if your (broken) compiler shifts signed values as if they were
|
||||
unsigned. */
|
||||
#cmakedefine RIGHT_SHIFT_IS_UNSIGNED
|
||||
#cmakedefine RIGHT_SHIFT_IS_UNSIGNED 1
|
||||
|
||||
/* Define to 1 if type `char' is unsigned and you are not using gcc. */
|
||||
#ifndef __CHAR_UNSIGNED__
|
||||
#cmakedefine __CHAR_UNSIGNED__
|
||||
#cmakedefine __CHAR_UNSIGNED__ 1
|
||||
#endif
|
||||
|
||||
/* Define to empty if `const' does not conform to ANSI C. */
|
||||
|
||||
@@ -53,8 +53,8 @@
|
||||
*/
|
||||
|
||||
/* NOTE: Both GCC and Clang define __GNUC__ */
|
||||
#if defined __GNUC__ && (defined __arm__ || defined __aarch64__)
|
||||
#if !defined __thumb__ || defined __thumb2__
|
||||
#if defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))
|
||||
#if !defined(__thumb__) || defined(__thumb2__)
|
||||
#define USE_CLZ_INTRINSIC
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
* Copyright (C) 1994-1996, Thomas G. Lane.
|
||||
* Modified 2009-2012 by Guido Vollbeding.
|
||||
* libjpeg-turbo Modifications:
|
||||
* Copyright (C) 2011, 2014, 2016, D. R. Commander.
|
||||
* Copyright (C) 2011, 2014, 2016, 2019, D. R. Commander.
|
||||
* For conditions of distribution and use, see the accompanying README.ijg
|
||||
* file.
|
||||
*
|
||||
@@ -27,6 +27,8 @@
|
||||
extern void *malloc(size_t size);
|
||||
extern void free(void *ptr);
|
||||
#endif
|
||||
void jpeg_mem_dest_tj(j_compress_ptr cinfo, unsigned char **outbuffer,
|
||||
unsigned long *outsize, boolean alloc);
|
||||
|
||||
|
||||
#define OUTPUT_BUF_SIZE 4096 /* choose an efficiently fwrite'able size */
|
||||
@@ -101,8 +103,7 @@ empty_mem_output_buffer(j_compress_ptr cinfo)
|
||||
|
||||
MEMCOPY(nextbuffer, dest->buffer, dest->bufsize);
|
||||
|
||||
if (dest->newbuffer != NULL)
|
||||
free(dest->newbuffer);
|
||||
free(dest->newbuffer);
|
||||
|
||||
dest->newbuffer = nextbuffer;
|
||||
|
||||
|
||||
@@ -144,8 +144,7 @@ empty_mem_output_buffer (j_compress_ptr cinfo)
|
||||
|
||||
MEMCOPY(nextbuffer, dest->buffer, dest->bufsize);
|
||||
|
||||
if (dest->newbuffer != NULL)
|
||||
free(dest->newbuffer);
|
||||
free(dest->newbuffer);
|
||||
|
||||
dest->newbuffer = nextbuffer;
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
* Copyright (C) 1994-1996, Thomas G. Lane.
|
||||
* Modified 2009-2011 by Guido Vollbeding.
|
||||
* libjpeg-turbo Modifications:
|
||||
* Copyright (C) 2011, 2016, D. R. Commander.
|
||||
* Copyright (C) 2011, 2016, 2019, D. R. Commander.
|
||||
* For conditions of distribution and use, see the accompanying README.ijg
|
||||
* file.
|
||||
*
|
||||
@@ -23,6 +23,9 @@
|
||||
#include "jpeglib.h"
|
||||
#include "jerror.h"
|
||||
|
||||
void jpeg_mem_src_tj(j_decompress_ptr cinfo, const unsigned char *inbuffer,
|
||||
unsigned long insize);
|
||||
|
||||
|
||||
/*
|
||||
* Initialize source --- called by jpeg_read_header
|
||||
|
||||
@@ -592,7 +592,7 @@ ycck_cmyk_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
||||
/* Declarations for ordered dithering
|
||||
*
|
||||
* We use a 4x4 ordered dither array packed into 32 bits. This array is
|
||||
* sufficent for dithering RGB888 to RGB565.
|
||||
* sufficient for dithering RGB888 to RGB565.
|
||||
*/
|
||||
|
||||
#define DITHER_MASK 0x3
|
||||
|
||||
10
jdhuff.c
10
jdhuff.c
@@ -4,7 +4,7 @@
|
||||
* This file was part of the Independent JPEG Group's software:
|
||||
* Copyright (C) 1991-1997, Thomas G. Lane.
|
||||
* libjpeg-turbo Modifications:
|
||||
* Copyright (C) 2009-2011, 2016, 2018, D. R. Commander.
|
||||
* Copyright (C) 2009-2011, 2016, 2018-2019, D. R. Commander.
|
||||
* For conditions of distribution and use, see the accompanying README.ijg
|
||||
* file.
|
||||
*
|
||||
@@ -589,7 +589,11 @@ decode_mcu_slow(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
|
||||
if (entropy->dc_needed[blkn]) {
|
||||
/* Convert DC difference to actual value, update last_dc_val */
|
||||
int ci = cinfo->MCU_membership[blkn];
|
||||
s += state.last_dc_val[ci];
|
||||
/* This is really just
|
||||
* s += state.last_dc_val[ci];
|
||||
* It is written this way in order to shut up UBSan.
|
||||
*/
|
||||
s = (int)((unsigned int)s + (unsigned int)state.last_dc_val[ci]);
|
||||
state.last_dc_val[ci] = s;
|
||||
if (block) {
|
||||
/* Output the DC coefficient (assumes jpeg_natural_order[0] = 0) */
|
||||
@@ -684,7 +688,7 @@ decode_mcu_fast(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
|
||||
|
||||
if (entropy->dc_needed[blkn]) {
|
||||
int ci = cinfo->MCU_membership[blkn];
|
||||
s += state.last_dc_val[ci];
|
||||
s = (int)((unsigned int)s + (unsigned int)state.last_dc_val[ci]);
|
||||
state.last_dc_val[ci] = s;
|
||||
if (block)
|
||||
(*block)[0] = (JCOEF)s;
|
||||
|
||||
@@ -429,8 +429,6 @@ h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
||||
#define PACK_TWO_PIXELS_LE(l, r) ((r << 16) | l)
|
||||
#define PACK_TWO_PIXELS_BE(l, r) ((l << 16) | r)
|
||||
|
||||
#define PACK_NEED_ALIGNMENT(ptr) (((size_t)(ptr)) & 3)
|
||||
|
||||
#define WRITE_TWO_PIXELS_LE(addr, pixels) { \
|
||||
((INT16 *)(addr))[0] = (INT16)(pixels); \
|
||||
((INT16 *)(addr))[1] = (INT16)((pixels) >> 16); \
|
||||
@@ -448,7 +446,7 @@ h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
||||
/* Declarations for ordered dithering
|
||||
*
|
||||
* We use a 4x4 ordered dither array packed into 32 bits. This array is
|
||||
* sufficent for dithering RGB888 to RGB565.
|
||||
* sufficient for dithering RGB888 to RGB565.
|
||||
*/
|
||||
|
||||
#define DITHER_MASK 0x3
|
||||
|
||||
14
jdsample.c
14
jdsample.c
@@ -8,6 +8,7 @@
|
||||
* Copyright (C) 2010, 2015-2016, D. R. Commander.
|
||||
* Copyright (C) 2014, MIPS Technologies, Inc., California.
|
||||
* Copyright (C) 2015, Google, Inc.
|
||||
* Copyright (C) 2019, Arm Limited.
|
||||
* For conditions of distribution and use, see the accompanying README.ijg
|
||||
* file.
|
||||
*
|
||||
@@ -315,9 +316,9 @@ h1v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
||||
JSAMPARRAY output_data = *output_data_ptr;
|
||||
JSAMPROW inptr0, inptr1, outptr;
|
||||
#if BITS_IN_JSAMPLE == 8
|
||||
int thiscolsum;
|
||||
int thiscolsum, bias;
|
||||
#else
|
||||
JLONG thiscolsum;
|
||||
JLONG thiscolsum, bias;
|
||||
#endif
|
||||
JDIMENSION colctr;
|
||||
int inrow, outrow, v;
|
||||
@@ -327,15 +328,18 @@ h1v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
||||
for (v = 0; v < 2; v++) {
|
||||
/* inptr0 points to nearest input row, inptr1 points to next nearest */
|
||||
inptr0 = input_data[inrow];
|
||||
if (v == 0) /* next nearest is row above */
|
||||
if (v == 0) { /* next nearest is row above */
|
||||
inptr1 = input_data[inrow - 1];
|
||||
else /* next nearest is row below */
|
||||
bias = 1;
|
||||
} else { /* next nearest is row below */
|
||||
inptr1 = input_data[inrow + 1];
|
||||
bias = 2;
|
||||
}
|
||||
outptr = output_data[outrow++];
|
||||
|
||||
for (colctr = 0; colctr < compptr->downsampled_width; colctr++) {
|
||||
thiscolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++);
|
||||
*outptr++ = (JSAMPLE)((thiscolsum + 1) >> 2);
|
||||
*outptr++ = (JSAMPLE)((thiscolsum + bias) >> 2);
|
||||
}
|
||||
}
|
||||
inrow++;
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/*
|
||||
* jfdctint.c
|
||||
*
|
||||
* This file was part of the Independent JPEG Group's software.
|
||||
* This file was part of the Independent JPEG Group's software:
|
||||
* Copyright (C) 1991-1996, Thomas G. Lane.
|
||||
* libjpeg-turbo Modifications:
|
||||
* Copyright (C) 2015, D. R. Commander.
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/*
|
||||
* jidctint.c
|
||||
*
|
||||
* This file was part of the Independent JPEG Group's software.
|
||||
* This file was part of the Independent JPEG Group's software:
|
||||
* Copyright (C) 1991-1998, Thomas G. Lane.
|
||||
* Modification developed 2002-2009 by Guido Vollbeding.
|
||||
* libjpeg-turbo Modifications:
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/*
|
||||
* jidctred.c
|
||||
*
|
||||
* This file was part of the Independent JPEG Group's software.
|
||||
* This file was part of the Independent JPEG Group's software:
|
||||
* Copyright (C) 1994-1998, Thomas G. Lane.
|
||||
* libjpeg-turbo Modifications:
|
||||
* Copyright (C) 2015, D. R. Commander.
|
||||
|
||||
@@ -129,7 +129,7 @@ select_transform (JXFORM_CODE transform)
|
||||
|
||||
LOCAL(int)
|
||||
parse_switches (j_compress_ptr cinfo, int argc, char **argv,
|
||||
int last_file_arg_seen, boolean for_real)
|
||||
int last_file_arg_seen, boolean for_real)
|
||||
/* Parse optional switches.
|
||||
* Returns argv[] index of first file-name argument (== argc if none).
|
||||
* Any file names with indexes <= last_file_arg_seen are ignored;
|
||||
|
||||
@@ -154,7 +154,7 @@ typedef struct {
|
||||
*/
|
||||
boolean is_padded; /* is the colorindex padded for odither? */
|
||||
|
||||
int Ncolors[MAX_Q_COMPS]; /* # of values alloced to each component */
|
||||
int Ncolors[MAX_Q_COMPS]; /* # of values allocated to each component */
|
||||
|
||||
/* Variables for ordered dithering */
|
||||
int row_index; /* cur row's vertical index in dither matrix */
|
||||
|
||||
20
jversion.h
20
jversion.h
@@ -4,7 +4,7 @@
|
||||
* This file was part of the Independent JPEG Group's software:
|
||||
* Copyright (C) 1991-2012, Thomas G. Lane, Guido Vollbeding.
|
||||
* libjpeg-turbo Modifications:
|
||||
* Copyright (C) 2010, 2012-2018, D. R. Commander.
|
||||
* Copyright (C) 2010, 2012-2020, D. R. Commander.
|
||||
* mozjpeg Modifications:
|
||||
* Copyright (C) 2014, Mozilla Corporation.
|
||||
* For conditions of distribution and use, see the accompanying README file.
|
||||
@@ -37,18 +37,18 @@
|
||||
*/
|
||||
|
||||
#define JCOPYRIGHT \
|
||||
"Copyright (C) 2009-2018 D. R. Commander\n" \
|
||||
"Copyright (C) 2011-2016 Siarhei Siamashka\n" \
|
||||
"Copyright (C) 2009-2020 D. R. Commander\n" \
|
||||
"Copyright (C) 2011-2016 Siarhei Siamashka\n" \
|
||||
"Copyright (C) 2015-2016, 2018 Matthieu Darbois\n" \
|
||||
"Copyright (C) 2015 Intel Corporation\n" \
|
||||
"Copyright (C) 2015 Google, Inc.\n" \
|
||||
"Copyright (C) 2015 Google, Inc.\n" \
|
||||
"Copyright (C) 2014 Mozilla Corporation\n" \
|
||||
"Copyright (C) 2013-2014 MIPS Technologies, Inc.\n" \
|
||||
"Copyright (C) 2013 Linaro Limited\n" \
|
||||
"Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies)\n" \
|
||||
"Copyright (C) 2009 Pierre Ossman for Cendio AB\n" \
|
||||
"Copyright (C) 1999-2006 MIYASAKA Masaru\n" \
|
||||
"Copyright (C) 2013-2014 MIPS Technologies, Inc.\n" \
|
||||
"Copyright (C) 2013 Linaro Limited\n" \
|
||||
"Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies)\n" \
|
||||
"Copyright (C) 2009 Pierre Ossman for Cendio AB\n" \
|
||||
"Copyright (C) 1999-2006 MIYASAKA Masaru\n" \
|
||||
"Copyright (C) 1991-2016 Thomas G. Lane, Guido Vollbeding"
|
||||
|
||||
#define JCOPYRIGHT_SHORT \
|
||||
"Copyright (C) 1991-2018 The libjpeg-turbo Project and many others"
|
||||
"Copyright (C) 1991-2020 The libjpeg-turbo Project and many others"
|
||||
|
||||
15
md5/md5hl.c
15
md5/md5hl.c
@@ -6,7 +6,7 @@
|
||||
* this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
|
||||
* ----------------------------------------------------------------------------
|
||||
* libjpeg-turbo Modifications:
|
||||
* Copyright (C)2016, 2018 D. R. Commander. All Rights Reserved.
|
||||
* Copyright (C)2016, 2018-2019 D. R. Commander. All Rights Reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -56,7 +56,7 @@
|
||||
|
||||
#include "./md5.h"
|
||||
|
||||
char *MD5End(MD5_CTX *ctx, char *buf)
|
||||
static char *MD5End(MD5_CTX *ctx, char *buf)
|
||||
{
|
||||
int i;
|
||||
unsigned char digest[LENGTH];
|
||||
@@ -89,7 +89,7 @@ char *MD5FileChunk(const char *filename, char *buf, off_t ofs, off_t len)
|
||||
off_t n;
|
||||
|
||||
MD5Init(&ctx);
|
||||
#if _WIN32
|
||||
#ifdef _WIN32
|
||||
f = _open(filename, O_RDONLY | O_BINARY);
|
||||
#else
|
||||
f = open(filename, O_RDONLY);
|
||||
@@ -123,12 +123,3 @@ char *MD5FileChunk(const char *filename, char *buf, off_t ofs, off_t len)
|
||||
return 0;
|
||||
return (MD5End(&ctx, buf));
|
||||
}
|
||||
|
||||
char *MD5Data(const void *data, unsigned int len, char *buf)
|
||||
{
|
||||
MD5_CTX ctx;
|
||||
|
||||
MD5Init(&ctx);
|
||||
MD5Update(&ctx, (unsigned char *)data, len);
|
||||
return (MD5End(&ctx, buf));
|
||||
}
|
||||
|
||||
11
rdtarga.c
11
rdtarga.c
@@ -3,8 +3,9 @@
|
||||
*
|
||||
* This file was part of the Independent JPEG Group's software:
|
||||
* Copyright (C) 1991-1996, Thomas G. Lane.
|
||||
* It was modified by The libjpeg-turbo Project to include only code relevant
|
||||
* to libjpeg-turbo.
|
||||
* Modified 2017 by Guido Vollbeding.
|
||||
* libjpeg-turbo Modifications:
|
||||
* Copyright (C) 2018, D. R. Commander.
|
||||
* For conditions of distribution and use, see the accompanying README.ijg
|
||||
* file.
|
||||
*
|
||||
@@ -66,6 +67,7 @@ typedef struct _tga_source_struct {
|
||||
U_CHAR tga_pixel[4];
|
||||
|
||||
int pixel_size; /* Bytes per Targa pixel (1 to 4) */
|
||||
int cmap_length; /* colormap length */
|
||||
|
||||
/* State info for reading RLE-coded pixels; both counts must be init to 0 */
|
||||
int block_count; /* # of pixels remaining in RLE block */
|
||||
@@ -196,11 +198,14 @@ get_8bit_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
|
||||
register JSAMPROW ptr;
|
||||
register JDIMENSION col;
|
||||
register JSAMPARRAY colormap = source->colormap;
|
||||
int cmaplen = source->cmap_length;
|
||||
|
||||
ptr = source->pub.buffer[0];
|
||||
for (col = cinfo->image_width; col > 0; col--) {
|
||||
(*source->read_pixel) (source); /* Load next pixel into tga_pixel */
|
||||
t = UCH(source->tga_pixel[0]);
|
||||
if (t >= cmaplen)
|
||||
ERREXIT(cinfo, JERR_TGA_BADPARMS);
|
||||
*ptr++ = colormap[0][t];
|
||||
*ptr++ = colormap[1][t];
|
||||
*ptr++ = colormap[2][t];
|
||||
@@ -452,12 +457,14 @@ start_input_tga(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
|
||||
/* Allocate space to store the colormap */
|
||||
source->colormap = (*cinfo->mem->alloc_sarray)
|
||||
((j_common_ptr)cinfo, JPOOL_IMAGE, (JDIMENSION)maplen, (JDIMENSION)3);
|
||||
source->cmap_length = (int)maplen;
|
||||
/* and read it from the file */
|
||||
read_colormap(source, (int)maplen, UCH(targaheader[7]));
|
||||
} else {
|
||||
if (cmaptype) /* but you promised a cmap! */
|
||||
ERREXIT(cinfo, JERR_TGA_BADPARMS);
|
||||
source->colormap = NULL;
|
||||
source->cmap_length = 0;
|
||||
}
|
||||
|
||||
cinfo->input_components = components;
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
libjpeg-turbo is a JPEG image codec that uses SIMD instructions (MMX, SSE2, AVX2, NEON, AltiVec) to accelerate baseline JPEG compression and decompression on x86, x86-64, ARM, and PowerPC systems, as well as progressive JPEG compression on x86 and x86-64 systems. On such systems, libjpeg-turbo is generally 2-6x as fast as libjpeg, all else being equal. On other types of systems, libjpeg-turbo can still outperform libjpeg by a significant amount, by virtue of its highly-optimized Huffman coding routines. In many cases, the performance of libjpeg-turbo rivals that of proprietary high-speed JPEG codecs.
|
||||
libjpeg-turbo is a JPEG image codec that uses SIMD instructions to accelerate baseline JPEG compression and decompression on x86, x86-64, ARM, PowerPC, and MIPS systems, as well as progressive JPEG compression on x86 and x86-64 systems. On such systems, libjpeg-turbo is generally 2-6x as fast as libjpeg, all else being equal. On other types of systems, libjpeg-turbo can still outperform libjpeg by a significant amount, by virtue of its highly-optimized Huffman coding routines. In many cases, the performance of libjpeg-turbo rivals that of proprietary high-speed JPEG codecs.
|
||||
|
||||
libjpeg-turbo implements both the traditional libjpeg API as well as the less powerful but more straightforward TurboJPEG API. libjpeg-turbo also features colorspace extensions that allow it to compress from/decompress to 32-bit and big-endian pixel buffers (RGBX, XBGR, etc.), as well as a full-featured Java interface.
|
||||
|
||||
|
||||
@@ -62,15 +62,15 @@ Section "@CMAKE_PROJECT_NAME@ SDK for @INST_PLATFORM@ (required)"
|
||||
File "@CMAKE_CURRENT_BINARY_DIR@\libturbojpeg.a"
|
||||
File "@CMAKE_CURRENT_BINARY_DIR@\libjpeg.dll.a"
|
||||
File "@CMAKE_CURRENT_BINARY_DIR@\libjpeg.a"
|
||||
SetOutPath $INSTDIR\lib\pkgconfig
|
||||
File "@CMAKE_CURRENT_BINARY_DIR@\pkgscripts\libjpeg.pc"
|
||||
File "@CMAKE_CURRENT_BINARY_DIR@\pkgscripts\libturbojpeg.pc"
|
||||
!else
|
||||
File "@CMAKE_CURRENT_BINARY_DIR@\${BUILDDIR}turbojpeg.lib"
|
||||
File "@CMAKE_CURRENT_BINARY_DIR@\${BUILDDIR}turbojpeg-static.lib"
|
||||
File "@CMAKE_CURRENT_BINARY_DIR@\${BUILDDIR}jpeg.lib"
|
||||
File "@CMAKE_CURRENT_BINARY_DIR@\${BUILDDIR}jpeg-static.lib"
|
||||
!endif
|
||||
SetOutPath $INSTDIR\lib\pkgconfig
|
||||
File "@CMAKE_CURRENT_BINARY_DIR@\pkgscripts\libjpeg.pc"
|
||||
File "@CMAKE_CURRENT_BINARY_DIR@\pkgscripts\libturbojpeg.pc"
|
||||
!ifdef JAVA
|
||||
SetOutPath $INSTDIR\classes
|
||||
File "@CMAKE_CURRENT_BINARY_DIR@\java\turbojpeg.jar"
|
||||
@@ -130,8 +130,6 @@ Section "Uninstall"
|
||||
Delete $INSTDIR\lib\libturbojpeg.a
|
||||
Delete $INSTDIR\lib\libjpeg.dll.a
|
||||
Delete $INSTDIR\lib\libjpeg.a
|
||||
Delete $INSTDIR\lib\pkgconfig\libjpeg.pc
|
||||
Delete $INSTDIR\lib\pkgconfig\libturbojpeg.pc
|
||||
!else
|
||||
Delete $INSTDIR\bin\jpeg@SO_MAJOR_VERSION@.dll
|
||||
Delete $INSTDIR\bin\turbojpeg.dll
|
||||
@@ -141,6 +139,8 @@ Section "Uninstall"
|
||||
Delete $INSTDIR\lib\turbojpeg.lib
|
||||
Delete $INSTDIR\lib\turbojpeg-static.lib
|
||||
!endif
|
||||
Delete $INSTDIR\lib\pkgconfig\libjpeg.pc
|
||||
Delete $INSTDIR\lib\pkgconfig\libturbojpeg.pc
|
||||
!ifdef JAVA
|
||||
Delete $INSTDIR\classes\turbojpeg.jar
|
||||
!endif
|
||||
@@ -175,9 +175,7 @@ Section "Uninstall"
|
||||
!endif
|
||||
|
||||
RMDir "$INSTDIR\include"
|
||||
!ifdef GCC
|
||||
RMDir "$INSTDIR\lib\pkgconfig"
|
||||
!endif
|
||||
RMDir "$INSTDIR\lib"
|
||||
RMDir "$INSTDIR\doc"
|
||||
!ifdef GCC
|
||||
|
||||
@@ -58,6 +58,8 @@ BUILDDIRARMV7=@IOS_ARMV7_BUILD@
|
||||
BUILDDIRARMV7S=@IOS_ARMV7S_BUILD@
|
||||
BUILDDIRARMV8=@IOS_ARMV8_BUILD@
|
||||
WITH_JAVA=@WITH_JAVA@
|
||||
OSX_APP_CERT_NAME="@OSX_APP_CERT_NAME@"
|
||||
OSX_INST_CERT_NAME="@OSX_INST_CERT_NAME@"
|
||||
LIPO=lipo
|
||||
|
||||
PREFIX=@CMAKE_INSTALL_PREFIX@
|
||||
@@ -228,7 +230,7 @@ if [ $UNIVERSAL = 1 -a "$BUILDDIRARMV7S" != "" ]; then
|
||||
install_ios $BUILDDIRARMV7S ARMv7s armv7s arm
|
||||
fi
|
||||
|
||||
if [ $UNIVERSAL = 1 -a "BUILDDIRARMV8" != "" ]; then
|
||||
if [ $UNIVERSAL = 1 -a "$BUILDDIRARMV8" != "" ]; then
|
||||
install_ios $BUILDDIRARMV8 ARMv8 armv8 arm64
|
||||
fi
|
||||
|
||||
@@ -258,11 +260,25 @@ cp $SRCDIR/release/License.rtf $SRCDIR/release/Welcome.rtf $SRCDIR/release/ReadM
|
||||
mkdir $TMPDIR/dmg
|
||||
pkgbuild --root $PKGROOT --version $VERSION.$BUILD --identifier @PKGID@ \
|
||||
$TMPDIR/pkg/$PKGNAME.pkg
|
||||
SUFFIX=
|
||||
if [ "$OSX_INST_CERT_NAME" != "" ]; then
|
||||
SUFFIX=-unsigned
|
||||
fi
|
||||
productbuild --distribution pkgscripts/Distribution.xml \
|
||||
--package-path $TMPDIR/pkg/ --resources $TMPDIR/pkg/ \
|
||||
$TMPDIR/dmg/$PKGNAME.pkg
|
||||
$TMPDIR/dmg/$PKGNAME$SUFFIX.pkg
|
||||
if [ "$OSX_INST_CERT_NAME" != "" ]; then
|
||||
productsign --sign "$OSX_INST_CERT_NAME" --timestamp \
|
||||
$TMPDIR/dmg/$PKGNAME$SUFFIX.pkg $TMPDIR/dmg/$PKGNAME.pkg
|
||||
rm -r $TMPDIR/dmg/$PKGNAME$SUFFIX.pkg
|
||||
pkgutil --check-signature $TMPDIR/dmg/$PKGNAME.pkg
|
||||
fi
|
||||
hdiutil create -fs HFS+ -volname $PKGNAME-$VERSION \
|
||||
-srcfolder "$TMPDIR/dmg" $TMPDIR/$PKGNAME-$VERSION.dmg
|
||||
if [ "$OSX_APP_CERT_NAME" != "" ]; then
|
||||
codesign -s "$OSX_APP_CERT_NAME" --timestamp $TMPDIR/$PKGNAME-$VERSION.dmg
|
||||
codesign -vv $TMPDIR/$PKGNAME-$VERSION.dmg
|
||||
fi
|
||||
cp $TMPDIR/$PKGNAME-$VERSION.dmg .
|
||||
|
||||
exit
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
%global _docdir %{_defaultdocdir}/%{name}-%{version}
|
||||
%define _prefix @CMAKE_INSTALL_PREFIX@
|
||||
%define _bindir @CMAKE_INSTALL_FULL_BINDIR@
|
||||
%define _datarootdir @CMAKE_INSTALL_FULL_DATAROOTDIR@
|
||||
%define _docdir %{_defaultdocdir}/%{name}-%{version}
|
||||
%define _includedir @CMAKE_INSTALL_FULL_INCLUDEDIR@
|
||||
%define _javadir @CMAKE_INSTALL_FULL_JAVADIR@
|
||||
%define _mandir @CMAKE_INSTALL_FULL_MANDIR@
|
||||
@@ -43,7 +43,7 @@ Group: System Environment/Libraries
|
||||
Release: @BUILD@
|
||||
License: BSD-style
|
||||
BuildRoot: %{_blddir}/%{name}-buildroot-%{version}-%{release}
|
||||
Prereq: /sbin/ldconfig
|
||||
Requires: /sbin/ldconfig
|
||||
%if "%{_bits}" == "64"
|
||||
Provides: %{name} = %{version}-%{release}, @CMAKE_PROJECT_NAME@ = %{version}-%{release}, libturbojpeg.so()(64bit)
|
||||
%else
|
||||
@@ -51,14 +51,14 @@ Provides: %{name} = %{version}-%{release}, @CMAKE_PROJECT_NAME@ = %{version}-%{r
|
||||
%endif
|
||||
|
||||
%description
|
||||
libjpeg-turbo is a JPEG image codec that uses SIMD instructions (MMX, SSE2,
|
||||
AVX2, NEON, AltiVec) to accelerate baseline JPEG compression and decompression
|
||||
on x86, x86-64, ARM, and PowerPC systems, as well as progressive JPEG
|
||||
compression on x86 and x86-64 systems. On such systems, libjpeg-turbo is
|
||||
generally 2-6x as fast as libjpeg, all else being equal. On other types of
|
||||
systems, libjpeg-turbo can still outperform libjpeg by a significant amount, by
|
||||
virtue of its highly-optimized Huffman coding routines. In many cases, the
|
||||
performance of libjpeg-turbo rivals that of proprietary high-speed JPEG codecs.
|
||||
libjpeg-turbo is a JPEG image codec that uses SIMD instructions to accelerate
|
||||
baseline JPEG compression and decompression on x86, x86-64, ARM, PowerPC, and
|
||||
MIPS systems, as well as progressive JPEG compression on x86 and x86-64
|
||||
systems. On such systems, libjpeg-turbo is generally 2-6x as fast as libjpeg,
|
||||
all else being equal. On other types of systems, libjpeg-turbo can still
|
||||
outperform libjpeg by a significant amount, by virtue of its highly-optimized
|
||||
Huffman coding routines. In many cases, the performance of libjpeg-turbo
|
||||
rivals that of proprietary high-speed JPEG codecs.
|
||||
|
||||
libjpeg-turbo implements both the traditional libjpeg API as well as the less
|
||||
powerful but more straightforward TurboJPEG API. libjpeg-turbo also features
|
||||
@@ -183,7 +183,7 @@ rm -rf $RPM_BUILD_ROOT
|
||||
%if "%{_enable_static}" == "1"
|
||||
%{_libdir}/libjpeg.a
|
||||
%endif
|
||||
%{_libdir}/pkgconfig
|
||||
%dir %{_libdir}/pkgconfig
|
||||
%{_libdir}/pkgconfig/libjpeg.pc
|
||||
%if "%{_with_turbojpeg}" == "1"
|
||||
%if "%{_enable_shared}" == "1" || "%{_with_java}" == "1"
|
||||
|
||||
@@ -23,7 +23,7 @@ foreach(src ${JPEG_SOURCES})
|
||||
set(JPEG_SRCS ${JPEG_SRCS} ../${src})
|
||||
endforeach()
|
||||
|
||||
if(WITH_SIMD AND MSVC_IDE)
|
||||
if(WITH_SIMD AND (MSVC_IDE OR XCODE))
|
||||
# This tells CMake that the "source" files haven't been generated yet
|
||||
set_source_files_properties(${SIMD_OBJS} PROPERTIES GENERATED 1)
|
||||
endif()
|
||||
@@ -55,7 +55,8 @@ if(MAPFLAG)
|
||||
LINK_FLAGS "${MAPFLAG}${CMAKE_CURRENT_BINARY_DIR}/../libjpeg.map")
|
||||
endif()
|
||||
if(MSVC)
|
||||
set_target_properties(jpeg PROPERTIES SUFFIX ${SO_MAJOR_VERSION}.dll)
|
||||
set_target_properties(jpeg PROPERTIES
|
||||
RUNTIME_OUTPUT_NAME jpeg${SO_MAJOR_VERSION})
|
||||
# The jsimd_*.c file is built using /MT, so this prevents a linker warning.
|
||||
set_target_properties(jpeg PROPERTIES LINK_FLAGS "/NODEFAULTLIB:LIBCMT /NODEFAULTLIB:LIBCMTD")
|
||||
elseif(MINGW)
|
||||
@@ -94,3 +95,8 @@ install(TARGETS jpeg cjpeg djpeg jpegtran
|
||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
|
||||
if(NOT CMAKE_VERSION VERSION_LESS "3.1" AND MSVC AND
|
||||
CMAKE_C_LINKER_SUPPORTS_PDB)
|
||||
install(FILES "$<TARGET_PDB_FILE:jpeg>"
|
||||
DESTINATION ${CMAKE_INSTALL_BINDIR} OPTIONAL)
|
||||
endif()
|
||||
|
||||
@@ -38,6 +38,14 @@ elseif(CPU_TYPE STREQUAL "i386")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT REQUIRE_SIMD)
|
||||
include(CheckLanguage)
|
||||
check_language(ASM_NASM)
|
||||
if(NOT CMAKE_ASM_NASM_COMPILER)
|
||||
simd_fail("SIMD extensions disabled: could not find NASM compiler")
|
||||
return()
|
||||
endif()
|
||||
endif()
|
||||
enable_language(ASM_NASM)
|
||||
message(STATUS "CMAKE_ASM_NASM_COMPILER = ${CMAKE_ASM_NASM_COMPILER}")
|
||||
|
||||
@@ -74,12 +82,12 @@ if(CMAKE_ASM_NASM_COMPILER_TYPE MATCHES "yasm")
|
||||
if(${var} STREQUAL "-g")
|
||||
if(CMAKE_ASM_NASM_DEBUG_FORMAT)
|
||||
set_property(CACHE ${var} PROPERTY VALUE "-g ${CMAKE_ASM_NASM_DEBUG_FORMAT}")
|
||||
else()
|
||||
else()
|
||||
set_property(CACHE ${var} PROPERTY VALUE "")
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT WIN32 AND (CMAKE_POSITION_INDEPENDENT_CODE OR ENABLE_SHARED))
|
||||
set(CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} -DPIC")
|
||||
@@ -135,6 +143,9 @@ endif()
|
||||
if(MSVC_IDE)
|
||||
set(OBJDIR "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}")
|
||||
string(REGEX REPLACE " " ";" CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS}")
|
||||
elseif(XCODE)
|
||||
set(OBJDIR "${CMAKE_CURRENT_BINARY_DIR}")
|
||||
string(REGEX REPLACE " " ";" CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS}")
|
||||
endif()
|
||||
|
||||
file(GLOB INC_FILES nasm/*.inc)
|
||||
@@ -162,25 +173,25 @@ foreach(file ${SIMD_SOURCES})
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/${DEPFILE})
|
||||
endif()
|
||||
set(OBJECT_DEPENDS ${OBJECT_DEPENDS} ${INC_FILES})
|
||||
if(MSVC_IDE)
|
||||
if(MSVC_IDE OR XCODE)
|
||||
# The CMake Visual Studio generators do not work properly with the ASM_NASM
|
||||
# language, so we have to go rogue here and use a custom command like we
|
||||
# did in prior versions of libjpeg-turbo. (This is why we can't have nice
|
||||
# things.)
|
||||
string(REGEX REPLACE "${CPU_TYPE}/" "" filename ${file})
|
||||
set(SIMD_OBJ ${OBJDIR}/${filename}.obj)
|
||||
set(SIMD_OBJ ${OBJDIR}/${filename}${CMAKE_C_OUTPUT_EXTENSION})
|
||||
add_custom_command(OUTPUT ${SIMD_OBJ} DEPENDS ${file} ${OBJECT_DEPENDS}
|
||||
COMMAND ${CMAKE_ASM_NASM_COMPILER} -f${CMAKE_ASM_NASM_OBJECT_FORMAT}
|
||||
${CMAKE_ASM_NASM_FLAGS} ${CMAKE_CURRENT_SOURCE_DIR}/${file}
|
||||
-o${SIMD_OBJ})
|
||||
set(SIMD_OBJS ${SIMD_OBJS} ${SIMD_OBJ})
|
||||
set(SIMD_OBJS ${SIMD_OBJS} ${SIMD_OBJ})
|
||||
else()
|
||||
set_source_files_properties(${file} PROPERTIES OBJECT_DEPENDS
|
||||
"${OBJECT_DEPENDS}")
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
if(MSVC_IDE)
|
||||
if(MSVC_IDE OR XCODE)
|
||||
set(SIMD_OBJS ${SIMD_OBJS} PARENT_SCOPE)
|
||||
add_library(simd OBJECT ${CPU_TYPE}/jsimd.c)
|
||||
add_custom_target(simd-objs DEPENDS ${SIMD_OBJS})
|
||||
@@ -262,7 +273,7 @@ endif()
|
||||
# MIPS (GAS)
|
||||
###############################################################################
|
||||
|
||||
elseif(CPU_TYPE STREQUAL "mips")
|
||||
elseif(CPU_TYPE STREQUAL "mips" OR CPU_TYPE STREQUAL "mipsel")
|
||||
|
||||
enable_language(ASM)
|
||||
|
||||
@@ -293,7 +304,7 @@ if(NOT HAVE_DSPR2)
|
||||
return()
|
||||
endif()
|
||||
|
||||
add_library(simd OBJECT ${CPU_TYPE}/jsimd_dspr2.S ${CPU_TYPE}/jsimd.c)
|
||||
add_library(simd OBJECT mips/jsimd_dspr2.S mips/jsimd.c)
|
||||
|
||||
if(CMAKE_POSITION_INDEPENDENT_CODE OR ENABLE_SHARED)
|
||||
set_target_properties(simd PROPERTIES POSITION_INDEPENDENT_CODE 1)
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
* Copyright (C) 2011, Nokia Corporation and/or its subsidiary(-ies).
|
||||
* Copyright (C) 2009-2011, 2013-2014, 2016, 2018, D. R. Commander.
|
||||
* Copyright (C) 2015-2016, 2018, Matthieu Darbois.
|
||||
* Copyright (C) 2019, Google LLC.
|
||||
*
|
||||
* Based on the x86 SIMD extension for IJG JPEG library,
|
||||
* Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||
@@ -30,7 +31,7 @@
|
||||
static unsigned int simd_support = ~0;
|
||||
static unsigned int simd_huffman = 1;
|
||||
|
||||
#if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
|
||||
#if !defined(__ARM_NEON__) && (defined(__linux__) || defined(ANDROID) || defined(__ANDROID__))
|
||||
|
||||
#define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024)
|
||||
|
||||
@@ -105,7 +106,7 @@ init_simd(void)
|
||||
#ifndef NO_GETENV
|
||||
char *env = NULL;
|
||||
#endif
|
||||
#if !defined(__ARM_NEON__) && defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
|
||||
#if !defined(__ARM_NEON__) && (defined(__linux__) || defined(ANDROID) || defined(__ANDROID__))
|
||||
int bufsize = 1024; /* an initial guess for the line buffer size limit */
|
||||
#endif
|
||||
|
||||
|
||||
@@ -31,6 +31,251 @@
|
||||
.section .note.GNU-stack, "", %progbits /* mark stack as non-executable */
|
||||
#endif
|
||||
|
||||
#if defined(__APPLE__)
|
||||
.section __DATA, __const
|
||||
#else
|
||||
.section .rodata, "a", %progbits
|
||||
#endif
|
||||
|
||||
/* Constants for jsimd_idct_islow_neon() */
|
||||
|
||||
#define F_0_298 2446 /* FIX(0.298631336) */
|
||||
#define F_0_390 3196 /* FIX(0.390180644) */
|
||||
#define F_0_541 4433 /* FIX(0.541196100) */
|
||||
#define F_0_765 6270 /* FIX(0.765366865) */
|
||||
#define F_0_899 7373 /* FIX(0.899976223) */
|
||||
#define F_1_175 9633 /* FIX(1.175875602) */
|
||||
#define F_1_501 12299 /* FIX(1.501321110) */
|
||||
#define F_1_847 15137 /* FIX(1.847759065) */
|
||||
#define F_1_961 16069 /* FIX(1.961570560) */
|
||||
#define F_2_053 16819 /* FIX(2.053119869) */
|
||||
#define F_2_562 20995 /* FIX(2.562915447) */
|
||||
#define F_3_072 25172 /* FIX(3.072711026) */
|
||||
|
||||
.balign 16
|
||||
Ljsimd_idct_islow_neon_consts:
|
||||
.short F_0_298
|
||||
.short -F_0_390
|
||||
.short F_0_541
|
||||
.short F_0_765
|
||||
.short - F_0_899
|
||||
.short F_1_175
|
||||
.short F_1_501
|
||||
.short - F_1_847
|
||||
.short - F_1_961
|
||||
.short F_2_053
|
||||
.short - F_2_562
|
||||
.short F_3_072
|
||||
.short 0 /* padding */
|
||||
.short 0
|
||||
.short 0
|
||||
.short 0
|
||||
|
||||
#undef F_0_298
|
||||
#undef F_0_390
|
||||
#undef F_0_541
|
||||
#undef F_0_765
|
||||
#undef F_0_899
|
||||
#undef F_1_175
|
||||
#undef F_1_501
|
||||
#undef F_1_847
|
||||
#undef F_1_961
|
||||
#undef F_2_053
|
||||
#undef F_2_562
|
||||
#undef F_3_072
|
||||
|
||||
/* Constants for jsimd_idct_ifast_neon() */
|
||||
|
||||
.balign 16
|
||||
Ljsimd_idct_ifast_neon_consts:
|
||||
.short (277 * 128 - 256 * 128) /* XFIX_1_082392200 */
|
||||
.short (362 * 128 - 256 * 128) /* XFIX_1_414213562 */
|
||||
.short (473 * 128 - 256 * 128) /* XFIX_1_847759065 */
|
||||
.short (669 * 128 - 512 * 128) /* XFIX_2_613125930 */
|
||||
|
||||
/* Constants for jsimd_idct_4x4_neon() and jsimd_idct_2x2_neon() */
|
||||
|
||||
#define CONST_BITS 13
|
||||
|
||||
#define FIX_0_211164243 (1730) /* FIX(0.211164243) */
|
||||
#define FIX_0_509795579 (4176) /* FIX(0.509795579) */
|
||||
#define FIX_0_601344887 (4926) /* FIX(0.601344887) */
|
||||
#define FIX_0_720959822 (5906) /* FIX(0.720959822) */
|
||||
#define FIX_0_765366865 (6270) /* FIX(0.765366865) */
|
||||
#define FIX_0_850430095 (6967) /* FIX(0.850430095) */
|
||||
#define FIX_0_899976223 (7373) /* FIX(0.899976223) */
|
||||
#define FIX_1_061594337 (8697) /* FIX(1.061594337) */
|
||||
#define FIX_1_272758580 (10426) /* FIX(1.272758580) */
|
||||
#define FIX_1_451774981 (11893) /* FIX(1.451774981) */
|
||||
#define FIX_1_847759065 (15137) /* FIX(1.847759065) */
|
||||
#define FIX_2_172734803 (17799) /* FIX(2.172734803) */
|
||||
#define FIX_2_562915447 (20995) /* FIX(2.562915447) */
|
||||
#define FIX_3_624509785 (29692) /* FIX(3.624509785) */
|
||||
|
||||
.balign 16
|
||||
Ljsimd_idct_4x4_neon_consts:
|
||||
.short FIX_1_847759065 /* v0.h[0] */
|
||||
.short -FIX_0_765366865 /* v0.h[1] */
|
||||
.short -FIX_0_211164243 /* v0.h[2] */
|
||||
.short FIX_1_451774981 /* v0.h[3] */
|
||||
.short -FIX_2_172734803 /* d1[0] */
|
||||
.short FIX_1_061594337 /* d1[1] */
|
||||
.short -FIX_0_509795579 /* d1[2] */
|
||||
.short -FIX_0_601344887 /* d1[3] */
|
||||
.short FIX_0_899976223 /* v2.h[0] */
|
||||
.short FIX_2_562915447 /* v2.h[1] */
|
||||
.short 1 << (CONST_BITS + 1) /* v2.h[2] */
|
||||
.short 0 /* v2.h[3] */
|
||||
|
||||
.balign 8
|
||||
Ljsimd_idct_2x2_neon_consts:
|
||||
.short -FIX_0_720959822 /* v14[0] */
|
||||
.short FIX_0_850430095 /* v14[1] */
|
||||
.short -FIX_1_272758580 /* v14[2] */
|
||||
.short FIX_3_624509785 /* v14[3] */
|
||||
|
||||
/* Constants for jsimd_ycc_*_neon() */
|
||||
|
||||
.balign 16
|
||||
Ljsimd_ycc_rgb_neon_consts:
|
||||
.short 0, 0, 0, 0
|
||||
.short 22971, -11277, -23401, 29033
|
||||
.short -128, -128, -128, -128
|
||||
.short -128, -128, -128, -128
|
||||
|
||||
/* Constants for jsimd_*_ycc_neon() */
|
||||
|
||||
.balign 16
|
||||
Ljsimd_rgb_ycc_neon_consts:
|
||||
.short 19595, 38470, 7471, 11059
|
||||
.short 21709, 32768, 27439, 5329
|
||||
.short 32767, 128, 32767, 128
|
||||
.short 32767, 128, 32767, 128
|
||||
|
||||
/* Constants for jsimd_fdct_islow_neon() */
|
||||
|
||||
#define F_0_298 2446 /* FIX(0.298631336) */
|
||||
#define F_0_390 3196 /* FIX(0.390180644) */
|
||||
#define F_0_541 4433 /* FIX(0.541196100) */
|
||||
#define F_0_765 6270 /* FIX(0.765366865) */
|
||||
#define F_0_899 7373 /* FIX(0.899976223) */
|
||||
#define F_1_175 9633 /* FIX(1.175875602) */
|
||||
#define F_1_501 12299 /* FIX(1.501321110) */
|
||||
#define F_1_847 15137 /* FIX(1.847759065) */
|
||||
#define F_1_961 16069 /* FIX(1.961570560) */
|
||||
#define F_2_053 16819 /* FIX(2.053119869) */
|
||||
#define F_2_562 20995 /* FIX(2.562915447) */
|
||||
#define F_3_072 25172 /* FIX(3.072711026) */
|
||||
|
||||
.balign 16
|
||||
Ljsimd_fdct_islow_neon_consts:
|
||||
.short F_0_298
|
||||
.short -F_0_390
|
||||
.short F_0_541
|
||||
.short F_0_765
|
||||
.short - F_0_899
|
||||
.short F_1_175
|
||||
.short F_1_501
|
||||
.short - F_1_847
|
||||
.short - F_1_961
|
||||
.short F_2_053
|
||||
.short - F_2_562
|
||||
.short F_3_072
|
||||
.short 0 /* padding */
|
||||
.short 0
|
||||
.short 0
|
||||
.short 0
|
||||
|
||||
#undef F_0_298
|
||||
#undef F_0_390
|
||||
#undef F_0_541
|
||||
#undef F_0_765
|
||||
#undef F_0_899
|
||||
#undef F_1_175
|
||||
#undef F_1_501
|
||||
#undef F_1_847
|
||||
#undef F_1_961
|
||||
#undef F_2_053
|
||||
#undef F_2_562
|
||||
#undef F_3_072
|
||||
|
||||
/* Constants for jsimd_fdct_ifast_neon() */
|
||||
|
||||
.balign 16
|
||||
Ljsimd_fdct_ifast_neon_consts:
|
||||
.short (98 * 128) /* XFIX_0_382683433 */
|
||||
.short (139 * 128) /* XFIX_0_541196100 */
|
||||
.short (181 * 128) /* XFIX_0_707106781 */
|
||||
.short (334 * 128 - 256 * 128) /* XFIX_1_306562965 */
|
||||
|
||||
/* Constants for jsimd_h2*_downsample_neon() */
|
||||
|
||||
.balign 16
|
||||
Ljsimd_h2_downsample_neon_consts:
|
||||
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
|
||||
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F /* diff 0 */
|
||||
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
|
||||
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0E /* diff 1 */
|
||||
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
|
||||
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0D, 0x0D /* diff 2 */
|
||||
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
|
||||
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0C, 0x0C, 0x0C /* diff 3 */
|
||||
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
|
||||
0x08, 0x09, 0x0A, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B /* diff 4 */
|
||||
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
|
||||
0x08, 0x09, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A /* diff 5 */
|
||||
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
|
||||
0x08, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09 /* diff 6 */
|
||||
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
|
||||
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08 /* diff 7 */
|
||||
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
|
||||
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07 /* diff 8 */
|
||||
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x06, \
|
||||
0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06 /* diff 9 */
|
||||
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x05, 0x05, \
|
||||
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05 /* diff 10 */
|
||||
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x04, \
|
||||
0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04 /* diff 11 */
|
||||
.byte 0x00, 0x01, 0x02, 0x03, 0x03, 0x03, 0x03, 0x03, \
|
||||
0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03 /* diff 12 */
|
||||
.byte 0x00, 0x01, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, \
|
||||
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02 /* diff 13 */
|
||||
.byte 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, \
|
||||
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01 /* diff 14 */
|
||||
.byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 /* diff 15 */
|
||||
|
||||
/* Constants for jsimd_huff_encode_one_block_neon() */
|
||||
|
||||
.balign 16
|
||||
Ljsimd_huff_encode_one_block_neon_consts:
|
||||
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, \
|
||||
0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80
|
||||
.byte 0, 1, 2, 3, 16, 17, 32, 33, \
|
||||
18, 19, 4, 5, 6, 7, 20, 21 /* L0 => L3 : 4 lines OK */
|
||||
.byte 34, 35, 48, 49, 255, 255, 50, 51, \
|
||||
36, 37, 22, 23, 8, 9, 10, 11 /* L0 => L3 : 4 lines OK */
|
||||
.byte 8, 9, 22, 23, 36, 37, 50, 51, \
|
||||
255, 255, 255, 255, 255, 255, 52, 53 /* L1 => L4 : 4 lines OK */
|
||||
.byte 54, 55, 40, 41, 26, 27, 12, 13, \
|
||||
14, 15, 28, 29, 42, 43, 56, 57 /* L0 => L3 : 4 lines OK */
|
||||
.byte 6, 7, 20, 21, 34, 35, 48, 49, \
|
||||
50, 51, 36, 37, 22, 23, 8, 9 /* L4 => L7 : 4 lines OK */
|
||||
.byte 42, 43, 28, 29, 14, 15, 30, 31, \
|
||||
44, 45, 58, 59, 255, 255, 255, 255 /* L1 => L4 : 4 lines OK */
|
||||
.byte 255, 255, 255, 255, 56, 57, 42, 43, \
|
||||
28, 29, 14, 15, 30, 31, 44, 45 /* L3 => L6 : 4 lines OK */
|
||||
.byte 26, 27, 40, 41, 42, 43, 28, 29, \
|
||||
14, 15, 30, 31, 44, 45, 46, 47 /* L5 => L7 : 3 lines OK */
|
||||
.byte 255, 255, 255, 255, 0, 1, 255, 255, \
|
||||
255, 255, 255, 255, 255, 255, 255, 255 /* L4 : 1 lines OK */
|
||||
.byte 255, 255, 255, 255, 255, 255, 255, 255, \
|
||||
0, 1, 16, 17, 2, 3, 255, 255 /* L5 => L6 : 2 lines OK */
|
||||
.byte 255, 255, 255, 255, 255, 255, 255, 255, \
|
||||
255, 255, 255, 255, 8, 9, 22, 23 /* L5 => L6 : 2 lines OK */
|
||||
.byte 4, 5, 6, 7, 255, 255, 255, 255, \
|
||||
255, 255, 255, 255, 255, 255, 255, 255 /* L7 : 1 line OK */
|
||||
|
||||
.text
|
||||
|
||||
|
||||
@@ -55,6 +300,17 @@ _\fname:
|
||||
#endif
|
||||
.endm
|
||||
|
||||
/* Get symbol location */
|
||||
.macro get_symbol_loc reg, symbol
|
||||
#ifdef __APPLE__
|
||||
adrp \reg, \symbol@PAGE
|
||||
add \reg, \reg, \symbol@PAGEOFF
|
||||
#else
|
||||
adrp \reg, \symbol
|
||||
add \reg, \reg, :lo12:\symbol
|
||||
#endif
|
||||
.endm
|
||||
|
||||
/* Transpose elements of single 128 bit registers */
|
||||
.macro transpose_single x0, x1, xi, xilen, literal
|
||||
ins \xi\xilen[0], \x0\xilen[0]
|
||||
@@ -63,7 +319,7 @@ _\fname:
|
||||
trn2 \x1\literal, \xi\literal, \x1\literal
|
||||
.endm
|
||||
|
||||
/* Transpose elements of 2 differnet registers */
|
||||
/* Transpose elements of 2 different registers */
|
||||
.macro transpose x0, x1, xi, xilen, literal
|
||||
mov \xi\xilen, \x0\xilen
|
||||
trn1 \x0\literal, \x0\literal, \x1\literal
|
||||
@@ -139,51 +395,6 @@ _\fname:
|
||||
#define CONST_BITS 13
|
||||
#define PASS1_BITS 2
|
||||
|
||||
#define F_0_298 2446 /* FIX(0.298631336) */
|
||||
#define F_0_390 3196 /* FIX(0.390180644) */
|
||||
#define F_0_541 4433 /* FIX(0.541196100) */
|
||||
#define F_0_765 6270 /* FIX(0.765366865) */
|
||||
#define F_0_899 7373 /* FIX(0.899976223) */
|
||||
#define F_1_175 9633 /* FIX(1.175875602) */
|
||||
#define F_1_501 12299 /* FIX(1.501321110) */
|
||||
#define F_1_847 15137 /* FIX(1.847759065) */
|
||||
#define F_1_961 16069 /* FIX(1.961570560) */
|
||||
#define F_2_053 16819 /* FIX(2.053119869) */
|
||||
#define F_2_562 20995 /* FIX(2.562915447) */
|
||||
#define F_3_072 25172 /* FIX(3.072711026) */
|
||||
|
||||
.balign 16
|
||||
Ljsimd_idct_islow_neon_consts:
|
||||
.short F_0_298
|
||||
.short -F_0_390
|
||||
.short F_0_541
|
||||
.short F_0_765
|
||||
.short - F_0_899
|
||||
.short F_1_175
|
||||
.short F_1_501
|
||||
.short - F_1_847
|
||||
.short - F_1_961
|
||||
.short F_2_053
|
||||
.short - F_2_562
|
||||
.short F_3_072
|
||||
.short 0 /* padding */
|
||||
.short 0
|
||||
.short 0
|
||||
.short 0
|
||||
|
||||
#undef F_0_298
|
||||
#undef F_0_390
|
||||
#undef F_0_541
|
||||
#undef F_0_765
|
||||
#undef F_0_899
|
||||
#undef F_1_175
|
||||
#undef F_1_501
|
||||
#undef F_1_847
|
||||
#undef F_1_961
|
||||
#undef F_2_053
|
||||
#undef F_2_562
|
||||
#undef F_3_072
|
||||
|
||||
#define XFIX_P_0_298 v0.h[0]
|
||||
#define XFIX_N_0_390 v0.h[1]
|
||||
#define XFIX_P_0_541 v0.h[2]
|
||||
@@ -217,7 +428,7 @@ asm_function jsimd_idct_islow_neon
|
||||
uxtw x3, w3
|
||||
|
||||
sub sp, sp, #64
|
||||
adr x15, Ljsimd_idct_islow_neon_consts
|
||||
get_symbol_loc x15, Ljsimd_idct_islow_neon_consts
|
||||
mov x10, sp
|
||||
st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x10], #32
|
||||
st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [x10], #32
|
||||
@@ -791,13 +1002,6 @@ asm_function jsimd_idct_islow_neon
|
||||
#define XFIX_1_847759065 v0.h[2]
|
||||
#define XFIX_2_613125930 v0.h[3]
|
||||
|
||||
.balign 16
|
||||
Ljsimd_idct_ifast_neon_consts:
|
||||
.short (277 * 128 - 256 * 128) /* XFIX_1_082392200 */
|
||||
.short (362 * 128 - 256 * 128) /* XFIX_1_414213562 */
|
||||
.short (473 * 128 - 256 * 128) /* XFIX_1_847759065 */
|
||||
.short (669 * 128 - 512 * 128) /* XFIX_2_613125930 */
|
||||
|
||||
asm_function jsimd_idct_ifast_neon
|
||||
|
||||
DCT_TABLE .req x0
|
||||
@@ -832,7 +1036,7 @@ asm_function jsimd_idct_ifast_neon
|
||||
* 7 | d30 | d31 ( v23.8h )
|
||||
*/
|
||||
/* Save NEON registers used in fast IDCT */
|
||||
adr TMP5, Ljsimd_idct_ifast_neon_consts
|
||||
get_symbol_loc TMP5, Ljsimd_idct_ifast_neon_consts
|
||||
ld1 {v16.8h, v17.8h}, [COEF_BLOCK], 32
|
||||
ld1 {v0.8h, v1.8h}, [DCT_TABLE], 32
|
||||
ld1 {v18.8h, v19.8h}, [COEF_BLOCK], 32
|
||||
@@ -1023,38 +1227,6 @@ asm_function jsimd_idct_ifast_neon
|
||||
* but readability will suffer somewhat.
|
||||
*/
|
||||
|
||||
#define CONST_BITS 13
|
||||
|
||||
#define FIX_0_211164243 (1730) /* FIX(0.211164243) */
|
||||
#define FIX_0_509795579 (4176) /* FIX(0.509795579) */
|
||||
#define FIX_0_601344887 (4926) /* FIX(0.601344887) */
|
||||
#define FIX_0_720959822 (5906) /* FIX(0.720959822) */
|
||||
#define FIX_0_765366865 (6270) /* FIX(0.765366865) */
|
||||
#define FIX_0_850430095 (6967) /* FIX(0.850430095) */
|
||||
#define FIX_0_899976223 (7373) /* FIX(0.899976223) */
|
||||
#define FIX_1_061594337 (8697) /* FIX(1.061594337) */
|
||||
#define FIX_1_272758580 (10426) /* FIX(1.272758580) */
|
||||
#define FIX_1_451774981 (11893) /* FIX(1.451774981) */
|
||||
#define FIX_1_847759065 (15137) /* FIX(1.847759065) */
|
||||
#define FIX_2_172734803 (17799) /* FIX(2.172734803) */
|
||||
#define FIX_2_562915447 (20995) /* FIX(2.562915447) */
|
||||
#define FIX_3_624509785 (29692) /* FIX(3.624509785) */
|
||||
|
||||
.balign 16
|
||||
Ljsimd_idct_4x4_neon_consts:
|
||||
.short FIX_1_847759065 /* v0.h[0] */
|
||||
.short -FIX_0_765366865 /* v0.h[1] */
|
||||
.short -FIX_0_211164243 /* v0.h[2] */
|
||||
.short FIX_1_451774981 /* v0.h[3] */
|
||||
.short -FIX_2_172734803 /* d1[0] */
|
||||
.short FIX_1_061594337 /* d1[1] */
|
||||
.short -FIX_0_509795579 /* d1[2] */
|
||||
.short -FIX_0_601344887 /* d1[3] */
|
||||
.short FIX_0_899976223 /* v2.h[0] */
|
||||
.short FIX_2_562915447 /* v2.h[1] */
|
||||
.short 1 << (CONST_BITS + 1) /* v2.h[2] */
|
||||
.short 0 /* v2.h[3] */
|
||||
|
||||
.macro idct_helper x4, x6, x8, x10, x12, x14, x16, shift, y26, y27, y28, y29
|
||||
smull v28.4s, \x4, v2.h[2]
|
||||
smlal v28.4s, \x8, v0.h[0]
|
||||
@@ -1121,7 +1293,7 @@ asm_function jsimd_idct_4x4_neon
|
||||
sub sp, sp, 64
|
||||
mov x9, sp
|
||||
/* Load constants (v3.4h is just used for padding) */
|
||||
adr TMP4, Ljsimd_idct_4x4_neon_consts
|
||||
get_symbol_loc TMP4, Ljsimd_idct_4x4_neon_consts
|
||||
st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x9], 32
|
||||
st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [x9], 32
|
||||
ld1 {v0.4h, v1.4h, v2.4h, v3.4h}, [TMP4]
|
||||
@@ -1264,13 +1436,6 @@ asm_function jsimd_idct_4x4_neon
|
||||
* bit exact compatibility with jpeg-6b.
|
||||
*/
|
||||
|
||||
.balign 8
|
||||
Ljsimd_idct_2x2_neon_consts:
|
||||
.short -FIX_0_720959822 /* v14[0] */
|
||||
.short FIX_0_850430095 /* v14[1] */
|
||||
.short -FIX_1_272758580 /* v14[2] */
|
||||
.short FIX_3_624509785 /* v14[3] */
|
||||
|
||||
.macro idct_helper x4, x6, x10, x12, x16, shift, y26, y27
|
||||
sshll v15.4s, \x4, #15
|
||||
smull v26.4s, \x6, v14.h[3]
|
||||
@@ -1311,7 +1476,7 @@ asm_function jsimd_idct_2x2_neon
|
||||
mov x9, sp
|
||||
|
||||
/* Load constants */
|
||||
adr TMP2, Ljsimd_idct_2x2_neon_consts
|
||||
get_symbol_loc TMP2, Ljsimd_idct_2x2_neon_consts
|
||||
st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x9], 32
|
||||
st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [x9], 32
|
||||
ld1 {v14.4h}, [TMP2]
|
||||
@@ -1663,21 +1828,6 @@ asm_function jsimd_idct_2x2_neon
|
||||
do_yuv_to_rgb_stage2
|
||||
.endm
|
||||
|
||||
/* Apple gas crashes on adrl, work around that by using adr.
|
||||
* But this requires a copy of these constants for each function.
|
||||
*/
|
||||
|
||||
.balign 16
|
||||
.if \fast_st3 == 1
|
||||
Ljsimd_ycc_\colorid\()_neon_consts:
|
||||
.else
|
||||
Ljsimd_ycc_\colorid\()_neon_slowst3_consts:
|
||||
.endif
|
||||
.short 0, 0, 0, 0
|
||||
.short 22971, -11277, -23401, 29033
|
||||
.short -128, -128, -128, -128
|
||||
.short -128, -128, -128, -128
|
||||
|
||||
.if \fast_st3 == 1
|
||||
asm_function jsimd_ycc_\colorid\()_convert_neon
|
||||
.else
|
||||
@@ -1703,11 +1853,7 @@ asm_function jsimd_ycc_\colorid\()_convert_neon_slowst3
|
||||
mov x9, sp
|
||||
|
||||
/* Load constants to d1, d2, d3 (v0.4h is just used for padding) */
|
||||
.if \fast_st3 == 1
|
||||
adr x15, Ljsimd_ycc_\colorid\()_neon_consts
|
||||
.else
|
||||
adr x15, Ljsimd_ycc_\colorid\()_neon_slowst3_consts
|
||||
.endif
|
||||
get_symbol_loc x15, Ljsimd_ycc_rgb_neon_consts
|
||||
|
||||
/* Save NEON registers */
|
||||
st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x9], 32
|
||||
@@ -2004,17 +2150,6 @@ generate_jsimd_ycc_rgb_convert_neon extbgr, 24, 2, .4h, 1, .4h, 0, .4h, .8b,
|
||||
do_rgb_to_yuv_stage1
|
||||
.endm
|
||||
|
||||
.balign 16
|
||||
.if \fast_ld3 == 1
|
||||
Ljsimd_\colorid\()_ycc_neon_consts:
|
||||
.else
|
||||
Ljsimd_\colorid\()_ycc_neon_slowld3_consts:
|
||||
.endif
|
||||
.short 19595, 38470, 7471, 11059
|
||||
.short 21709, 32768, 27439, 5329
|
||||
.short 32767, 128, 32767, 128
|
||||
.short 32767, 128, 32767, 128
|
||||
|
||||
.if \fast_ld3 == 1
|
||||
asm_function jsimd_\colorid\()_ycc_convert_neon
|
||||
.else
|
||||
@@ -2037,11 +2172,7 @@ asm_function jsimd_\colorid\()_ycc_convert_neon_slowld3
|
||||
N .req w12
|
||||
|
||||
/* Load constants to d0, d1, d2, d3 */
|
||||
.if \fast_ld3 == 1
|
||||
adr x13, Ljsimd_\colorid\()_ycc_neon_consts
|
||||
.else
|
||||
adr x13, Ljsimd_\colorid\()_ycc_neon_slowld3_consts
|
||||
.endif
|
||||
get_symbol_loc x13, Ljsimd_rgb_ycc_neon_consts
|
||||
ld1 {v0.8h, v1.8h}, [x13]
|
||||
|
||||
ldr OUTPUT_BUF0, [OUTPUT_BUF]
|
||||
@@ -2241,50 +2372,6 @@ asm_function jsimd_convsamp_neon
|
||||
#define DESCALE_P1 (CONST_BITS - PASS1_BITS)
|
||||
#define DESCALE_P2 (CONST_BITS + PASS1_BITS)
|
||||
|
||||
#define F_0_298 2446 /* FIX(0.298631336) */
|
||||
#define F_0_390 3196 /* FIX(0.390180644) */
|
||||
#define F_0_541 4433 /* FIX(0.541196100) */
|
||||
#define F_0_765 6270 /* FIX(0.765366865) */
|
||||
#define F_0_899 7373 /* FIX(0.899976223) */
|
||||
#define F_1_175 9633 /* FIX(1.175875602) */
|
||||
#define F_1_501 12299 /* FIX(1.501321110) */
|
||||
#define F_1_847 15137 /* FIX(1.847759065) */
|
||||
#define F_1_961 16069 /* FIX(1.961570560) */
|
||||
#define F_2_053 16819 /* FIX(2.053119869) */
|
||||
#define F_2_562 20995 /* FIX(2.562915447) */
|
||||
#define F_3_072 25172 /* FIX(3.072711026) */
|
||||
|
||||
.balign 16
|
||||
Ljsimd_fdct_islow_neon_consts:
|
||||
.short F_0_298
|
||||
.short -F_0_390
|
||||
.short F_0_541
|
||||
.short F_0_765
|
||||
.short - F_0_899
|
||||
.short F_1_175
|
||||
.short F_1_501
|
||||
.short - F_1_847
|
||||
.short - F_1_961
|
||||
.short F_2_053
|
||||
.short - F_2_562
|
||||
.short F_3_072
|
||||
.short 0 /* padding */
|
||||
.short 0
|
||||
.short 0
|
||||
.short 0
|
||||
|
||||
#undef F_0_298
|
||||
#undef F_0_390
|
||||
#undef F_0_541
|
||||
#undef F_0_765
|
||||
#undef F_0_899
|
||||
#undef F_1_175
|
||||
#undef F_1_501
|
||||
#undef F_1_847
|
||||
#undef F_1_961
|
||||
#undef F_2_053
|
||||
#undef F_2_562
|
||||
#undef F_3_072
|
||||
#define XFIX_P_0_298 v0.h[0]
|
||||
#define XFIX_N_0_390 v0.h[1]
|
||||
#define XFIX_P_0_541 v0.h[2]
|
||||
@@ -2304,7 +2391,7 @@ asm_function jsimd_fdct_islow_neon
|
||||
TMP .req x9
|
||||
|
||||
/* Load constants */
|
||||
adr TMP, Ljsimd_fdct_islow_neon_consts
|
||||
get_symbol_loc TMP, Ljsimd_fdct_islow_neon_consts
|
||||
ld1 {v0.8h, v1.8h}, [TMP]
|
||||
|
||||
/* Save NEON registers */
|
||||
@@ -2583,20 +2670,13 @@ asm_function jsimd_fdct_islow_neon
|
||||
#define XFIX_0_707106781 v0.h[2]
|
||||
#define XFIX_1_306562965 v0.h[3]
|
||||
|
||||
.balign 16
|
||||
Ljsimd_fdct_ifast_neon_consts:
|
||||
.short (98 * 128) /* XFIX_0_382683433 */
|
||||
.short (139 * 128) /* XFIX_0_541196100 */
|
||||
.short (181 * 128) /* XFIX_0_707106781 */
|
||||
.short (334 * 128 - 256 * 128) /* XFIX_1_306562965 */
|
||||
|
||||
asm_function jsimd_fdct_ifast_neon
|
||||
|
||||
DATA .req x0
|
||||
TMP .req x9
|
||||
|
||||
/* Load constants */
|
||||
adr TMP, Ljsimd_fdct_ifast_neon_consts
|
||||
get_symbol_loc TMP, Ljsimd_fdct_ifast_neon_consts
|
||||
ld1 {v0.4h}, [TMP]
|
||||
|
||||
/* Load all DATA into NEON registers with the following allocation:
|
||||
@@ -2775,41 +2855,6 @@ asm_function jsimd_quantize_neon
|
||||
* JSAMPARRAY input_data, JSAMPARRAY output_data);
|
||||
*/
|
||||
|
||||
.balign 16
|
||||
Ljsimd_h2_downsample_neon_consts:
|
||||
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
|
||||
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F /* diff 0 */
|
||||
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
|
||||
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0E /* diff 1 */
|
||||
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
|
||||
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0D, 0x0D /* diff 2 */
|
||||
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
|
||||
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0C, 0x0C, 0x0C /* diff 3 */
|
||||
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
|
||||
0x08, 0x09, 0x0A, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B /* diff 4 */
|
||||
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
|
||||
0x08, 0x09, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A /* diff 5 */
|
||||
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
|
||||
0x08, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09 /* diff 6 */
|
||||
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
|
||||
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08 /* diff 7 */
|
||||
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
|
||||
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07 /* diff 8 */
|
||||
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x06, \
|
||||
0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06 /* diff 9 */
|
||||
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x05, 0x05, \
|
||||
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05 /* diff 10 */
|
||||
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x04, \
|
||||
0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04 /* diff 11 */
|
||||
.byte 0x00, 0x01, 0x02, 0x03, 0x03, 0x03, 0x03, 0x03, \
|
||||
0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03 /* diff 12 */
|
||||
.byte 0x00, 0x01, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, \
|
||||
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02 /* diff 13 */
|
||||
.byte 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, \
|
||||
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01 /* diff 14 */
|
||||
.byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 /* diff 15 */
|
||||
|
||||
asm_function jsimd_h2v1_downsample_neon
|
||||
IMAGE_WIDTH .req x0
|
||||
MAX_V_SAMP .req x1
|
||||
@@ -2827,7 +2872,7 @@ asm_function jsimd_h2v1_downsample_neon
|
||||
mov TMPDUP, #0x10000
|
||||
lsl TMP2, BLOCK_WIDTH, #4
|
||||
sub TMP2, TMP2, IMAGE_WIDTH
|
||||
adr TMP3, Ljsimd_h2_downsample_neon_consts
|
||||
get_symbol_loc TMP3, Ljsimd_h2_downsample_neon_consts
|
||||
add TMP3, TMP3, TMP2, lsl #4
|
||||
dup v16.4s, TMPDUP
|
||||
ld1 {v18.16b}, [TMP3]
|
||||
@@ -2906,7 +2951,7 @@ asm_function jsimd_h2v2_downsample_neon
|
||||
lsl TMP2, BLOCK_WIDTH, #4
|
||||
lsl TMPDUP, TMPDUP, #17
|
||||
sub TMP2, TMP2, IMAGE_WIDTH
|
||||
adr TMP3, Ljsimd_h2_downsample_neon_consts
|
||||
get_symbol_loc TMP3, Ljsimd_h2_downsample_neon_consts
|
||||
orr TMPDUP, TMPDUP, #1
|
||||
add TMP3, TMP3, TMP2, lsl #4
|
||||
dup v16.4s, TMPDUP
|
||||
@@ -3012,41 +3057,6 @@ asm_function jsimd_h2v2_downsample_neon
|
||||
|
||||
.macro generate_jsimd_huff_encode_one_block fast_tbl
|
||||
|
||||
.balign 16
|
||||
.if \fast_tbl == 1
|
||||
Ljsimd_huff_encode_one_block_neon_consts:
|
||||
.else
|
||||
Ljsimd_huff_encode_one_block_neon_slowtbl_consts:
|
||||
.endif
|
||||
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, \
|
||||
0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80
|
||||
.if \fast_tbl == 1
|
||||
.byte 0, 1, 2, 3, 16, 17, 32, 33, \
|
||||
18, 19, 4, 5, 6, 7, 20, 21 /* L0 => L3 : 4 lines OK */
|
||||
.byte 34, 35, 48, 49, 255, 255, 50, 51, \
|
||||
36, 37, 22, 23, 8, 9, 10, 11 /* L0 => L3 : 4 lines OK */
|
||||
.byte 8, 9, 22, 23, 36, 37, 50, 51, \
|
||||
255, 255, 255, 255, 255, 255, 52, 53 /* L1 => L4 : 4 lines OK */
|
||||
.byte 54, 55, 40, 41, 26, 27, 12, 13, \
|
||||
14, 15, 28, 29, 42, 43, 56, 57 /* L0 => L3 : 4 lines OK */
|
||||
.byte 6, 7, 20, 21, 34, 35, 48, 49, \
|
||||
50, 51, 36, 37, 22, 23, 8, 9 /* L4 => L7 : 4 lines OK */
|
||||
.byte 42, 43, 28, 29, 14, 15, 30, 31, \
|
||||
44, 45, 58, 59, 255, 255, 255, 255 /* L1 => L4 : 4 lines OK */
|
||||
.byte 255, 255, 255, 255, 56, 57, 42, 43, \
|
||||
28, 29, 14, 15, 30, 31, 44, 45 /* L3 => L6 : 4 lines OK */
|
||||
.byte 26, 27, 40, 41, 42, 43, 28, 29, \
|
||||
14, 15, 30, 31, 44, 45, 46, 47 /* L5 => L7 : 3 lines OK */
|
||||
.byte 255, 255, 255, 255, 0, 1, 255, 255, \
|
||||
255, 255, 255, 255, 255, 255, 255, 255 /* L4 : 1 lines OK */
|
||||
.byte 255, 255, 255, 255, 255, 255, 255, 255, \
|
||||
0, 1, 16, 17, 2, 3, 255, 255 /* L5 => L6 : 2 lines OK */
|
||||
.byte 255, 255, 255, 255, 255, 255, 255, 255, \
|
||||
255, 255, 255, 255, 8, 9, 22, 23 /* L5 => L6 : 2 lines OK */
|
||||
.byte 4, 5, 6, 7, 255, 255, 255, 255, \
|
||||
255, 255, 255, 255, 255, 255, 255, 255 /* L7 : 1 line OK */
|
||||
.endif
|
||||
|
||||
.if \fast_tbl == 1
|
||||
asm_function jsimd_huff_encode_one_block_neon
|
||||
.else
|
||||
@@ -3056,11 +3066,7 @@ asm_function jsimd_huff_encode_one_block_neon_slowtbl
|
||||
sub BUFFER, BUFFER, #0x1 /* BUFFER=buffer-- */
|
||||
/* Save ARM registers */
|
||||
stp x19, x20, [sp]
|
||||
.if \fast_tbl == 1
|
||||
adr x15, Ljsimd_huff_encode_one_block_neon_consts
|
||||
.else
|
||||
adr x15, Ljsimd_huff_encode_one_block_neon_slowtbl_consts
|
||||
.endif
|
||||
get_symbol_loc x15, Ljsimd_huff_encode_one_block_neon_consts
|
||||
ldr PUT_BUFFER, [x0, #0x10]
|
||||
ldr PUT_BITSw, [x0, #0x18]
|
||||
ldrsh w12, [x2] /* load DC coeff in w12 */
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jcolsamp.inc"
|
||||
|
||||
@@ -110,12 +108,12 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
|
||||
test cl, SIZEOF_BYTE
|
||||
jz short .column_ld2
|
||||
sub ecx, byte SIZEOF_BYTE
|
||||
movzx eax, BYTE [esi+ecx]
|
||||
movzx eax, byte [esi+ecx]
|
||||
.column_ld2:
|
||||
test cl, SIZEOF_WORD
|
||||
jz short .column_ld4
|
||||
sub ecx, byte SIZEOF_WORD
|
||||
movzx edx, WORD [esi+ecx]
|
||||
movzx edx, word [esi+ecx]
|
||||
shl eax, WORD_BIT
|
||||
or eax, edx
|
||||
.column_ld4:
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jcolsamp.inc"
|
||||
|
||||
@@ -111,13 +109,13 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
|
||||
jz short .column_ld2
|
||||
sub ecx, byte SIZEOF_BYTE
|
||||
xor eax, eax
|
||||
mov al, BYTE [esi+ecx]
|
||||
mov al, byte [esi+ecx]
|
||||
.column_ld2:
|
||||
test cl, SIZEOF_WORD
|
||||
jz short .column_ld4
|
||||
sub ecx, byte SIZEOF_WORD
|
||||
xor edx, edx
|
||||
mov dx, WORD [esi+ecx]
|
||||
mov dx, word [esi+ecx]
|
||||
shl eax, WORD_BIT
|
||||
or eax, edx
|
||||
.column_ld4:
|
||||
@@ -127,7 +125,7 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
|
||||
test cl, SIZEOF_DWORD
|
||||
jz short .column_ld8
|
||||
sub ecx, byte SIZEOF_DWORD
|
||||
movd mmG, DWORD [esi+ecx]
|
||||
movd mmG, dword [esi+ecx]
|
||||
psllq mmA, DWORD_BIT
|
||||
por mmA, mmG
|
||||
.column_ld8:
|
||||
@@ -197,7 +195,7 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
|
||||
test cl, SIZEOF_MMWORD/8
|
||||
jz short .column_ld2
|
||||
sub ecx, byte SIZEOF_MMWORD/8
|
||||
movd mmA, DWORD [esi+ecx*RGB_PIXELSIZE]
|
||||
movd mmA, dword [esi+ecx*RGB_PIXELSIZE]
|
||||
.column_ld2:
|
||||
test cl, SIZEOF_MMWORD/4
|
||||
jz short .column_ld4
|
||||
|
||||
@@ -12,8 +12,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jcolsamp.inc"
|
||||
|
||||
@@ -109,12 +107,12 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
|
||||
test cl, SIZEOF_BYTE
|
||||
jz short .column_ld2
|
||||
sub ecx, byte SIZEOF_BYTE
|
||||
movzx eax, BYTE [esi+ecx]
|
||||
movzx eax, byte [esi+ecx]
|
||||
.column_ld2:
|
||||
test cl, SIZEOF_WORD
|
||||
jz short .column_ld4
|
||||
sub ecx, byte SIZEOF_WORD
|
||||
movzx edx, WORD [esi+ecx]
|
||||
movzx edx, word [esi+ecx]
|
||||
shl eax, WORD_BIT
|
||||
or eax, edx
|
||||
.column_ld4:
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -12,8 +12,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -12,8 +12,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jcolsamp.inc"
|
||||
|
||||
@@ -102,12 +100,12 @@ EXTN(jsimd_rgb_gray_convert_avx2):
|
||||
test cl, SIZEOF_BYTE
|
||||
jz short .column_ld2
|
||||
sub ecx, byte SIZEOF_BYTE
|
||||
movzx eax, BYTE [esi+ecx]
|
||||
movzx eax, byte [esi+ecx]
|
||||
.column_ld2:
|
||||
test cl, SIZEOF_WORD
|
||||
jz short .column_ld4
|
||||
sub ecx, byte SIZEOF_WORD
|
||||
movzx edx, WORD [esi+ecx]
|
||||
movzx edx, word [esi+ecx]
|
||||
shl eax, WORD_BIT
|
||||
or eax, edx
|
||||
.column_ld4:
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jcolsamp.inc"
|
||||
|
||||
@@ -103,13 +101,13 @@ EXTN(jsimd_rgb_gray_convert_mmx):
|
||||
jz short .column_ld2
|
||||
sub ecx, byte SIZEOF_BYTE
|
||||
xor eax, eax
|
||||
mov al, BYTE [esi+ecx]
|
||||
mov al, byte [esi+ecx]
|
||||
.column_ld2:
|
||||
test cl, SIZEOF_WORD
|
||||
jz short .column_ld4
|
||||
sub ecx, byte SIZEOF_WORD
|
||||
xor edx, edx
|
||||
mov dx, WORD [esi+ecx]
|
||||
mov dx, word [esi+ecx]
|
||||
shl eax, WORD_BIT
|
||||
or eax, edx
|
||||
.column_ld4:
|
||||
@@ -119,7 +117,7 @@ EXTN(jsimd_rgb_gray_convert_mmx):
|
||||
test cl, SIZEOF_DWORD
|
||||
jz short .column_ld8
|
||||
sub ecx, byte SIZEOF_DWORD
|
||||
movd mmG, DWORD [esi+ecx]
|
||||
movd mmG, dword [esi+ecx]
|
||||
psllq mmA, DWORD_BIT
|
||||
por mmA, mmG
|
||||
.column_ld8:
|
||||
@@ -189,7 +187,7 @@ EXTN(jsimd_rgb_gray_convert_mmx):
|
||||
test cl, SIZEOF_MMWORD/8
|
||||
jz short .column_ld2
|
||||
sub ecx, byte SIZEOF_MMWORD/8
|
||||
movd mmA, DWORD [esi+ecx*RGB_PIXELSIZE]
|
||||
movd mmA, dword [esi+ecx*RGB_PIXELSIZE]
|
||||
.column_ld2:
|
||||
test cl, SIZEOF_MMWORD/4
|
||||
jz short .column_ld4
|
||||
|
||||
@@ -12,8 +12,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jcolsamp.inc"
|
||||
|
||||
@@ -101,12 +99,12 @@ EXTN(jsimd_rgb_gray_convert_sse2):
|
||||
test cl, SIZEOF_BYTE
|
||||
jz short .column_ld2
|
||||
sub ecx, byte SIZEOF_BYTE
|
||||
movzx eax, BYTE [esi+ecx]
|
||||
movzx eax, byte [esi+ecx]
|
||||
.column_ld2:
|
||||
test cl, SIZEOF_WORD
|
||||
jz short .column_ld4
|
||||
sub ecx, byte SIZEOF_WORD
|
||||
movzx edx, WORD [esi+ecx]
|
||||
movzx edx, word [esi+ecx]
|
||||
shl eax, WORD_BIT
|
||||
or eax, edx
|
||||
.column_ld4:
|
||||
|
||||
@@ -17,8 +17,6 @@
|
||||
; This file contains an SSE2 implementation for Huffman coding of one block.
|
||||
; The following code is based directly on jchuff.c; see jchuff.c for more
|
||||
; details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
@@ -197,8 +195,8 @@ EXTN(jsimd_huff_encode_one_block_sse2):
|
||||
push ebp
|
||||
|
||||
mov esi, POINTER [eax+8] ; (working_state *state)
|
||||
mov put_buffer, DWORD [esi+8] ; put_buffer = state->cur.put_buffer;
|
||||
mov put_bits, DWORD [esi+12] ; put_bits = state->cur.put_bits;
|
||||
mov put_buffer, dword [esi+8] ; put_buffer = state->cur.put_buffer;
|
||||
mov put_bits, dword [esi+12] ; put_bits = state->cur.put_bits;
|
||||
push esi ; esi is now scratch
|
||||
|
||||
get_GOT edx ; get GOT address
|
||||
@@ -214,7 +212,7 @@ EXTN(jsimd_huff_encode_one_block_sse2):
|
||||
; Encode the DC coefficient difference per section F.1.2.1
|
||||
mov esi, POINTER [esp+block] ; block
|
||||
movsx ecx, word [esi] ; temp = temp2 = block[0] - last_dc_val;
|
||||
sub ecx, DWORD [eax+20]
|
||||
sub ecx, dword [eax+20]
|
||||
mov esi, ecx
|
||||
|
||||
; This is a well-known technique for obtaining the absolute value
|
||||
@@ -229,12 +227,12 @@ EXTN(jsimd_huff_encode_one_block_sse2):
|
||||
; For a negative input, want temp2 = bitwise complement of abs(input)
|
||||
; This code assumes we are on a two's complement machine
|
||||
add esi, edx ; temp2 += temp3;
|
||||
mov DWORD [esp+temp], esi ; backup temp2 in temp
|
||||
mov dword [esp+temp], esi ; backup temp2 in temp
|
||||
|
||||
; Find the number of bits needed for the magnitude of the coefficient
|
||||
movpic ebp, POINTER [esp+gotptr] ; load GOT address (ebp)
|
||||
movzx edx, byte [GOTOFF(ebp, jpeg_nbits_table + ecx)] ; nbits = JPEG_NBITS(temp);
|
||||
mov DWORD [esp+temp2], edx ; backup nbits in temp2
|
||||
mov dword [esp+temp2], edx ; backup nbits in temp2
|
||||
|
||||
; Emit the Huffman-coded symbol for the number of bits
|
||||
mov ebp, POINTER [eax+24] ; After this point, arguments are not accessible anymore
|
||||
@@ -242,13 +240,13 @@ EXTN(jsimd_huff_encode_one_block_sse2):
|
||||
movzx ecx, byte [ebp + edx + 1024] ; size = dctbl->ehufsi[nbits];
|
||||
EMIT_BITS eax ; EMIT_BITS(code, size)
|
||||
|
||||
mov ecx, DWORD [esp+temp2] ; restore nbits
|
||||
mov ecx, dword [esp+temp2] ; restore nbits
|
||||
|
||||
; Mask off any extra bits in code
|
||||
mov eax, 1
|
||||
shl eax, cl
|
||||
dec eax
|
||||
and eax, DWORD [esp+temp] ; temp2 &= (((JLONG)1)<<nbits) - 1;
|
||||
and eax, dword [esp+temp] ; temp2 &= (((JLONG)1)<<nbits) - 1;
|
||||
|
||||
; Emit that number of bits of the value, if positive,
|
||||
; or the complement of its magnitude, if negative.
|
||||
@@ -291,22 +289,22 @@ EXTN(jsimd_huff_encode_one_block_sse2):
|
||||
jz near .ELOOP
|
||||
lea esi, [esi+ecx*2] ; k += r;
|
||||
shr edx, cl ; index >>= r;
|
||||
mov DWORD [esp+temp3], edx
|
||||
mov dword [esp+temp3], edx
|
||||
.BRLOOP:
|
||||
cmp ecx, 16 ; while (r > 15) {
|
||||
jl near .ERLOOP
|
||||
sub ecx, 16 ; r -= 16;
|
||||
mov DWORD [esp+temp], ecx
|
||||
mov dword [esp+temp], ecx
|
||||
mov eax, INT [ebp + 240 * 4] ; code_0xf0 = actbl->ehufco[0xf0];
|
||||
movzx ecx, byte [ebp + 1024 + 240] ; size_0xf0 = actbl->ehufsi[0xf0];
|
||||
EMIT_BITS eax ; EMIT_BITS(code_0xf0, size_0xf0)
|
||||
mov ecx, DWORD [esp+temp]
|
||||
mov ecx, dword [esp+temp]
|
||||
jmp .BRLOOP
|
||||
.ERLOOP:
|
||||
movsx eax, word [esi] ; temp = t1[k];
|
||||
movpic edx, POINTER [esp+gotptr] ; load GOT address (edx)
|
||||
movzx eax, byte [GOTOFF(edx, jpeg_nbits_table + eax)] ; nbits = JPEG_NBITS(temp);
|
||||
mov DWORD [esp+temp2], eax
|
||||
mov dword [esp+temp2], eax
|
||||
; Emit Huffman symbol for run length / number of bits
|
||||
shl ecx, 4 ; temp3 = (r << 4) + nbits;
|
||||
add ecx, eax
|
||||
@@ -316,13 +314,13 @@ EXTN(jsimd_huff_encode_one_block_sse2):
|
||||
|
||||
movsx edx, word [esi+DCTSIZE2*2] ; temp2 = t2[k];
|
||||
; Mask off any extra bits in code
|
||||
mov ecx, DWORD [esp+temp2]
|
||||
mov ecx, dword [esp+temp2]
|
||||
mov eax, 1
|
||||
shl eax, cl
|
||||
dec eax
|
||||
and eax, edx ; temp2 &= (((JLONG)1)<<nbits) - 1;
|
||||
EMIT_BITS eax ; PUT_BITS(temp2, nbits)
|
||||
mov edx, DWORD [esp+temp3]
|
||||
mov edx, dword [esp+temp3]
|
||||
add esi, 2 ; ++k;
|
||||
shr edx, 1 ; index >>= 1;
|
||||
|
||||
@@ -352,29 +350,29 @@ EXTN(jsimd_huff_encode_one_block_sse2):
|
||||
shr edx, cl ; index >>= r;
|
||||
add ecx, eax
|
||||
lea esi, [esi+ecx*2] ; k += r;
|
||||
mov DWORD [esp+temp3], edx
|
||||
mov dword [esp+temp3], edx
|
||||
jmp .BRLOOP2
|
||||
.BLOOP2:
|
||||
bsf ecx, edx ; r = __builtin_ctzl(index);
|
||||
jz near .ELOOP2
|
||||
lea esi, [esi+ecx*2] ; k += r;
|
||||
shr edx, cl ; index >>= r;
|
||||
mov DWORD [esp+temp3], edx
|
||||
mov dword [esp+temp3], edx
|
||||
.BRLOOP2:
|
||||
cmp ecx, 16 ; while (r > 15) {
|
||||
jl near .ERLOOP2
|
||||
sub ecx, 16 ; r -= 16;
|
||||
mov DWORD [esp+temp], ecx
|
||||
mov dword [esp+temp], ecx
|
||||
mov eax, INT [ebp + 240 * 4] ; code_0xf0 = actbl->ehufco[0xf0];
|
||||
movzx ecx, byte [ebp + 1024 + 240] ; size_0xf0 = actbl->ehufsi[0xf0];
|
||||
EMIT_BITS eax ; EMIT_BITS(code_0xf0, size_0xf0)
|
||||
mov ecx, DWORD [esp+temp]
|
||||
mov ecx, dword [esp+temp]
|
||||
jmp .BRLOOP2
|
||||
.ERLOOP2:
|
||||
movsx eax, word [esi] ; temp = t1[k];
|
||||
bsr eax, eax ; nbits = 32 - __builtin_clz(temp);
|
||||
inc eax
|
||||
mov DWORD [esp+temp2], eax
|
||||
mov dword [esp+temp2], eax
|
||||
; Emit Huffman symbol for run length / number of bits
|
||||
shl ecx, 4 ; temp3 = (r << 4) + nbits;
|
||||
add ecx, eax
|
||||
@@ -384,13 +382,13 @@ EXTN(jsimd_huff_encode_one_block_sse2):
|
||||
|
||||
movsx edx, word [esi+DCTSIZE2*2] ; temp2 = t2[k];
|
||||
; Mask off any extra bits in code
|
||||
mov ecx, DWORD [esp+temp2]
|
||||
mov ecx, dword [esp+temp2]
|
||||
mov eax, 1
|
||||
shl eax, cl
|
||||
dec eax
|
||||
and eax, edx ; temp2 &= (((JLONG)1)<<nbits) - 1;
|
||||
EMIT_BITS eax ; PUT_BITS(temp2, nbits)
|
||||
mov edx, DWORD [esp+temp3]
|
||||
mov edx, dword [esp+temp3]
|
||||
add esi, 2 ; ++k;
|
||||
shr edx, 1 ; index >>= 1;
|
||||
|
||||
@@ -407,8 +405,8 @@ EXTN(jsimd_huff_encode_one_block_sse2):
|
||||
mov eax, [esp+buffer]
|
||||
pop esi
|
||||
; Save put_buffer & put_bits
|
||||
mov DWORD [esi+8], put_buffer ; state->cur.put_buffer = put_buffer;
|
||||
mov DWORD [esi+12], put_bits ; state->cur.put_bits = put_bits;
|
||||
mov dword [esi+8], put_buffer ; state->cur.put_buffer = put_buffer;
|
||||
mov dword [esi+12], put_bits ; state->cur.put_bits = put_bits;
|
||||
|
||||
pop ebp
|
||||
pop edi
|
||||
|
||||
@@ -15,8 +15,6 @@
|
||||
;
|
||||
; This file contains an SSE2 implementation of data preparation for progressive
|
||||
; Huffman encoding. See jcphuff.c for more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
@@ -329,6 +327,8 @@ EXTN(jsimd_encode_mcu_AC_first_prepare_sse2):
|
||||
add LUT, 16*SIZEOF_INT
|
||||
dec K
|
||||
jnz .BLOOP16
|
||||
test LEN, 15
|
||||
je .PADDING
|
||||
.ELOOP16:
|
||||
mov LENEND, LEN
|
||||
and LENEND, 7
|
||||
|
||||
@@ -14,8 +14,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -14,8 +14,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jcolsamp.inc"
|
||||
|
||||
@@ -348,7 +346,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
|
||||
vmovd eax, xmmA
|
||||
cmp ecx, byte SIZEOF_WORD
|
||||
jb short .column_st1
|
||||
mov WORD [edi], ax
|
||||
mov word [edi], ax
|
||||
add edi, byte SIZEOF_WORD
|
||||
sub ecx, byte SIZEOF_WORD
|
||||
shr eax, 16
|
||||
@@ -357,7 +355,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
|
||||
; space.
|
||||
test ecx, ecx
|
||||
jz short .nextrow
|
||||
mov BYTE [edi], al
|
||||
mov byte [edi], al
|
||||
|
||||
%else ; RGB_PIXELSIZE == 4 ; -----------
|
||||
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jcolsamp.inc"
|
||||
|
||||
@@ -280,7 +278,7 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
|
||||
movd eax, mmA
|
||||
cmp ecx, byte SIZEOF_DWORD
|
||||
jb short .column_st2
|
||||
mov DWORD [edi+0*SIZEOF_DWORD], eax
|
||||
mov dword [edi+0*SIZEOF_DWORD], eax
|
||||
psrlq mmA, DWORD_BIT
|
||||
movd eax, mmA
|
||||
sub ecx, byte SIZEOF_DWORD
|
||||
@@ -288,14 +286,14 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
|
||||
.column_st2:
|
||||
cmp ecx, byte SIZEOF_WORD
|
||||
jb short .column_st1
|
||||
mov WORD [edi+0*SIZEOF_WORD], ax
|
||||
mov word [edi+0*SIZEOF_WORD], ax
|
||||
shr eax, WORD_BIT
|
||||
sub ecx, byte SIZEOF_WORD
|
||||
add edi, byte SIZEOF_WORD
|
||||
.column_st1:
|
||||
cmp ecx, byte SIZEOF_BYTE
|
||||
jb short .nextrow
|
||||
mov BYTE [edi+0*SIZEOF_BYTE], al
|
||||
mov byte [edi+0*SIZEOF_BYTE], al
|
||||
|
||||
%else ; RGB_PIXELSIZE == 4 ; -----------
|
||||
|
||||
@@ -367,7 +365,7 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
|
||||
.column_st4:
|
||||
cmp ecx, byte SIZEOF_MMWORD/8
|
||||
jb short .nextrow
|
||||
movd DWORD [edi+0*SIZEOF_DWORD], mmA
|
||||
movd dword [edi+0*SIZEOF_DWORD], mmA
|
||||
|
||||
%endif ; RGB_PIXELSIZE ; ---------------
|
||||
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jcolsamp.inc"
|
||||
|
||||
@@ -320,7 +318,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
|
||||
movd eax, xmmA
|
||||
cmp ecx, byte SIZEOF_WORD
|
||||
jb short .column_st1
|
||||
mov WORD [edi], ax
|
||||
mov word [edi], ax
|
||||
add edi, byte SIZEOF_WORD
|
||||
sub ecx, byte SIZEOF_WORD
|
||||
shr eax, 16
|
||||
@@ -329,7 +327,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
|
||||
; space.
|
||||
test ecx, ecx
|
||||
jz short .nextrow
|
||||
mov BYTE [edi], al
|
||||
mov byte [edi], al
|
||||
|
||||
%else ; RGB_PIXELSIZE == 4 ; -----------
|
||||
|
||||
|
||||
@@ -14,8 +14,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -14,8 +14,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -14,8 +14,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jcolsamp.inc"
|
||||
|
||||
@@ -354,7 +352,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
|
||||
vmovd eax, xmmA
|
||||
cmp ecx, byte SIZEOF_WORD
|
||||
jb short .column_st1
|
||||
mov WORD [edi], ax
|
||||
mov word [edi], ax
|
||||
add edi, byte SIZEOF_WORD
|
||||
sub ecx, byte SIZEOF_WORD
|
||||
shr eax, 16
|
||||
@@ -363,7 +361,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
|
||||
; space.
|
||||
test ecx, ecx
|
||||
jz short .endcolumn
|
||||
mov BYTE [edi], al
|
||||
mov byte [edi], al
|
||||
|
||||
%else ; RGB_PIXELSIZE == 4 ; -----------
|
||||
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jcolsamp.inc"
|
||||
|
||||
@@ -283,7 +281,7 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
|
||||
movd eax, mmA
|
||||
cmp ecx, byte SIZEOF_DWORD
|
||||
jb short .column_st2
|
||||
mov DWORD [edi+0*SIZEOF_DWORD], eax
|
||||
mov dword [edi+0*SIZEOF_DWORD], eax
|
||||
psrlq mmA, DWORD_BIT
|
||||
movd eax, mmA
|
||||
sub ecx, byte SIZEOF_DWORD
|
||||
@@ -291,14 +289,14 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
|
||||
.column_st2:
|
||||
cmp ecx, byte SIZEOF_WORD
|
||||
jb short .column_st1
|
||||
mov WORD [edi+0*SIZEOF_WORD], ax
|
||||
mov word [edi+0*SIZEOF_WORD], ax
|
||||
shr eax, WORD_BIT
|
||||
sub ecx, byte SIZEOF_WORD
|
||||
add edi, byte SIZEOF_WORD
|
||||
.column_st1:
|
||||
cmp ecx, byte SIZEOF_BYTE
|
||||
jb short .endcolumn
|
||||
mov BYTE [edi+0*SIZEOF_BYTE], al
|
||||
mov byte [edi+0*SIZEOF_BYTE], al
|
||||
|
||||
%else ; RGB_PIXELSIZE == 4 ; -----------
|
||||
|
||||
@@ -373,7 +371,7 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
|
||||
.column_st4:
|
||||
cmp ecx, byte SIZEOF_MMWORD/8
|
||||
jb short .endcolumn
|
||||
movd DWORD [edi+0*SIZEOF_DWORD], mmA
|
||||
movd dword [edi+0*SIZEOF_DWORD], mmA
|
||||
|
||||
%endif ; RGB_PIXELSIZE ; ---------------
|
||||
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jcolsamp.inc"
|
||||
|
||||
@@ -325,7 +323,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
|
||||
movd eax, xmmA
|
||||
cmp ecx, byte SIZEOF_WORD
|
||||
jb short .column_st1
|
||||
mov WORD [edi], ax
|
||||
mov word [edi], ax
|
||||
add edi, byte SIZEOF_WORD
|
||||
sub ecx, byte SIZEOF_WORD
|
||||
shr eax, 16
|
||||
@@ -334,7 +332,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
|
||||
; space.
|
||||
test ecx, ecx
|
||||
jz short .endcolumn
|
||||
mov BYTE [edi], al
|
||||
mov byte [edi], al
|
||||
|
||||
%else ; RGB_PIXELSIZE == 4 ; -----------
|
||||
|
||||
|
||||
@@ -14,8 +14,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
|
||||
@@ -17,8 +17,6 @@
|
||||
; This file contains a floating-point implementation of the forward DCT
|
||||
; (Discrete Cosine Transform). The following code is based directly on
|
||||
; the IJG's original jfdctflt.c; see the jfdctflt.c for more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
|
||||
@@ -17,8 +17,6 @@
|
||||
; This file contains a floating-point implementation of the forward DCT
|
||||
; (Discrete Cosine Transform). The following code is based directly on
|
||||
; the IJG's original jfdctflt.c; see the jfdctflt.c for more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
|
||||
@@ -18,8 +18,6 @@
|
||||
; the forward DCT (Discrete Cosine Transform). The following code is
|
||||
; based directly on the IJG's original jfdctfst.c; see the jfdctfst.c
|
||||
; for more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
|
||||
@@ -18,8 +18,6 @@
|
||||
; the forward DCT (Discrete Cosine Transform). The following code is
|
||||
; based directly on the IJG's original jfdctfst.c; see the jfdctfst.c
|
||||
; for more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
|
||||
@@ -18,8 +18,6 @@
|
||||
; forward DCT (Discrete Cosine Transform). The following code is based
|
||||
; directly on the IJG's original jfdctint.c; see the jfdctint.c for
|
||||
; more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
|
||||
@@ -18,8 +18,6 @@
|
||||
; forward DCT (Discrete Cosine Transform). The following code is based
|
||||
; directly on the IJG's original jfdctint.c; see the jfdctint.c for
|
||||
; more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
|
||||
@@ -18,8 +18,6 @@
|
||||
; forward DCT (Discrete Cosine Transform). The following code is based
|
||||
; directly on the IJG's original jfdctint.c; see the jfdctint.c for
|
||||
; more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
|
||||
@@ -17,8 +17,6 @@
|
||||
; This file contains a floating-point implementation of the inverse DCT
|
||||
; (Discrete Cosine Transform). The following code is based directly on
|
||||
; the IJG's original jidctflt.c; see the jidctflt.c for more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
@@ -92,23 +90,23 @@ EXTN(jsimd_idct_float_3dnow):
|
||||
alignx 16, 7
|
||||
.columnloop:
|
||||
%ifndef NO_ZERO_COLUMN_TEST_FLOAT_3DNOW
|
||||
mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
jnz short .columnDCT
|
||||
|
||||
pushpic ebx ; save GOT address
|
||||
mov ebx, DWORD [DWBLOCK(3,0,esi,SIZEOF_JCOEF)]
|
||||
mov eax, DWORD [DWBLOCK(4,0,esi,SIZEOF_JCOEF)]
|
||||
or ebx, DWORD [DWBLOCK(5,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, DWORD [DWBLOCK(6,0,esi,SIZEOF_JCOEF)]
|
||||
or ebx, DWORD [DWBLOCK(7,0,esi,SIZEOF_JCOEF)]
|
||||
mov ebx, dword [DWBLOCK(3,0,esi,SIZEOF_JCOEF)]
|
||||
mov eax, dword [DWBLOCK(4,0,esi,SIZEOF_JCOEF)]
|
||||
or ebx, dword [DWBLOCK(5,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, dword [DWBLOCK(6,0,esi,SIZEOF_JCOEF)]
|
||||
or ebx, dword [DWBLOCK(7,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, ebx
|
||||
poppic ebx ; restore GOT address
|
||||
jnz short .columnDCT
|
||||
|
||||
; -- AC terms all zero
|
||||
|
||||
movd mm0, DWORD [DWBLOCK(0,0,esi,SIZEOF_JCOEF)]
|
||||
movd mm0, dword [DWBLOCK(0,0,esi,SIZEOF_JCOEF)]
|
||||
|
||||
punpcklwd mm0, mm0
|
||||
psrad mm0, (DWORD_BIT-WORD_BIT)
|
||||
@@ -135,10 +133,10 @@ EXTN(jsimd_idct_float_3dnow):
|
||||
|
||||
; -- Even part
|
||||
|
||||
movd mm0, DWORD [DWBLOCK(0,0,esi,SIZEOF_JCOEF)]
|
||||
movd mm1, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
movd mm2, DWORD [DWBLOCK(4,0,esi,SIZEOF_JCOEF)]
|
||||
movd mm3, DWORD [DWBLOCK(6,0,esi,SIZEOF_JCOEF)]
|
||||
movd mm0, dword [DWBLOCK(0,0,esi,SIZEOF_JCOEF)]
|
||||
movd mm1, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
movd mm2, dword [DWBLOCK(4,0,esi,SIZEOF_JCOEF)]
|
||||
movd mm3, dword [DWBLOCK(6,0,esi,SIZEOF_JCOEF)]
|
||||
|
||||
punpcklwd mm0, mm0
|
||||
punpcklwd mm1, mm1
|
||||
@@ -182,10 +180,10 @@ EXTN(jsimd_idct_float_3dnow):
|
||||
|
||||
; -- Odd part
|
||||
|
||||
movd mm2, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
movd mm3, DWORD [DWBLOCK(3,0,esi,SIZEOF_JCOEF)]
|
||||
movd mm5, DWORD [DWBLOCK(5,0,esi,SIZEOF_JCOEF)]
|
||||
movd mm1, DWORD [DWBLOCK(7,0,esi,SIZEOF_JCOEF)]
|
||||
movd mm2, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
movd mm3, dword [DWBLOCK(3,0,esi,SIZEOF_JCOEF)]
|
||||
movd mm5, dword [DWBLOCK(5,0,esi,SIZEOF_JCOEF)]
|
||||
movd mm1, dword [DWBLOCK(7,0,esi,SIZEOF_JCOEF)]
|
||||
|
||||
punpcklwd mm2, mm2
|
||||
punpcklwd mm3, mm3
|
||||
|
||||
@@ -17,8 +17,6 @@
|
||||
; This file contains a floating-point implementation of the inverse DCT
|
||||
; (Discrete Cosine Transform). The following code is based directly on
|
||||
; the IJG's original jidctflt.c; see the jidctflt.c for more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
@@ -102,8 +100,8 @@ EXTN(jsimd_idct_float_sse):
|
||||
alignx 16, 7
|
||||
.columnloop:
|
||||
%ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE
|
||||
mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
jnz near .columnDCT
|
||||
|
||||
movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
|
||||
@@ -17,8 +17,6 @@
|
||||
; This file contains a floating-point implementation of the inverse DCT
|
||||
; (Discrete Cosine Transform). The following code is based directly on
|
||||
; the IJG's original jidctflt.c; see the jidctflt.c for more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
@@ -102,8 +100,8 @@ EXTN(jsimd_idct_float_sse2):
|
||||
alignx 16, 7
|
||||
.columnloop:
|
||||
%ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE
|
||||
mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
jnz near .columnDCT
|
||||
|
||||
movq xmm1, XMM_MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
|
||||
@@ -18,8 +18,6 @@
|
||||
; the inverse DCT (Discrete Cosine Transform). The following code is
|
||||
; based directly on the IJG's original jidctfst.c; see the jidctfst.c
|
||||
; for more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
@@ -123,8 +121,8 @@ EXTN(jsimd_idct_ifast_mmx):
|
||||
alignx 16, 7
|
||||
.columnloop:
|
||||
%ifndef NO_ZERO_COLUMN_TEST_IFAST_MMX
|
||||
mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
jnz short .columnDCT
|
||||
|
||||
movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
|
||||
@@ -18,8 +18,6 @@
|
||||
; the inverse DCT (Discrete Cosine Transform). The following code is
|
||||
; based directly on the IJG's original jidctfst.c; see the jidctfst.c
|
||||
; for more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
@@ -118,8 +116,8 @@ EXTN(jsimd_idct_ifast_sse2):
|
||||
mov esi, JCOEFPTR [coef_block(eax)] ; inptr
|
||||
|
||||
%ifndef NO_ZERO_COLUMN_TEST_IFAST_SSE2
|
||||
mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
jnz near .columnDCT
|
||||
|
||||
movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
|
||||
@@ -18,8 +18,6 @@
|
||||
; inverse DCT (Discrete Cosine Transform). The following code is based
|
||||
; directly on the IJG's original jidctint.c; see the jidctint.c for
|
||||
; more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
@@ -320,8 +318,8 @@ EXTN(jsimd_idct_islow_avx2):
|
||||
mov esi, JCOEFPTR [coef_block(eax)] ; inptr
|
||||
|
||||
%ifndef NO_ZERO_COLUMN_TEST_ISLOW_AVX2
|
||||
mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
jnz near .columnDCT
|
||||
|
||||
movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
|
||||
@@ -18,8 +18,6 @@
|
||||
; inverse DCT (Discrete Cosine Transform). The following code is based
|
||||
; directly on the IJG's original jidctint.c; see the jidctint.c for
|
||||
; more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
@@ -136,8 +134,8 @@ EXTN(jsimd_idct_islow_mmx):
|
||||
alignx 16, 7
|
||||
.columnloop:
|
||||
%ifndef NO_ZERO_COLUMN_TEST_ISLOW_MMX
|
||||
mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
jnz short .columnDCT
|
||||
|
||||
movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
|
||||
@@ -18,8 +18,6 @@
|
||||
; inverse DCT (Discrete Cosine Transform). The following code is based
|
||||
; directly on the IJG's original jidctint.c; see the jidctint.c for
|
||||
; more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
@@ -131,8 +129,8 @@ EXTN(jsimd_idct_islow_sse2):
|
||||
mov esi, JCOEFPTR [coef_block(eax)] ; inptr
|
||||
|
||||
%ifndef NO_ZERO_COLUMN_TEST_ISLOW_SSE2
|
||||
mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
jnz near .columnDCT
|
||||
|
||||
movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
|
||||
@@ -18,8 +18,6 @@
|
||||
; output: either 4x4 or 2x2 pixels from an 8x8 DCT block.
|
||||
; The following code is based directly on the IJG's original jidctred.c;
|
||||
; see the jidctred.c for more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
@@ -144,8 +142,8 @@ EXTN(jsimd_idct_4x4_mmx):
|
||||
alignx 16, 7
|
||||
.columnloop:
|
||||
%ifndef NO_ZERO_COLUMN_TEST_4X4_MMX
|
||||
mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
jnz short .columnDCT
|
||||
|
||||
movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
@@ -464,16 +462,16 @@ EXTN(jsimd_idct_4x4_mmx):
|
||||
|
||||
mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
|
||||
mov esi, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
|
||||
movd DWORD [edx+eax*SIZEOF_JSAMPLE], mm1
|
||||
movd DWORD [esi+eax*SIZEOF_JSAMPLE], mm0
|
||||
movd dword [edx+eax*SIZEOF_JSAMPLE], mm1
|
||||
movd dword [esi+eax*SIZEOF_JSAMPLE], mm0
|
||||
|
||||
psrlq mm1, 4*BYTE_BIT
|
||||
psrlq mm0, 4*BYTE_BIT
|
||||
|
||||
mov edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
|
||||
mov esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW]
|
||||
movd DWORD [edx+eax*SIZEOF_JSAMPLE], mm1
|
||||
movd DWORD [esi+eax*SIZEOF_JSAMPLE], mm0
|
||||
movd dword [edx+eax*SIZEOF_JSAMPLE], mm1
|
||||
movd dword [esi+eax*SIZEOF_JSAMPLE], mm0
|
||||
|
||||
emms ; empty MMX state
|
||||
|
||||
@@ -688,8 +686,8 @@ EXTN(jsimd_idct_2x2_mmx):
|
||||
|
||||
mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
|
||||
mov esi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
|
||||
mov WORD [edx+eax*SIZEOF_JSAMPLE], bx
|
||||
mov WORD [esi+eax*SIZEOF_JSAMPLE], cx
|
||||
mov word [edx+eax*SIZEOF_JSAMPLE], bx
|
||||
mov word [esi+eax*SIZEOF_JSAMPLE], cx
|
||||
|
||||
emms ; empty MMX state
|
||||
|
||||
|
||||
@@ -18,8 +18,6 @@
|
||||
; output: either 4x4 or 2x2 pixels from an 8x8 DCT block.
|
||||
; The following code is based directly on the IJG's original jidctred.c;
|
||||
; see the jidctred.c for more details.
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
@@ -139,8 +137,8 @@ EXTN(jsimd_idct_4x4_sse2):
|
||||
mov esi, JCOEFPTR [coef_block(eax)] ; inptr
|
||||
|
||||
%ifndef NO_ZERO_COLUMN_TEST_4X4_SSE2
|
||||
mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
||||
jnz short .columnDCT
|
||||
|
||||
movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
||||
@@ -578,8 +576,8 @@ EXTN(jsimd_idct_2x2_sse2):
|
||||
|
||||
mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
|
||||
mov esi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
|
||||
mov WORD [edx+eax*SIZEOF_JSAMPLE], bx
|
||||
mov WORD [esi+eax*SIZEOF_JSAMPLE], cx
|
||||
mov word [edx+eax*SIZEOF_JSAMPLE], bx
|
||||
mov word [esi+eax*SIZEOF_JSAMPLE], cx
|
||||
|
||||
pop edi
|
||||
pop esi
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
|
||||
@@ -14,8 +14,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
%include "jdct.inc"
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
; assembler (including Borland's Turbo Assembler).
|
||||
; NASM is available from http://nasm.sourceforge.net/ or
|
||||
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||
;
|
||||
; [TAB8]
|
||||
|
||||
%include "jsimdext.inc"
|
||||
|
||||
@@ -51,29 +49,14 @@ EXTN(jpeg_simd_cpu_support):
|
||||
xor eax, edx
|
||||
jz near .return ; CPUID is not supported
|
||||
|
||||
; Check for MMX instruction support
|
||||
; Check whether CPUID leaf 07H is supported
|
||||
; (leaf 07H is used to check for AVX2 instruction support)
|
||||
xor eax, eax
|
||||
cpuid
|
||||
test eax, eax
|
||||
jz near .return
|
||||
|
||||
xor eax, eax
|
||||
inc eax
|
||||
cpuid
|
||||
mov eax, edx ; eax = Standard feature flags
|
||||
|
||||
test eax, 1<<23 ; bit23:MMX
|
||||
jz short .no_mmx
|
||||
or edi, byte JSIMD_MMX
|
||||
.no_mmx:
|
||||
test eax, 1<<25 ; bit25:SSE
|
||||
jz short .no_sse
|
||||
or edi, byte JSIMD_SSE
|
||||
.no_sse:
|
||||
test eax, 1<<26 ; bit26:SSE2
|
||||
jz short .no_sse2
|
||||
or edi, byte JSIMD_SSE2
|
||||
.no_sse2:
|
||||
cmp eax, 7
|
||||
jl short .no_avx2 ; Maximum leaf < 07H
|
||||
|
||||
; Check for AVX2 instruction support
|
||||
mov eax, 7
|
||||
@@ -94,13 +77,34 @@ EXTN(jpeg_simd_cpu_support):
|
||||
|
||||
xor ecx, ecx
|
||||
xgetbv
|
||||
test eax, 6 ; O/S does not manage XMM/YMM state
|
||||
and eax, 6
|
||||
cmp eax, 6 ; O/S does not manage XMM/YMM state
|
||||
; using XSAVE
|
||||
jz short .no_avx2
|
||||
jnz short .no_avx2
|
||||
|
||||
or edi, JSIMD_AVX2
|
||||
.no_avx2:
|
||||
|
||||
; Check CPUID leaf 01H for MMX, SSE, and SSE2 support
|
||||
xor eax, eax
|
||||
inc eax
|
||||
cpuid
|
||||
mov eax, edx ; eax = Standard feature flags
|
||||
|
||||
; Check for MMX instruction support
|
||||
test eax, 1<<23 ; bit23:MMX
|
||||
jz short .no_mmx
|
||||
or edi, byte JSIMD_MMX
|
||||
.no_mmx:
|
||||
test eax, 1<<25 ; bit25:SSE
|
||||
jz short .no_sse
|
||||
or edi, byte JSIMD_SSE
|
||||
.no_sse:
|
||||
test eax, 1<<26 ; bit26:SSE2
|
||||
jz short .no_sse2
|
||||
or edi, byte JSIMD_SSE2
|
||||
.no_sse2:
|
||||
|
||||
; Check for 3DNow! instruction support
|
||||
mov eax, 0x80000000
|
||||
cpuid
|
||||
|
||||
@@ -2,12 +2,13 @@
|
||||
* Loongson MMI optimizations for libjpeg-turbo
|
||||
*
|
||||
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||
* Copyright (C) 2014-2015, D. R. Commander. All Rights Reserved.
|
||||
* Copyright (C) 2016-2017, Loongson Technology Corporation Limited, BeiJing.
|
||||
* Copyright (C) 2014-2015, 2019, D. R. Commander. All Rights Reserved.
|
||||
* Copyright (C) 2016-2018, Loongson Technology Corporation Limited, BeiJing.
|
||||
* All Rights Reserved.
|
||||
* Authors: ZhuChen <zhuchen@loongson.cn>
|
||||
* SunZhangzhi <sunzhangzhi-cq@loongson.cn>
|
||||
* CaiWanwei <caiwanwei@loongson.cn>
|
||||
* ZhangLixia <zhanglixia-hf@loongson.cn>
|
||||
*
|
||||
* Based on the x86 SIMD extension for IJG JPEG library
|
||||
* Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||
@@ -184,9 +185,15 @@ void jsimd_rgb_ycc_convert_mmi(JDIMENSION image_width, JSAMPARRAY input_buf,
|
||||
"$14", "memory"
|
||||
);
|
||||
} else {
|
||||
mmA = _mm_load_si64((__m64 *)&inptr[0]);
|
||||
mmG = _mm_load_si64((__m64 *)&inptr[8]);
|
||||
mmF = _mm_load_si64((__m64 *)&inptr[16]);
|
||||
if (!(((long)inptr) & 7)) {
|
||||
mmA = _mm_load_si64((__m64 *)&inptr[0]);
|
||||
mmG = _mm_load_si64((__m64 *)&inptr[8]);
|
||||
mmF = _mm_load_si64((__m64 *)&inptr[16]);
|
||||
} else {
|
||||
mmA = _mm_loadu_si64((__m64 *)&inptr[0]);
|
||||
mmG = _mm_loadu_si64((__m64 *)&inptr[8]);
|
||||
mmF = _mm_loadu_si64((__m64 *)&inptr[16]);
|
||||
}
|
||||
inptr += RGB_PIXELSIZE * 8;
|
||||
}
|
||||
mmD = mmA;
|
||||
@@ -268,10 +275,17 @@ void jsimd_rgb_ycc_convert_mmi(JDIMENSION image_width, JSAMPARRAY input_buf,
|
||||
: "$f0", "$f2", "$8", "$9", "$10", "$11", "$13", "memory"
|
||||
);
|
||||
} else {
|
||||
mmA = _mm_load_si64((__m64 *)&inptr[0]);
|
||||
mmF = _mm_load_si64((__m64 *)&inptr[8]);
|
||||
mmD = _mm_load_si64((__m64 *)&inptr[16]);
|
||||
mmC = _mm_load_si64((__m64 *)&inptr[24]);
|
||||
if (!(((long)inptr) & 7)) {
|
||||
mmA = _mm_load_si64((__m64 *)&inptr[0]);
|
||||
mmF = _mm_load_si64((__m64 *)&inptr[8]);
|
||||
mmD = _mm_load_si64((__m64 *)&inptr[16]);
|
||||
mmC = _mm_load_si64((__m64 *)&inptr[24]);
|
||||
} else {
|
||||
mmA = _mm_loadu_si64((__m64 *)&inptr[0]);
|
||||
mmF = _mm_loadu_si64((__m64 *)&inptr[8]);
|
||||
mmD = _mm_loadu_si64((__m64 *)&inptr[16]);
|
||||
mmC = _mm_loadu_si64((__m64 *)&inptr[24]);
|
||||
}
|
||||
inptr += RGB_PIXELSIZE * 8;
|
||||
}
|
||||
mmB = mmA;
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
/*
|
||||
* Loongson MMI optimizations for libjpeg-turbo
|
||||
*
|
||||
* Copyright (C) 2016-2017, Loongson Technology Corporation Limited, BeiJing.
|
||||
* Copyright (C) 2016-2018, Loongson Technology Corporation Limited, BeiJing.
|
||||
* All Rights Reserved.
|
||||
* Copyright (C) 2019, D. R. Commander. All Rights Reserved.
|
||||
*
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty. In no event will the authors be held liable for any damages
|
||||
@@ -41,7 +42,7 @@ typedef float __m32;
|
||||
|
||||
/********** Set Operations **********/
|
||||
|
||||
extern __inline __m64
|
||||
extern __inline __m64 FUNCTION_ATTRIBS
|
||||
_mm_setzero_si64(void)
|
||||
{
|
||||
return 0.0;
|
||||
@@ -1245,6 +1246,22 @@ _mm_load_si64(const __m64 *src)
|
||||
asm("ldc1 %0, %1\n\t"
|
||||
: "=f" (ret)
|
||||
: "m" (*src)
|
||||
: "memory"
|
||||
);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
extern __inline __m64 FUNCTION_ATTRIBS
|
||||
_mm_loadu_si64(const __m64 *src)
|
||||
{
|
||||
__m64 ret;
|
||||
|
||||
asm("gsldlc1 %0, 7(%1)\n\t"
|
||||
"gsldrc1 %0, 0(%1)\n\t"
|
||||
: "=f" (ret)
|
||||
: "r" (src)
|
||||
: "memory"
|
||||
);
|
||||
|
||||
return ret;
|
||||
|
||||
@@ -692,8 +692,10 @@ jsimd_can_convsamp_float(void)
|
||||
if (sizeof(ISLOW_MULT_TYPE) != 2)
|
||||
return 0;
|
||||
|
||||
#ifndef __mips_soft_float
|
||||
if (simd_support & JSIMD_DSPR2)
|
||||
return 1;
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -709,7 +711,9 @@ GLOBAL(void)
|
||||
jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
|
||||
FAST_FLOAT *workspace)
|
||||
{
|
||||
#ifndef __mips_soft_float
|
||||
jsimd_convsamp_float_dspr2(sample_data, start_col, workspace);
|
||||
#endif
|
||||
}
|
||||
|
||||
GLOBAL(int)
|
||||
@@ -805,8 +809,10 @@ jsimd_can_quantize_float(void)
|
||||
if (sizeof(ISLOW_MULT_TYPE) != 2)
|
||||
return 0;
|
||||
|
||||
#ifndef __mips_soft_float
|
||||
if (simd_support & JSIMD_DSPR2)
|
||||
return 1;
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -821,7 +827,9 @@ GLOBAL(void)
|
||||
jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
|
||||
FAST_FLOAT *workspace)
|
||||
{
|
||||
#ifndef __mips_soft_float
|
||||
jsimd_quantize_float_dspr2(coef_block, divisors, workspace);
|
||||
#endif
|
||||
}
|
||||
|
||||
GLOBAL(int)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user