Merge remote-tracking branch 'turbo/master'

* turbo/master: (105 commits)
  makemacpkg.in: Allow universal DMG w/o ARMv8 arch
  Remove more unnecessary NULL checks before free()
  Eliminate unnecessary NULL checks before tjFree()
  Eliminate unnecessary NULL checks before free()
  simd/arm64/jsimd_neon.S: Fix checkstyle issue
  tjTransform(): Use instance err. for bad crop spec
  README.md, package specs: Various tweaks
  djpeg.c: Fix compiler warning w/o mem. src manager
  ARMv8 SIMD: Support execute-only memory (XOM)
  Travis: Use MacPorts instead of Homebrew
  Huffman enc.: Fix very rare local buffer overrun
  TurboJPEG: Fix erroneous subsampling detection
  ChangeLog.md: List CVE IDs for specific fixes
  tjDecompressToYUV*(): Fix OOB write/double free
  64-bit tjbench: Fix signed int overflow/segfault
  Fix copyright header formatting buglets
  example.txt: Avoid undefined setjmp() behavior
  Mac: Support hiding SIMD fct symbols w/ NASM 2.14+
  TJBench: Fix output with -componly -quiet
  Build: Don't require ASM_NASM if !REQUIRE_SIMD
  ...
This commit is contained in:
Kornel Lesiński
2020-02-13 10:45:55 +00:00
150 changed files with 2752 additions and 2523 deletions

View File

@@ -15,13 +15,18 @@ Build Requirements
* If using NASM, 2.10 or later is required.
* If using NASM, 2.10 or later (except 2.11.08) is required for an x86-64 Mac
build (2.11.08 does not work properly with libjpeg-turbo's x86-64 SIMD code
when building macho64 objects.) NASM or YASM can be obtained from
[MacPorts](http://www.macports.org/) or [Homebrew](http://brew.sh/).
when building macho64 objects.)
* If using YASM, 1.2.0 or later is required.
* If building on macOS, NASM or YASM can be obtained from
[MacPorts](http://www.macports.org/) or [Homebrew](http://brew.sh/).
- NOTE: Currently, if it is desirable to hide the SIMD function symbols in
Mac executables or shared libraries that statically link with
libjpeg-turbo, then YASM must be used when building libjpeg-turbo.
libjpeg-turbo, then NASM 2.14 or later or YASM must be used when
building libjpeg-turbo.
* If building on Windows, **nasm.exe**/**yasm.exe** should be in your `PATH`.
* NASM and YASM are located in the CRB (Code Ready Builder) repository on
Red Hat Enterprise Linux 8 and in the PowerTools repository on CentOS 8,
which is not enabled by default.
The binary RPMs released by the NASM project do not work on older Linux
systems, such as Red Hat Enterprise Linux 5. On such systems, you can easily
@@ -48,8 +53,9 @@ Build Requirements
install the Java Developer Package, which can be downloaded from
<http://developer.apple.com/downloads> (Apple ID required.) For other
systems, you can obtain the Oracle Java Development Kit from
<http://www.java.com>.
<http://www.oracle.com/technetwork/java/javase/downloads>.
* If using JDK 11 or later, CMake 3.10.x or later must also be used.
### Windows
@@ -83,7 +89,10 @@ Build Requirements
appropriate compiler paths automatically set.
- If building the TurboJPEG Java wrapper, JDK 1.5 or later is required. This
can be downloaded from <http://www.java.com>.
can be downloaded from
<http://www.oracle.com/technetwork/java/javase/downloads>.
* If using JDK 11 or later, CMake 3.10.x or later must also be used.
Out-of-Tree Builds
@@ -521,7 +530,7 @@ a universal library.
Building libjpeg-turbo for Android
----------------------------------
Building libjpeg-turbo for Android platforms requires the
Building libjpeg-turbo for Android platforms requires v13b or later of the
[Android NDK](https://developer.android.com/tools/sdk/ndk).
@@ -531,35 +540,21 @@ The following is a general recipe script that can be modified for your specific
needs.
# Set these variables to suit your needs
NDK_PATH={full path to the "ndk" directory-- for example, /opt/android/sdk/ndk-bundle}
BUILD_PLATFORM={the platform name for the NDK package you installed--
for example, "windows-x86" or "linux-x86_64" or "darwin-x86_64"}
TOOLCHAIN_VERSION={"4.8", "4.9", "clang3.5", etc. This corresponds to a
toolchain directory under ${NDK_PATH}/toolchains/.}
ANDROID_VERSION={The minimum version of Android to support-- for example,
NDK_PATH={full path to the NDK directory-- for example,
/opt/android/android-ndk-r16b}
TOOLCHAIN={"gcc" or "clang"-- "gcc" must be used with NDK r16b and earlier,
and "clang" must be used with NDK r17c and later}
ANDROID_VERSION={the minimum version of Android to support-- for example,
"16", "19", etc.}
# It should not be necessary to modify the rest
HOST=arm-linux-androideabi
SYSROOT=${NDK_PATH}/platforms/android-${ANDROID_VERSION}/arch-arm
export CFLAGS="-march=armv7-a -mfloat-abi=softfp -fprefetch-loop-arrays \
-D__ANDROID_API__=${ANDROID_VERSION} --sysroot=${SYSROOT} \
-isystem ${NDK_PATH}/sysroot/usr/include \
-isystem ${NDK_PATH}/sysroot/usr/include/${HOST}"
export LDFLAGS=-pie
TOOLCHAIN=${NDK_PATH}/toolchains/${HOST}-${TOOLCHAIN_VERSION}/prebuilt/${BUILD_PLATFORM}
cd {build_directory}
cat <<EOF >toolchain.cmake
set(CMAKE_SYSTEM_NAME Linux)
set(CMAKE_SYSTEM_PROCESSOR arm)
set(CMAKE_C_COMPILER ${TOOLCHAIN}/bin/${HOST}-gcc)
set(CMAKE_FIND_ROOT_PATH ${TOOLCHAIN}/${HOST})
EOF
cmake -G"Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=toolchain.cmake \
-DCMAKE_POSITION_INDEPENDENT_CODE=1 \
cmake -G"Unix Makefiles" \
-DANDROID_ABI=armeabi-v7a \
-DANDROID_ARM_MODE=arm \
-DANDROID_PLATFORM=android-${ANDROID_VERSION} \
-DANDROID_TOOLCHAIN=${TOOLCHAIN} \
-DCMAKE_ASM_FLAGS="--target=arm-linux-androideabi${ANDROID_VERSION}" \
-DCMAKE_TOOLCHAIN_FILE=${NDK_PATH}/build/cmake/android.toolchain.cmake \
[additional CMake flags] {source_directory}
make
@@ -570,34 +565,21 @@ The following is a general recipe script that can be modified for your specific
needs.
# Set these variables to suit your needs
NDK_PATH={full path to the "ndk" directory-- for example, /opt/android/sdk/ndk-bundle}
BUILD_PLATFORM={the platform name for the NDK package you installed--
for example, "windows-x86" or "linux-x86_64" or "darwin-x86_64"}
TOOLCHAIN_VERSION={"4.8", "4.9", "clang3.5", etc. This corresponds to a
toolchain directory under ${NDK_PATH}/toolchains/.}
ANDROID_VERSION={The minimum version of Android to support. "21" or later
NDK_PATH={full path to the NDK directory-- for example,
/opt/android/android-ndk-r16b}
TOOLCHAIN={"gcc" or "clang"-- "gcc" must be used with NDK r14b and earlier,
and "clang" must be used with NDK r17c and later}
ANDROID_VERSION={the minimum version of Android to support. "21" or later
is required for a 64-bit build.}
# It should not be necessary to modify the rest
HOST=aarch64-linux-android
SYSROOT=${NDK_PATH}/platforms/android-${ANDROID_VERSION}/arch-arm64
export CFLAGS="-D__ANDROID_API__=${ANDROID_VERSION} --sysroot=${SYSROOT} \
-isystem ${NDK_PATH}/sysroot/usr/include \
-isystem ${NDK_PATH}/sysroot/usr/include/${HOST}"
export LDFLAGS=-pie
TOOLCHAIN=${NDK_PATH}/toolchains/${HOST}-${TOOLCHAIN_VERSION}/prebuilt/${BUILD_PLATFORM}
cd {build_directory}
cat <<EOF >toolchain.cmake
set(CMAKE_SYSTEM_NAME Linux)
set(CMAKE_SYSTEM_PROCESSOR aarch64)
set(CMAKE_C_COMPILER ${TOOLCHAIN}/bin/${HOST}-gcc)
set(CMAKE_FIND_ROOT_PATH ${TOOLCHAIN}/${HOST})
EOF
cmake -G"Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=toolchain.cmake \
-DCMAKE_POSITION_INDEPENDENT_CODE=1 \
cmake -G"Unix Makefiles" \
-DANDROID_ABI=arm64-v8a \
-DANDROID_ARM_MODE=arm \
-DANDROID_PLATFORM=android-${ANDROID_VERSION} \
-DANDROID_TOOLCHAIN=${TOOLCHAIN} \
-DCMAKE_ASM_FLAGS="--target=aarch64-linux-android${ANDROID_VERSION}" \
-DCMAKE_TOOLCHAIN_FILE=${NDK_PATH}/build/cmake/android.toolchain.cmake \
[additional CMake flags] {source_directory}
make
@@ -608,34 +590,19 @@ The following is a general recipe script that can be modified for your specific
needs.
# Set these variables to suit your needs
NDK_PATH={full path to the "ndk" directory-- for example, /opt/android/sdk/ndk-bundle}
BUILD_PLATFORM={the platform name for the NDK package you installed--
for example, "windows-x86" or "linux-x86_64" or "darwin-x86_64"}
TOOLCHAIN_VERSION={"4.8", "4.9", "clang3.5", etc. This corresponds to a
toolchain directory under ${NDK_PATH}/toolchains/.}
NDK_PATH={full path to the NDK directory-- for example,
/opt/android/android-ndk-r16b}
TOOLCHAIN={"gcc" or "clang"-- "gcc" must be used with NDK r14b and earlier,
and "clang" must be used with NDK r17c and later}
ANDROID_VERSION={The minimum version of Android to support-- for example,
"16", "19", etc.}
# It should not be necessary to modify the rest
HOST=i686-linux-android
SYSROOT=${NDK_PATH}/platforms/android-${ANDROID_VERSION}/arch-x86
export CFLAGS="-D__ANDROID_API__=${ANDROID_VERSION} --sysroot=${SYSROOT} \
-isystem ${NDK_PATH}/sysroot/usr/include \
-isystem ${NDK_PATH}/sysroot/usr/include/${HOST}"
export LDFLAGS=-pie
TOOLCHAIN=${NDK_PATH}/toolchains/x86-${TOOLCHAIN_VERSION}/prebuilt/${BUILD_PLATFORM}
cd {build_directory}
cat <<EOF >toolchain.cmake
set(CMAKE_SYSTEM_NAME Linux)
set(CMAKE_SYSTEM_PROCESSOR i386)
set(CMAKE_C_COMPILER ${TOOLCHAIN}/bin/${HOST}-gcc)
set(CMAKE_FIND_ROOT_PATH ${TOOLCHAIN}/${HOST})
EOF
cmake -G"Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=toolchain.cmake \
-DCMAKE_POSITION_INDEPENDENT_CODE=1 \
cmake -G"Unix Makefiles" \
-DANDROID_ABI=x86 \
-DANDROID_PLATFORM=android-${ANDROID_VERSION} \
-DANDROID_TOOLCHAIN=${TOOLCHAIN} \
-DCMAKE_TOOLCHAIN_FILE=${NDK_PATH}/build/cmake/android.toolchain.cmake \
[additional CMake flags] {source_directory}
make
@@ -646,45 +613,23 @@ The following is a general recipe script that can be modified for your specific
needs.
# Set these variables to suit your needs
NDK_PATH={full path to the "ndk" directory-- for example, /opt/android/sdk/ndk-bundle}
BUILD_PLATFORM={the platform name for the NDK package you installed--
for example, "windows-x86" or "linux-x86_64" or "darwin-x86_64"}
TOOLCHAIN_VERSION={"4.8", "4.9", "clang3.5", etc. This corresponds to a
toolchain directory under ${NDK_PATH}/toolchains/.}
ANDROID_VERSION={The minimum version of Android to support. "21" or later
NDK_PATH={full path to the NDK directory-- for example,
/opt/android/android-ndk-r16b}
TOOLCHAIN={"gcc" or "clang"-- "gcc" must be used with NDK r14b and earlier,
and "clang" must be used with NDK r17c and later}
ANDROID_VERSION={the minimum version of Android to support. "21" or later
is required for a 64-bit build.}
# It should not be necessary to modify the rest
HOST=x86_64-linux-android
SYSROOT=${NDK_PATH}/platforms/android-${ANDROID_VERSION}/arch-x86_64
export CFLAGS="-D__ANDROID_API__=${ANDROID_VERSION} --sysroot=${SYSROOT} \
-isystem ${NDK_PATH}/sysroot/usr/include \
-isystem ${NDK_PATH}/sysroot/usr/include/${HOST}"
export LDFLAGS=-pie
TOOLCHAIN=${NDK_PATH}/toolchains/x86_64-${TOOLCHAIN_VERSION}/prebuilt/${BUILD_PLATFORM}
cd {build_directory}
cat <<EOF >toolchain.cmake
set(CMAKE_SYSTEM_NAME Linux)
set(CMAKE_SYSTEM_PROCESSOR x86_64)
set(CMAKE_C_COMPILER ${TOOLCHAIN}/bin/${HOST}-gcc)
set(CMAKE_FIND_ROOT_PATH ${TOOLCHAIN}/${HOST})
EOF
cmake -G"Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=toolchain.cmake \
-DCMAKE_POSITION_INDEPENDENT_CODE=1 \
cmake -G"Unix Makefiles" \
-DANDROID_ABI=x86_64 \
-DANDROID_PLATFORM=android-${ANDROID_VERSION} \
-DANDROID_TOOLCHAIN=${TOOLCHAIN} \
-DCMAKE_TOOLCHAIN_FILE=${NDK_PATH}/build/cmake/android.toolchain.cmake \
[additional CMake flags] {source_directory}
make
If building for Android 4.0.x (API level < 16) or earlier, remove
`-DCMAKE_POSITION_INDEPENDENT_CODE=1` from the CMake arguments and `-pie` from
`LDFLAGS`.
If building on Windows, add `.exe` to the end of `CMAKE_C_COMPILER`.
Advanced CMake Options
----------------------

View File

@@ -1,4 +0,0 @@
brew 'yasm'
brew 'gcc@5'
brew 'md5sha1sum'
cask 'Caskroom/versions/java6'

View File

@@ -109,8 +109,6 @@ endif()
include(cmakescripts/GNUInstallDirs.cmake)
set(CMAKE_INSTALL_RPATH ${CMAKE_INSTALL_FULL_LIBDIR})
macro(report_directory var)
if(CMAKE_INSTALL_${var} STREQUAL CMAKE_INSTALL_FULL_${var})
message(STATUS "CMAKE_INSTALL_${var} = ${CMAKE_INSTALL_${var}}")
@@ -193,6 +191,10 @@ endif()
report_option(ENABLE_SHARED "Shared libraries")
report_option(ENABLE_STATIC "Static libraries")
if(ENABLE_SHARED)
set(CMAKE_INSTALL_RPATH ${CMAKE_INSTALL_FULL_LIBDIR})
endif()
if(WITH_12BIT)
set(WITH_ARITH_DEC 0)
set(WITH_ARITH_ENC 0)
@@ -333,7 +335,7 @@ if(CMAKE_SYSTEM_NAME STREQUAL "SunOS")
endif()
if(${var} MATCHES "-xO2")
string(REGEX REPLACE "-xO2" "-xO5" ${var} "${${var}}")
endif()
endif()
endforeach()
endif()
endif()
@@ -470,8 +472,8 @@ if(UNIX AND NOT APPLE)
# still work.
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/conftest.map
"VERS_1 { global: foo; local: *; }; VERS_2 { global: foo2; } VERS_1;")
set(CMAKE_REQUIRED_FLAGS "-Wl,-M,${CMAKE_CURRENT_BINARY_DIR}/conftest.map")
check_c_source_compiles("void foo() {} void foo2() {} int main(void) { return 0; }"
set(CMAKE_REQUIRED_FLAGS "-Wl,-M,${CMAKE_CURRENT_BINARY_DIR}/conftest.map -shared")
check_c_source_compiles("int foo() { return 0; } int foo2() { return 2; }"
HAVE_MAPFILE)
set(CMAKE_REQUIRED_FLAGS)
file(REMOVE ${CMAKE_CURRENT_BINARY_DIR}/conftest.map)
@@ -536,7 +538,7 @@ elseif(NOT WITH_12BIT)
endif()
if(WITH_SIMD)
message(STATUS "SIMD extensions: ${CPU_TYPE} (WITH_SIMD = ${WITH_SIMD})")
if(MSVC_IDE)
if(MSVC_IDE OR XCODE)
set_source_files_properties(${SIMD_OBJS} PROPERTIES GENERATED 1)
endif()
else()
@@ -565,16 +567,16 @@ if(WITH_TURBOJPEG)
turbojpeg.c transupp.c jdatadst-tj.c jdatasrc-tj.c rdbmp.c rdppm.c
wrbmp.c wrppm.c)
set(TJMAPFILE ${CMAKE_CURRENT_SOURCE_DIR}/turbojpeg-mapfile)
if(WITH_JAVA)
set(TURBOJPEG_SOURCES ${TURBOJPEG_SOURCES} turbojpeg-jni.c)
include_directories(${JAVA_INCLUDE_PATH} ${JAVA_INCLUDE_PATH2})
if(WITH_JAVA)
set(TURBOJPEG_SOURCES ${TURBOJPEG_SOURCES} turbojpeg-jni.c)
include_directories(${JAVA_INCLUDE_PATH} ${JAVA_INCLUDE_PATH2})
set(TJMAPFILE ${CMAKE_CURRENT_SOURCE_DIR}/turbojpeg-mapfile.jni)
endif()
endif()
add_library(turbojpeg SHARED ${TURBOJPEG_SOURCES})
set_property(TARGET turbojpeg PROPERTY COMPILE_FLAGS
"-DBMP_SUPPORTED -DPPM_SUPPORTED")
if(WIN32)
set_target_properties(turbojpeg PROPERTIES DEFINE_SYMBOL DLLDEFINE)
set_target_properties(turbojpeg PROPERTIES DEFINE_SYMBOL DLLDEFINE)
endif()
if(MINGW)
set_target_properties(turbojpeg PROPERTIES LINK_FLAGS -Wl,--kill-at)
@@ -609,7 +611,7 @@ if(WITH_TURBOJPEG)
target_link_libraries(tjexample turbojpeg)
if(UNIX)
target_link_libraries(tjexample m)
endif()
endif()
endif()
if(ENABLE_STATIC)
@@ -699,7 +701,7 @@ add_executable(wrjpgcom wrjpgcom.c)
add_subdirectory(md5)
if(MSVC_IDE)
if(MSVC_IDE OR XCODE)
set(OBJDIR "\${CTEST_CONFIGURATION_TYPE}/")
else()
set(OBJDIR "")
@@ -715,7 +717,7 @@ if(WITH_12BIT)
set(MD5_JPEG_422_IFAST_OPT 7322e3bd2f127f7de4b40d4480ce60e4)
set(MD5_PPM_422_IFAST 79807fa552899e66a04708f533e16950)
set(MD5_PPM_422M_IFAST 07737bfe8a7c1c87aaa393a0098d16b0)
set(MD5_JPEG_420_IFAST_Q100_PROG a1da220b5604081863a504297ed59e55)
set(MD5_JPEG_420_IFAST_Q100_PROG 008ab68d6ddbba04a8f01deee4e0f9f8)
set(MD5_PPM_420_Q100_IFAST 1b3730122709f53d007255e8dfd3305e)
set(MD5_PPM_420M_Q100_IFAST 980a1a3c5bf9510022869d30b7d26566)
set(MD5_JPEG_GRAY_ISLOW 235c90707b16e2e069f37c888b2636d9)
@@ -765,7 +767,7 @@ else()
set(MD5_PPM_422M_IFAST 8dbc65323d62cca7c91ba02dd1cfa81d)
set(MD5_BMP_422M_IFAST_565 3294bd4d9a1f2b3d08ea6020d0db7065)
set(MD5_BMP_422M_IFAST_565D da98c9c7b6039511be4a79a878a9abc1)
set(MD5_JPEG_420_IFAST_Q100_PROG 990cbe0329c882420a2094da7e5adade)
set(MD5_JPEG_420_IFAST_Q100_PROG e59bb462016a8d9a748c330a3474bb55)
set(MD5_PPM_420_Q100_IFAST 5a732542015c278ff43635e473a8a294)
set(MD5_PPM_420M_Q100_IFAST ff692ee9323a3b424894862557c092f1)
set(MD5_JPEG_GRAY_ISLOW 72b51f894b8f4a10b3ee3066770aa38d)
@@ -1032,6 +1034,8 @@ foreach(libtype ${TEST_LIBTYPES})
add_test(djpeg-${libtype}-rgb-islow-icc-cmp
${MD5CMP} b06a39d730129122e85c1363ed1bbc9e testout_rgb_islow.icc)
set_tests_properties(djpeg-${libtype}-rgb-islow-icc-cmp PROPERTIES
DEPENDS djpeg-${libtype}-rgb-islow)
add_bittest(jpegtran icc "-copy;all;-icc;${TESTIMAGES}/test2.icc"
testout_rgb_islow2.jpg testout_rgb_islow.jpg ${MD5_JPEG_RGB_ISLOW2})
@@ -1078,7 +1082,7 @@ foreach(libtype ${TEST_LIBTYPES})
# CC: RGB->YCC SAMP: fullsize/h2v2 FDCT: ifast ENT: prog huff
add_bittest(cjpeg 420-q100-ifast-prog
"-sample;2x2;-quality;100;-dct;fast;-prog"
"-sample;2x2;-quality;100;-dct;fast;-scans;${TESTIMAGES}/test.scan"
testout_420_q100_ifast_prog.jpg ${TESTIMAGES}/testorig.ppm
${MD5_JPEG_420_IFAST_Q100_PROG})
@@ -1126,12 +1130,12 @@ foreach(libtype ${TEST_LIBTYPES})
${MD5_JPEG_420S_IFAST_OPT})
if(FLOATTEST)
# CC: RGB->YCC SAMP: fullsize/int FDCT: float ENT: prog huff
# CC: RGB->YCC SAMP: fullsize/int FDCT: float ENT: prog huff
add_bittest(cjpeg 3x2-float-prog "-sample;3x2;-dct;float;-prog"
testout_3x2_float_prog.jpg ${TESTIMAGES}/testorig.ppm
${MD5_JPEG_3x2_FLOAT_PROG_${FLOATTEST_UC}})
# CC: YCC->RGB SAMP: fullsize/int IDCT: float ENT: prog huff
# CC: YCC->RGB SAMP: fullsize/int IDCT: float ENT: prog huff
add_bittest(djpeg 3x2-float-prog "-dct;float"
testout_3x2_float.ppm testout_3x2_float_prog.jpg
${MD5_PPM_3x2_FLOAT_${FLOATTEST_UC}} cjpeg-${libtype}-3x2-float-prog)
@@ -1321,6 +1325,8 @@ if(WITH_TURBOJPEG)
COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest -yuv -alloc
COMMAND echo tjbenchtest -progressive
COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest -progressive
COMMAND echo tjbenchtest -progressive -yuv
COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest -progressive -yuv
COMMAND echo tjexampletest
COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjexampletest
COMMAND echo tjbenchtest.java
@@ -1329,6 +1335,9 @@ if(WITH_TURBOJPEG)
COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest.java -yuv
COMMAND echo tjbenchtest.java -progressive
COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest.java -progressive
COMMAND echo tjexampletest.java -progressive -yuv
COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest.java
-progressive -yuv
COMMAND echo tjexampletest.java
COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjexampletest.java
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest
@@ -1344,6 +1353,10 @@ if(WITH_TURBOJPEG)
COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest -yuv
COMMAND echo tjbenchtest -yuv -alloc
COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest -yuv -alloc
COMMAND echo tjbenchtest -progressive
COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest -progressive
COMMAND echo tjbenchtest -progressive -yuv
COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest -progressive -yuv
COMMAND echo tjexampletest
COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjexampletest
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest)
@@ -1363,12 +1376,22 @@ if(WITH_TURBOJPEG)
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
if(NOT CMAKE_VERSION VERSION_LESS "3.1" AND MSVC AND
CMAKE_C_LINKER_SUPPORTS_PDB)
install(FILES "$<TARGET_PDB_FILE:turbojpeg>"
DESTINATION ${CMAKE_INSTALL_BINDIR} OPTIONAL)
endif()
endif()
if(ENABLE_STATIC)
install(TARGETS turbojpeg-static ARCHIVE
DESTINATION ${CMAKE_INSTALL_LIBDIR})
if(NOT ENABLE_SHARED)
install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/tjbench-static${EXE}
if(MSVC_IDE OR XCODE)
set(DIR "${CMAKE_CURRENT_BINARY_DIR}/\${CMAKE_INSTALL_CONFIG_NAME}")
else()
set(DIR ${CMAKE_CURRENT_BINARY_DIR})
endif()
install(PROGRAMS ${DIR}/tjbench-static${EXE}
DESTINATION ${CMAKE_INSTALL_BINDIR} RENAME tjbench${EXE})
endif()
endif()
@@ -1379,11 +1402,16 @@ endif()
if(ENABLE_STATIC)
install(TARGETS jpeg-static ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
if(NOT ENABLE_SHARED)
install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/cjpeg-static${EXE}
if(MSVC_IDE OR XCODE)
set(DIR "${CMAKE_CURRENT_BINARY_DIR}/\${CMAKE_INSTALL_CONFIG_NAME}")
else()
set(DIR ${CMAKE_CURRENT_BINARY_DIR})
endif()
install(PROGRAMS ${DIR}/cjpeg-static${EXE}
DESTINATION ${CMAKE_INSTALL_BINDIR} RENAME cjpeg${EXE})
install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/djpeg-static${EXE}
install(PROGRAMS ${DIR}/djpeg-static${EXE}
DESTINATION ${CMAKE_INSTALL_BINDIR} RENAME djpeg${EXE})
install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/jpegtran-static${EXE}
install(PROGRAMS ${DIR}/jpegtran-static${EXE}
DESTINATION ${CMAKE_INSTALL_BINDIR} RENAME jpegtran${EXE})
endif()
endif()
@@ -1408,10 +1436,10 @@ if(UNIX OR MINGW)
${CMAKE_CURRENT_SOURCE_DIR}/rdjpgcom.1
${CMAKE_CURRENT_SOURCE_DIR}/wrjpgcom.1
DESTINATION ${CMAKE_INSTALL_MANDIR}/man1)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/pkgscripts/libjpeg.pc
endif()
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/pkgscripts/libjpeg.pc
${CMAKE_CURRENT_BINARY_DIR}/pkgscripts/libturbojpeg.pc
DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
endif()
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/jconfig.h
${CMAKE_CURRENT_SOURCE_DIR}/jerror.h ${CMAKE_CURRENT_SOURCE_DIR}/jmorecfg.h

View File

@@ -1,3 +1,152 @@
2.0.4
=====
### Significant changes relative to 2.0.3:
1. Fixed a regression in the Windows packaging system (introduced by
2.0 beta1[2]) whereby, if both the 64-bit libjpeg-turbo SDK for GCC and the
64-bit libjpeg-turbo SDK for Visual C++ were installed on the same system, only
one of them could be uninstalled.
2. Fixed a signed integer overflow and subsequent segfault that occurred when
attempting to decompress images with more than 715827882 pixels using the
64-bit C version of TJBench.
3. Fixed out-of-bounds write in `tjDecompressToYUV2()` and
`tjDecompressToYUVPlanes()` (sometimes manifesting as a double free) that
occurred when attempting to decompress grayscale JPEG images that were
compressed with a sampling factor other than 1 (for instance, with
`cjpeg -grayscale -sample 2x2`).
4. Fixed a regression introduced by 2.0.2[5] that caused the TurboJPEG API to
incorrectly identify some JPEG images with unusual sampling factors as 4:4:4
JPEG images. This was known to cause a buffer overflow when attempting to
decompress some such images using `tjDecompressToYUV2()` or
`tjDecompressToYUVPlanes()`.
5. Fixed an issue, detected by ASan, whereby attempting to losslessly transform
a specially-crafted malformed JPEG image containing an extremely-high-frequency
coefficient block (junk image data that could never be generated by a
legitimate JPEG compressor) could cause the Huffman encoder's local buffer to
be overrun. (Refer to 1.4.0[9] and 1.4beta1[15].) Given that the buffer
overrun was fully contained within the stack and did not cause a segfault or
other user-visible errant behavior, and given that the lossless transformer
(unlike the decompressor) is not generally exposed to arbitrary data exploits,
this issue did not likely pose a security risk.
6. The ARM 64-bit (ARMv8) NEON SIMD assembly code now stores constants in a
separate read-only data section rather than in the text section, to support
execute-only memory layouts.
2.0.3
=====
### Significant changes relative to 2.0.2:
1. Fixed "using JNI after critical get" errors that occurred on Android
platforms when passing invalid arguments to certain methods in the TurboJPEG
Java API.
2. Fixed a regression in the SIMD feature detection code, introduced by
the AVX2 SIMD extensions (2.0 beta1[1]), that was known to cause an illegal
instruction exception, in rare cases, on CPUs that lack support for CPUID leaf
07H (or on which the maximum CPUID leaf has been limited by way of a BIOS
setting.)
3. The 4:4:0 (h1v2) fancy (smooth) chroma upsampling algorithm in the
decompressor now uses a similar bias pattern to that of the 4:2:2 (h2v1) fancy
chroma upsampling algorithm, rounding up or down the upsampled result for
alternate pixels rather than always rounding down. This ensures that,
regardless of whether a 4:2:2 JPEG image is rotated or transposed prior to
decompression (in the frequency domain) or after decompression (in the spatial
domain), the final image will be similar.
4. Fixed an integer overflow and subsequent segfault that occurred when
attempting to compress or decompress images with more than 1 billion pixels
using the TurboJPEG API.
5. Fixed a regression introduced by 2.0 beta1[15] whereby attempting to
generate a progressive JPEG image on an SSE2-capable CPU using a scan script
containing one or more scans with lengths divisible by 16 would result in an
error ("Missing Huffman code table entry") and an invalid JPEG image.
6. Fixed an issue whereby `tjDecodeYUV()` and `tjDecodeYUVPlanes()` would throw
an error ("Invalid progressive parameters") or a warning ("Inconsistent
progression sequence") if passed a TurboJPEG instance that was previously used
to decompress a progressive JPEG image.
2.0.2
=====
### Significant changes relative to 2.0.1:
1. Fixed a regression introduced by 2.0.1[5] that prevented a runtime search
path (rpath) from being embedded in the libjpeg-turbo shared libraries and
executables for macOS and iOS. This caused a fatal error of the form
"dyld: Library not loaded" when attempting to use one of the executables,
unless `DYLD_LIBRARY_PATH` was explicitly set to the location of the
libjpeg-turbo shared libraries.
2. Fixed an integer overflow and subsequent segfault (CVE-2018-20330) that
occurred when attempting to load a BMP file with more than 1 billion pixels
using the `tjLoadImage()` function.
3. Fixed a buffer overrun (CVE-2018-19664) that occurred when attempting to
decompress a specially-crafted malformed JPEG image to a 256-color BMP using
djpeg.
4. Fixed a floating point exception that occurred when attempting to
decompress a specially-crafted malformed JPEG image with a specified image
width or height of 0 using the C version of TJBench.
5. The TurboJPEG API will now decompress 4:4:4 JPEG images with 2x1, 1x2, 3x1,
or 1x3 luminance and chrominance sampling factors. This is a non-standard way
of specifying 1x subsampling (normally 4:4:4 JPEGs have 1x1 luminance and
chrominance sampling factors), but the JPEG format and the libjpeg API both
allow it.
6. Fixed a regression introduced by 2.0 beta1[7] that caused djpeg to generate
incorrect PPM images when used with the `-colors` option.
7. Fixed an issue whereby a static build of libjpeg-turbo (a build in which
`ENABLE_SHARED` is `0`) could not be installed using the Visual Studio IDE.
8. Fixed a severe performance issue in the Loongson MMI SIMD extensions that
occurred when compressing RGB images whose image rows were not 64-bit-aligned.
2.0.1
=====
### Significant changes relative to 2.0.0:
1. Fixed a regression introduced with the new CMake-based Un*x build system,
whereby jconfig.h could cause compiler warnings of the form
`"HAVE_*_H" redefined` if it was included by downstream Autotools-based
projects that used `AC_CHECK_HEADERS()` to check for the existence of locale.h,
stddef.h, or stdlib.h.
2. The `jsimd_quantize_float_dspr2()` and `jsimd_convsamp_float_dspr2()`
functions in the MIPS DSPr2 SIMD extensions are now disabled at compile time
if the soft float ABI is enabled. Those functions use instructions that are
incompatible with the soft float ABI.
3. Fixed a regression in the SIMD feature detection code, introduced by
the AVX2 SIMD extensions (2.0 beta1[1]), that caused libjpeg-turbo to crash on
Windows 7 if Service Pack 1 was not installed.
4. Fixed out-of-bounds read in cjpeg that occurred when attempting to compress
a specially-crafted malformed color-index (8-bit-per-sample) Targa file in
which some of the samples (color indices) exceeded the bounds of the Targa
file's color table.
5. Fixed an issue whereby installing a fully static build of libjpeg-turbo
(a build in which `CFLAGS` contains `-static` and `ENABLE_SHARED` is `0`) would
fail with "No valid ELF RPATH or RUNPATH entry exists in the file."
2.0.0
=====
@@ -30,10 +179,11 @@ would produce a "Bogus message code" error message if the underlying bitmap and
PPM readers/writers threw an error that was specific to the readers/writers
(as opposed to a general libjpeg API error.)
4. Fixed an issue whereby a specially-crafted malformed BMP file, one in which
the header specified an image width of 1073741824 pixels, would trigger a
floating point exception (division by zero) in the `tjLoadImage()` function
when attempting to load the BMP file into a 4-component image buffer.
4. Fixed an issue (CVE-2018-1152) whereby a specially-crafted malformed BMP
file, one in which the header specified an image width of 1073741824 pixels,
would trigger a floating point exception (division by zero) in the
`tjLoadImage()` function when attempting to load the BMP file into a
4-component image buffer.
5. Fixed an issue whereby certain combinations of calls to
`jpeg_skip_scanlines()` and `jpeg_read_scanlines()` could trigger an infinite
@@ -47,10 +197,10 @@ a 4:2:2 or 4:2:0 JPEG image using the merged (non-fancy) upsampling algorithms
7. The new CMake-based build system will now disable the MIPS DSPr2 SIMD
extensions if it detects that the compiler does not support DSPr2 instructions.
8. Fixed out-of-bounds read in cjpeg that occurred when attempting to compress
a specially-crafted malformed color-index (8-bit-per-sample) BMP file in which
some of the samples (color indices) exceeded the bounds of the BMP file's color
table.
8. Fixed out-of-bounds read in cjpeg (CVE-2018-14498) that occurred when
attempting to compress a specially-crafted malformed color-index
(8-bit-per-sample) BMP file in which some of the samples (color indices)
exceeded the bounds of the BMP file's color table.
9. Fixed a signed integer overflow in the progressive Huffman decoder, detected
by the Clang and GCC undefined behavior sanitizers, that could be triggered by
@@ -210,8 +360,8 @@ write scanlines in bottom-up order.) djpeg will now exit gracefully if an
output format other than PPM/PGM, GIF, or Targa is selected along with the
`-crop` option.
4. Fixed an issue whereby `jpeg_skip_scanlines()` would segfault if color
quantization was enabled.
4. Fixed an issue (CVE-2017-15232) whereby `jpeg_skip_scanlines()` would
segfault if color quantization was enabled.
5. TJBench (both C and Java versions) will now display usage information if any
command-line argument is unrecognized. This prevents the program from silently
@@ -838,13 +988,13 @@ and IDCT algorithms (both are used during JPEG decompression.) For unknown
reasons (probably related to clang), this code cannot currently be compiled for
iOS.
15. Fixed an extremely rare bug that could cause the Huffman encoder's local
buffer to overrun when a very high-frequency MCU is compressed using quality
100 and no subsampling, and when the JPEG output buffer is being dynamically
resized by the destination manager. This issue was so rare that, even with a
test program specifically designed to make the bug occur (by injecting random
high-frequency YUV data into the compressor), it was reproducible only once in
about every 25 million iterations.
15. Fixed an extremely rare bug (CVE-2014-9092) that could cause the Huffman
encoder's local buffer to overrun when a very high-frequency MCU is compressed
using quality 100 and no subsampling, and when the JPEG output buffer is being
dynamically resized by the destination manager. This issue was so rare that,
even with a test program specifically designed to make the bug occur (by
injecting random high-frequency YUV data into the compressor), it was
reproducible only once in about every 25 million iterations.
16. Fixed an oversight in the TurboJPEG C wrapper: if any of the JPEG
compression functions was called repeatedly with the same
@@ -879,8 +1029,9 @@ entropy coding (by passing arguments of `-progressive -arithmetic` to cjpeg or
jpegtran, for instance) would result in an error, `Requested feature was
omitted at compile time`.
4. Fixed a couple of issues whereby malformed JPEG images would cause
libjpeg-turbo to use uninitialized memory during decompression.
4. Fixed a couple of issues (CVE-2013-6629 and CVE-2013-6630) whereby malformed
JPEG images would cause libjpeg-turbo to use uninitialized memory during
decompression.
5. Fixed an error (`Buffer passed to JPEG library is too small`) that occurred
when calling the TurboJPEG YUV encoding function with a very small (< 5x5)
@@ -1019,9 +1170,9 @@ correct behavior of the colorspace extensions when merged upsampling is used.
upper 64 bits of xmm6 and xmm7 on Win64 platforms, which violated the Win64
calling conventions.
4. Fixed a regression caused by 1.2.0[6] whereby decompressing corrupt JPEG
images (specifically, images in which the component count was erroneously set
to a large value) would cause libjpeg-turbo to segfault.
4. Fixed a regression (CVE-2012-2806) caused by 1.2.0[6] whereby decompressing
corrupt JPEG images (specifically, images in which the component count was
erroneously set to a large value) would cause libjpeg-turbo to segfault.
5. Worked around a severe performance issue with "Bobcat" (AMD Embedded APU)
processors. The `MASKMOVDQU` instruction, which was used by the libjpeg-turbo

View File

@@ -14,7 +14,7 @@ libjpeg-turbo is covered by three compatible BSD-style open source licenses:
This license covers the TurboJPEG API library and associated programs, as
well as the build system.
- The zlib License, which is listed below
- The [zlib License](https://opensource.org/licenses/Zlib)
This license is a subset of the other two, and it covers the libjpeg-turbo
SIMD extensions.
@@ -66,7 +66,7 @@ best of our understanding.
2. If your binary distribution includes or uses the TurboJPEG API, then
your product documentation must include the text of the Modified BSD
License.
License (see below.)
**Origin**
- Clause 2 of the Modified BSD License
@@ -91,7 +91,8 @@ best of our understanding.
The Modified (3-clause) BSD License
===================================
Copyright (C)\<YEAR\> \<AUTHOR\>. All Rights Reserved.
Copyright (C)2009-2020 D. R. Commander. All Rights Reserved.
Copyright (C)2015 Viktor Szathmáry. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
@@ -118,28 +119,6 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
The zlib License
================
Copyright (C) \<YEAR\>, \<AUTHOR\>.
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
Why Three Licenses?
===================

View File

@@ -17,7 +17,7 @@ before_build:
- cd cmake_build
- cmake .. -G "Visual Studio 15 2017" -DPNG_SUPPORTED=NO
build_script:
build_script:
- cd %APPVEYOR_BUILD_FOLDER%
- msbuild cmake_build\mozjpeg.sln

View File

@@ -879,12 +879,10 @@ main(int argc, char **argv)
if (memdst) {
fprintf(stderr, "Compressed size: %lu bytes\n", outsize);
if (outbuffer != NULL)
free(outbuffer);
free(outbuffer);
}
if (icc_profile != NULL)
free(icc_profile);
free(icc_profile);
/* All done. */
exit(jerr.num_warnings ? EXIT_WARNING : EXIT_SUCCESS);

View File

@@ -83,7 +83,7 @@ endif()
if(BITS EQUAL 64)
set(INST_PLATFORM "${INST_PLATFORM} 64-bit")
set(INST_NAME ${INST_NAME}64)
set(INST_REG_NAME ${INST_DIR}64)
set(INST_REG_NAME ${INST_REG_NAME}64)
set(INST_DEFS ${INST_DEFS} -DWIN64)
endif()
@@ -145,6 +145,11 @@ set(DEFAULT_IOS_ARMV8_BUILD ${CMAKE_SOURCE_DIR}/iosarmv8)
set(IOS_ARMV8_BUILD ${DEFAULT_IOS_ARMV8_BUILD} CACHE PATH
"Directory containing ARMv8 iOS build to include in universal binaries (default: ${DEFAULT_IOS_ARMV8_BUILD})")
set(OSX_APP_CERT_NAME "" CACHE STRING
"Name of the Developer ID Application certificate (in the macOS keychain) that should be used to sign the libjpeg-turbo DMG. Leave this blank to generate an unsigned DMG.")
set(OSX_INST_CERT_NAME "" CACHE STRING
"Name of the Developer ID Installer certificate (in the macOS keychain) that should be used to sign the libjpeg-turbo installer package. Leave this blank to generate an unsigned package.")
configure_file(release/makemacpkg.in pkgscripts/makemacpkg)
configure_file(release/Distribution.xml.in pkgscripts/Distribution.xml)
configure_file(release/uninstall.in pkgscripts/uninstall)

View File

@@ -118,7 +118,7 @@
# absolute paths where necessary, using the same logic.
#=============================================================================
# Copyright 2016 D. R. Commander
# Copyright 2016, 2019 D. R. Commander
# Copyright 2016 Dmitry Marakasov
# Copyright 2016 Roger Leigh
# Copyright 2015 Alex Turbov
@@ -184,7 +184,7 @@ macro(GNUInstallDirs_set_install_dir var docstring)
"${docstring} (Default: ${CMAKE_INSTALL_DEFAULT_${var}})"
${_GNUInstallDirs_CMAKE_INSTALL_FORCE_${var}})
if(NOT "${CMAKE_INSTALL_${var}}" STREQUAL "${CMAKE_INSTALL_DEFAULT_${var}}")
if(NOT CMAKE_INSTALL_${var} STREQUAL CMAKE_INSTALL_DEFAULT_${var})
unset(_GNUInstallDirs_CMAKE_INSTALL_DEFAULT_${var} CACHE)
endif()

View File

@@ -516,7 +516,9 @@ main(int argc, char **argv)
FILE *input_file;
FILE *output_file;
unsigned char *inbuffer = NULL;
#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
unsigned long insize = 0;
#endif
JDIMENSION num_scanlines;
/* On Mac, fetch a command line. */
@@ -811,7 +813,7 @@ main(int argc, char **argv)
end_progress_monitor((j_common_ptr)&cinfo);
#endif
if (memsrc && inbuffer != NULL)
if (memsrc)
free(inbuffer);
/* All done. */

View File

@@ -2078,7 +2078,7 @@ If you choose option 1, <code>*jpegSize</code> should be set to the size of your
<p>You should always use this function to free JPEG destination buffer(s) that were automatically (re)allocated by the compression and transform functions or that were manually allocated using <a class="el" href="group___turbo_j_p_e_g.html#gaec627dd4c5f30b7a775a7aea3bec5d83" title="Allocate an image buffer for use with TurboJPEG.">tjAlloc()</a>.</p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">buffer</td><td>address of the buffer to free</td></tr>
<tr><td class="paramname">buffer</td><td>address of the buffer to free. If the address is NULL, then this function has no effect.</td></tr>
</table>
</dd>
</dl>

View File

@@ -288,12 +288,14 @@ my_error_exit(j_common_ptr cinfo)
}
METHODDEF(int) do_read_JPEG_file(struct jpeg_decompress_struct *cinfo,
char *filename);
/*
* Sample routine for JPEG decompression. We assume that the source file name
* is passed in. We want to return 1 on success, 0 on error.
*/
GLOBAL(int)
read_JPEG_file(char *filename)
{
@@ -301,6 +303,21 @@ read_JPEG_file(char *filename)
* working space (which is allocated as needed by the JPEG library).
*/
struct jpeg_decompress_struct cinfo;
return do_read_JPEG_file(&cinfo, filename);
}
/*
* We call the libjpeg API from within a separate function, because modifying
* the local non-volatile jpeg_decompress_struct instance below the setjmp()
* return point and then accessing the instance after setjmp() returns would
* return in undefined behavior that may potentially overwrite all or part of
* the structure.
*/
METHODDEF(int)
do_read_JPEG_file(struct jpeg_decompress_struct *cinfo, char *filename)
{
/* We use our private extension JPEG error handler.
* Note that this struct must live as long as the main JPEG parameter
* struct, to avoid dangling-pointer problems.
@@ -325,27 +342,27 @@ read_JPEG_file(char *filename)
/* Step 1: allocate and initialize JPEG decompression object */
/* We set up the normal JPEG error routines, then override error_exit. */
cinfo.err = jpeg_std_error(&jerr.pub);
cinfo->err = jpeg_std_error(&jerr.pub);
jerr.pub.error_exit = my_error_exit;
/* Establish the setjmp return context for my_error_exit to use. */
if (setjmp(jerr.setjmp_buffer)) {
/* If we get here, the JPEG code has signaled an error.
* We need to clean up the JPEG object, close the input file, and return.
*/
jpeg_destroy_decompress(&cinfo);
jpeg_destroy_decompress(cinfo);
fclose(infile);
return 0;
}
/* Now we can initialize the JPEG decompression object. */
jpeg_create_decompress(&cinfo);
jpeg_create_decompress(cinfo);
/* Step 2: specify data source (eg, a file) */
jpeg_stdio_src(&cinfo, infile);
jpeg_stdio_src(cinfo, infile);
/* Step 3: read file parameters with jpeg_read_header() */
(void)jpeg_read_header(&cinfo, TRUE);
(void)jpeg_read_header(cinfo, TRUE);
/* We can ignore the return value from jpeg_read_header since
* (a) suspension is not possible with the stdio data source, and
* (b) we passed TRUE to reject a tables-only JPEG file as an error.
@@ -360,7 +377,7 @@ read_JPEG_file(char *filename)
/* Step 5: Start decompressor */
(void)jpeg_start_decompress(&cinfo);
(void)jpeg_start_decompress(cinfo);
/* We can ignore the return value since suspension is not possible
* with the stdio data source.
*/
@@ -372,30 +389,30 @@ read_JPEG_file(char *filename)
* In this example, we need to make an output work buffer of the right size.
*/
/* JSAMPLEs per row in output buffer */
row_stride = cinfo.output_width * cinfo.output_components;
row_stride = cinfo->output_width * cinfo->output_components;
/* Make a one-row-high sample array that will go away when done with image */
buffer = (*cinfo.mem->alloc_sarray)
((j_common_ptr)&cinfo, JPOOL_IMAGE, row_stride, 1);
buffer = (*cinfo->mem->alloc_sarray)
((j_common_ptr)cinfo, JPOOL_IMAGE, row_stride, 1);
/* Step 6: while (scan lines remain to be read) */
/* jpeg_read_scanlines(...); */
/* Here we use the library's state variable cinfo.output_scanline as the
/* Here we use the library's state variable cinfo->output_scanline as the
* loop counter, so that we don't have to keep track ourselves.
*/
while (cinfo.output_scanline < cinfo.output_height) {
while (cinfo->output_scanline < cinfo->output_height) {
/* jpeg_read_scanlines expects an array of pointers to scanlines.
* Here the array is only one element long, but you could ask for
* more than one scanline at a time if that's more convenient.
*/
(void)jpeg_read_scanlines(&cinfo, buffer, 1);
(void)jpeg_read_scanlines(cinfo, buffer, 1);
/* Assume put_scanline_someplace wants a pointer and sample count. */
put_scanline_someplace(buffer[0], row_stride);
}
/* Step 7: Finish decompression */
(void)jpeg_finish_decompress(&cinfo);
(void)jpeg_finish_decompress(cinfo);
/* We can ignore the return value since suspension is not possible
* with the stdio data source.
*/
@@ -403,7 +420,7 @@ read_JPEG_file(char *filename)
/* Step 8: Release JPEG decompression object */
/* This is an important step since it will release a good deal of memory. */
jpeg_destroy_decompress(&cinfo);
jpeg_destroy_decompress(cinfo);
/* After finish_decompress, we can close the input file.
* Here we postpone it until after no more JPEG errors are possible,

View File

@@ -58,11 +58,21 @@ endif()
add_custom_target(javadoc COMMAND
javadoc -notimestamp -d ${CMAKE_CURRENT_SOURCE_DIR}/doc -sourcepath ${CMAKE_CURRENT_SOURCE_DIR} org.libjpegturbo.turbojpeg)
set(JAVACLASSPATH ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_FILES_DIRECTORY}/turbojpeg-java.dir)
add_custom_target(javah
COMMAND javah -d ${CMAKE_CURRENT_SOURCE_DIR} -classpath ${JAVACLASSPATH} org.libjpegturbo.turbojpeg.TJ
COMMAND javah -d ${CMAKE_CURRENT_SOURCE_DIR} -classpath ${JAVACLASSPATH} org.libjpegturbo.turbojpeg.TJCompressor
COMMAND javah -d ${CMAKE_CURRENT_SOURCE_DIR} -classpath ${JAVACLASSPATH} org.libjpegturbo.turbojpeg.TJDecompressor
COMMAND javah -d ${CMAKE_CURRENT_SOURCE_DIR} -classpath ${JAVACLASSPATH} org.libjpegturbo.turbojpeg.TJTransformer)
if(Java_VERSION_MAJOR GREATER 9)
add_custom_target(javah
COMMAND javac -h ${CMAKE_CURRENT_SOURCE_DIR} -classpath ${JAVACLASSPATH}
-d ${CMAKE_CURRENT_BINARY_DIR}/__unused
${CMAKE_CURRENT_SOURCE_DIR}/org/libjpegturbo/turbojpeg/TJ.java
${CMAKE_CURRENT_SOURCE_DIR}/org/libjpegturbo/turbojpeg/TJCompressor.java
${CMAKE_CURRENT_SOURCE_DIR}/org/libjpegturbo/turbojpeg/TJDecompressor.java
${CMAKE_CURRENT_SOURCE_DIR}/org/libjpegturbo/turbojpeg/TJTransformer.java)
else()
add_custom_target(javah
COMMAND javah -d ${CMAKE_CURRENT_SOURCE_DIR} -classpath ${JAVACLASSPATH} org.libjpegturbo.turbojpeg.TJ
COMMAND javah -d ${CMAKE_CURRENT_SOURCE_DIR} -classpath ${JAVACLASSPATH} org.libjpegturbo.turbojpeg.TJCompressor
COMMAND javah -d ${CMAKE_CURRENT_SOURCE_DIR} -classpath ${JAVACLASSPATH} org.libjpegturbo.turbojpeg.TJDecompressor
COMMAND javah -d ${CMAKE_CURRENT_SOURCE_DIR} -classpath ${JAVACLASSPATH} org.libjpegturbo.turbojpeg.TJTransformer)
endif()
if(NOT DEFINED CMAKE_INSTALL_DEFAULT_JAVADIR)
set(CMAKE_INSTALL_DEFAULT_JAVADIR "<CMAKE_INSTALL_DATAROOTDIR>/java")

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C)2009-2014, 2016-2018 D. R. Commander. All Rights Reserved.
* Copyright (C)2009-2014, 2016-2019 D. R. Commander. All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -121,6 +121,8 @@ final class TJBench {
int rindex = TJ.getRedOffset(pixelFormat);
int gindex = TJ.getGreenOffset(pixelFormat);
int bindex = TJ.getBlueOffset(pixelFormat);
if ((long)w[0] * (long)h[0] * (long)ps > (long)Integer.MAX_VALUE)
throw new Exception("Image is too large");
byte[] dstBuf = new byte[w[0] * h[0] * ps];
int pixels = w[0] * h[0], dstPtr = 0, rgbPtr = 0;
@@ -175,8 +177,11 @@ final class TJBench {
tjd = new TJDecompressor();
if (dstBuf == null)
if (dstBuf == null) {
if ((long)pitch * (long)scaledh > (long)Integer.MAX_VALUE)
throw new Exception("Image is too large");
dstBuf = new byte[pitch * scaledh];
}
/* Set the destination buffer to gray so we know whether the decompressor
attempted to write to it */
@@ -202,7 +207,9 @@ final class TJBench {
int width = doTile ? Math.min(tilew, w - x) : scaledw;
int height = doTile ? Math.min(tileh, h - y) : scaledh;
tjd.setSourceImage(jpegBuf[tile], jpegSize[tile]);
try {
tjd.setSourceImage(jpegBuf[tile], jpegSize[tile]);
} catch (TJException e) { handleTJException(e); }
if (doYUV) {
yuvImage.setBuf(yuvImage.getBuf(), width, yuvPad, height, subsamp);
try {
@@ -329,6 +336,8 @@ final class TJBench {
String pfStr = PIXFORMATSTR[pf];
YUVImage yuvImage = null;
if ((long)pitch * (long)h > (long)Integer.MAX_VALUE)
throw new Exception("Image is too large");
tmpBuf = new byte[pitch * h];
if (quiet == 0)
@@ -469,6 +478,8 @@ final class TJBench {
if (!compOnly)
decomp(srcBuf, jpegBuf, jpegSize, tmpBuf, w, h, subsamp, jpegQual,
fileName, tilew, tileh);
else if (quiet == 1)
System.out.println("N/A");
if (tilew == w && tileh == h) break;
}
@@ -489,6 +500,8 @@ final class TJBench {
int tw, th, ttilew, ttileh, tntilesw, tntilesh, tsubsamp;
FileInputStream fis = new FileInputStream(fileName);
if (fis.getChannel().size() > (long)Integer.MAX_VALUE)
throw new Exception("Image is too large");
int srcSize = (int)fis.getChannel().size();
srcBuf = new byte[srcSize];
fis.read(srcBuf, 0, srcSize);
@@ -500,7 +513,9 @@ final class TJBench {
tjt = new TJTransformer();
tjt.setSourceImage(srcBuf, srcSize);
try {
tjt.setSourceImage(srcBuf, srcSize);
} catch (TJException e) { handleTJException(e); }
w = tjt.getWidth();
h = tjt.getHeight();
subsamp = tjt.getSubsamp();
@@ -607,7 +622,9 @@ final class TJBench {
elapsed = 0.;
while (true) {
start = getTime();
tjt.transform(jpegBuf, t, flags);
try {
tjt.transform(jpegBuf, t, flags);
} catch (TJException e) { handleTJException(e); }
jpegSize = tjt.getTransformedSizes();
elapsed += getTime() - start;
if (iter >= 0) {
@@ -705,7 +722,7 @@ final class TJBench {
System.out.println(" bytes to which each row of each plane in the intermediate YUV image is");
System.out.println(" padded (default = 1)");
System.out.println("-scale M/N = Scale down the width/height of the decompressed JPEG image by a");
System.out.print (" factor of M/N (M/N = ");
System.out.print(" factor of M/N (M/N = ");
for (i = 0; i < nsf; i++) {
System.out.format("%d/%d", scalingFactors[i].getNum(),
scalingFactors[i].getDenom());

View File

@@ -4,7 +4,7 @@
* This file was part of the Independent JPEG Group's software:
* Copyright (C) 1991-1997, Thomas G. Lane.
* libjpeg-turbo Modifications:
* Copyright (C) 2009-2011, 2014-2016, 2018, D. R. Commander.
* Copyright (C) 2009-2011, 2014-2016, 2018-2019, D. R. Commander.
* Copyright (C) 2015, Matthieu Darbois.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
@@ -43,8 +43,8 @@
*/
/* NOTE: Both GCC and Clang define __GNUC__ */
#if defined __GNUC__ && (defined __arm__ || defined __aarch64__)
#if !defined __thumb__ || defined __thumb2__
#if defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))
#if !defined(__thumb__) || defined(__thumb2__)
#define USE_CLZ_INTRINSIC
#endif
#endif
@@ -356,6 +356,8 @@ dump_buffer(working_state *state)
put_buffer = (put_buffer << size) | code; \
}
#if SIZEOF_SIZE_T != 8 && !defined(_WIN64)
#define CHECKBUF15() { \
if (put_bits > 15) { \
EMIT_BYTE() \
@@ -363,6 +365,8 @@ dump_buffer(working_state *state)
} \
}
#endif
#define CHECKBUF31() { \
if (put_bits > 31) { \
EMIT_BYTE() \
@@ -428,7 +432,7 @@ dump_buffer(working_state *state)
* scanning order-- 1, 8, 16, etc.), then this will produce an encoded block
* larger than 200 bytes.
*/
#define BUFSIZE (DCTSIZE2 * 4)
#define BUFSIZE (DCTSIZE2 * 8)
#define LOAD_BUFFER() { \
if (state->free_in_buffer < BUFSIZE) { \

View File

@@ -507,8 +507,8 @@ prepare_for_pass (j_compress_ptr cinfo)
*/
master->pass_type = output_pass;
master->pass_number++;
/*FALLTHROUGH*/
#endif
/*FALLTHROUGH*/
case output_pass:
/* Do a data-output pass. */
/* We need not repeat per-scan setup if prior optimization pass did it. */

View File

@@ -10,16 +10,16 @@
#define LIBJPEG_TURBO_VERSION_NUMBER @LIBJPEG_TURBO_VERSION_NUMBER@
/* Support arithmetic encoding */
#cmakedefine C_ARITH_CODING_SUPPORTED
#cmakedefine C_ARITH_CODING_SUPPORTED 1
/* Support arithmetic decoding */
#cmakedefine D_ARITH_CODING_SUPPORTED
#cmakedefine D_ARITH_CODING_SUPPORTED 1
/* Support in-memory source/destination managers */
#cmakedefine MEM_SRCDST_SUPPORTED
#cmakedefine MEM_SRCDST_SUPPORTED 1
/* Use accelerated SIMD routines. */
#cmakedefine WITH_SIMD
#cmakedefine WITH_SIMD 1
/*
* Define BITS_IN_JSAMPLE as either
@@ -33,37 +33,37 @@
#define BITS_IN_JSAMPLE @BITS_IN_JSAMPLE@ /* use 8 or 12 */
/* Define to 1 if you have the <locale.h> header file. */
#cmakedefine HAVE_LOCALE_H
#cmakedefine HAVE_LOCALE_H 1
/* Define to 1 if you have the <stddef.h> header file. */
#cmakedefine HAVE_STDDEF_H
#cmakedefine HAVE_STDDEF_H 1
/* Define to 1 if you have the <stdlib.h> header file. */
#cmakedefine HAVE_STDLIB_H
#cmakedefine HAVE_STDLIB_H 1
/* Define if you need to include <sys/types.h> to get size_t. */
#cmakedefine NEED_SYS_TYPES_H
#cmakedefine NEED_SYS_TYPES_H 1
/* Define if you have BSD-like bzero and bcopy in <strings.h> rather than
memset/memcpy in <string.h>. */
#cmakedefine NEED_BSD_STRINGS
#cmakedefine NEED_BSD_STRINGS 1
/* Define to 1 if the system has the type `unsigned char'. */
#cmakedefine HAVE_UNSIGNED_CHAR
#cmakedefine HAVE_UNSIGNED_CHAR 1
/* Define to 1 if the system has the type `unsigned short'. */
#cmakedefine HAVE_UNSIGNED_SHORT
#cmakedefine HAVE_UNSIGNED_SHORT 1
/* Compiler does not support pointers to undefined structures. */
#cmakedefine INCOMPLETE_TYPES_BROKEN
#cmakedefine INCOMPLETE_TYPES_BROKEN 1
/* Define if your (broken) compiler shifts signed values as if they were
unsigned. */
#cmakedefine RIGHT_SHIFT_IS_UNSIGNED
#cmakedefine RIGHT_SHIFT_IS_UNSIGNED 1
/* Define to 1 if type `char' is unsigned and you are not using gcc. */
#ifndef __CHAR_UNSIGNED__
#cmakedefine __CHAR_UNSIGNED__
#cmakedefine __CHAR_UNSIGNED__ 1
#endif
/* Define to empty if `const' does not conform to ANSI C. */

View File

@@ -53,8 +53,8 @@
*/
/* NOTE: Both GCC and Clang define __GNUC__ */
#if defined __GNUC__ && (defined __arm__ || defined __aarch64__)
#if !defined __thumb__ || defined __thumb2__
#if defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))
#if !defined(__thumb__) || defined(__thumb2__)
#define USE_CLZ_INTRINSIC
#endif
#endif

View File

@@ -5,7 +5,7 @@
* Copyright (C) 1994-1996, Thomas G. Lane.
* Modified 2009-2012 by Guido Vollbeding.
* libjpeg-turbo Modifications:
* Copyright (C) 2011, 2014, 2016, D. R. Commander.
* Copyright (C) 2011, 2014, 2016, 2019, D. R. Commander.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
@@ -27,6 +27,8 @@
extern void *malloc(size_t size);
extern void free(void *ptr);
#endif
void jpeg_mem_dest_tj(j_compress_ptr cinfo, unsigned char **outbuffer,
unsigned long *outsize, boolean alloc);
#define OUTPUT_BUF_SIZE 4096 /* choose an efficiently fwrite'able size */
@@ -101,8 +103,7 @@ empty_mem_output_buffer(j_compress_ptr cinfo)
MEMCOPY(nextbuffer, dest->buffer, dest->bufsize);
if (dest->newbuffer != NULL)
free(dest->newbuffer);
free(dest->newbuffer);
dest->newbuffer = nextbuffer;

View File

@@ -144,8 +144,7 @@ empty_mem_output_buffer (j_compress_ptr cinfo)
MEMCOPY(nextbuffer, dest->buffer, dest->bufsize);
if (dest->newbuffer != NULL)
free(dest->newbuffer);
free(dest->newbuffer);
dest->newbuffer = nextbuffer;

View File

@@ -5,7 +5,7 @@
* Copyright (C) 1994-1996, Thomas G. Lane.
* Modified 2009-2011 by Guido Vollbeding.
* libjpeg-turbo Modifications:
* Copyright (C) 2011, 2016, D. R. Commander.
* Copyright (C) 2011, 2016, 2019, D. R. Commander.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
@@ -23,6 +23,9 @@
#include "jpeglib.h"
#include "jerror.h"
void jpeg_mem_src_tj(j_decompress_ptr cinfo, const unsigned char *inbuffer,
unsigned long insize);
/*
* Initialize source --- called by jpeg_read_header

View File

@@ -592,7 +592,7 @@ ycck_cmyk_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
/* Declarations for ordered dithering
*
* We use a 4x4 ordered dither array packed into 32 bits. This array is
* sufficent for dithering RGB888 to RGB565.
* sufficient for dithering RGB888 to RGB565.
*/
#define DITHER_MASK 0x3

View File

@@ -4,7 +4,7 @@
* This file was part of the Independent JPEG Group's software:
* Copyright (C) 1991-1997, Thomas G. Lane.
* libjpeg-turbo Modifications:
* Copyright (C) 2009-2011, 2016, 2018, D. R. Commander.
* Copyright (C) 2009-2011, 2016, 2018-2019, D. R. Commander.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
@@ -589,7 +589,11 @@ decode_mcu_slow(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
if (entropy->dc_needed[blkn]) {
/* Convert DC difference to actual value, update last_dc_val */
int ci = cinfo->MCU_membership[blkn];
s += state.last_dc_val[ci];
/* This is really just
* s += state.last_dc_val[ci];
* It is written this way in order to shut up UBSan.
*/
s = (int)((unsigned int)s + (unsigned int)state.last_dc_val[ci]);
state.last_dc_val[ci] = s;
if (block) {
/* Output the DC coefficient (assumes jpeg_natural_order[0] = 0) */
@@ -684,7 +688,7 @@ decode_mcu_fast(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
if (entropy->dc_needed[blkn]) {
int ci = cinfo->MCU_membership[blkn];
s += state.last_dc_val[ci];
s = (int)((unsigned int)s + (unsigned int)state.last_dc_val[ci]);
state.last_dc_val[ci] = s;
if (block)
(*block)[0] = (JCOEF)s;

View File

@@ -429,8 +429,6 @@ h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
#define PACK_TWO_PIXELS_LE(l, r) ((r << 16) | l)
#define PACK_TWO_PIXELS_BE(l, r) ((l << 16) | r)
#define PACK_NEED_ALIGNMENT(ptr) (((size_t)(ptr)) & 3)
#define WRITE_TWO_PIXELS_LE(addr, pixels) { \
((INT16 *)(addr))[0] = (INT16)(pixels); \
((INT16 *)(addr))[1] = (INT16)((pixels) >> 16); \
@@ -448,7 +446,7 @@ h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
/* Declarations for ordered dithering
*
* We use a 4x4 ordered dither array packed into 32 bits. This array is
* sufficent for dithering RGB888 to RGB565.
* sufficient for dithering RGB888 to RGB565.
*/
#define DITHER_MASK 0x3

View File

@@ -8,6 +8,7 @@
* Copyright (C) 2010, 2015-2016, D. R. Commander.
* Copyright (C) 2014, MIPS Technologies, Inc., California.
* Copyright (C) 2015, Google, Inc.
* Copyright (C) 2019, Arm Limited.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
@@ -315,9 +316,9 @@ h1v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
JSAMPARRAY output_data = *output_data_ptr;
JSAMPROW inptr0, inptr1, outptr;
#if BITS_IN_JSAMPLE == 8
int thiscolsum;
int thiscolsum, bias;
#else
JLONG thiscolsum;
JLONG thiscolsum, bias;
#endif
JDIMENSION colctr;
int inrow, outrow, v;
@@ -327,15 +328,18 @@ h1v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
for (v = 0; v < 2; v++) {
/* inptr0 points to nearest input row, inptr1 points to next nearest */
inptr0 = input_data[inrow];
if (v == 0) /* next nearest is row above */
if (v == 0) { /* next nearest is row above */
inptr1 = input_data[inrow - 1];
else /* next nearest is row below */
bias = 1;
} else { /* next nearest is row below */
inptr1 = input_data[inrow + 1];
bias = 2;
}
outptr = output_data[outrow++];
for (colctr = 0; colctr < compptr->downsampled_width; colctr++) {
thiscolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++);
*outptr++ = (JSAMPLE)((thiscolsum + 1) >> 2);
*outptr++ = (JSAMPLE)((thiscolsum + bias) >> 2);
}
}
inrow++;

View File

@@ -1,7 +1,7 @@
/*
* jfdctint.c
*
* This file was part of the Independent JPEG Group's software.
* This file was part of the Independent JPEG Group's software:
* Copyright (C) 1991-1996, Thomas G. Lane.
* libjpeg-turbo Modifications:
* Copyright (C) 2015, D. R. Commander.

View File

@@ -1,7 +1,7 @@
/*
* jidctint.c
*
* This file was part of the Independent JPEG Group's software.
* This file was part of the Independent JPEG Group's software:
* Copyright (C) 1991-1998, Thomas G. Lane.
* Modification developed 2002-2009 by Guido Vollbeding.
* libjpeg-turbo Modifications:

View File

@@ -1,7 +1,7 @@
/*
* jidctred.c
*
* This file was part of the Independent JPEG Group's software.
* This file was part of the Independent JPEG Group's software:
* Copyright (C) 1994-1998, Thomas G. Lane.
* libjpeg-turbo Modifications:
* Copyright (C) 2015, D. R. Commander.

View File

@@ -129,7 +129,7 @@ select_transform (JXFORM_CODE transform)
LOCAL(int)
parse_switches (j_compress_ptr cinfo, int argc, char **argv,
int last_file_arg_seen, boolean for_real)
int last_file_arg_seen, boolean for_real)
/* Parse optional switches.
* Returns argv[] index of first file-name argument (== argc if none).
* Any file names with indexes <= last_file_arg_seen are ignored;

View File

@@ -154,7 +154,7 @@ typedef struct {
*/
boolean is_padded; /* is the colorindex padded for odither? */
int Ncolors[MAX_Q_COMPS]; /* # of values alloced to each component */
int Ncolors[MAX_Q_COMPS]; /* # of values allocated to each component */
/* Variables for ordered dithering */
int row_index; /* cur row's vertical index in dither matrix */

View File

@@ -4,7 +4,7 @@
* This file was part of the Independent JPEG Group's software:
* Copyright (C) 1991-2012, Thomas G. Lane, Guido Vollbeding.
* libjpeg-turbo Modifications:
* Copyright (C) 2010, 2012-2018, D. R. Commander.
* Copyright (C) 2010, 2012-2020, D. R. Commander.
* mozjpeg Modifications:
* Copyright (C) 2014, Mozilla Corporation.
* For conditions of distribution and use, see the accompanying README file.
@@ -37,18 +37,18 @@
*/
#define JCOPYRIGHT \
"Copyright (C) 2009-2018 D. R. Commander\n" \
"Copyright (C) 2011-2016 Siarhei Siamashka\n" \
"Copyright (C) 2009-2020 D. R. Commander\n" \
"Copyright (C) 2011-2016 Siarhei Siamashka\n" \
"Copyright (C) 2015-2016, 2018 Matthieu Darbois\n" \
"Copyright (C) 2015 Intel Corporation\n" \
"Copyright (C) 2015 Google, Inc.\n" \
"Copyright (C) 2015 Google, Inc.\n" \
"Copyright (C) 2014 Mozilla Corporation\n" \
"Copyright (C) 2013-2014 MIPS Technologies, Inc.\n" \
"Copyright (C) 2013 Linaro Limited\n" \
"Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies)\n" \
"Copyright (C) 2009 Pierre Ossman for Cendio AB\n" \
"Copyright (C) 1999-2006 MIYASAKA Masaru\n" \
"Copyright (C) 2013-2014 MIPS Technologies, Inc.\n" \
"Copyright (C) 2013 Linaro Limited\n" \
"Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies)\n" \
"Copyright (C) 2009 Pierre Ossman for Cendio AB\n" \
"Copyright (C) 1999-2006 MIYASAKA Masaru\n" \
"Copyright (C) 1991-2016 Thomas G. Lane, Guido Vollbeding"
#define JCOPYRIGHT_SHORT \
"Copyright (C) 1991-2018 The libjpeg-turbo Project and many others"
"Copyright (C) 1991-2020 The libjpeg-turbo Project and many others"

View File

@@ -6,7 +6,7 @@
* this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
* ----------------------------------------------------------------------------
* libjpeg-turbo Modifications:
* Copyright (C)2016, 2018 D. R. Commander. All Rights Reserved.
* Copyright (C)2016, 2018-2019 D. R. Commander. All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -56,7 +56,7 @@
#include "./md5.h"
char *MD5End(MD5_CTX *ctx, char *buf)
static char *MD5End(MD5_CTX *ctx, char *buf)
{
int i;
unsigned char digest[LENGTH];
@@ -89,7 +89,7 @@ char *MD5FileChunk(const char *filename, char *buf, off_t ofs, off_t len)
off_t n;
MD5Init(&ctx);
#if _WIN32
#ifdef _WIN32
f = _open(filename, O_RDONLY | O_BINARY);
#else
f = open(filename, O_RDONLY);
@@ -123,12 +123,3 @@ char *MD5FileChunk(const char *filename, char *buf, off_t ofs, off_t len)
return 0;
return (MD5End(&ctx, buf));
}
char *MD5Data(const void *data, unsigned int len, char *buf)
{
MD5_CTX ctx;
MD5Init(&ctx);
MD5Update(&ctx, (unsigned char *)data, len);
return (MD5End(&ctx, buf));
}

View File

@@ -3,8 +3,9 @@
*
* This file was part of the Independent JPEG Group's software:
* Copyright (C) 1991-1996, Thomas G. Lane.
* It was modified by The libjpeg-turbo Project to include only code relevant
* to libjpeg-turbo.
* Modified 2017 by Guido Vollbeding.
* libjpeg-turbo Modifications:
* Copyright (C) 2018, D. R. Commander.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
@@ -66,6 +67,7 @@ typedef struct _tga_source_struct {
U_CHAR tga_pixel[4];
int pixel_size; /* Bytes per Targa pixel (1 to 4) */
int cmap_length; /* colormap length */
/* State info for reading RLE-coded pixels; both counts must be init to 0 */
int block_count; /* # of pixels remaining in RLE block */
@@ -196,11 +198,14 @@ get_8bit_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
register JSAMPROW ptr;
register JDIMENSION col;
register JSAMPARRAY colormap = source->colormap;
int cmaplen = source->cmap_length;
ptr = source->pub.buffer[0];
for (col = cinfo->image_width; col > 0; col--) {
(*source->read_pixel) (source); /* Load next pixel into tga_pixel */
t = UCH(source->tga_pixel[0]);
if (t >= cmaplen)
ERREXIT(cinfo, JERR_TGA_BADPARMS);
*ptr++ = colormap[0][t];
*ptr++ = colormap[1][t];
*ptr++ = colormap[2][t];
@@ -452,12 +457,14 @@ start_input_tga(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
/* Allocate space to store the colormap */
source->colormap = (*cinfo->mem->alloc_sarray)
((j_common_ptr)cinfo, JPOOL_IMAGE, (JDIMENSION)maplen, (JDIMENSION)3);
source->cmap_length = (int)maplen;
/* and read it from the file */
read_colormap(source, (int)maplen, UCH(targaheader[7]));
} else {
if (cmaptype) /* but you promised a cmap! */
ERREXIT(cinfo, JERR_TGA_BADPARMS);
source->colormap = NULL;
source->cmap_length = 0;
}
cinfo->input_components = components;

View File

@@ -1,4 +1,4 @@
libjpeg-turbo is a JPEG image codec that uses SIMD instructions (MMX, SSE2, AVX2, NEON, AltiVec) to accelerate baseline JPEG compression and decompression on x86, x86-64, ARM, and PowerPC systems, as well as progressive JPEG compression on x86 and x86-64 systems. On such systems, libjpeg-turbo is generally 2-6x as fast as libjpeg, all else being equal. On other types of systems, libjpeg-turbo can still outperform libjpeg by a significant amount, by virtue of its highly-optimized Huffman coding routines. In many cases, the performance of libjpeg-turbo rivals that of proprietary high-speed JPEG codecs.
libjpeg-turbo is a JPEG image codec that uses SIMD instructions to accelerate baseline JPEG compression and decompression on x86, x86-64, ARM, PowerPC, and MIPS systems, as well as progressive JPEG compression on x86 and x86-64 systems. On such systems, libjpeg-turbo is generally 2-6x as fast as libjpeg, all else being equal. On other types of systems, libjpeg-turbo can still outperform libjpeg by a significant amount, by virtue of its highly-optimized Huffman coding routines. In many cases, the performance of libjpeg-turbo rivals that of proprietary high-speed JPEG codecs.
libjpeg-turbo implements both the traditional libjpeg API as well as the less powerful but more straightforward TurboJPEG API. libjpeg-turbo also features colorspace extensions that allow it to compress from/decompress to 32-bit and big-endian pixel buffers (RGBX, XBGR, etc.), as well as a full-featured Java interface.

View File

@@ -62,15 +62,15 @@ Section "@CMAKE_PROJECT_NAME@ SDK for @INST_PLATFORM@ (required)"
File "@CMAKE_CURRENT_BINARY_DIR@\libturbojpeg.a"
File "@CMAKE_CURRENT_BINARY_DIR@\libjpeg.dll.a"
File "@CMAKE_CURRENT_BINARY_DIR@\libjpeg.a"
SetOutPath $INSTDIR\lib\pkgconfig
File "@CMAKE_CURRENT_BINARY_DIR@\pkgscripts\libjpeg.pc"
File "@CMAKE_CURRENT_BINARY_DIR@\pkgscripts\libturbojpeg.pc"
!else
File "@CMAKE_CURRENT_BINARY_DIR@\${BUILDDIR}turbojpeg.lib"
File "@CMAKE_CURRENT_BINARY_DIR@\${BUILDDIR}turbojpeg-static.lib"
File "@CMAKE_CURRENT_BINARY_DIR@\${BUILDDIR}jpeg.lib"
File "@CMAKE_CURRENT_BINARY_DIR@\${BUILDDIR}jpeg-static.lib"
!endif
SetOutPath $INSTDIR\lib\pkgconfig
File "@CMAKE_CURRENT_BINARY_DIR@\pkgscripts\libjpeg.pc"
File "@CMAKE_CURRENT_BINARY_DIR@\pkgscripts\libturbojpeg.pc"
!ifdef JAVA
SetOutPath $INSTDIR\classes
File "@CMAKE_CURRENT_BINARY_DIR@\java\turbojpeg.jar"
@@ -130,8 +130,6 @@ Section "Uninstall"
Delete $INSTDIR\lib\libturbojpeg.a
Delete $INSTDIR\lib\libjpeg.dll.a
Delete $INSTDIR\lib\libjpeg.a
Delete $INSTDIR\lib\pkgconfig\libjpeg.pc
Delete $INSTDIR\lib\pkgconfig\libturbojpeg.pc
!else
Delete $INSTDIR\bin\jpeg@SO_MAJOR_VERSION@.dll
Delete $INSTDIR\bin\turbojpeg.dll
@@ -141,6 +139,8 @@ Section "Uninstall"
Delete $INSTDIR\lib\turbojpeg.lib
Delete $INSTDIR\lib\turbojpeg-static.lib
!endif
Delete $INSTDIR\lib\pkgconfig\libjpeg.pc
Delete $INSTDIR\lib\pkgconfig\libturbojpeg.pc
!ifdef JAVA
Delete $INSTDIR\classes\turbojpeg.jar
!endif
@@ -175,9 +175,7 @@ Section "Uninstall"
!endif
RMDir "$INSTDIR\include"
!ifdef GCC
RMDir "$INSTDIR\lib\pkgconfig"
!endif
RMDir "$INSTDIR\lib"
RMDir "$INSTDIR\doc"
!ifdef GCC

View File

@@ -58,6 +58,8 @@ BUILDDIRARMV7=@IOS_ARMV7_BUILD@
BUILDDIRARMV7S=@IOS_ARMV7S_BUILD@
BUILDDIRARMV8=@IOS_ARMV8_BUILD@
WITH_JAVA=@WITH_JAVA@
OSX_APP_CERT_NAME="@OSX_APP_CERT_NAME@"
OSX_INST_CERT_NAME="@OSX_INST_CERT_NAME@"
LIPO=lipo
PREFIX=@CMAKE_INSTALL_PREFIX@
@@ -228,7 +230,7 @@ if [ $UNIVERSAL = 1 -a "$BUILDDIRARMV7S" != "" ]; then
install_ios $BUILDDIRARMV7S ARMv7s armv7s arm
fi
if [ $UNIVERSAL = 1 -a "BUILDDIRARMV8" != "" ]; then
if [ $UNIVERSAL = 1 -a "$BUILDDIRARMV8" != "" ]; then
install_ios $BUILDDIRARMV8 ARMv8 armv8 arm64
fi
@@ -258,11 +260,25 @@ cp $SRCDIR/release/License.rtf $SRCDIR/release/Welcome.rtf $SRCDIR/release/ReadM
mkdir $TMPDIR/dmg
pkgbuild --root $PKGROOT --version $VERSION.$BUILD --identifier @PKGID@ \
$TMPDIR/pkg/$PKGNAME.pkg
SUFFIX=
if [ "$OSX_INST_CERT_NAME" != "" ]; then
SUFFIX=-unsigned
fi
productbuild --distribution pkgscripts/Distribution.xml \
--package-path $TMPDIR/pkg/ --resources $TMPDIR/pkg/ \
$TMPDIR/dmg/$PKGNAME.pkg
$TMPDIR/dmg/$PKGNAME$SUFFIX.pkg
if [ "$OSX_INST_CERT_NAME" != "" ]; then
productsign --sign "$OSX_INST_CERT_NAME" --timestamp \
$TMPDIR/dmg/$PKGNAME$SUFFIX.pkg $TMPDIR/dmg/$PKGNAME.pkg
rm -r $TMPDIR/dmg/$PKGNAME$SUFFIX.pkg
pkgutil --check-signature $TMPDIR/dmg/$PKGNAME.pkg
fi
hdiutil create -fs HFS+ -volname $PKGNAME-$VERSION \
-srcfolder "$TMPDIR/dmg" $TMPDIR/$PKGNAME-$VERSION.dmg
if [ "$OSX_APP_CERT_NAME" != "" ]; then
codesign -s "$OSX_APP_CERT_NAME" --timestamp $TMPDIR/$PKGNAME-$VERSION.dmg
codesign -vv $TMPDIR/$PKGNAME-$VERSION.dmg
fi
cp $TMPDIR/$PKGNAME-$VERSION.dmg .
exit

View File

@@ -1,7 +1,7 @@
%global _docdir %{_defaultdocdir}/%{name}-%{version}
%define _prefix @CMAKE_INSTALL_PREFIX@
%define _bindir @CMAKE_INSTALL_FULL_BINDIR@
%define _datarootdir @CMAKE_INSTALL_FULL_DATAROOTDIR@
%define _docdir %{_defaultdocdir}/%{name}-%{version}
%define _includedir @CMAKE_INSTALL_FULL_INCLUDEDIR@
%define _javadir @CMAKE_INSTALL_FULL_JAVADIR@
%define _mandir @CMAKE_INSTALL_FULL_MANDIR@
@@ -43,7 +43,7 @@ Group: System Environment/Libraries
Release: @BUILD@
License: BSD-style
BuildRoot: %{_blddir}/%{name}-buildroot-%{version}-%{release}
Prereq: /sbin/ldconfig
Requires: /sbin/ldconfig
%if "%{_bits}" == "64"
Provides: %{name} = %{version}-%{release}, @CMAKE_PROJECT_NAME@ = %{version}-%{release}, libturbojpeg.so()(64bit)
%else
@@ -51,14 +51,14 @@ Provides: %{name} = %{version}-%{release}, @CMAKE_PROJECT_NAME@ = %{version}-%{r
%endif
%description
libjpeg-turbo is a JPEG image codec that uses SIMD instructions (MMX, SSE2,
AVX2, NEON, AltiVec) to accelerate baseline JPEG compression and decompression
on x86, x86-64, ARM, and PowerPC systems, as well as progressive JPEG
compression on x86 and x86-64 systems. On such systems, libjpeg-turbo is
generally 2-6x as fast as libjpeg, all else being equal. On other types of
systems, libjpeg-turbo can still outperform libjpeg by a significant amount, by
virtue of its highly-optimized Huffman coding routines. In many cases, the
performance of libjpeg-turbo rivals that of proprietary high-speed JPEG codecs.
libjpeg-turbo is a JPEG image codec that uses SIMD instructions to accelerate
baseline JPEG compression and decompression on x86, x86-64, ARM, PowerPC, and
MIPS systems, as well as progressive JPEG compression on x86 and x86-64
systems. On such systems, libjpeg-turbo is generally 2-6x as fast as libjpeg,
all else being equal. On other types of systems, libjpeg-turbo can still
outperform libjpeg by a significant amount, by virtue of its highly-optimized
Huffman coding routines. In many cases, the performance of libjpeg-turbo
rivals that of proprietary high-speed JPEG codecs.
libjpeg-turbo implements both the traditional libjpeg API as well as the less
powerful but more straightforward TurboJPEG API. libjpeg-turbo also features
@@ -183,7 +183,7 @@ rm -rf $RPM_BUILD_ROOT
%if "%{_enable_static}" == "1"
%{_libdir}/libjpeg.a
%endif
%{_libdir}/pkgconfig
%dir %{_libdir}/pkgconfig
%{_libdir}/pkgconfig/libjpeg.pc
%if "%{_with_turbojpeg}" == "1"
%if "%{_enable_shared}" == "1" || "%{_with_java}" == "1"

View File

@@ -23,7 +23,7 @@ foreach(src ${JPEG_SOURCES})
set(JPEG_SRCS ${JPEG_SRCS} ../${src})
endforeach()
if(WITH_SIMD AND MSVC_IDE)
if(WITH_SIMD AND (MSVC_IDE OR XCODE))
# This tells CMake that the "source" files haven't been generated yet
set_source_files_properties(${SIMD_OBJS} PROPERTIES GENERATED 1)
endif()
@@ -55,7 +55,8 @@ if(MAPFLAG)
LINK_FLAGS "${MAPFLAG}${CMAKE_CURRENT_BINARY_DIR}/../libjpeg.map")
endif()
if(MSVC)
set_target_properties(jpeg PROPERTIES SUFFIX ${SO_MAJOR_VERSION}.dll)
set_target_properties(jpeg PROPERTIES
RUNTIME_OUTPUT_NAME jpeg${SO_MAJOR_VERSION})
# The jsimd_*.c file is built using /MT, so this prevents a linker warning.
set_target_properties(jpeg PROPERTIES LINK_FLAGS "/NODEFAULTLIB:LIBCMT /NODEFAULTLIB:LIBCMTD")
elseif(MINGW)
@@ -94,3 +95,8 @@ install(TARGETS jpeg cjpeg djpeg jpegtran
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
if(NOT CMAKE_VERSION VERSION_LESS "3.1" AND MSVC AND
CMAKE_C_LINKER_SUPPORTS_PDB)
install(FILES "$<TARGET_PDB_FILE:jpeg>"
DESTINATION ${CMAKE_INSTALL_BINDIR} OPTIONAL)
endif()

View File

@@ -38,6 +38,14 @@ elseif(CPU_TYPE STREQUAL "i386")
endif()
endif()
if(NOT REQUIRE_SIMD)
include(CheckLanguage)
check_language(ASM_NASM)
if(NOT CMAKE_ASM_NASM_COMPILER)
simd_fail("SIMD extensions disabled: could not find NASM compiler")
return()
endif()
endif()
enable_language(ASM_NASM)
message(STATUS "CMAKE_ASM_NASM_COMPILER = ${CMAKE_ASM_NASM_COMPILER}")
@@ -74,12 +82,12 @@ if(CMAKE_ASM_NASM_COMPILER_TYPE MATCHES "yasm")
if(${var} STREQUAL "-g")
if(CMAKE_ASM_NASM_DEBUG_FORMAT)
set_property(CACHE ${var} PROPERTY VALUE "-g ${CMAKE_ASM_NASM_DEBUG_FORMAT}")
else()
else()
set_property(CACHE ${var} PROPERTY VALUE "")
endif()
endif()
endforeach()
endif()
endif()
if(NOT WIN32 AND (CMAKE_POSITION_INDEPENDENT_CODE OR ENABLE_SHARED))
set(CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} -DPIC")
@@ -135,6 +143,9 @@ endif()
if(MSVC_IDE)
set(OBJDIR "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}")
string(REGEX REPLACE " " ";" CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS}")
elseif(XCODE)
set(OBJDIR "${CMAKE_CURRENT_BINARY_DIR}")
string(REGEX REPLACE " " ";" CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS}")
endif()
file(GLOB INC_FILES nasm/*.inc)
@@ -162,25 +173,25 @@ foreach(file ${SIMD_SOURCES})
${CMAKE_CURRENT_SOURCE_DIR}/${DEPFILE})
endif()
set(OBJECT_DEPENDS ${OBJECT_DEPENDS} ${INC_FILES})
if(MSVC_IDE)
if(MSVC_IDE OR XCODE)
# The CMake Visual Studio generators do not work properly with the ASM_NASM
# language, so we have to go rogue here and use a custom command like we
# did in prior versions of libjpeg-turbo. (This is why we can't have nice
# things.)
string(REGEX REPLACE "${CPU_TYPE}/" "" filename ${file})
set(SIMD_OBJ ${OBJDIR}/${filename}.obj)
set(SIMD_OBJ ${OBJDIR}/${filename}${CMAKE_C_OUTPUT_EXTENSION})
add_custom_command(OUTPUT ${SIMD_OBJ} DEPENDS ${file} ${OBJECT_DEPENDS}
COMMAND ${CMAKE_ASM_NASM_COMPILER} -f${CMAKE_ASM_NASM_OBJECT_FORMAT}
${CMAKE_ASM_NASM_FLAGS} ${CMAKE_CURRENT_SOURCE_DIR}/${file}
-o${SIMD_OBJ})
set(SIMD_OBJS ${SIMD_OBJS} ${SIMD_OBJ})
set(SIMD_OBJS ${SIMD_OBJS} ${SIMD_OBJ})
else()
set_source_files_properties(${file} PROPERTIES OBJECT_DEPENDS
"${OBJECT_DEPENDS}")
endif()
endforeach()
if(MSVC_IDE)
if(MSVC_IDE OR XCODE)
set(SIMD_OBJS ${SIMD_OBJS} PARENT_SCOPE)
add_library(simd OBJECT ${CPU_TYPE}/jsimd.c)
add_custom_target(simd-objs DEPENDS ${SIMD_OBJS})
@@ -262,7 +273,7 @@ endif()
# MIPS (GAS)
###############################################################################
elseif(CPU_TYPE STREQUAL "mips")
elseif(CPU_TYPE STREQUAL "mips" OR CPU_TYPE STREQUAL "mipsel")
enable_language(ASM)
@@ -293,7 +304,7 @@ if(NOT HAVE_DSPR2)
return()
endif()
add_library(simd OBJECT ${CPU_TYPE}/jsimd_dspr2.S ${CPU_TYPE}/jsimd.c)
add_library(simd OBJECT mips/jsimd_dspr2.S mips/jsimd.c)
if(CMAKE_POSITION_INDEPENDENT_CODE OR ENABLE_SHARED)
set_target_properties(simd PROPERTIES POSITION_INDEPENDENT_CODE 1)

View File

@@ -5,6 +5,7 @@
* Copyright (C) 2011, Nokia Corporation and/or its subsidiary(-ies).
* Copyright (C) 2009-2011, 2013-2014, 2016, 2018, D. R. Commander.
* Copyright (C) 2015-2016, 2018, Matthieu Darbois.
* Copyright (C) 2019, Google LLC.
*
* Based on the x86 SIMD extension for IJG JPEG library,
* Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -30,7 +31,7 @@
static unsigned int simd_support = ~0;
static unsigned int simd_huffman = 1;
#if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
#if !defined(__ARM_NEON__) && (defined(__linux__) || defined(ANDROID) || defined(__ANDROID__))
#define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024)
@@ -105,7 +106,7 @@ init_simd(void)
#ifndef NO_GETENV
char *env = NULL;
#endif
#if !defined(__ARM_NEON__) && defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
#if !defined(__ARM_NEON__) && (defined(__linux__) || defined(ANDROID) || defined(__ANDROID__))
int bufsize = 1024; /* an initial guess for the line buffer size limit */
#endif

View File

@@ -31,6 +31,251 @@
.section .note.GNU-stack, "", %progbits /* mark stack as non-executable */
#endif
#if defined(__APPLE__)
.section __DATA, __const
#else
.section .rodata, "a", %progbits
#endif
/* Constants for jsimd_idct_islow_neon() */
#define F_0_298 2446 /* FIX(0.298631336) */
#define F_0_390 3196 /* FIX(0.390180644) */
#define F_0_541 4433 /* FIX(0.541196100) */
#define F_0_765 6270 /* FIX(0.765366865) */
#define F_0_899 7373 /* FIX(0.899976223) */
#define F_1_175 9633 /* FIX(1.175875602) */
#define F_1_501 12299 /* FIX(1.501321110) */
#define F_1_847 15137 /* FIX(1.847759065) */
#define F_1_961 16069 /* FIX(1.961570560) */
#define F_2_053 16819 /* FIX(2.053119869) */
#define F_2_562 20995 /* FIX(2.562915447) */
#define F_3_072 25172 /* FIX(3.072711026) */
.balign 16
Ljsimd_idct_islow_neon_consts:
.short F_0_298
.short -F_0_390
.short F_0_541
.short F_0_765
.short - F_0_899
.short F_1_175
.short F_1_501
.short - F_1_847
.short - F_1_961
.short F_2_053
.short - F_2_562
.short F_3_072
.short 0 /* padding */
.short 0
.short 0
.short 0
#undef F_0_298
#undef F_0_390
#undef F_0_541
#undef F_0_765
#undef F_0_899
#undef F_1_175
#undef F_1_501
#undef F_1_847
#undef F_1_961
#undef F_2_053
#undef F_2_562
#undef F_3_072
/* Constants for jsimd_idct_ifast_neon() */
.balign 16
Ljsimd_idct_ifast_neon_consts:
.short (277 * 128 - 256 * 128) /* XFIX_1_082392200 */
.short (362 * 128 - 256 * 128) /* XFIX_1_414213562 */
.short (473 * 128 - 256 * 128) /* XFIX_1_847759065 */
.short (669 * 128 - 512 * 128) /* XFIX_2_613125930 */
/* Constants for jsimd_idct_4x4_neon() and jsimd_idct_2x2_neon() */
#define CONST_BITS 13
#define FIX_0_211164243 (1730) /* FIX(0.211164243) */
#define FIX_0_509795579 (4176) /* FIX(0.509795579) */
#define FIX_0_601344887 (4926) /* FIX(0.601344887) */
#define FIX_0_720959822 (5906) /* FIX(0.720959822) */
#define FIX_0_765366865 (6270) /* FIX(0.765366865) */
#define FIX_0_850430095 (6967) /* FIX(0.850430095) */
#define FIX_0_899976223 (7373) /* FIX(0.899976223) */
#define FIX_1_061594337 (8697) /* FIX(1.061594337) */
#define FIX_1_272758580 (10426) /* FIX(1.272758580) */
#define FIX_1_451774981 (11893) /* FIX(1.451774981) */
#define FIX_1_847759065 (15137) /* FIX(1.847759065) */
#define FIX_2_172734803 (17799) /* FIX(2.172734803) */
#define FIX_2_562915447 (20995) /* FIX(2.562915447) */
#define FIX_3_624509785 (29692) /* FIX(3.624509785) */
.balign 16
Ljsimd_idct_4x4_neon_consts:
.short FIX_1_847759065 /* v0.h[0] */
.short -FIX_0_765366865 /* v0.h[1] */
.short -FIX_0_211164243 /* v0.h[2] */
.short FIX_1_451774981 /* v0.h[3] */
.short -FIX_2_172734803 /* d1[0] */
.short FIX_1_061594337 /* d1[1] */
.short -FIX_0_509795579 /* d1[2] */
.short -FIX_0_601344887 /* d1[3] */
.short FIX_0_899976223 /* v2.h[0] */
.short FIX_2_562915447 /* v2.h[1] */
.short 1 << (CONST_BITS + 1) /* v2.h[2] */
.short 0 /* v2.h[3] */
.balign 8
Ljsimd_idct_2x2_neon_consts:
.short -FIX_0_720959822 /* v14[0] */
.short FIX_0_850430095 /* v14[1] */
.short -FIX_1_272758580 /* v14[2] */
.short FIX_3_624509785 /* v14[3] */
/* Constants for jsimd_ycc_*_neon() */
.balign 16
Ljsimd_ycc_rgb_neon_consts:
.short 0, 0, 0, 0
.short 22971, -11277, -23401, 29033
.short -128, -128, -128, -128
.short -128, -128, -128, -128
/* Constants for jsimd_*_ycc_neon() */
.balign 16
Ljsimd_rgb_ycc_neon_consts:
.short 19595, 38470, 7471, 11059
.short 21709, 32768, 27439, 5329
.short 32767, 128, 32767, 128
.short 32767, 128, 32767, 128
/* Constants for jsimd_fdct_islow_neon() */
#define F_0_298 2446 /* FIX(0.298631336) */
#define F_0_390 3196 /* FIX(0.390180644) */
#define F_0_541 4433 /* FIX(0.541196100) */
#define F_0_765 6270 /* FIX(0.765366865) */
#define F_0_899 7373 /* FIX(0.899976223) */
#define F_1_175 9633 /* FIX(1.175875602) */
#define F_1_501 12299 /* FIX(1.501321110) */
#define F_1_847 15137 /* FIX(1.847759065) */
#define F_1_961 16069 /* FIX(1.961570560) */
#define F_2_053 16819 /* FIX(2.053119869) */
#define F_2_562 20995 /* FIX(2.562915447) */
#define F_3_072 25172 /* FIX(3.072711026) */
.balign 16
Ljsimd_fdct_islow_neon_consts:
.short F_0_298
.short -F_0_390
.short F_0_541
.short F_0_765
.short - F_0_899
.short F_1_175
.short F_1_501
.short - F_1_847
.short - F_1_961
.short F_2_053
.short - F_2_562
.short F_3_072
.short 0 /* padding */
.short 0
.short 0
.short 0
#undef F_0_298
#undef F_0_390
#undef F_0_541
#undef F_0_765
#undef F_0_899
#undef F_1_175
#undef F_1_501
#undef F_1_847
#undef F_1_961
#undef F_2_053
#undef F_2_562
#undef F_3_072
/* Constants for jsimd_fdct_ifast_neon() */
.balign 16
Ljsimd_fdct_ifast_neon_consts:
.short (98 * 128) /* XFIX_0_382683433 */
.short (139 * 128) /* XFIX_0_541196100 */
.short (181 * 128) /* XFIX_0_707106781 */
.short (334 * 128 - 256 * 128) /* XFIX_1_306562965 */
/* Constants for jsimd_h2*_downsample_neon() */
.balign 16
Ljsimd_h2_downsample_neon_consts:
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F /* diff 0 */
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0E /* diff 1 */
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0D, 0x0D /* diff 2 */
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0C, 0x0C, 0x0C /* diff 3 */
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
0x08, 0x09, 0x0A, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B /* diff 4 */
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
0x08, 0x09, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A /* diff 5 */
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
0x08, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09 /* diff 6 */
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08 /* diff 7 */
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07 /* diff 8 */
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x06, \
0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06 /* diff 9 */
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x05, 0x05, \
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05 /* diff 10 */
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x04, \
0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04 /* diff 11 */
.byte 0x00, 0x01, 0x02, 0x03, 0x03, 0x03, 0x03, 0x03, \
0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03 /* diff 12 */
.byte 0x00, 0x01, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, \
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02 /* diff 13 */
.byte 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, \
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01 /* diff 14 */
.byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 /* diff 15 */
/* Constants for jsimd_huff_encode_one_block_neon() */
.balign 16
Ljsimd_huff_encode_one_block_neon_consts:
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, \
0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80
.byte 0, 1, 2, 3, 16, 17, 32, 33, \
18, 19, 4, 5, 6, 7, 20, 21 /* L0 => L3 : 4 lines OK */
.byte 34, 35, 48, 49, 255, 255, 50, 51, \
36, 37, 22, 23, 8, 9, 10, 11 /* L0 => L3 : 4 lines OK */
.byte 8, 9, 22, 23, 36, 37, 50, 51, \
255, 255, 255, 255, 255, 255, 52, 53 /* L1 => L4 : 4 lines OK */
.byte 54, 55, 40, 41, 26, 27, 12, 13, \
14, 15, 28, 29, 42, 43, 56, 57 /* L0 => L3 : 4 lines OK */
.byte 6, 7, 20, 21, 34, 35, 48, 49, \
50, 51, 36, 37, 22, 23, 8, 9 /* L4 => L7 : 4 lines OK */
.byte 42, 43, 28, 29, 14, 15, 30, 31, \
44, 45, 58, 59, 255, 255, 255, 255 /* L1 => L4 : 4 lines OK */
.byte 255, 255, 255, 255, 56, 57, 42, 43, \
28, 29, 14, 15, 30, 31, 44, 45 /* L3 => L6 : 4 lines OK */
.byte 26, 27, 40, 41, 42, 43, 28, 29, \
14, 15, 30, 31, 44, 45, 46, 47 /* L5 => L7 : 3 lines OK */
.byte 255, 255, 255, 255, 0, 1, 255, 255, \
255, 255, 255, 255, 255, 255, 255, 255 /* L4 : 1 lines OK */
.byte 255, 255, 255, 255, 255, 255, 255, 255, \
0, 1, 16, 17, 2, 3, 255, 255 /* L5 => L6 : 2 lines OK */
.byte 255, 255, 255, 255, 255, 255, 255, 255, \
255, 255, 255, 255, 8, 9, 22, 23 /* L5 => L6 : 2 lines OK */
.byte 4, 5, 6, 7, 255, 255, 255, 255, \
255, 255, 255, 255, 255, 255, 255, 255 /* L7 : 1 line OK */
.text
@@ -55,6 +300,17 @@ _\fname:
#endif
.endm
/* Get symbol location */
.macro get_symbol_loc reg, symbol
#ifdef __APPLE__
adrp \reg, \symbol@PAGE
add \reg, \reg, \symbol@PAGEOFF
#else
adrp \reg, \symbol
add \reg, \reg, :lo12:\symbol
#endif
.endm
/* Transpose elements of single 128 bit registers */
.macro transpose_single x0, x1, xi, xilen, literal
ins \xi\xilen[0], \x0\xilen[0]
@@ -63,7 +319,7 @@ _\fname:
trn2 \x1\literal, \xi\literal, \x1\literal
.endm
/* Transpose elements of 2 differnet registers */
/* Transpose elements of 2 different registers */
.macro transpose x0, x1, xi, xilen, literal
mov \xi\xilen, \x0\xilen
trn1 \x0\literal, \x0\literal, \x1\literal
@@ -139,51 +395,6 @@ _\fname:
#define CONST_BITS 13
#define PASS1_BITS 2
#define F_0_298 2446 /* FIX(0.298631336) */
#define F_0_390 3196 /* FIX(0.390180644) */
#define F_0_541 4433 /* FIX(0.541196100) */
#define F_0_765 6270 /* FIX(0.765366865) */
#define F_0_899 7373 /* FIX(0.899976223) */
#define F_1_175 9633 /* FIX(1.175875602) */
#define F_1_501 12299 /* FIX(1.501321110) */
#define F_1_847 15137 /* FIX(1.847759065) */
#define F_1_961 16069 /* FIX(1.961570560) */
#define F_2_053 16819 /* FIX(2.053119869) */
#define F_2_562 20995 /* FIX(2.562915447) */
#define F_3_072 25172 /* FIX(3.072711026) */
.balign 16
Ljsimd_idct_islow_neon_consts:
.short F_0_298
.short -F_0_390
.short F_0_541
.short F_0_765
.short - F_0_899
.short F_1_175
.short F_1_501
.short - F_1_847
.short - F_1_961
.short F_2_053
.short - F_2_562
.short F_3_072
.short 0 /* padding */
.short 0
.short 0
.short 0
#undef F_0_298
#undef F_0_390
#undef F_0_541
#undef F_0_765
#undef F_0_899
#undef F_1_175
#undef F_1_501
#undef F_1_847
#undef F_1_961
#undef F_2_053
#undef F_2_562
#undef F_3_072
#define XFIX_P_0_298 v0.h[0]
#define XFIX_N_0_390 v0.h[1]
#define XFIX_P_0_541 v0.h[2]
@@ -217,7 +428,7 @@ asm_function jsimd_idct_islow_neon
uxtw x3, w3
sub sp, sp, #64
adr x15, Ljsimd_idct_islow_neon_consts
get_symbol_loc x15, Ljsimd_idct_islow_neon_consts
mov x10, sp
st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x10], #32
st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [x10], #32
@@ -791,13 +1002,6 @@ asm_function jsimd_idct_islow_neon
#define XFIX_1_847759065 v0.h[2]
#define XFIX_2_613125930 v0.h[3]
.balign 16
Ljsimd_idct_ifast_neon_consts:
.short (277 * 128 - 256 * 128) /* XFIX_1_082392200 */
.short (362 * 128 - 256 * 128) /* XFIX_1_414213562 */
.short (473 * 128 - 256 * 128) /* XFIX_1_847759065 */
.short (669 * 128 - 512 * 128) /* XFIX_2_613125930 */
asm_function jsimd_idct_ifast_neon
DCT_TABLE .req x0
@@ -832,7 +1036,7 @@ asm_function jsimd_idct_ifast_neon
* 7 | d30 | d31 ( v23.8h )
*/
/* Save NEON registers used in fast IDCT */
adr TMP5, Ljsimd_idct_ifast_neon_consts
get_symbol_loc TMP5, Ljsimd_idct_ifast_neon_consts
ld1 {v16.8h, v17.8h}, [COEF_BLOCK], 32
ld1 {v0.8h, v1.8h}, [DCT_TABLE], 32
ld1 {v18.8h, v19.8h}, [COEF_BLOCK], 32
@@ -1023,38 +1227,6 @@ asm_function jsimd_idct_ifast_neon
* but readability will suffer somewhat.
*/
#define CONST_BITS 13
#define FIX_0_211164243 (1730) /* FIX(0.211164243) */
#define FIX_0_509795579 (4176) /* FIX(0.509795579) */
#define FIX_0_601344887 (4926) /* FIX(0.601344887) */
#define FIX_0_720959822 (5906) /* FIX(0.720959822) */
#define FIX_0_765366865 (6270) /* FIX(0.765366865) */
#define FIX_0_850430095 (6967) /* FIX(0.850430095) */
#define FIX_0_899976223 (7373) /* FIX(0.899976223) */
#define FIX_1_061594337 (8697) /* FIX(1.061594337) */
#define FIX_1_272758580 (10426) /* FIX(1.272758580) */
#define FIX_1_451774981 (11893) /* FIX(1.451774981) */
#define FIX_1_847759065 (15137) /* FIX(1.847759065) */
#define FIX_2_172734803 (17799) /* FIX(2.172734803) */
#define FIX_2_562915447 (20995) /* FIX(2.562915447) */
#define FIX_3_624509785 (29692) /* FIX(3.624509785) */
.balign 16
Ljsimd_idct_4x4_neon_consts:
.short FIX_1_847759065 /* v0.h[0] */
.short -FIX_0_765366865 /* v0.h[1] */
.short -FIX_0_211164243 /* v0.h[2] */
.short FIX_1_451774981 /* v0.h[3] */
.short -FIX_2_172734803 /* d1[0] */
.short FIX_1_061594337 /* d1[1] */
.short -FIX_0_509795579 /* d1[2] */
.short -FIX_0_601344887 /* d1[3] */
.short FIX_0_899976223 /* v2.h[0] */
.short FIX_2_562915447 /* v2.h[1] */
.short 1 << (CONST_BITS + 1) /* v2.h[2] */
.short 0 /* v2.h[3] */
.macro idct_helper x4, x6, x8, x10, x12, x14, x16, shift, y26, y27, y28, y29
smull v28.4s, \x4, v2.h[2]
smlal v28.4s, \x8, v0.h[0]
@@ -1121,7 +1293,7 @@ asm_function jsimd_idct_4x4_neon
sub sp, sp, 64
mov x9, sp
/* Load constants (v3.4h is just used for padding) */
adr TMP4, Ljsimd_idct_4x4_neon_consts
get_symbol_loc TMP4, Ljsimd_idct_4x4_neon_consts
st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x9], 32
st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [x9], 32
ld1 {v0.4h, v1.4h, v2.4h, v3.4h}, [TMP4]
@@ -1264,13 +1436,6 @@ asm_function jsimd_idct_4x4_neon
* bit exact compatibility with jpeg-6b.
*/
.balign 8
Ljsimd_idct_2x2_neon_consts:
.short -FIX_0_720959822 /* v14[0] */
.short FIX_0_850430095 /* v14[1] */
.short -FIX_1_272758580 /* v14[2] */
.short FIX_3_624509785 /* v14[3] */
.macro idct_helper x4, x6, x10, x12, x16, shift, y26, y27
sshll v15.4s, \x4, #15
smull v26.4s, \x6, v14.h[3]
@@ -1311,7 +1476,7 @@ asm_function jsimd_idct_2x2_neon
mov x9, sp
/* Load constants */
adr TMP2, Ljsimd_idct_2x2_neon_consts
get_symbol_loc TMP2, Ljsimd_idct_2x2_neon_consts
st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x9], 32
st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [x9], 32
ld1 {v14.4h}, [TMP2]
@@ -1663,21 +1828,6 @@ asm_function jsimd_idct_2x2_neon
do_yuv_to_rgb_stage2
.endm
/* Apple gas crashes on adrl, work around that by using adr.
* But this requires a copy of these constants for each function.
*/
.balign 16
.if \fast_st3 == 1
Ljsimd_ycc_\colorid\()_neon_consts:
.else
Ljsimd_ycc_\colorid\()_neon_slowst3_consts:
.endif
.short 0, 0, 0, 0
.short 22971, -11277, -23401, 29033
.short -128, -128, -128, -128
.short -128, -128, -128, -128
.if \fast_st3 == 1
asm_function jsimd_ycc_\colorid\()_convert_neon
.else
@@ -1703,11 +1853,7 @@ asm_function jsimd_ycc_\colorid\()_convert_neon_slowst3
mov x9, sp
/* Load constants to d1, d2, d3 (v0.4h is just used for padding) */
.if \fast_st3 == 1
adr x15, Ljsimd_ycc_\colorid\()_neon_consts
.else
adr x15, Ljsimd_ycc_\colorid\()_neon_slowst3_consts
.endif
get_symbol_loc x15, Ljsimd_ycc_rgb_neon_consts
/* Save NEON registers */
st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x9], 32
@@ -2004,17 +2150,6 @@ generate_jsimd_ycc_rgb_convert_neon extbgr, 24, 2, .4h, 1, .4h, 0, .4h, .8b,
do_rgb_to_yuv_stage1
.endm
.balign 16
.if \fast_ld3 == 1
Ljsimd_\colorid\()_ycc_neon_consts:
.else
Ljsimd_\colorid\()_ycc_neon_slowld3_consts:
.endif
.short 19595, 38470, 7471, 11059
.short 21709, 32768, 27439, 5329
.short 32767, 128, 32767, 128
.short 32767, 128, 32767, 128
.if \fast_ld3 == 1
asm_function jsimd_\colorid\()_ycc_convert_neon
.else
@@ -2037,11 +2172,7 @@ asm_function jsimd_\colorid\()_ycc_convert_neon_slowld3
N .req w12
/* Load constants to d0, d1, d2, d3 */
.if \fast_ld3 == 1
adr x13, Ljsimd_\colorid\()_ycc_neon_consts
.else
adr x13, Ljsimd_\colorid\()_ycc_neon_slowld3_consts
.endif
get_symbol_loc x13, Ljsimd_rgb_ycc_neon_consts
ld1 {v0.8h, v1.8h}, [x13]
ldr OUTPUT_BUF0, [OUTPUT_BUF]
@@ -2241,50 +2372,6 @@ asm_function jsimd_convsamp_neon
#define DESCALE_P1 (CONST_BITS - PASS1_BITS)
#define DESCALE_P2 (CONST_BITS + PASS1_BITS)
#define F_0_298 2446 /* FIX(0.298631336) */
#define F_0_390 3196 /* FIX(0.390180644) */
#define F_0_541 4433 /* FIX(0.541196100) */
#define F_0_765 6270 /* FIX(0.765366865) */
#define F_0_899 7373 /* FIX(0.899976223) */
#define F_1_175 9633 /* FIX(1.175875602) */
#define F_1_501 12299 /* FIX(1.501321110) */
#define F_1_847 15137 /* FIX(1.847759065) */
#define F_1_961 16069 /* FIX(1.961570560) */
#define F_2_053 16819 /* FIX(2.053119869) */
#define F_2_562 20995 /* FIX(2.562915447) */
#define F_3_072 25172 /* FIX(3.072711026) */
.balign 16
Ljsimd_fdct_islow_neon_consts:
.short F_0_298
.short -F_0_390
.short F_0_541
.short F_0_765
.short - F_0_899
.short F_1_175
.short F_1_501
.short - F_1_847
.short - F_1_961
.short F_2_053
.short - F_2_562
.short F_3_072
.short 0 /* padding */
.short 0
.short 0
.short 0
#undef F_0_298
#undef F_0_390
#undef F_0_541
#undef F_0_765
#undef F_0_899
#undef F_1_175
#undef F_1_501
#undef F_1_847
#undef F_1_961
#undef F_2_053
#undef F_2_562
#undef F_3_072
#define XFIX_P_0_298 v0.h[0]
#define XFIX_N_0_390 v0.h[1]
#define XFIX_P_0_541 v0.h[2]
@@ -2304,7 +2391,7 @@ asm_function jsimd_fdct_islow_neon
TMP .req x9
/* Load constants */
adr TMP, Ljsimd_fdct_islow_neon_consts
get_symbol_loc TMP, Ljsimd_fdct_islow_neon_consts
ld1 {v0.8h, v1.8h}, [TMP]
/* Save NEON registers */
@@ -2583,20 +2670,13 @@ asm_function jsimd_fdct_islow_neon
#define XFIX_0_707106781 v0.h[2]
#define XFIX_1_306562965 v0.h[3]
.balign 16
Ljsimd_fdct_ifast_neon_consts:
.short (98 * 128) /* XFIX_0_382683433 */
.short (139 * 128) /* XFIX_0_541196100 */
.short (181 * 128) /* XFIX_0_707106781 */
.short (334 * 128 - 256 * 128) /* XFIX_1_306562965 */
asm_function jsimd_fdct_ifast_neon
DATA .req x0
TMP .req x9
/* Load constants */
adr TMP, Ljsimd_fdct_ifast_neon_consts
get_symbol_loc TMP, Ljsimd_fdct_ifast_neon_consts
ld1 {v0.4h}, [TMP]
/* Load all DATA into NEON registers with the following allocation:
@@ -2775,41 +2855,6 @@ asm_function jsimd_quantize_neon
* JSAMPARRAY input_data, JSAMPARRAY output_data);
*/
.balign 16
Ljsimd_h2_downsample_neon_consts:
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F /* diff 0 */
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0E /* diff 1 */
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0D, 0x0D /* diff 2 */
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0C, 0x0C, 0x0C /* diff 3 */
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
0x08, 0x09, 0x0A, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B /* diff 4 */
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
0x08, 0x09, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A /* diff 5 */
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
0x08, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09 /* diff 6 */
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08 /* diff 7 */
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07 /* diff 8 */
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x06, \
0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06 /* diff 9 */
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x05, 0x05, \
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05 /* diff 10 */
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x04, \
0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04 /* diff 11 */
.byte 0x00, 0x01, 0x02, 0x03, 0x03, 0x03, 0x03, 0x03, \
0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03 /* diff 12 */
.byte 0x00, 0x01, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, \
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02 /* diff 13 */
.byte 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, \
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01 /* diff 14 */
.byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 /* diff 15 */
asm_function jsimd_h2v1_downsample_neon
IMAGE_WIDTH .req x0
MAX_V_SAMP .req x1
@@ -2827,7 +2872,7 @@ asm_function jsimd_h2v1_downsample_neon
mov TMPDUP, #0x10000
lsl TMP2, BLOCK_WIDTH, #4
sub TMP2, TMP2, IMAGE_WIDTH
adr TMP3, Ljsimd_h2_downsample_neon_consts
get_symbol_loc TMP3, Ljsimd_h2_downsample_neon_consts
add TMP3, TMP3, TMP2, lsl #4
dup v16.4s, TMPDUP
ld1 {v18.16b}, [TMP3]
@@ -2906,7 +2951,7 @@ asm_function jsimd_h2v2_downsample_neon
lsl TMP2, BLOCK_WIDTH, #4
lsl TMPDUP, TMPDUP, #17
sub TMP2, TMP2, IMAGE_WIDTH
adr TMP3, Ljsimd_h2_downsample_neon_consts
get_symbol_loc TMP3, Ljsimd_h2_downsample_neon_consts
orr TMPDUP, TMPDUP, #1
add TMP3, TMP3, TMP2, lsl #4
dup v16.4s, TMPDUP
@@ -3012,41 +3057,6 @@ asm_function jsimd_h2v2_downsample_neon
.macro generate_jsimd_huff_encode_one_block fast_tbl
.balign 16
.if \fast_tbl == 1
Ljsimd_huff_encode_one_block_neon_consts:
.else
Ljsimd_huff_encode_one_block_neon_slowtbl_consts:
.endif
.byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, \
0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80
.if \fast_tbl == 1
.byte 0, 1, 2, 3, 16, 17, 32, 33, \
18, 19, 4, 5, 6, 7, 20, 21 /* L0 => L3 : 4 lines OK */
.byte 34, 35, 48, 49, 255, 255, 50, 51, \
36, 37, 22, 23, 8, 9, 10, 11 /* L0 => L3 : 4 lines OK */
.byte 8, 9, 22, 23, 36, 37, 50, 51, \
255, 255, 255, 255, 255, 255, 52, 53 /* L1 => L4 : 4 lines OK */
.byte 54, 55, 40, 41, 26, 27, 12, 13, \
14, 15, 28, 29, 42, 43, 56, 57 /* L0 => L3 : 4 lines OK */
.byte 6, 7, 20, 21, 34, 35, 48, 49, \
50, 51, 36, 37, 22, 23, 8, 9 /* L4 => L7 : 4 lines OK */
.byte 42, 43, 28, 29, 14, 15, 30, 31, \
44, 45, 58, 59, 255, 255, 255, 255 /* L1 => L4 : 4 lines OK */
.byte 255, 255, 255, 255, 56, 57, 42, 43, \
28, 29, 14, 15, 30, 31, 44, 45 /* L3 => L6 : 4 lines OK */
.byte 26, 27, 40, 41, 42, 43, 28, 29, \
14, 15, 30, 31, 44, 45, 46, 47 /* L5 => L7 : 3 lines OK */
.byte 255, 255, 255, 255, 0, 1, 255, 255, \
255, 255, 255, 255, 255, 255, 255, 255 /* L4 : 1 lines OK */
.byte 255, 255, 255, 255, 255, 255, 255, 255, \
0, 1, 16, 17, 2, 3, 255, 255 /* L5 => L6 : 2 lines OK */
.byte 255, 255, 255, 255, 255, 255, 255, 255, \
255, 255, 255, 255, 8, 9, 22, 23 /* L5 => L6 : 2 lines OK */
.byte 4, 5, 6, 7, 255, 255, 255, 255, \
255, 255, 255, 255, 255, 255, 255, 255 /* L7 : 1 line OK */
.endif
.if \fast_tbl == 1
asm_function jsimd_huff_encode_one_block_neon
.else
@@ -3056,11 +3066,7 @@ asm_function jsimd_huff_encode_one_block_neon_slowtbl
sub BUFFER, BUFFER, #0x1 /* BUFFER=buffer-- */
/* Save ARM registers */
stp x19, x20, [sp]
.if \fast_tbl == 1
adr x15, Ljsimd_huff_encode_one_block_neon_consts
.else
adr x15, Ljsimd_huff_encode_one_block_neon_slowtbl_consts
.endif
get_symbol_loc x15, Ljsimd_huff_encode_one_block_neon_consts
ldr PUT_BUFFER, [x0, #0x10]
ldr PUT_BITSw, [x0, #0x18]
ldrsh w12, [x2] /* load DC coeff in w12 */

View File

@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jcolsamp.inc"
@@ -110,12 +108,12 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
test cl, SIZEOF_BYTE
jz short .column_ld2
sub ecx, byte SIZEOF_BYTE
movzx eax, BYTE [esi+ecx]
movzx eax, byte [esi+ecx]
.column_ld2:
test cl, SIZEOF_WORD
jz short .column_ld4
sub ecx, byte SIZEOF_WORD
movzx edx, WORD [esi+ecx]
movzx edx, word [esi+ecx]
shl eax, WORD_BIT
or eax, edx
.column_ld4:

View File

@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jcolsamp.inc"
@@ -111,13 +109,13 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
jz short .column_ld2
sub ecx, byte SIZEOF_BYTE
xor eax, eax
mov al, BYTE [esi+ecx]
mov al, byte [esi+ecx]
.column_ld2:
test cl, SIZEOF_WORD
jz short .column_ld4
sub ecx, byte SIZEOF_WORD
xor edx, edx
mov dx, WORD [esi+ecx]
mov dx, word [esi+ecx]
shl eax, WORD_BIT
or eax, edx
.column_ld4:
@@ -127,7 +125,7 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
test cl, SIZEOF_DWORD
jz short .column_ld8
sub ecx, byte SIZEOF_DWORD
movd mmG, DWORD [esi+ecx]
movd mmG, dword [esi+ecx]
psllq mmA, DWORD_BIT
por mmA, mmG
.column_ld8:
@@ -197,7 +195,7 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
test cl, SIZEOF_MMWORD/8
jz short .column_ld2
sub ecx, byte SIZEOF_MMWORD/8
movd mmA, DWORD [esi+ecx*RGB_PIXELSIZE]
movd mmA, dword [esi+ecx*RGB_PIXELSIZE]
.column_ld2:
test cl, SIZEOF_MMWORD/4
jz short .column_ld4

View File

@@ -12,8 +12,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jcolsamp.inc"
@@ -109,12 +107,12 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
test cl, SIZEOF_BYTE
jz short .column_ld2
sub ecx, byte SIZEOF_BYTE
movzx eax, BYTE [esi+ecx]
movzx eax, byte [esi+ecx]
.column_ld2:
test cl, SIZEOF_WORD
jz short .column_ld4
sub ecx, byte SIZEOF_WORD
movzx edx, WORD [esi+ecx]
movzx edx, word [esi+ecx]
shl eax, WORD_BIT
or eax, edx
.column_ld4:

View File

@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jsimdext.inc"

View File

@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jsimdext.inc"

View File

@@ -12,8 +12,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jsimdext.inc"

View File

@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jsimdext.inc"

View File

@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jsimdext.inc"

View File

@@ -12,8 +12,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jsimdext.inc"

View File

@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jcolsamp.inc"
@@ -102,12 +100,12 @@ EXTN(jsimd_rgb_gray_convert_avx2):
test cl, SIZEOF_BYTE
jz short .column_ld2
sub ecx, byte SIZEOF_BYTE
movzx eax, BYTE [esi+ecx]
movzx eax, byte [esi+ecx]
.column_ld2:
test cl, SIZEOF_WORD
jz short .column_ld4
sub ecx, byte SIZEOF_WORD
movzx edx, WORD [esi+ecx]
movzx edx, word [esi+ecx]
shl eax, WORD_BIT
or eax, edx
.column_ld4:

View File

@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jcolsamp.inc"
@@ -103,13 +101,13 @@ EXTN(jsimd_rgb_gray_convert_mmx):
jz short .column_ld2
sub ecx, byte SIZEOF_BYTE
xor eax, eax
mov al, BYTE [esi+ecx]
mov al, byte [esi+ecx]
.column_ld2:
test cl, SIZEOF_WORD
jz short .column_ld4
sub ecx, byte SIZEOF_WORD
xor edx, edx
mov dx, WORD [esi+ecx]
mov dx, word [esi+ecx]
shl eax, WORD_BIT
or eax, edx
.column_ld4:
@@ -119,7 +117,7 @@ EXTN(jsimd_rgb_gray_convert_mmx):
test cl, SIZEOF_DWORD
jz short .column_ld8
sub ecx, byte SIZEOF_DWORD
movd mmG, DWORD [esi+ecx]
movd mmG, dword [esi+ecx]
psllq mmA, DWORD_BIT
por mmA, mmG
.column_ld8:
@@ -189,7 +187,7 @@ EXTN(jsimd_rgb_gray_convert_mmx):
test cl, SIZEOF_MMWORD/8
jz short .column_ld2
sub ecx, byte SIZEOF_MMWORD/8
movd mmA, DWORD [esi+ecx*RGB_PIXELSIZE]
movd mmA, dword [esi+ecx*RGB_PIXELSIZE]
.column_ld2:
test cl, SIZEOF_MMWORD/4
jz short .column_ld4

View File

@@ -12,8 +12,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jcolsamp.inc"
@@ -101,12 +99,12 @@ EXTN(jsimd_rgb_gray_convert_sse2):
test cl, SIZEOF_BYTE
jz short .column_ld2
sub ecx, byte SIZEOF_BYTE
movzx eax, BYTE [esi+ecx]
movzx eax, byte [esi+ecx]
.column_ld2:
test cl, SIZEOF_WORD
jz short .column_ld4
sub ecx, byte SIZEOF_WORD
movzx edx, WORD [esi+ecx]
movzx edx, word [esi+ecx]
shl eax, WORD_BIT
or eax, edx
.column_ld4:

View File

@@ -17,8 +17,6 @@
; This file contains an SSE2 implementation for Huffman coding of one block.
; The following code is based directly on jchuff.c; see jchuff.c for more
; details.
;
; [TAB8]
%include "jsimdext.inc"
@@ -197,8 +195,8 @@ EXTN(jsimd_huff_encode_one_block_sse2):
push ebp
mov esi, POINTER [eax+8] ; (working_state *state)
mov put_buffer, DWORD [esi+8] ; put_buffer = state->cur.put_buffer;
mov put_bits, DWORD [esi+12] ; put_bits = state->cur.put_bits;
mov put_buffer, dword [esi+8] ; put_buffer = state->cur.put_buffer;
mov put_bits, dword [esi+12] ; put_bits = state->cur.put_bits;
push esi ; esi is now scratch
get_GOT edx ; get GOT address
@@ -214,7 +212,7 @@ EXTN(jsimd_huff_encode_one_block_sse2):
; Encode the DC coefficient difference per section F.1.2.1
mov esi, POINTER [esp+block] ; block
movsx ecx, word [esi] ; temp = temp2 = block[0] - last_dc_val;
sub ecx, DWORD [eax+20]
sub ecx, dword [eax+20]
mov esi, ecx
; This is a well-known technique for obtaining the absolute value
@@ -229,12 +227,12 @@ EXTN(jsimd_huff_encode_one_block_sse2):
; For a negative input, want temp2 = bitwise complement of abs(input)
; This code assumes we are on a two's complement machine
add esi, edx ; temp2 += temp3;
mov DWORD [esp+temp], esi ; backup temp2 in temp
mov dword [esp+temp], esi ; backup temp2 in temp
; Find the number of bits needed for the magnitude of the coefficient
movpic ebp, POINTER [esp+gotptr] ; load GOT address (ebp)
movzx edx, byte [GOTOFF(ebp, jpeg_nbits_table + ecx)] ; nbits = JPEG_NBITS(temp);
mov DWORD [esp+temp2], edx ; backup nbits in temp2
mov dword [esp+temp2], edx ; backup nbits in temp2
; Emit the Huffman-coded symbol for the number of bits
mov ebp, POINTER [eax+24] ; After this point, arguments are not accessible anymore
@@ -242,13 +240,13 @@ EXTN(jsimd_huff_encode_one_block_sse2):
movzx ecx, byte [ebp + edx + 1024] ; size = dctbl->ehufsi[nbits];
EMIT_BITS eax ; EMIT_BITS(code, size)
mov ecx, DWORD [esp+temp2] ; restore nbits
mov ecx, dword [esp+temp2] ; restore nbits
; Mask off any extra bits in code
mov eax, 1
shl eax, cl
dec eax
and eax, DWORD [esp+temp] ; temp2 &= (((JLONG)1)<<nbits) - 1;
and eax, dword [esp+temp] ; temp2 &= (((JLONG)1)<<nbits) - 1;
; Emit that number of bits of the value, if positive,
; or the complement of its magnitude, if negative.
@@ -291,22 +289,22 @@ EXTN(jsimd_huff_encode_one_block_sse2):
jz near .ELOOP
lea esi, [esi+ecx*2] ; k += r;
shr edx, cl ; index >>= r;
mov DWORD [esp+temp3], edx
mov dword [esp+temp3], edx
.BRLOOP:
cmp ecx, 16 ; while (r > 15) {
jl near .ERLOOP
sub ecx, 16 ; r -= 16;
mov DWORD [esp+temp], ecx
mov dword [esp+temp], ecx
mov eax, INT [ebp + 240 * 4] ; code_0xf0 = actbl->ehufco[0xf0];
movzx ecx, byte [ebp + 1024 + 240] ; size_0xf0 = actbl->ehufsi[0xf0];
EMIT_BITS eax ; EMIT_BITS(code_0xf0, size_0xf0)
mov ecx, DWORD [esp+temp]
mov ecx, dword [esp+temp]
jmp .BRLOOP
.ERLOOP:
movsx eax, word [esi] ; temp = t1[k];
movpic edx, POINTER [esp+gotptr] ; load GOT address (edx)
movzx eax, byte [GOTOFF(edx, jpeg_nbits_table + eax)] ; nbits = JPEG_NBITS(temp);
mov DWORD [esp+temp2], eax
mov dword [esp+temp2], eax
; Emit Huffman symbol for run length / number of bits
shl ecx, 4 ; temp3 = (r << 4) + nbits;
add ecx, eax
@@ -316,13 +314,13 @@ EXTN(jsimd_huff_encode_one_block_sse2):
movsx edx, word [esi+DCTSIZE2*2] ; temp2 = t2[k];
; Mask off any extra bits in code
mov ecx, DWORD [esp+temp2]
mov ecx, dword [esp+temp2]
mov eax, 1
shl eax, cl
dec eax
and eax, edx ; temp2 &= (((JLONG)1)<<nbits) - 1;
EMIT_BITS eax ; PUT_BITS(temp2, nbits)
mov edx, DWORD [esp+temp3]
mov edx, dword [esp+temp3]
add esi, 2 ; ++k;
shr edx, 1 ; index >>= 1;
@@ -352,29 +350,29 @@ EXTN(jsimd_huff_encode_one_block_sse2):
shr edx, cl ; index >>= r;
add ecx, eax
lea esi, [esi+ecx*2] ; k += r;
mov DWORD [esp+temp3], edx
mov dword [esp+temp3], edx
jmp .BRLOOP2
.BLOOP2:
bsf ecx, edx ; r = __builtin_ctzl(index);
jz near .ELOOP2
lea esi, [esi+ecx*2] ; k += r;
shr edx, cl ; index >>= r;
mov DWORD [esp+temp3], edx
mov dword [esp+temp3], edx
.BRLOOP2:
cmp ecx, 16 ; while (r > 15) {
jl near .ERLOOP2
sub ecx, 16 ; r -= 16;
mov DWORD [esp+temp], ecx
mov dword [esp+temp], ecx
mov eax, INT [ebp + 240 * 4] ; code_0xf0 = actbl->ehufco[0xf0];
movzx ecx, byte [ebp + 1024 + 240] ; size_0xf0 = actbl->ehufsi[0xf0];
EMIT_BITS eax ; EMIT_BITS(code_0xf0, size_0xf0)
mov ecx, DWORD [esp+temp]
mov ecx, dword [esp+temp]
jmp .BRLOOP2
.ERLOOP2:
movsx eax, word [esi] ; temp = t1[k];
bsr eax, eax ; nbits = 32 - __builtin_clz(temp);
inc eax
mov DWORD [esp+temp2], eax
mov dword [esp+temp2], eax
; Emit Huffman symbol for run length / number of bits
shl ecx, 4 ; temp3 = (r << 4) + nbits;
add ecx, eax
@@ -384,13 +382,13 @@ EXTN(jsimd_huff_encode_one_block_sse2):
movsx edx, word [esi+DCTSIZE2*2] ; temp2 = t2[k];
; Mask off any extra bits in code
mov ecx, DWORD [esp+temp2]
mov ecx, dword [esp+temp2]
mov eax, 1
shl eax, cl
dec eax
and eax, edx ; temp2 &= (((JLONG)1)<<nbits) - 1;
EMIT_BITS eax ; PUT_BITS(temp2, nbits)
mov edx, DWORD [esp+temp3]
mov edx, dword [esp+temp3]
add esi, 2 ; ++k;
shr edx, 1 ; index >>= 1;
@@ -407,8 +405,8 @@ EXTN(jsimd_huff_encode_one_block_sse2):
mov eax, [esp+buffer]
pop esi
; Save put_buffer & put_bits
mov DWORD [esi+8], put_buffer ; state->cur.put_buffer = put_buffer;
mov DWORD [esi+12], put_bits ; state->cur.put_bits = put_bits;
mov dword [esi+8], put_buffer ; state->cur.put_buffer = put_buffer;
mov dword [esi+12], put_bits ; state->cur.put_bits = put_bits;
pop ebp
pop edi

View File

@@ -15,8 +15,6 @@
;
; This file contains an SSE2 implementation of data preparation for progressive
; Huffman encoding. See jcphuff.c for more details.
;
; [TAB8]
%include "jsimdext.inc"
@@ -329,6 +327,8 @@ EXTN(jsimd_encode_mcu_AC_first_prepare_sse2):
add LUT, 16*SIZEOF_INT
dec K
jnz .BLOOP16
test LEN, 15
je .PADDING
.ELOOP16:
mov LENEND, LEN
and LENEND, 7

View File

@@ -14,8 +14,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jsimdext.inc"

View File

@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jsimdext.inc"

View File

@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jsimdext.inc"

View File

@@ -14,8 +14,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jcolsamp.inc"
@@ -348,7 +346,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
vmovd eax, xmmA
cmp ecx, byte SIZEOF_WORD
jb short .column_st1
mov WORD [edi], ax
mov word [edi], ax
add edi, byte SIZEOF_WORD
sub ecx, byte SIZEOF_WORD
shr eax, 16
@@ -357,7 +355,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
; space.
test ecx, ecx
jz short .nextrow
mov BYTE [edi], al
mov byte [edi], al
%else ; RGB_PIXELSIZE == 4 ; -----------

View File

@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jcolsamp.inc"
@@ -280,7 +278,7 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
movd eax, mmA
cmp ecx, byte SIZEOF_DWORD
jb short .column_st2
mov DWORD [edi+0*SIZEOF_DWORD], eax
mov dword [edi+0*SIZEOF_DWORD], eax
psrlq mmA, DWORD_BIT
movd eax, mmA
sub ecx, byte SIZEOF_DWORD
@@ -288,14 +286,14 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
.column_st2:
cmp ecx, byte SIZEOF_WORD
jb short .column_st1
mov WORD [edi+0*SIZEOF_WORD], ax
mov word [edi+0*SIZEOF_WORD], ax
shr eax, WORD_BIT
sub ecx, byte SIZEOF_WORD
add edi, byte SIZEOF_WORD
.column_st1:
cmp ecx, byte SIZEOF_BYTE
jb short .nextrow
mov BYTE [edi+0*SIZEOF_BYTE], al
mov byte [edi+0*SIZEOF_BYTE], al
%else ; RGB_PIXELSIZE == 4 ; -----------
@@ -367,7 +365,7 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
.column_st4:
cmp ecx, byte SIZEOF_MMWORD/8
jb short .nextrow
movd DWORD [edi+0*SIZEOF_DWORD], mmA
movd dword [edi+0*SIZEOF_DWORD], mmA
%endif ; RGB_PIXELSIZE ; ---------------

View File

@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jcolsamp.inc"
@@ -320,7 +318,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
movd eax, xmmA
cmp ecx, byte SIZEOF_WORD
jb short .column_st1
mov WORD [edi], ax
mov word [edi], ax
add edi, byte SIZEOF_WORD
sub ecx, byte SIZEOF_WORD
shr eax, 16
@@ -329,7 +327,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
; space.
test ecx, ecx
jz short .nextrow
mov BYTE [edi], al
mov byte [edi], al
%else ; RGB_PIXELSIZE == 4 ; -----------

View File

@@ -14,8 +14,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jsimdext.inc"

View File

@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jsimdext.inc"

View File

@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jsimdext.inc"

View File

@@ -14,8 +14,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jsimdext.inc"

View File

@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jsimdext.inc"

View File

@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jsimdext.inc"

View File

@@ -14,8 +14,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jcolsamp.inc"
@@ -354,7 +352,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
vmovd eax, xmmA
cmp ecx, byte SIZEOF_WORD
jb short .column_st1
mov WORD [edi], ax
mov word [edi], ax
add edi, byte SIZEOF_WORD
sub ecx, byte SIZEOF_WORD
shr eax, 16
@@ -363,7 +361,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
; space.
test ecx, ecx
jz short .endcolumn
mov BYTE [edi], al
mov byte [edi], al
%else ; RGB_PIXELSIZE == 4 ; -----------

View File

@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jcolsamp.inc"
@@ -283,7 +281,7 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
movd eax, mmA
cmp ecx, byte SIZEOF_DWORD
jb short .column_st2
mov DWORD [edi+0*SIZEOF_DWORD], eax
mov dword [edi+0*SIZEOF_DWORD], eax
psrlq mmA, DWORD_BIT
movd eax, mmA
sub ecx, byte SIZEOF_DWORD
@@ -291,14 +289,14 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
.column_st2:
cmp ecx, byte SIZEOF_WORD
jb short .column_st1
mov WORD [edi+0*SIZEOF_WORD], ax
mov word [edi+0*SIZEOF_WORD], ax
shr eax, WORD_BIT
sub ecx, byte SIZEOF_WORD
add edi, byte SIZEOF_WORD
.column_st1:
cmp ecx, byte SIZEOF_BYTE
jb short .endcolumn
mov BYTE [edi+0*SIZEOF_BYTE], al
mov byte [edi+0*SIZEOF_BYTE], al
%else ; RGB_PIXELSIZE == 4 ; -----------
@@ -373,7 +371,7 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
.column_st4:
cmp ecx, byte SIZEOF_MMWORD/8
jb short .endcolumn
movd DWORD [edi+0*SIZEOF_DWORD], mmA
movd dword [edi+0*SIZEOF_DWORD], mmA
%endif ; RGB_PIXELSIZE ; ---------------

View File

@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jcolsamp.inc"
@@ -325,7 +323,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
movd eax, xmmA
cmp ecx, byte SIZEOF_WORD
jb short .column_st1
mov WORD [edi], ax
mov word [edi], ax
add edi, byte SIZEOF_WORD
sub ecx, byte SIZEOF_WORD
shr eax, 16
@@ -334,7 +332,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
; space.
test ecx, ecx
jz short .endcolumn
mov BYTE [edi], al
mov byte [edi], al
%else ; RGB_PIXELSIZE == 4 ; -----------

View File

@@ -14,8 +14,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jsimdext.inc"

View File

@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jsimdext.inc"

View File

@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jsimdext.inc"

View File

@@ -17,8 +17,6 @@
; This file contains a floating-point implementation of the forward DCT
; (Discrete Cosine Transform). The following code is based directly on
; the IJG's original jfdctflt.c; see the jfdctflt.c for more details.
;
; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"

View File

@@ -17,8 +17,6 @@
; This file contains a floating-point implementation of the forward DCT
; (Discrete Cosine Transform). The following code is based directly on
; the IJG's original jfdctflt.c; see the jfdctflt.c for more details.
;
; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"

View File

@@ -18,8 +18,6 @@
; the forward DCT (Discrete Cosine Transform). The following code is
; based directly on the IJG's original jfdctfst.c; see the jfdctfst.c
; for more details.
;
; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"

View File

@@ -18,8 +18,6 @@
; the forward DCT (Discrete Cosine Transform). The following code is
; based directly on the IJG's original jfdctfst.c; see the jfdctfst.c
; for more details.
;
; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"

View File

@@ -18,8 +18,6 @@
; forward DCT (Discrete Cosine Transform). The following code is based
; directly on the IJG's original jfdctint.c; see the jfdctint.c for
; more details.
;
; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"

View File

@@ -18,8 +18,6 @@
; forward DCT (Discrete Cosine Transform). The following code is based
; directly on the IJG's original jfdctint.c; see the jfdctint.c for
; more details.
;
; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"

View File

@@ -18,8 +18,6 @@
; forward DCT (Discrete Cosine Transform). The following code is based
; directly on the IJG's original jfdctint.c; see the jfdctint.c for
; more details.
;
; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"

View File

@@ -17,8 +17,6 @@
; This file contains a floating-point implementation of the inverse DCT
; (Discrete Cosine Transform). The following code is based directly on
; the IJG's original jidctflt.c; see the jidctflt.c for more details.
;
; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
@@ -92,23 +90,23 @@ EXTN(jsimd_idct_float_3dnow):
alignx 16, 7
.columnloop:
%ifndef NO_ZERO_COLUMN_TEST_FLOAT_3DNOW
mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
jnz short .columnDCT
pushpic ebx ; save GOT address
mov ebx, DWORD [DWBLOCK(3,0,esi,SIZEOF_JCOEF)]
mov eax, DWORD [DWBLOCK(4,0,esi,SIZEOF_JCOEF)]
or ebx, DWORD [DWBLOCK(5,0,esi,SIZEOF_JCOEF)]
or eax, DWORD [DWBLOCK(6,0,esi,SIZEOF_JCOEF)]
or ebx, DWORD [DWBLOCK(7,0,esi,SIZEOF_JCOEF)]
mov ebx, dword [DWBLOCK(3,0,esi,SIZEOF_JCOEF)]
mov eax, dword [DWBLOCK(4,0,esi,SIZEOF_JCOEF)]
or ebx, dword [DWBLOCK(5,0,esi,SIZEOF_JCOEF)]
or eax, dword [DWBLOCK(6,0,esi,SIZEOF_JCOEF)]
or ebx, dword [DWBLOCK(7,0,esi,SIZEOF_JCOEF)]
or eax, ebx
poppic ebx ; restore GOT address
jnz short .columnDCT
; -- AC terms all zero
movd mm0, DWORD [DWBLOCK(0,0,esi,SIZEOF_JCOEF)]
movd mm0, dword [DWBLOCK(0,0,esi,SIZEOF_JCOEF)]
punpcklwd mm0, mm0
psrad mm0, (DWORD_BIT-WORD_BIT)
@@ -135,10 +133,10 @@ EXTN(jsimd_idct_float_3dnow):
; -- Even part
movd mm0, DWORD [DWBLOCK(0,0,esi,SIZEOF_JCOEF)]
movd mm1, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
movd mm2, DWORD [DWBLOCK(4,0,esi,SIZEOF_JCOEF)]
movd mm3, DWORD [DWBLOCK(6,0,esi,SIZEOF_JCOEF)]
movd mm0, dword [DWBLOCK(0,0,esi,SIZEOF_JCOEF)]
movd mm1, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
movd mm2, dword [DWBLOCK(4,0,esi,SIZEOF_JCOEF)]
movd mm3, dword [DWBLOCK(6,0,esi,SIZEOF_JCOEF)]
punpcklwd mm0, mm0
punpcklwd mm1, mm1
@@ -182,10 +180,10 @@ EXTN(jsimd_idct_float_3dnow):
; -- Odd part
movd mm2, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
movd mm3, DWORD [DWBLOCK(3,0,esi,SIZEOF_JCOEF)]
movd mm5, DWORD [DWBLOCK(5,0,esi,SIZEOF_JCOEF)]
movd mm1, DWORD [DWBLOCK(7,0,esi,SIZEOF_JCOEF)]
movd mm2, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
movd mm3, dword [DWBLOCK(3,0,esi,SIZEOF_JCOEF)]
movd mm5, dword [DWBLOCK(5,0,esi,SIZEOF_JCOEF)]
movd mm1, dword [DWBLOCK(7,0,esi,SIZEOF_JCOEF)]
punpcklwd mm2, mm2
punpcklwd mm3, mm3

View File

@@ -17,8 +17,6 @@
; This file contains a floating-point implementation of the inverse DCT
; (Discrete Cosine Transform). The following code is based directly on
; the IJG's original jidctflt.c; see the jidctflt.c for more details.
;
; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
@@ -102,8 +100,8 @@ EXTN(jsimd_idct_float_sse):
alignx 16, 7
.columnloop:
%ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE
mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
jnz near .columnDCT
movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]

View File

@@ -17,8 +17,6 @@
; This file contains a floating-point implementation of the inverse DCT
; (Discrete Cosine Transform). The following code is based directly on
; the IJG's original jidctflt.c; see the jidctflt.c for more details.
;
; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
@@ -102,8 +100,8 @@ EXTN(jsimd_idct_float_sse2):
alignx 16, 7
.columnloop:
%ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE
mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
jnz near .columnDCT
movq xmm1, XMM_MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]

View File

@@ -18,8 +18,6 @@
; the inverse DCT (Discrete Cosine Transform). The following code is
; based directly on the IJG's original jidctfst.c; see the jidctfst.c
; for more details.
;
; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
@@ -123,8 +121,8 @@ EXTN(jsimd_idct_ifast_mmx):
alignx 16, 7
.columnloop:
%ifndef NO_ZERO_COLUMN_TEST_IFAST_MMX
mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
jnz short .columnDCT
movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]

View File

@@ -18,8 +18,6 @@
; the inverse DCT (Discrete Cosine Transform). The following code is
; based directly on the IJG's original jidctfst.c; see the jidctfst.c
; for more details.
;
; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
@@ -118,8 +116,8 @@ EXTN(jsimd_idct_ifast_sse2):
mov esi, JCOEFPTR [coef_block(eax)] ; inptr
%ifndef NO_ZERO_COLUMN_TEST_IFAST_SSE2
mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
jnz near .columnDCT
movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]

View File

@@ -18,8 +18,6 @@
; inverse DCT (Discrete Cosine Transform). The following code is based
; directly on the IJG's original jidctint.c; see the jidctint.c for
; more details.
;
; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
@@ -320,8 +318,8 @@ EXTN(jsimd_idct_islow_avx2):
mov esi, JCOEFPTR [coef_block(eax)] ; inptr
%ifndef NO_ZERO_COLUMN_TEST_ISLOW_AVX2
mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
jnz near .columnDCT
movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]

View File

@@ -18,8 +18,6 @@
; inverse DCT (Discrete Cosine Transform). The following code is based
; directly on the IJG's original jidctint.c; see the jidctint.c for
; more details.
;
; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
@@ -136,8 +134,8 @@ EXTN(jsimd_idct_islow_mmx):
alignx 16, 7
.columnloop:
%ifndef NO_ZERO_COLUMN_TEST_ISLOW_MMX
mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
jnz short .columnDCT
movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]

View File

@@ -18,8 +18,6 @@
; inverse DCT (Discrete Cosine Transform). The following code is based
; directly on the IJG's original jidctint.c; see the jidctint.c for
; more details.
;
; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
@@ -131,8 +129,8 @@ EXTN(jsimd_idct_islow_sse2):
mov esi, JCOEFPTR [coef_block(eax)] ; inptr
%ifndef NO_ZERO_COLUMN_TEST_ISLOW_SSE2
mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
jnz near .columnDCT
movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]

View File

@@ -18,8 +18,6 @@
; output: either 4x4 or 2x2 pixels from an 8x8 DCT block.
; The following code is based directly on the IJG's original jidctred.c;
; see the jidctred.c for more details.
;
; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
@@ -144,8 +142,8 @@ EXTN(jsimd_idct_4x4_mmx):
alignx 16, 7
.columnloop:
%ifndef NO_ZERO_COLUMN_TEST_4X4_MMX
mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
jnz short .columnDCT
movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
@@ -464,16 +462,16 @@ EXTN(jsimd_idct_4x4_mmx):
mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
mov esi, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
movd DWORD [edx+eax*SIZEOF_JSAMPLE], mm1
movd DWORD [esi+eax*SIZEOF_JSAMPLE], mm0
movd dword [edx+eax*SIZEOF_JSAMPLE], mm1
movd dword [esi+eax*SIZEOF_JSAMPLE], mm0
psrlq mm1, 4*BYTE_BIT
psrlq mm0, 4*BYTE_BIT
mov edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
mov esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW]
movd DWORD [edx+eax*SIZEOF_JSAMPLE], mm1
movd DWORD [esi+eax*SIZEOF_JSAMPLE], mm0
movd dword [edx+eax*SIZEOF_JSAMPLE], mm1
movd dword [esi+eax*SIZEOF_JSAMPLE], mm0
emms ; empty MMX state
@@ -688,8 +686,8 @@ EXTN(jsimd_idct_2x2_mmx):
mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
mov esi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
mov WORD [edx+eax*SIZEOF_JSAMPLE], bx
mov WORD [esi+eax*SIZEOF_JSAMPLE], cx
mov word [edx+eax*SIZEOF_JSAMPLE], bx
mov word [esi+eax*SIZEOF_JSAMPLE], cx
emms ; empty MMX state

View File

@@ -18,8 +18,6 @@
; output: either 4x4 or 2x2 pixels from an 8x8 DCT block.
; The following code is based directly on the IJG's original jidctred.c;
; see the jidctred.c for more details.
;
; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
@@ -139,8 +137,8 @@ EXTN(jsimd_idct_4x4_sse2):
mov esi, JCOEFPTR [coef_block(eax)] ; inptr
%ifndef NO_ZERO_COLUMN_TEST_4X4_SSE2
mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
jnz short .columnDCT
movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
@@ -578,8 +576,8 @@ EXTN(jsimd_idct_2x2_sse2):
mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
mov esi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
mov WORD [edx+eax*SIZEOF_JSAMPLE], bx
mov WORD [esi+eax*SIZEOF_JSAMPLE], cx
mov word [edx+eax*SIZEOF_JSAMPLE], bx
mov word [esi+eax*SIZEOF_JSAMPLE], cx
pop edi
pop esi

View File

@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"

View File

@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"

View File

@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"

View File

@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"

View File

@@ -14,8 +14,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"

View File

@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"

View File

@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
%include "jsimdext.inc"
@@ -51,29 +49,14 @@ EXTN(jpeg_simd_cpu_support):
xor eax, edx
jz near .return ; CPUID is not supported
; Check for MMX instruction support
; Check whether CPUID leaf 07H is supported
; (leaf 07H is used to check for AVX2 instruction support)
xor eax, eax
cpuid
test eax, eax
jz near .return
xor eax, eax
inc eax
cpuid
mov eax, edx ; eax = Standard feature flags
test eax, 1<<23 ; bit23:MMX
jz short .no_mmx
or edi, byte JSIMD_MMX
.no_mmx:
test eax, 1<<25 ; bit25:SSE
jz short .no_sse
or edi, byte JSIMD_SSE
.no_sse:
test eax, 1<<26 ; bit26:SSE2
jz short .no_sse2
or edi, byte JSIMD_SSE2
.no_sse2:
cmp eax, 7
jl short .no_avx2 ; Maximum leaf < 07H
; Check for AVX2 instruction support
mov eax, 7
@@ -94,13 +77,34 @@ EXTN(jpeg_simd_cpu_support):
xor ecx, ecx
xgetbv
test eax, 6 ; O/S does not manage XMM/YMM state
and eax, 6
cmp eax, 6 ; O/S does not manage XMM/YMM state
; using XSAVE
jz short .no_avx2
jnz short .no_avx2
or edi, JSIMD_AVX2
.no_avx2:
; Check CPUID leaf 01H for MMX, SSE, and SSE2 support
xor eax, eax
inc eax
cpuid
mov eax, edx ; eax = Standard feature flags
; Check for MMX instruction support
test eax, 1<<23 ; bit23:MMX
jz short .no_mmx
or edi, byte JSIMD_MMX
.no_mmx:
test eax, 1<<25 ; bit25:SSE
jz short .no_sse
or edi, byte JSIMD_SSE
.no_sse:
test eax, 1<<26 ; bit26:SSE2
jz short .no_sse2
or edi, byte JSIMD_SSE2
.no_sse2:
; Check for 3DNow! instruction support
mov eax, 0x80000000
cpuid

View File

@@ -2,12 +2,13 @@
* Loongson MMI optimizations for libjpeg-turbo
*
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
* Copyright (C) 2014-2015, D. R. Commander. All Rights Reserved.
* Copyright (C) 2016-2017, Loongson Technology Corporation Limited, BeiJing.
* Copyright (C) 2014-2015, 2019, D. R. Commander. All Rights Reserved.
* Copyright (C) 2016-2018, Loongson Technology Corporation Limited, BeiJing.
* All Rights Reserved.
* Authors: ZhuChen <zhuchen@loongson.cn>
* SunZhangzhi <sunzhangzhi-cq@loongson.cn>
* CaiWanwei <caiwanwei@loongson.cn>
* ZhangLixia <zhanglixia-hf@loongson.cn>
*
* Based on the x86 SIMD extension for IJG JPEG library
* Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -184,9 +185,15 @@ void jsimd_rgb_ycc_convert_mmi(JDIMENSION image_width, JSAMPARRAY input_buf,
"$14", "memory"
);
} else {
mmA = _mm_load_si64((__m64 *)&inptr[0]);
mmG = _mm_load_si64((__m64 *)&inptr[8]);
mmF = _mm_load_si64((__m64 *)&inptr[16]);
if (!(((long)inptr) & 7)) {
mmA = _mm_load_si64((__m64 *)&inptr[0]);
mmG = _mm_load_si64((__m64 *)&inptr[8]);
mmF = _mm_load_si64((__m64 *)&inptr[16]);
} else {
mmA = _mm_loadu_si64((__m64 *)&inptr[0]);
mmG = _mm_loadu_si64((__m64 *)&inptr[8]);
mmF = _mm_loadu_si64((__m64 *)&inptr[16]);
}
inptr += RGB_PIXELSIZE * 8;
}
mmD = mmA;
@@ -268,10 +275,17 @@ void jsimd_rgb_ycc_convert_mmi(JDIMENSION image_width, JSAMPARRAY input_buf,
: "$f0", "$f2", "$8", "$9", "$10", "$11", "$13", "memory"
);
} else {
mmA = _mm_load_si64((__m64 *)&inptr[0]);
mmF = _mm_load_si64((__m64 *)&inptr[8]);
mmD = _mm_load_si64((__m64 *)&inptr[16]);
mmC = _mm_load_si64((__m64 *)&inptr[24]);
if (!(((long)inptr) & 7)) {
mmA = _mm_load_si64((__m64 *)&inptr[0]);
mmF = _mm_load_si64((__m64 *)&inptr[8]);
mmD = _mm_load_si64((__m64 *)&inptr[16]);
mmC = _mm_load_si64((__m64 *)&inptr[24]);
} else {
mmA = _mm_loadu_si64((__m64 *)&inptr[0]);
mmF = _mm_loadu_si64((__m64 *)&inptr[8]);
mmD = _mm_loadu_si64((__m64 *)&inptr[16]);
mmC = _mm_loadu_si64((__m64 *)&inptr[24]);
}
inptr += RGB_PIXELSIZE * 8;
}
mmB = mmA;

View File

@@ -1,8 +1,9 @@
/*
* Loongson MMI optimizations for libjpeg-turbo
*
* Copyright (C) 2016-2017, Loongson Technology Corporation Limited, BeiJing.
* Copyright (C) 2016-2018, Loongson Technology Corporation Limited, BeiJing.
* All Rights Reserved.
* Copyright (C) 2019, D. R. Commander. All Rights Reserved.
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
@@ -41,7 +42,7 @@ typedef float __m32;
/********** Set Operations **********/
extern __inline __m64
extern __inline __m64 FUNCTION_ATTRIBS
_mm_setzero_si64(void)
{
return 0.0;
@@ -1245,6 +1246,22 @@ _mm_load_si64(const __m64 *src)
asm("ldc1 %0, %1\n\t"
: "=f" (ret)
: "m" (*src)
: "memory"
);
return ret;
}
extern __inline __m64 FUNCTION_ATTRIBS
_mm_loadu_si64(const __m64 *src)
{
__m64 ret;
asm("gsldlc1 %0, 7(%1)\n\t"
"gsldrc1 %0, 0(%1)\n\t"
: "=f" (ret)
: "r" (src)
: "memory"
);
return ret;

View File

@@ -692,8 +692,10 @@ jsimd_can_convsamp_float(void)
if (sizeof(ISLOW_MULT_TYPE) != 2)
return 0;
#ifndef __mips_soft_float
if (simd_support & JSIMD_DSPR2)
return 1;
#endif
return 0;
}
@@ -709,7 +711,9 @@ GLOBAL(void)
jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
FAST_FLOAT *workspace)
{
#ifndef __mips_soft_float
jsimd_convsamp_float_dspr2(sample_data, start_col, workspace);
#endif
}
GLOBAL(int)
@@ -805,8 +809,10 @@ jsimd_can_quantize_float(void)
if (sizeof(ISLOW_MULT_TYPE) != 2)
return 0;
#ifndef __mips_soft_float
if (simd_support & JSIMD_DSPR2)
return 1;
#endif
return 0;
}
@@ -821,7 +827,9 @@ GLOBAL(void)
jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
FAST_FLOAT *workspace)
{
#ifndef __mips_soft_float
jsimd_quantize_float_dspr2(coef_block, divisors, workspace);
#endif
}
GLOBAL(int)

Some files were not shown because too many files have changed in this diff Show More