Compare commits
14 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cb88e5da80 | ||
|
|
e9d9c31fd2 | ||
|
|
077e5bb4e0 | ||
|
|
a1dd35680d | ||
|
|
a09ba29a55 | ||
|
|
8ce2c9119a | ||
|
|
db04435165 | ||
|
|
7723d7f7d0 | ||
|
|
628c168c86 | ||
|
|
1120ff29a1 | ||
|
|
1945ad961b | ||
|
|
6e9d43e085 | ||
|
|
9055fb408d | ||
|
|
9e6c6a14f8 |
34
BUILDING.md
34
BUILDING.md
@@ -323,11 +323,6 @@ Set the following shell variables for simplicity:
|
|||||||
IOS_SYSROOT=$IOS_PLATFORMDIR/Developer/SDKs/iPhoneOS*.sdk
|
IOS_SYSROOT=$IOS_PLATFORMDIR/Developer/SDKs/iPhoneOS*.sdk
|
||||||
IOS_GCC=$IOS_PLATFORMDIR/Developer/usr/bin/arm-apple-darwin10-llvm-gcc-4.2
|
IOS_GCC=$IOS_PLATFORMDIR/Developer/usr/bin/arm-apple-darwin10-llvm-gcc-4.2
|
||||||
|
|
||||||
*ARMv6 (code will run on all iOS devices, not SIMD-accelerated)*
|
|
||||||
[NOTE: Requires Xcode 4.4.x or earlier]
|
|
||||||
|
|
||||||
IOS_CFLAGS="-march=armv6 -mcpu=arm1176jzf-s -mfpu=vfp"
|
|
||||||
|
|
||||||
*ARMv7 (code will run on iPhone 3GS-4S/iPad 1st-3rd Generation and newer)*
|
*ARMv7 (code will run on iPhone 3GS-4S/iPad 1st-3rd Generation and newer)*
|
||||||
|
|
||||||
IOS_CFLAGS="-march=armv7 -mcpu=cortex-a8 -mtune=cortex-a8 -mfpu=neon"
|
IOS_CFLAGS="-march=armv7 -mcpu=cortex-a8 -mtune=cortex-a8 -mfpu=neon"
|
||||||
@@ -399,8 +394,8 @@ NOTE: You can also add `-miphoneos-version-min={version}` to `$IOS_CFLAGS`
|
|||||||
above in order to support older versions of iOS than the default version
|
above in order to support older versions of iOS than the default version
|
||||||
supported by the SDK.
|
supported by the SDK.
|
||||||
|
|
||||||
Once built, lipo can be used to combine the ARMv6, v7, v7s, and/or v8 variants
|
Once built, lipo can be used to combine the ARMv7, v7s, and/or v8 variants into
|
||||||
into a universal library.
|
a universal library.
|
||||||
|
|
||||||
|
|
||||||
### Building libjpeg-turbo for Android
|
### Building libjpeg-turbo for Android
|
||||||
@@ -782,7 +777,6 @@ default, but you can override this by setting the `BUILDDIR32` variable on the
|
|||||||
make command line as shown above.
|
make command line as shown above.
|
||||||
|
|
||||||
make iosdmg [BUILDDIR32={32-bit build directory}] \
|
make iosdmg [BUILDDIR32={32-bit build directory}] \
|
||||||
[BUILDDIRARMV6={ARMv6 build directory}] \
|
|
||||||
[BUILDDIRARMV7={ARMv7 build directory}] \
|
[BUILDDIRARMV7={ARMv7 build directory}] \
|
||||||
[BUILDDIRARMV7S={ARMv7s build directory}] \
|
[BUILDDIRARMV7S={ARMv7s build directory}] \
|
||||||
[BUILDDIRARMV8={ARMv8 build directory}]
|
[BUILDDIRARMV8={ARMv8 build directory}]
|
||||||
@@ -791,19 +785,17 @@ On OS X systems, this creates a Macintosh package and disk image in which the
|
|||||||
libjpeg-turbo static libraries contain ARM architectures necessary to build
|
libjpeg-turbo static libraries contain ARM architectures necessary to build
|
||||||
iOS applications. If building on an x86-64 system, the binaries will also
|
iOS applications. If building on an x86-64 system, the binaries will also
|
||||||
contain the i386 architecture, as with `make udmg` above. You should first
|
contain the i386 architecture, as with `make udmg` above. You should first
|
||||||
configure ARMv6, ARMv7, ARMv7s, and/or ARMv8 out-of-tree builds of
|
configure ARMv7, ARMv7s, and/or ARMv8 out-of-tree builds of libjpeg-turbo (see
|
||||||
libjpeg-turbo (see "Building libjpeg-turbo for iOS" above.) If you are
|
"Building libjpeg-turbo for iOS" above.) If you are building an x86-64 version
|
||||||
building an x86-64 version of libjpeg-turbo, you should configure a 32-bit
|
of libjpeg-turbo, you should configure a 32-bit out-of-tree build as well.
|
||||||
out-of-tree build as well. Next, build libjpeg-turbo as you would normally,
|
Next, build libjpeg-turbo as you would normally, using an out-of-tree build.
|
||||||
using an out-of-tree build. When it is built, run `make iosdmg` from the
|
When it is built, run `make iosdmg` from the build directory. The build system
|
||||||
build directory. The build system will look for the ARMv6 build under
|
will look for the ARMv7 build under *{source_directory}*/iosarmv7 by default,
|
||||||
*{source_directory}*/iosarmv6 by default, the ARMv7 build under
|
the ARMv7s build under *{source_directory}*/iosarmv7s by default, the ARMv8
|
||||||
*{source_directory}*/iosarmv7 by default, the ARMv7s build under
|
build under *{source_directory}*/iosarmv8 by default, and (if applicable) the
|
||||||
*{source_directory}*/iosarmv7s by default, the ARMv8 build under
|
32-bit build under *{source_directory}*/osxx86 by default, but you can override
|
||||||
*{source_directory}*/iosarmv8 by default, and (if applicable) the 32-bit build
|
this by setting the `BUILDDIR32`, `BUILDDIRARMV7`, `BUILDDIRARMV7S`, and/or
|
||||||
under *{source_directory}*/osxx86 by default, but you can override this by
|
`BUILDDIRARMV8` variables on the `make` command line as shown above.
|
||||||
setting the `BUILDDIR32`, `BUILDDIRARMV6`, `BUILDDIRARMV7`, `BUILDDIRARMV7S`,
|
|
||||||
and/or `BUILDDIRARMV8` variables on the `make` command line as shown above.
|
|
||||||
|
|
||||||
NOTE: If including an ARMv8 build in the package, then you may need to use
|
NOTE: If including an ARMv8 build in the package, then you may need to use
|
||||||
Xcode's version of lipo instead of the operating system's. To do this, pass
|
Xcode's version of lipo instead of the operating system's. To do this, pass
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ if(POLICY CMP0022)
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
project(libjpeg-turbo C)
|
project(libjpeg-turbo C)
|
||||||
set(VERSION 1.5.0)
|
set(VERSION 1.5.1)
|
||||||
string(REPLACE "." ";" VERSION_TRIPLET ${VERSION})
|
string(REPLACE "." ";" VERSION_TRIPLET ${VERSION})
|
||||||
list(GET VERSION_TRIPLET 0 VERSION_MAJOR)
|
list(GET VERSION_TRIPLET 0 VERSION_MAJOR)
|
||||||
list(GET VERSION_TRIPLET 1 VERSION_MINOR)
|
list(GET VERSION_TRIPLET 1 VERSION_MINOR)
|
||||||
|
|||||||
91
ChangeLog.md
91
ChangeLog.md
@@ -1,3 +1,94 @@
|
|||||||
|
1.5.1
|
||||||
|
=====
|
||||||
|
|
||||||
|
### Significant changes relative to 1.5.0:
|
||||||
|
|
||||||
|
1. Previously, the undocumented `JSIMD_FORCE*` environment variables could be
|
||||||
|
used to force-enable a particular SIMD instruction set if multiple instruction
|
||||||
|
sets were available on a particular platform. On x86 platforms, where CPU
|
||||||
|
feature detection is bulletproof and multiple SIMD instruction sets are
|
||||||
|
available, it makes sense for those environment variables to allow forcing the
|
||||||
|
use of an instruction set only if that instruction set is available. However,
|
||||||
|
since the ARM implementations of libjpeg-turbo can only use one SIMD
|
||||||
|
instruction set, and since their feature detection code is less bulletproof
|
||||||
|
(parsing /proc/cpuinfo), it makes sense for the `JSIMD_FORCENEON` environment
|
||||||
|
variable to bypass the feature detection code and really force the use of NEON
|
||||||
|
instructions. A new environment variable (`JSIMD_FORCEDSPR2`) was introduced
|
||||||
|
in the MIPS implementation for the same reasons, and the existing
|
||||||
|
`JSIMD_FORCENONE` environment variable was extended to that implementation.
|
||||||
|
These environment variables provide a workaround for those attempting to test
|
||||||
|
ARM and MIPS builds of libjpeg-turbo in QEMU, which passes through
|
||||||
|
/proc/cpuinfo from the host system.
|
||||||
|
|
||||||
|
2. libjpeg-turbo previously assumed that AltiVec instructions were always
|
||||||
|
available on PowerPC platforms, which led to "illegal instruction" errors when
|
||||||
|
running on PowerPC chips that lack AltiVec support (such as the older 7xx/G3
|
||||||
|
and newer e5500 series.) libjpeg-turbo now examines /proc/cpuinfo on
|
||||||
|
Linux/Android systems and enables AltiVec instructions only if the CPU supports
|
||||||
|
them. It also now provides two environment variables, `JSIMD_FORCEALTIVEC` and
|
||||||
|
`JSIMD_FORCENONE`, to force-enable and force-disable AltiVec instructions in
|
||||||
|
environments where /proc/cpuinfo is an unreliable means of CPU feature
|
||||||
|
detection (such as when running in QEMU.) On OS X, libjpeg-turbo continues to
|
||||||
|
assume that AltiVec support is always available, which means that libjpeg-turbo
|
||||||
|
cannot be used with G3 Macs unless you set the environment variable
|
||||||
|
`JSIMD_FORCENONE` to `1`.
|
||||||
|
|
||||||
|
3. Fixed an issue whereby 64-bit ARM (AArch64) builds of libjpeg-turbo would
|
||||||
|
crash when built with recent releases of the Clang/LLVM compiler. This was
|
||||||
|
caused by an ABI conformance issue in some of libjpeg-turbo's 64-bit NEON SIMD
|
||||||
|
routines. Those routines were incorrectly using 64-bit instructions to
|
||||||
|
transfer a 32-bit JDIMENSION argument, whereas the ABI allows the upper
|
||||||
|
(unused) 32 bits of a 32-bit argument's register to be undefined. The new
|
||||||
|
Clang/LLVM optimizer uses load combining to transfer multiple adjacent 32-bit
|
||||||
|
structure members into a single 64-bit register, and this exposed the ABI
|
||||||
|
conformance issue.
|
||||||
|
|
||||||
|
4. Fancy upsampling is now supported when decompressing JPEG images that use
|
||||||
|
4:4:0 (h1v2) chroma subsampling. These images are generated when losslessly
|
||||||
|
rotating or transposing JPEG images that use 4:2:2 (h2v1) chroma subsampling.
|
||||||
|
The h1v2 fancy upsampling algorithm is not currently SIMD-accelerated.
|
||||||
|
|
||||||
|
5. If merged upsampling isn't SIMD-accelerated but YCbCr-to-RGB conversion is,
|
||||||
|
then libjpeg-turbo will now disable merged upsampling when decompressing YCbCr
|
||||||
|
JPEG images into RGB or extended RGB output images. This significantly speeds
|
||||||
|
up the decompression of 4:2:0 and 4:2:2 JPEGs on ARM platforms if fancy
|
||||||
|
upsampling is not used (for example, if the `-nosmooth` option to djpeg is
|
||||||
|
specified.)
|
||||||
|
|
||||||
|
6. The TurboJPEG API will now decompress 4:2:2 and 4:4:0 JPEG images with
|
||||||
|
2x2 luminance sampling factors and 2x1 or 1x2 chrominance sampling factors.
|
||||||
|
This is a non-standard way of specifying 2x subsampling (normally 4:2:2 JPEGs
|
||||||
|
have 2x1 luminance and 1x1 chrominance sampling factors, and 4:4:0 JPEGs have
|
||||||
|
1x2 luminance and 1x1 chrominance sampling factors), but the JPEG specification
|
||||||
|
and the libjpeg API both allow it.
|
||||||
|
|
||||||
|
7. Fixed an unsigned integer overflow in the libjpeg memory manager, detected
|
||||||
|
by the Clang undefined behavior sanitizer, that could be triggered by
|
||||||
|
attempting to decompress a specially-crafted malformed JPEG image. This issue
|
||||||
|
affected only 32-bit code and did not pose a security threat, but removing the
|
||||||
|
warning makes it easier to detect actual security issues, should they arise in
|
||||||
|
the future.
|
||||||
|
|
||||||
|
8. Fixed additional negative left shifts and other issues reported by the GCC
|
||||||
|
and Clang undefined behavior sanitizers when attempting to decompress
|
||||||
|
specially-crafted malformed JPEG images. None of these issues posed a security
|
||||||
|
threat, but removing the warnings makes it easier to detect actual security
|
||||||
|
issues, should they arise in the future.
|
||||||
|
|
||||||
|
9. Fixed an out-of-bounds array reference, introduced by 1.4.90[2] (partial
|
||||||
|
image decompression) and detected by the Clang undefined behavior sanitizer,
|
||||||
|
that could be triggered by a specially-crafted malformed JPEG image with more
|
||||||
|
than four components. Because the out-of-bounds reference was still within the
|
||||||
|
same structure, it was not known to pose a security threat, but removing the
|
||||||
|
warning makes it easier to detect actual security issues, should they arise in
|
||||||
|
the future.
|
||||||
|
|
||||||
|
10. Fixed another ABI conformance issue in the 64-bit ARM (AArch64) NEON SIMD
|
||||||
|
code. Some of the routines were incorrectly reading and storing data below the
|
||||||
|
stack pointer, which caused segfaults in certain applications under specific
|
||||||
|
circumstances.
|
||||||
|
|
||||||
|
|
||||||
1.5.0
|
1.5.0
|
||||||
=====
|
=====
|
||||||
|
|
||||||
|
|||||||
@@ -11,7 +11,10 @@ endif
|
|||||||
nodist_include_HEADERS = jconfig.h
|
nodist_include_HEADERS = jconfig.h
|
||||||
|
|
||||||
pkgconfigdir = $(libdir)/pkgconfig
|
pkgconfigdir = $(libdir)/pkgconfig
|
||||||
pkgconfig_DATA = pkgscripts/libjpeg.pc pkgscripts/libturbojpeg.pc
|
pkgconfig_DATA = pkgscripts/libjpeg.pc
|
||||||
|
if WITH_TURBOJPEG
|
||||||
|
pkgconfig_DATA += pkgscripts/libturbojpeg.pc
|
||||||
|
endif
|
||||||
|
|
||||||
HDRS = jchuff.h jdct.h jdhuff.h jerror.h jinclude.h jmemsys.h jmorecfg.h \
|
HDRS = jchuff.h jdct.h jdhuff.h jerror.h jinclude.h jmemsys.h jmorecfg.h \
|
||||||
jpegint.h jpeglib.h jversion.h jsimd.h jsimddct.h jpegcomp.h \
|
jpegint.h jpeglib.h jversion.h jsimd.h jsimddct.h jpegcomp.h \
|
||||||
@@ -757,12 +760,12 @@ udmg: all pkgscripts/makemacpkg pkgscripts/uninstall
|
|||||||
sh pkgscripts/makemacpkg -build32 ${BUILDDIR32}
|
sh pkgscripts/makemacpkg -build32 ${BUILDDIR32}
|
||||||
|
|
||||||
iosdmg: all pkgscripts/makemacpkg pkgscripts/uninstall
|
iosdmg: all pkgscripts/makemacpkg pkgscripts/uninstall
|
||||||
sh pkgscripts/makemacpkg -build32 ${BUILDDIR32} -buildarmv6 ${BUILDDIRARMV6} -buildarmv7 ${BUILDDIRARMV7} -buildarmv7s ${BUILDDIRARMV7S} -buildarmv8 ${BUILDDIRARMV8} -lipo "${LIPO}"
|
sh pkgscripts/makemacpkg -build32 ${BUILDDIR32} -buildarmv7 ${BUILDDIRARMV7} -buildarmv7s ${BUILDDIRARMV7S} -buildarmv8 ${BUILDDIRARMV8} -lipo "${LIPO}"
|
||||||
|
|
||||||
else
|
else
|
||||||
|
|
||||||
iosdmg: all pkgscripts/makemacpkg pkgscripts/uninstall
|
iosdmg: all pkgscripts/makemacpkg pkgscripts/uninstall
|
||||||
sh pkgscripts/makemacpkg -buildarmv6 ${BUILDDIRARMV6} -buildarmv7 ${BUILDDIRARMV7} -buildarmv7s ${BUILDDIRARMV7S} -buildarmv8 ${BUILDDIRARMV8} -lipo "${LIPO}"
|
sh pkgscripts/makemacpkg -buildarmv7 ${BUILDDIRARMV7} -buildarmv7s ${BUILDDIRARMV7S} -buildarmv8 ${BUILDDIRARMV8} -lipo "${LIPO}"
|
||||||
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
|||||||
21
bmp.c
21
bmp.c
@@ -108,10 +108,14 @@ static void pixelconvert(unsigned char *srcbuf, int srcpf, int srcbottomup,
|
|||||||
m=(m-k)/(1.0-k);
|
m=(m-k)/(1.0-k);
|
||||||
y=(y-k)/(1.0-k);
|
y=(y-k)/(1.0-k);
|
||||||
}
|
}
|
||||||
if(c>1.0) c=1.0; if(c<0.) c=0.;
|
if(c>1.0) c=1.0;
|
||||||
if(m>1.0) m=1.0; if(m<0.) m=0.;
|
if(c<0.) c=0.;
|
||||||
if(y>1.0) y=1.0; if(y<0.) y=0.;
|
if(m>1.0) m=1.0;
|
||||||
if(k>1.0) k=1.0; if(k<0.) k=0.;
|
if(m<0.) m=0.;
|
||||||
|
if(y>1.0) y=1.0;
|
||||||
|
if(y<0.) y=0.;
|
||||||
|
if(k>1.0) k=1.0;
|
||||||
|
if(k<0.) k=0.;
|
||||||
*dstcolptr++=(unsigned char)(255.0-c*255.0+0.5);
|
*dstcolptr++=(unsigned char)(255.0-c*255.0+0.5);
|
||||||
*dstcolptr++=(unsigned char)(255.0-m*255.0+0.5);
|
*dstcolptr++=(unsigned char)(255.0-m*255.0+0.5);
|
||||||
*dstcolptr++=(unsigned char)(255.0-y*255.0+0.5);
|
*dstcolptr++=(unsigned char)(255.0-y*255.0+0.5);
|
||||||
@@ -133,9 +137,12 @@ static void pixelconvert(unsigned char *srcbuf, int srcpf, int srcbottomup,
|
|||||||
double r=c*k/255.;
|
double r=c*k/255.;
|
||||||
double g=m*k/255.;
|
double g=m*k/255.;
|
||||||
double b=y*k/255.;
|
double b=y*k/255.;
|
||||||
if(r>255.0) r=255.0; if(r<0.) r=0.;
|
if(r>255.0) r=255.0;
|
||||||
if(g>255.0) g=255.0; if(g<0.) g=0.;
|
if(r<0.) r=0.;
|
||||||
if(b>255.0) b=255.0; if(b<0.) b=0.;
|
if(g>255.0) g=255.0;
|
||||||
|
if(g<0.) g=0.;
|
||||||
|
if(b>255.0) b=255.0;
|
||||||
|
if(b<0.) b=0.;
|
||||||
dstcolptr[tjRedOffset[dstpf]]=(unsigned char)(r+0.5);
|
dstcolptr[tjRedOffset[dstpf]]=(unsigned char)(r+0.5);
|
||||||
dstcolptr[tjGreenOffset[dstpf]]=(unsigned char)(g+0.5);
|
dstcolptr[tjGreenOffset[dstpf]]=(unsigned char)(g+0.5);
|
||||||
dstcolptr[tjBlueOffset[dstpf]]=(unsigned char)(b+0.5);
|
dstcolptr[tjBlueOffset[dstpf]]=(unsigned char)(b+0.5);
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
# Process this file with autoconf to produce a configure script.
|
# Process this file with autoconf to produce a configure script.
|
||||||
|
|
||||||
AC_PREREQ([2.56])
|
AC_PREREQ([2.56])
|
||||||
AC_INIT([libjpeg-turbo], [1.5.0])
|
AC_INIT([libjpeg-turbo], [1.5.1])
|
||||||
|
|
||||||
AM_INIT_AUTOMAKE([-Wall foreign dist-bzip2])
|
AM_INIT_AUTOMAKE([-Wall foreign dist-bzip2])
|
||||||
AC_PREFIX_DEFAULT(/opt/libjpeg-turbo)
|
AC_PREFIX_DEFAULT(/opt/libjpeg-turbo)
|
||||||
|
|||||||
@@ -4,7 +4,7 @@
|
|||||||
* This file was part of the Independent JPEG Group's software:
|
* This file was part of the Independent JPEG Group's software:
|
||||||
* Developed 1997-2015 by Guido Vollbeding.
|
* Developed 1997-2015 by Guido Vollbeding.
|
||||||
* libjpeg-turbo Modifications:
|
* libjpeg-turbo Modifications:
|
||||||
* Copyright (C) 2015, D. R. Commander.
|
* Copyright (C) 2015-2016, D. R. Commander.
|
||||||
* For conditions of distribution and use, see the accompanying README.ijg
|
* For conditions of distribution and use, see the accompanying README.ijg
|
||||||
* file.
|
* file.
|
||||||
*
|
*
|
||||||
@@ -382,7 +382,7 @@ decode_mcu_AC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
|
|||||||
if (arith_decode(cinfo, st)) v |= m;
|
if (arith_decode(cinfo, st)) v |= m;
|
||||||
v += 1; if (sign) v = -v;
|
v += 1; if (sign) v = -v;
|
||||||
/* Scale and output coefficient in natural (dezigzagged) order */
|
/* Scale and output coefficient in natural (dezigzagged) order */
|
||||||
(*block)[jpeg_natural_order[k]] = (JCOEF) (v << cinfo->Al);
|
(*block)[jpeg_natural_order[k]] = (JCOEF) ((unsigned)v << cinfo->Al);
|
||||||
}
|
}
|
||||||
|
|
||||||
return TRUE;
|
return TRUE;
|
||||||
|
|||||||
4
jdhuff.c
4
jdhuff.c
@@ -109,9 +109,9 @@ start_pass_huff_decoder (j_decompress_ptr cinfo)
|
|||||||
actbl = compptr->ac_tbl_no;
|
actbl = compptr->ac_tbl_no;
|
||||||
/* Compute derived values for Huffman tables */
|
/* Compute derived values for Huffman tables */
|
||||||
/* We may do this more than once for a table, but it's not expensive */
|
/* We may do this more than once for a table, but it's not expensive */
|
||||||
pdtbl = entropy->dc_derived_tbls + dctbl;
|
pdtbl = (d_derived_tbl **)(entropy->dc_derived_tbls) + dctbl;
|
||||||
jpeg_make_d_derived_tbl(cinfo, TRUE, dctbl, pdtbl);
|
jpeg_make_d_derived_tbl(cinfo, TRUE, dctbl, pdtbl);
|
||||||
pdtbl = entropy->ac_derived_tbls + actbl;
|
pdtbl = (d_derived_tbl **)(entropy->ac_derived_tbls) + actbl;
|
||||||
jpeg_make_d_derived_tbl(cinfo, FALSE, actbl, pdtbl);
|
jpeg_make_d_derived_tbl(cinfo, FALSE, actbl, pdtbl);
|
||||||
/* Initialize DC predictions to 0 */
|
/* Initialize DC predictions to 0 */
|
||||||
entropy->saved.last_dc_val[ci] = 0;
|
entropy->saved.last_dc_val[ci] = 0;
|
||||||
|
|||||||
12
jdmaster.c
12
jdmaster.c
@@ -22,6 +22,7 @@
|
|||||||
#include "jpeglib.h"
|
#include "jpeglib.h"
|
||||||
#include "jpegcomp.h"
|
#include "jpegcomp.h"
|
||||||
#include "jdmaster.h"
|
#include "jdmaster.h"
|
||||||
|
#include "jsimd.h"
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -69,6 +70,17 @@ use_merged_upsample (j_decompress_ptr cinfo)
|
|||||||
cinfo->comp_info[1]._DCT_scaled_size != cinfo->_min_DCT_scaled_size ||
|
cinfo->comp_info[1]._DCT_scaled_size != cinfo->_min_DCT_scaled_size ||
|
||||||
cinfo->comp_info[2]._DCT_scaled_size != cinfo->_min_DCT_scaled_size)
|
cinfo->comp_info[2]._DCT_scaled_size != cinfo->_min_DCT_scaled_size)
|
||||||
return FALSE;
|
return FALSE;
|
||||||
|
#ifdef WITH_SIMD
|
||||||
|
/* If YCbCr-to-RGB color conversion is SIMD-accelerated but merged upsampling
|
||||||
|
isn't, then disabling merged upsampling is likely to be faster when
|
||||||
|
decompressing YCbCr JPEG images. */
|
||||||
|
if (!jsimd_can_h2v2_merged_upsample() && !jsimd_can_h2v1_merged_upsample() &&
|
||||||
|
jsimd_can_ycc_rgb() && cinfo->jpeg_color_space == JCS_YCbCr &&
|
||||||
|
(cinfo->out_color_space == JCS_RGB ||
|
||||||
|
(cinfo->out_color_space >= JCS_EXT_RGB &&
|
||||||
|
cinfo->out_color_space <= JCS_EXT_ARGB)))
|
||||||
|
return FALSE;
|
||||||
|
#endif
|
||||||
/* ??? also need to test for upsample-time rescaling, when & if supported */
|
/* ??? also need to test for upsample-time rescaling, when & if supported */
|
||||||
return TRUE; /* by golly, it'll work... */
|
return TRUE; /* by golly, it'll work... */
|
||||||
#else
|
#else
|
||||||
|
|||||||
@@ -4,7 +4,7 @@
|
|||||||
* This file was part of the Independent JPEG Group's software:
|
* This file was part of the Independent JPEG Group's software:
|
||||||
* Copyright (C) 1995-1997, Thomas G. Lane.
|
* Copyright (C) 1995-1997, Thomas G. Lane.
|
||||||
* libjpeg-turbo Modifications:
|
* libjpeg-turbo Modifications:
|
||||||
* Copyright (C) 2015, D. R. Commander.
|
* Copyright (C) 2015-2016, D. R. Commander.
|
||||||
* For conditions of distribution and use, see the accompanying README.ijg
|
* For conditions of distribution and use, see the accompanying README.ijg
|
||||||
* file.
|
* file.
|
||||||
*
|
*
|
||||||
@@ -170,12 +170,12 @@ start_pass_phuff_decoder (j_decompress_ptr cinfo)
|
|||||||
if (is_DC_band) {
|
if (is_DC_band) {
|
||||||
if (cinfo->Ah == 0) { /* DC refinement needs no table */
|
if (cinfo->Ah == 0) { /* DC refinement needs no table */
|
||||||
tbl = compptr->dc_tbl_no;
|
tbl = compptr->dc_tbl_no;
|
||||||
pdtbl = entropy->derived_tbls + tbl;
|
pdtbl = (d_derived_tbl **)(entropy->derived_tbls) + tbl;
|
||||||
jpeg_make_d_derived_tbl(cinfo, TRUE, tbl, pdtbl);
|
jpeg_make_d_derived_tbl(cinfo, TRUE, tbl, pdtbl);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
tbl = compptr->ac_tbl_no;
|
tbl = compptr->ac_tbl_no;
|
||||||
pdtbl = entropy->derived_tbls + tbl;
|
pdtbl = (d_derived_tbl **)(entropy->derived_tbls) + tbl;
|
||||||
jpeg_make_d_derived_tbl(cinfo, FALSE, tbl, pdtbl);
|
jpeg_make_d_derived_tbl(cinfo, FALSE, tbl, pdtbl);
|
||||||
/* remember the single active table */
|
/* remember the single active table */
|
||||||
entropy->ac_derived_tbl = entropy->derived_tbls[tbl];
|
entropy->ac_derived_tbl = entropy->derived_tbls[tbl];
|
||||||
|
|||||||
47
jdsample.c
47
jdsample.c
@@ -303,6 +303,48 @@ h2v1_fancy_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Fancy processing for 1:1 horizontal and 2:1 vertical (4:4:0 subsampling).
|
||||||
|
*
|
||||||
|
* This is a less common case, but it can be encountered when losslessly
|
||||||
|
* rotating/transposing a JPEG file that uses 4:2:2 chroma subsampling.
|
||||||
|
*/
|
||||||
|
|
||||||
|
METHODDEF(void)
|
||||||
|
h1v2_fancy_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
||||||
|
JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
|
||||||
|
{
|
||||||
|
JSAMPARRAY output_data = *output_data_ptr;
|
||||||
|
JSAMPROW inptr0, inptr1, outptr;
|
||||||
|
#if BITS_IN_JSAMPLE == 8
|
||||||
|
int thiscolsum;
|
||||||
|
#else
|
||||||
|
JLONG thiscolsum;
|
||||||
|
#endif
|
||||||
|
JDIMENSION colctr;
|
||||||
|
int inrow, outrow, v;
|
||||||
|
|
||||||
|
inrow = outrow = 0;
|
||||||
|
while (outrow < cinfo->max_v_samp_factor) {
|
||||||
|
for (v = 0; v < 2; v++) {
|
||||||
|
/* inptr0 points to nearest input row, inptr1 points to next nearest */
|
||||||
|
inptr0 = input_data[inrow];
|
||||||
|
if (v == 0) /* next nearest is row above */
|
||||||
|
inptr1 = input_data[inrow-1];
|
||||||
|
else /* next nearest is row below */
|
||||||
|
inptr1 = input_data[inrow+1];
|
||||||
|
outptr = output_data[outrow++];
|
||||||
|
|
||||||
|
for(colctr = 0; colctr < compptr->downsampled_width; colctr++) {
|
||||||
|
thiscolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++);
|
||||||
|
*outptr++ = (JSAMPLE) ((thiscolsum + 1) >> 2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
inrow++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Fancy processing for the common case of 2:1 horizontal and 2:1 vertical.
|
* Fancy processing for the common case of 2:1 horizontal and 2:1 vertical.
|
||||||
* Again a triangle filter; see comments for h2v1 case, above.
|
* Again a triangle filter; see comments for h2v1 case, above.
|
||||||
@@ -431,6 +473,11 @@ jinit_upsampler (j_decompress_ptr cinfo)
|
|||||||
else
|
else
|
||||||
upsample->methods[ci] = h2v1_upsample;
|
upsample->methods[ci] = h2v1_upsample;
|
||||||
}
|
}
|
||||||
|
} else if (h_in_group == h_out_group &&
|
||||||
|
v_in_group * 2 == v_out_group && do_fancy) {
|
||||||
|
/* Non-fancy upsampling is handled by the generic method */
|
||||||
|
upsample->methods[ci] = h1v2_fancy_upsample;
|
||||||
|
upsample->pub.need_context_rows = TRUE;
|
||||||
} else if (h_in_group * 2 == h_out_group &&
|
} else if (h_in_group * 2 == h_out_group &&
|
||||||
v_in_group * 2 == v_out_group) {
|
v_in_group * 2 == v_out_group) {
|
||||||
/* Special cases for 2h2v upsampling */
|
/* Special cases for 2h2v upsampling */
|
||||||
|
|||||||
17
jmemmgr.c
17
jmemmgr.c
@@ -32,6 +32,7 @@
|
|||||||
#include "jinclude.h"
|
#include "jinclude.h"
|
||||||
#include "jpeglib.h"
|
#include "jpeglib.h"
|
||||||
#include "jmemsys.h" /* import the system-dependent declarations */
|
#include "jmemsys.h" /* import the system-dependent declarations */
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
#ifndef NO_GETENV
|
#ifndef NO_GETENV
|
||||||
#ifndef HAVE_STDLIB_H /* <stdlib.h> should declare getenv() */
|
#ifndef HAVE_STDLIB_H /* <stdlib.h> should declare getenv() */
|
||||||
@@ -650,18 +651,26 @@ realize_virt_arrays (j_common_ptr cinfo)
|
|||||||
maximum_space = 0;
|
maximum_space = 0;
|
||||||
for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) {
|
for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) {
|
||||||
if (sptr->mem_buffer == NULL) { /* if not realized yet */
|
if (sptr->mem_buffer == NULL) { /* if not realized yet */
|
||||||
|
size_t new_space = (long) sptr->rows_in_array *
|
||||||
|
(long) sptr->samplesperrow * sizeof(JSAMPLE);
|
||||||
|
|
||||||
space_per_minheight += (long) sptr->maxaccess *
|
space_per_minheight += (long) sptr->maxaccess *
|
||||||
(long) sptr->samplesperrow * sizeof(JSAMPLE);
|
(long) sptr->samplesperrow * sizeof(JSAMPLE);
|
||||||
maximum_space += (long) sptr->rows_in_array *
|
if (SIZE_MAX - maximum_space < new_space)
|
||||||
(long) sptr->samplesperrow * sizeof(JSAMPLE);
|
out_of_memory(cinfo, 10);
|
||||||
|
maximum_space += new_space;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) {
|
for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) {
|
||||||
if (bptr->mem_buffer == NULL) { /* if not realized yet */
|
if (bptr->mem_buffer == NULL) { /* if not realized yet */
|
||||||
|
size_t new_space = (long) bptr->rows_in_array *
|
||||||
|
(long) bptr->blocksperrow * sizeof(JBLOCK);
|
||||||
|
|
||||||
space_per_minheight += (long) bptr->maxaccess *
|
space_per_minheight += (long) bptr->maxaccess *
|
||||||
(long) bptr->blocksperrow * sizeof(JBLOCK);
|
(long) bptr->blocksperrow * sizeof(JBLOCK);
|
||||||
maximum_space += (long) bptr->rows_in_array *
|
if (SIZE_MAX - maximum_space < new_space)
|
||||||
(long) bptr->blocksperrow * sizeof(JBLOCK);
|
out_of_memory(cinfo, 11);
|
||||||
|
maximum_space += new_space;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -155,8 +155,8 @@ struct jpeg_decomp_master {
|
|||||||
/* Partial decompression variables */
|
/* Partial decompression variables */
|
||||||
JDIMENSION first_iMCU_col;
|
JDIMENSION first_iMCU_col;
|
||||||
JDIMENSION last_iMCU_col;
|
JDIMENSION last_iMCU_col;
|
||||||
JDIMENSION first_MCU_col[MAX_COMPS_IN_SCAN];
|
JDIMENSION first_MCU_col[MAX_COMPONENTS];
|
||||||
JDIMENSION last_MCU_col[MAX_COMPS_IN_SCAN];
|
JDIMENSION last_MCU_col[MAX_COMPONENTS];
|
||||||
boolean jinit_upsampler_no_alloc;
|
boolean jinit_upsampler_no_alloc;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -73,19 +73,24 @@ endif
|
|||||||
|
|
||||||
if SIMD_POWERPC
|
if SIMD_POWERPC
|
||||||
|
|
||||||
libsimd_la_SOURCES = jsimd_powerpc.c jsimd_altivec.h jcsample.h \
|
noinst_LTLIBRARIES += libsimd_altivec.la
|
||||||
|
|
||||||
|
libsimd_altivec_la_SOURCES = \
|
||||||
jccolor-altivec.c jcgray-altivec.c jcsample-altivec.c \
|
jccolor-altivec.c jcgray-altivec.c jcsample-altivec.c \
|
||||||
jdcolor-altivec.c jdmerge-altivec.c jdsample-altivec.c \
|
jdcolor-altivec.c jdmerge-altivec.c jdsample-altivec.c \
|
||||||
jfdctfst-altivec.c jfdctint-altivec.c \
|
jfdctfst-altivec.c jfdctint-altivec.c \
|
||||||
jidctfst-altivec.c jidctint-altivec.c \
|
jidctfst-altivec.c jidctint-altivec.c \
|
||||||
jquanti-altivec.c
|
jquanti-altivec.c
|
||||||
libsimd_la_CFLAGS = -maltivec
|
libsimd_altivec_la_CFLAGS = -maltivec
|
||||||
|
|
||||||
jccolor-altivec.lo: jccolext-altivec.c
|
jccolor-altivec.lo: jccolext-altivec.c
|
||||||
jcgray-altivec.lo: jcgryext-altivec.c
|
jcgray-altivec.lo: jcgryext-altivec.c
|
||||||
jdcolor-altivec.lo: jdcolext-altivec.c
|
jdcolor-altivec.lo: jdcolext-altivec.c
|
||||||
jdmerge-altivec.lo: jdmrgext-altivec.c
|
jdmerge-altivec.lo: jdmrgext-altivec.c
|
||||||
|
|
||||||
|
libsimd_la_SOURCES = jsimd_powerpc.c jsimd_altivec.h jcsample.h
|
||||||
|
libsimd_la_LIBADD = libsimd_altivec.la
|
||||||
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
AM_CPPFLAGS = -I$(top_srcdir)
|
AM_CPPFLAGS = -I$(top_srcdir)
|
||||||
|
|||||||
@@ -125,7 +125,7 @@ init_simd (void)
|
|||||||
/* Force different settings through environment variables */
|
/* Force different settings through environment variables */
|
||||||
env = getenv("JSIMD_FORCENEON");
|
env = getenv("JSIMD_FORCENEON");
|
||||||
if ((env != NULL) && (strcmp(env, "1") == 0))
|
if ((env != NULL) && (strcmp(env, "1") == 0))
|
||||||
simd_support &= JSIMD_ARM_NEON;
|
simd_support = JSIMD_ARM_NEON;
|
||||||
env = getenv("JSIMD_FORCENONE");
|
env = getenv("JSIMD_FORCENONE");
|
||||||
if ((env != NULL) && (strcmp(env, "1") == 0))
|
if ((env != NULL) && (strcmp(env, "1") == 0))
|
||||||
simd_support = 0;
|
simd_support = 0;
|
||||||
|
|||||||
@@ -142,7 +142,7 @@ init_simd (void)
|
|||||||
/* Force different settings through environment variables */
|
/* Force different settings through environment variables */
|
||||||
env = getenv("JSIMD_FORCENEON");
|
env = getenv("JSIMD_FORCENEON");
|
||||||
if ((env != NULL) && (strcmp(env, "1") == 0))
|
if ((env != NULL) && (strcmp(env, "1") == 0))
|
||||||
simd_support &= JSIMD_ARM_NEON;
|
simd_support = JSIMD_ARM_NEON;
|
||||||
env = getenv("JSIMD_FORCENONE");
|
env = getenv("JSIMD_FORCENONE");
|
||||||
if ((env != NULL) && (strcmp(env, "1") == 0))
|
if ((env != NULL) && (strcmp(env, "1") == 0))
|
||||||
simd_support = 0;
|
simd_support = 0;
|
||||||
|
|||||||
@@ -210,10 +210,16 @@ asm_function jsimd_idct_islow_neon
|
|||||||
TMP7 .req x13
|
TMP7 .req x13
|
||||||
TMP8 .req x14
|
TMP8 .req x14
|
||||||
|
|
||||||
|
/* OUTPUT_COL is a JDIMENSION (unsigned int) argument, so the ABI doesn't
|
||||||
|
guarantee that the upper (unused) 32 bits of x3 are valid. This
|
||||||
|
instruction ensures that those bits are set to zero. */
|
||||||
|
uxtw x3, w3
|
||||||
|
|
||||||
sub sp, sp, #64
|
sub sp, sp, #64
|
||||||
adr x15, Ljsimd_idct_islow_neon_consts
|
adr x15, Ljsimd_idct_islow_neon_consts
|
||||||
st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], #32
|
mov x10, sp
|
||||||
st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], #32
|
st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x10], #32
|
||||||
|
st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [x10], #32
|
||||||
ld1 {v0.8h, v1.8h}, [x15]
|
ld1 {v0.8h, v1.8h}, [x15]
|
||||||
ld1 {v2.8h, v3.8h, v4.8h, v5.8h}, [COEF_BLOCK], #64
|
ld1 {v2.8h, v3.8h, v4.8h, v5.8h}, [COEF_BLOCK], #64
|
||||||
ld1 {v18.8h, v19.8h, v20.8h, v21.8h}, [DCT_TABLE], #64
|
ld1 {v18.8h, v19.8h, v20.8h, v21.8h}, [DCT_TABLE], #64
|
||||||
@@ -238,7 +244,6 @@ asm_function jsimd_idct_islow_neon
|
|||||||
shl v10.8h, v2.8h, #(PASS1_BITS)
|
shl v10.8h, v2.8h, #(PASS1_BITS)
|
||||||
sqxtn v16.8b, v15.8h
|
sqxtn v16.8b, v15.8h
|
||||||
mov TMP1, v16.d[0]
|
mov TMP1, v16.d[0]
|
||||||
sub sp, sp, #64
|
|
||||||
mvn TMP2, TMP1
|
mvn TMP2, TMP1
|
||||||
|
|
||||||
cbnz TMP2, 2f
|
cbnz TMP2, 2f
|
||||||
@@ -807,6 +812,11 @@ asm_function jsimd_idct_ifast_neon
|
|||||||
TMP7 .req x13
|
TMP7 .req x13
|
||||||
TMP8 .req x14
|
TMP8 .req x14
|
||||||
|
|
||||||
|
/* OUTPUT_COL is a JDIMENSION (unsigned int) argument, so the ABI doesn't
|
||||||
|
guarantee that the upper (unused) 32 bits of x3 are valid. This
|
||||||
|
instruction ensures that those bits are set to zero. */
|
||||||
|
uxtw x3, w3
|
||||||
|
|
||||||
/* Load and dequantize coefficients into NEON registers
|
/* Load and dequantize coefficients into NEON registers
|
||||||
* with the following allocation:
|
* with the following allocation:
|
||||||
* 0 1 2 3 | 4 5 6 7
|
* 0 1 2 3 | 4 5 6 7
|
||||||
@@ -1101,19 +1111,18 @@ asm_function jsimd_idct_4x4_neon
|
|||||||
TMP3 .req x2
|
TMP3 .req x2
|
||||||
TMP4 .req x15
|
TMP4 .req x15
|
||||||
|
|
||||||
|
/* OUTPUT_COL is a JDIMENSION (unsigned int) argument, so the ABI doesn't
|
||||||
|
guarantee that the upper (unused) 32 bits of x3 are valid. This
|
||||||
|
instruction ensures that those bits are set to zero. */
|
||||||
|
uxtw x3, w3
|
||||||
|
|
||||||
/* Save all used NEON registers */
|
/* Save all used NEON registers */
|
||||||
sub sp, sp, 272
|
sub sp, sp, 64
|
||||||
str x15, [sp], 16
|
mov x9, sp
|
||||||
/* Load constants (v3.4h is just used for padding) */
|
/* Load constants (v3.4h is just used for padding) */
|
||||||
adr TMP4, Ljsimd_idct_4x4_neon_consts
|
adr TMP4, Ljsimd_idct_4x4_neon_consts
|
||||||
st1 {v0.8b, v1.8b, v2.8b, v3.8b}, [sp], 32
|
st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x9], 32
|
||||||
st1 {v4.8b, v5.8b, v6.8b, v7.8b}, [sp], 32
|
st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [x9], 32
|
||||||
st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32
|
|
||||||
st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32
|
|
||||||
st1 {v16.8b, v17.8b, v18.8b, v19.8b}, [sp], 32
|
|
||||||
st1 {v20.8b, v21.8b, v22.8b, v23.8b}, [sp], 32
|
|
||||||
st1 {v24.8b, v25.8b, v26.8b, v27.8b}, [sp], 32
|
|
||||||
st1 {v28.8b, v29.8b, v30.8b, v31.8b}, [sp], 32
|
|
||||||
ld1 {v0.4h, v1.4h, v2.4h, v3.4h}, [TMP4]
|
ld1 {v0.4h, v1.4h, v2.4h, v3.4h}, [TMP4]
|
||||||
|
|
||||||
/* Load all COEF_BLOCK into NEON registers with the following allocation:
|
/* Load all COEF_BLOCK into NEON registers with the following allocation:
|
||||||
@@ -1222,16 +1231,8 @@ asm_function jsimd_idct_4x4_neon
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* vpop {v8.4h - v15.4h} ;not available */
|
/* vpop {v8.4h - v15.4h} ;not available */
|
||||||
sub sp, sp, #272
|
|
||||||
ldr x15, [sp], 16
|
|
||||||
ld1 {v0.8b, v1.8b, v2.8b, v3.8b}, [sp], 32
|
|
||||||
ld1 {v4.8b, v5.8b, v6.8b, v7.8b}, [sp], 32
|
|
||||||
ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32
|
ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32
|
||||||
ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32
|
ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32
|
||||||
ld1 {v16.8b, v17.8b, v18.8b, v19.8b}, [sp], 32
|
|
||||||
ld1 {v20.8b, v21.8b, v22.8b, v23.8b}, [sp], 32
|
|
||||||
ld1 {v24.8b, v25.8b, v26.8b, v27.8b}, [sp], 32
|
|
||||||
ld1 {v28.8b, v29.8b, v30.8b, v31.8b}, [sp], 32
|
|
||||||
blr x30
|
blr x30
|
||||||
|
|
||||||
.unreq DCT_TABLE
|
.unreq DCT_TABLE
|
||||||
@@ -1299,19 +1300,19 @@ asm_function jsimd_idct_2x2_neon
|
|||||||
TMP1 .req x0
|
TMP1 .req x0
|
||||||
TMP2 .req x15
|
TMP2 .req x15
|
||||||
|
|
||||||
|
/* OUTPUT_COL is a JDIMENSION (unsigned int) argument, so the ABI doesn't
|
||||||
|
guarantee that the upper (unused) 32 bits of x3 are valid. This
|
||||||
|
instruction ensures that those bits are set to zero. */
|
||||||
|
uxtw x3, w3
|
||||||
|
|
||||||
/* vpush {v8.4h - v15.4h} ; not available */
|
/* vpush {v8.4h - v15.4h} ; not available */
|
||||||
sub sp, sp, 208
|
sub sp, sp, 64
|
||||||
str x15, [sp], 16
|
mov x9, sp
|
||||||
|
|
||||||
/* Load constants */
|
/* Load constants */
|
||||||
adr TMP2, Ljsimd_idct_2x2_neon_consts
|
adr TMP2, Ljsimd_idct_2x2_neon_consts
|
||||||
st1 {v4.8b, v5.8b, v6.8b, v7.8b}, [sp], 32
|
st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x9], 32
|
||||||
st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32
|
st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [x9], 32
|
||||||
st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32
|
|
||||||
st1 {v16.8b, v17.8b, v18.8b, v19.8b}, [sp], 32
|
|
||||||
st1 {v21.8b, v22.8b}, [sp], 16
|
|
||||||
st1 {v24.8b, v25.8b, v26.8b, v27.8b}, [sp], 32
|
|
||||||
st1 {v30.8b, v31.8b}, [sp], 16
|
|
||||||
ld1 {v14.4h}, [TMP2]
|
ld1 {v14.4h}, [TMP2]
|
||||||
|
|
||||||
/* Load all COEF_BLOCK into NEON registers with the following allocation:
|
/* Load all COEF_BLOCK into NEON registers with the following allocation:
|
||||||
@@ -1411,15 +1412,8 @@ asm_function jsimd_idct_2x2_neon
|
|||||||
st1 {v26.b}[1], [TMP2], 1
|
st1 {v26.b}[1], [TMP2], 1
|
||||||
st1 {v27.b}[5], [TMP2], 1
|
st1 {v27.b}[5], [TMP2], 1
|
||||||
|
|
||||||
sub sp, sp, #208
|
|
||||||
ldr x15, [sp], 16
|
|
||||||
ld1 {v4.8b, v5.8b, v6.8b, v7.8b}, [sp], 32
|
|
||||||
ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32
|
ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32
|
||||||
ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32
|
ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32
|
||||||
ld1 {v16.8b, v17.8b, v18.8b, v19.8b}, [sp], 32
|
|
||||||
ld1 {v21.8b, v22.8b}, [sp], 16
|
|
||||||
ld1 {v24.8b, v25.8b, v26.8b, v27.8b}, [sp], 32
|
|
||||||
ld1 {v30.8b, v31.8b}, [sp], 16
|
|
||||||
blr x30
|
blr x30
|
||||||
|
|
||||||
.unreq DCT_TABLE
|
.unreq DCT_TABLE
|
||||||
@@ -1688,24 +1682,24 @@ asm_function jsimd_ycc_\colorid\()_convert_neon
|
|||||||
.else
|
.else
|
||||||
asm_function jsimd_ycc_\colorid\()_convert_neon_slowst3
|
asm_function jsimd_ycc_\colorid\()_convert_neon_slowst3
|
||||||
.endif
|
.endif
|
||||||
OUTPUT_WIDTH .req x0
|
OUTPUT_WIDTH .req w0
|
||||||
INPUT_BUF .req x1
|
INPUT_BUF .req x1
|
||||||
INPUT_ROW .req x2
|
INPUT_ROW .req w2
|
||||||
OUTPUT_BUF .req x3
|
OUTPUT_BUF .req x3
|
||||||
NUM_ROWS .req x4
|
NUM_ROWS .req w4
|
||||||
|
|
||||||
INPUT_BUF0 .req x5
|
INPUT_BUF0 .req x5
|
||||||
INPUT_BUF1 .req x6
|
INPUT_BUF1 .req x6
|
||||||
INPUT_BUF2 .req x1
|
INPUT_BUF2 .req x1
|
||||||
|
|
||||||
RGB .req x7
|
RGB .req x7
|
||||||
Y .req x8
|
Y .req x9
|
||||||
U .req x9
|
U .req x10
|
||||||
V .req x10
|
V .req x11
|
||||||
N .req x15
|
N .req w15
|
||||||
|
|
||||||
sub sp, sp, 336
|
sub sp, sp, 64
|
||||||
str x15, [sp], 16
|
mov x9, sp
|
||||||
|
|
||||||
/* Load constants to d1, d2, d3 (v0.4h is just used for padding) */
|
/* Load constants to d1, d2, d3 (v0.4h is just used for padding) */
|
||||||
.if \fast_st3 == 1
|
.if \fast_st3 == 1
|
||||||
@@ -1715,23 +1709,11 @@ asm_function jsimd_ycc_\colorid\()_convert_neon_slowst3
|
|||||||
.endif
|
.endif
|
||||||
|
|
||||||
/* Save NEON registers */
|
/* Save NEON registers */
|
||||||
st1 {v0.8b, v1.8b, v2.8b, v3.8b}, [sp], 32
|
st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x9], 32
|
||||||
st1 {v4.8b, v5.8b, v6.8b, v7.8b}, [sp], 32
|
st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [x9], 32
|
||||||
st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32
|
|
||||||
st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32
|
|
||||||
st1 {v16.8b, v17.8b, v18.8b, v19.8b}, [sp], 32
|
|
||||||
st1 {v20.8b, v21.8b, v22.8b, v23.8b}, [sp], 32
|
|
||||||
st1 {v24.8b, v25.8b, v26.8b, v27.8b}, [sp], 32
|
|
||||||
st1 {v28.8b, v29.8b, v30.8b, v31.8b}, [sp], 32
|
|
||||||
ld1 {v0.4h, v1.4h}, [x15], 16
|
ld1 {v0.4h, v1.4h}, [x15], 16
|
||||||
ld1 {v2.8h}, [x15]
|
ld1 {v2.8h}, [x15]
|
||||||
|
|
||||||
/* Save ARM registers and handle input arguments */
|
|
||||||
/* push {x4, x5, x6, x7, x8, x9, x10, x30} */
|
|
||||||
stp x4, x5, [sp], 16
|
|
||||||
stp x6, x7, [sp], 16
|
|
||||||
stp x8, x9, [sp], 16
|
|
||||||
stp x10, x30, [sp], 16
|
|
||||||
ldr INPUT_BUF0, [INPUT_BUF]
|
ldr INPUT_BUF0, [INPUT_BUF]
|
||||||
ldr INPUT_BUF1, [INPUT_BUF, #8]
|
ldr INPUT_BUF1, [INPUT_BUF, #8]
|
||||||
ldr INPUT_BUF2, [INPUT_BUF, #16]
|
ldr INPUT_BUF2, [INPUT_BUF, #16]
|
||||||
@@ -1745,11 +1727,10 @@ asm_function jsimd_ycc_\colorid\()_convert_neon_slowst3
|
|||||||
cmp NUM_ROWS, #1
|
cmp NUM_ROWS, #1
|
||||||
b.lt 9f
|
b.lt 9f
|
||||||
0:
|
0:
|
||||||
lsl x16, INPUT_ROW, #3
|
ldr Y, [INPUT_BUF0, INPUT_ROW, uxtw #3]
|
||||||
ldr Y, [INPUT_BUF0, x16]
|
ldr U, [INPUT_BUF1, INPUT_ROW, uxtw #3]
|
||||||
ldr U, [INPUT_BUF1, x16]
|
|
||||||
mov N, OUTPUT_WIDTH
|
mov N, OUTPUT_WIDTH
|
||||||
ldr V, [INPUT_BUF2, x16]
|
ldr V, [INPUT_BUF2, INPUT_ROW, uxtw #3]
|
||||||
add INPUT_ROW, INPUT_ROW, #1
|
add INPUT_ROW, INPUT_ROW, #1
|
||||||
ldr RGB, [OUTPUT_BUF], #8
|
ldr RGB, [OUTPUT_BUF], #8
|
||||||
|
|
||||||
@@ -1799,21 +1780,8 @@ asm_function jsimd_ycc_\colorid\()_convert_neon_slowst3
|
|||||||
b.gt 0b
|
b.gt 0b
|
||||||
9:
|
9:
|
||||||
/* Restore all registers and return */
|
/* Restore all registers and return */
|
||||||
sub sp, sp, #336
|
|
||||||
ldr x15, [sp], 16
|
|
||||||
ld1 {v0.8b, v1.8b, v2.8b, v3.8b}, [sp], 32
|
|
||||||
ld1 {v4.8b, v5.8b, v6.8b, v7.8b}, [sp], 32
|
|
||||||
ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32
|
ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32
|
||||||
ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32
|
ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32
|
||||||
ld1 {v16.8b, v17.8b, v18.8b, v19.8b}, [sp], 32
|
|
||||||
ld1 {v20.8b, v21.8b, v22.8b, v23.8b}, [sp], 32
|
|
||||||
ld1 {v24.8b, v25.8b, v26.8b, v27.8b}, [sp], 32
|
|
||||||
ld1 {v28.8b, v29.8b, v30.8b, v31.8b}, [sp], 32
|
|
||||||
/* pop {r4, r5, r6, r7, r8, r9, r10, pc} */
|
|
||||||
ldp x4, x5, [sp], 16
|
|
||||||
ldp x6, x7, [sp], 16
|
|
||||||
ldp x8, x9, [sp], 16
|
|
||||||
ldp x10, x30, [sp], 16
|
|
||||||
br x30
|
br x30
|
||||||
.unreq OUTPUT_WIDTH
|
.unreq OUTPUT_WIDTH
|
||||||
.unreq INPUT_ROW
|
.unreq INPUT_ROW
|
||||||
@@ -2054,8 +2022,8 @@ asm_function jsimd_\colorid\()_ycc_convert_neon_slowld3
|
|||||||
OUTPUT_WIDTH .req w0
|
OUTPUT_WIDTH .req w0
|
||||||
INPUT_BUF .req x1
|
INPUT_BUF .req x1
|
||||||
OUTPUT_BUF .req x2
|
OUTPUT_BUF .req x2
|
||||||
OUTPUT_ROW .req x3
|
OUTPUT_ROW .req w3
|
||||||
NUM_ROWS .req x4
|
NUM_ROWS .req w4
|
||||||
|
|
||||||
OUTPUT_BUF0 .req x5
|
OUTPUT_BUF0 .req x5
|
||||||
OUTPUT_BUF1 .req x6
|
OUTPUT_BUF1 .req x6
|
||||||
@@ -2082,17 +2050,18 @@ asm_function jsimd_\colorid\()_ycc_convert_neon_slowld3
|
|||||||
|
|
||||||
/* Save NEON registers */
|
/* Save NEON registers */
|
||||||
sub sp, sp, #64
|
sub sp, sp, #64
|
||||||
st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32
|
mov x9, sp
|
||||||
st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32
|
st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x9], 32
|
||||||
|
st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [x9], 32
|
||||||
|
|
||||||
/* Outer loop over scanlines */
|
/* Outer loop over scanlines */
|
||||||
cmp NUM_ROWS, #1
|
cmp NUM_ROWS, #1
|
||||||
b.lt 9f
|
b.lt 9f
|
||||||
0:
|
0:
|
||||||
ldr Y, [OUTPUT_BUF0, OUTPUT_ROW, lsl #3]
|
ldr Y, [OUTPUT_BUF0, OUTPUT_ROW, uxtw #3]
|
||||||
ldr U, [OUTPUT_BUF1, OUTPUT_ROW, lsl #3]
|
ldr U, [OUTPUT_BUF1, OUTPUT_ROW, uxtw #3]
|
||||||
mov N, OUTPUT_WIDTH
|
mov N, OUTPUT_WIDTH
|
||||||
ldr V, [OUTPUT_BUF2, OUTPUT_ROW, lsl #3]
|
ldr V, [OUTPUT_BUF2, OUTPUT_ROW, uxtw #3]
|
||||||
add OUTPUT_ROW, OUTPUT_ROW, #1
|
add OUTPUT_ROW, OUTPUT_ROW, #1
|
||||||
ldr RGB, [INPUT_BUF], #8
|
ldr RGB, [INPUT_BUF], #8
|
||||||
|
|
||||||
@@ -2136,7 +2105,6 @@ asm_function jsimd_\colorid\()_ycc_convert_neon_slowld3
|
|||||||
b.gt 0b
|
b.gt 0b
|
||||||
9:
|
9:
|
||||||
/* Restore all registers and return */
|
/* Restore all registers and return */
|
||||||
sub sp, sp, #64
|
|
||||||
ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32
|
ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32
|
||||||
ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32
|
ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32
|
||||||
br x30
|
br x30
|
||||||
@@ -2199,6 +2167,11 @@ asm_function jsimd_convsamp_neon
|
|||||||
TMP8 .req x4
|
TMP8 .req x4
|
||||||
TMPDUP .req w3
|
TMPDUP .req w3
|
||||||
|
|
||||||
|
/* START_COL is a JDIMENSION (unsigned int) argument, so the ABI doesn't
|
||||||
|
guarantee that the upper (unused) 32 bits of x1 are valid. This
|
||||||
|
instruction ensures that those bits are set to zero. */
|
||||||
|
uxtw x1, w1
|
||||||
|
|
||||||
mov TMPDUP, #128
|
mov TMPDUP, #128
|
||||||
ldp TMP1, TMP2, [SAMPLE_DATA], 16
|
ldp TMP1, TMP2, [SAMPLE_DATA], 16
|
||||||
ldp TMP3, TMP4, [SAMPLE_DATA], 16
|
ldp TMP3, TMP4, [SAMPLE_DATA], 16
|
||||||
@@ -2335,8 +2308,9 @@ asm_function jsimd_fdct_islow_neon
|
|||||||
|
|
||||||
/* Save NEON registers */
|
/* Save NEON registers */
|
||||||
sub sp, sp, #64
|
sub sp, sp, #64
|
||||||
st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32
|
mov x10, sp
|
||||||
st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32
|
st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x10], 32
|
||||||
|
st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [x10], 32
|
||||||
|
|
||||||
/* Load all DATA into NEON registers with the following allocation:
|
/* Load all DATA into NEON registers with the following allocation:
|
||||||
* 0 1 2 3 | 4 5 6 7
|
* 0 1 2 3 | 4 5 6 7
|
||||||
@@ -2566,7 +2540,6 @@ asm_function jsimd_fdct_islow_neon
|
|||||||
st1 {v20.8h, v21.8h, v22.8h, v23.8h}, [DATA]
|
st1 {v20.8h, v21.8h, v22.8h, v23.8h}, [DATA]
|
||||||
|
|
||||||
/* Restore NEON registers */
|
/* Restore NEON registers */
|
||||||
sub sp, sp, #64
|
|
||||||
ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32
|
ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32
|
||||||
ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32
|
ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32
|
||||||
|
|
||||||
@@ -3080,7 +3053,7 @@ asm_function jsimd_huff_encode_one_block_neon_slowtbl
|
|||||||
sub sp, sp, 272
|
sub sp, sp, 272
|
||||||
sub BUFFER, BUFFER, #0x1 /* BUFFER=buffer-- */
|
sub BUFFER, BUFFER, #0x1 /* BUFFER=buffer-- */
|
||||||
/* Save ARM registers */
|
/* Save ARM registers */
|
||||||
stp x19, x20, [sp], 16
|
stp x19, x20, [sp]
|
||||||
.if \fast_tbl == 1
|
.if \fast_tbl == 1
|
||||||
adr x15, Ljsimd_huff_encode_one_block_neon_consts
|
adr x15, Ljsimd_huff_encode_one_block_neon_consts
|
||||||
.else
|
.else
|
||||||
@@ -3294,7 +3267,7 @@ asm_function jsimd_huff_encode_one_block_neon_slowtbl
|
|||||||
and v18.16b, v18.16b, v23.16b
|
and v18.16b, v18.16b, v23.16b
|
||||||
add x3, x4, #0x400 /* r1 = dctbl->ehufsi */
|
add x3, x4, #0x400 /* r1 = dctbl->ehufsi */
|
||||||
and v20.16b, v20.16b, v23.16b
|
and v20.16b, v20.16b, v23.16b
|
||||||
add x15, sp, #0x80 /* x15 = t2 */
|
add x15, sp, #0x90 /* x15 = t2 */
|
||||||
and v22.16b, v22.16b, v23.16b
|
and v22.16b, v22.16b, v23.16b
|
||||||
ldr w10, [x4, x12, lsl #2]
|
ldr w10, [x4, x12, lsl #2]
|
||||||
addp v16.16b, v16.16b, v18.16b
|
addp v16.16b, v16.16b, v18.16b
|
||||||
@@ -3317,7 +3290,7 @@ asm_function jsimd_huff_encode_one_block_neon_slowtbl
|
|||||||
rbit x9, x9 /* x9 = index0 */
|
rbit x9, x9 /* x9 = index0 */
|
||||||
ldrb w14, [x4, #0xf0] /* x14 = actbl->ehufsi[0xf0] */
|
ldrb w14, [x4, #0xf0] /* x14 = actbl->ehufsi[0xf0] */
|
||||||
cmp w12, #(64-8)
|
cmp w12, #(64-8)
|
||||||
mov x11, sp
|
add x11, sp, #16
|
||||||
b.lt 4f
|
b.lt 4f
|
||||||
cbz x9, 6f
|
cbz x9, 6f
|
||||||
st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x11], #64
|
st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x11], #64
|
||||||
@@ -3421,7 +3394,7 @@ asm_function jsimd_huff_encode_one_block_neon_slowtbl
|
|||||||
put_bits x3, x11
|
put_bits x3, x11
|
||||||
cbnz x9, 1b
|
cbnz x9, 1b
|
||||||
6:
|
6:
|
||||||
add x13, sp, #0xfe
|
add x13, sp, #0x10e
|
||||||
cmp x15, x13
|
cmp x15, x13
|
||||||
b.hs 1f
|
b.hs 1f
|
||||||
ldr w12, [x5]
|
ldr w12, [x5]
|
||||||
@@ -3429,7 +3402,6 @@ asm_function jsimd_huff_encode_one_block_neon_slowtbl
|
|||||||
checkbuf47
|
checkbuf47
|
||||||
put_bits x12, x14
|
put_bits x12, x14
|
||||||
1:
|
1:
|
||||||
sub sp, sp, 16
|
|
||||||
str PUT_BUFFER, [x0, #0x10]
|
str PUT_BUFFER, [x0, #0x10]
|
||||||
str PUT_BITSw, [x0, #0x18]
|
str PUT_BITSw, [x0, #0x18]
|
||||||
ldp x19, x20, [sp], 16
|
ldp x19, x20, [sp], 16
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
* jsimd_mips.c
|
* jsimd_mips.c
|
||||||
*
|
*
|
||||||
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||||
* Copyright (C) 2009-2011, 2014, D. R. Commander.
|
* Copyright (C) 2009-2011, 2014, 2016, D. R. Commander.
|
||||||
* Copyright (C) 2013-2014, MIPS Technologies, Inc., California.
|
* Copyright (C) 2013-2014, MIPS Technologies, Inc., California.
|
||||||
* Copyright (C) 2015, Matthieu Darbois.
|
* Copyright (C) 2015, Matthieu Darbois.
|
||||||
*
|
*
|
||||||
@@ -77,6 +77,14 @@ init_simd (void)
|
|||||||
if (!parse_proc_cpuinfo("MIPS 74K"))
|
if (!parse_proc_cpuinfo("MIPS 74K"))
|
||||||
return;
|
return;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* Force different settings through environment variables */
|
||||||
|
env = getenv("JSIMD_FORCEDSPR2");
|
||||||
|
if ((env != NULL) && (strcmp(env, "1") == 0))
|
||||||
|
simd_support = JSIMD_MIPS_DSPR2;
|
||||||
|
env = getenv("JSIMD_FORCENONE");
|
||||||
|
if ((env != NULL) && (strcmp(env, "1") == 0))
|
||||||
|
simd_support = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static const int mips_idct_ifast_coefs[4] = {
|
static const int mips_idct_ifast_coefs[4] = {
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
* jsimd_powerpc.c
|
* jsimd_powerpc.c
|
||||||
*
|
*
|
||||||
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||||
* Copyright (C) 2009-2011, 2014-2015, D. R. Commander.
|
* Copyright (C) 2009-2011, 2014-2016, D. R. Commander.
|
||||||
* Copyright (C) 2015, Matthieu Darbois.
|
* Copyright (C) 2015, Matthieu Darbois.
|
||||||
*
|
*
|
||||||
* Based on the x86 SIMD extension for IJG JPEG library,
|
* Based on the x86 SIMD extension for IJG JPEG library,
|
||||||
@@ -22,19 +22,106 @@
|
|||||||
#include "../jsimddct.h"
|
#include "../jsimddct.h"
|
||||||
#include "jsimd.h"
|
#include "jsimd.h"
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <ctype.h>
|
||||||
|
|
||||||
static unsigned int simd_support = ~0;
|
static unsigned int simd_support = ~0;
|
||||||
|
|
||||||
|
#if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
|
||||||
|
|
||||||
|
#define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024)
|
||||||
|
|
||||||
|
LOCAL(int)
|
||||||
|
check_feature (char *buffer, char *feature)
|
||||||
|
{
|
||||||
|
char *p;
|
||||||
|
if (*feature == 0)
|
||||||
|
return 0;
|
||||||
|
if (strncmp(buffer, "cpu", 3) != 0)
|
||||||
|
return 0;
|
||||||
|
buffer += 3;
|
||||||
|
while (isspace(*buffer))
|
||||||
|
buffer++;
|
||||||
|
|
||||||
|
/* Check if 'feature' is present in the buffer as a separate word */
|
||||||
|
while ((p = strstr(buffer, feature))) {
|
||||||
|
if (p > buffer && !isspace(*(p - 1))) {
|
||||||
|
buffer++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
p += strlen(feature);
|
||||||
|
if (*p != 0 && !isspace(*p)) {
|
||||||
|
buffer++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
LOCAL(int)
|
||||||
|
parse_proc_cpuinfo (int bufsize)
|
||||||
|
{
|
||||||
|
char *buffer = (char *)malloc(bufsize);
|
||||||
|
FILE *fd;
|
||||||
|
simd_support = 0;
|
||||||
|
|
||||||
|
if (!buffer)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
fd = fopen("/proc/cpuinfo", "r");
|
||||||
|
if (fd) {
|
||||||
|
while (fgets(buffer, bufsize, fd)) {
|
||||||
|
if (!strchr(buffer, '\n') && !feof(fd)) {
|
||||||
|
/* "impossible" happened - insufficient size of the buffer! */
|
||||||
|
fclose(fd);
|
||||||
|
free(buffer);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (check_feature(buffer, "altivec"))
|
||||||
|
simd_support |= JSIMD_ALTIVEC;
|
||||||
|
}
|
||||||
|
fclose(fd);
|
||||||
|
}
|
||||||
|
free(buffer);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check what SIMD accelerations are supported.
|
||||||
|
*
|
||||||
|
* FIXME: This code is racy under a multi-threaded environment.
|
||||||
|
*/
|
||||||
LOCAL(void)
|
LOCAL(void)
|
||||||
init_simd (void)
|
init_simd (void)
|
||||||
{
|
{
|
||||||
char *env = NULL;
|
char *env = NULL;
|
||||||
|
#if !defined(__ALTIVEC__) && (defined(__linux__) || defined(ANDROID) || defined(__ANDROID__))
|
||||||
|
int bufsize = 1024; /* an initial guess for the line buffer size limit */
|
||||||
|
#endif
|
||||||
|
|
||||||
if (simd_support != ~0U)
|
if (simd_support != ~0U)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
simd_support = JSIMD_ALTIVEC;
|
simd_support = 0;
|
||||||
|
|
||||||
|
#if defined(__ALTIVEC__) || defined(__APPLE__)
|
||||||
|
simd_support |= JSIMD_ALTIVEC;
|
||||||
|
#elif defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
|
||||||
|
while (!parse_proc_cpuinfo(bufsize)) {
|
||||||
|
bufsize *= 2;
|
||||||
|
if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Force different settings through environment variables */
|
/* Force different settings through environment variables */
|
||||||
|
env = getenv("JSIMD_FORCEALTIVEC");
|
||||||
|
if ((env != NULL) && (strcmp(env, "1") == 0))
|
||||||
|
simd_support = JSIMD_ALTIVEC;
|
||||||
env = getenv("JSIMD_FORCENONE");
|
env = getenv("JSIMD_FORCENONE");
|
||||||
if ((env != NULL) && (strcmp(env, "1") == 0))
|
if ((env != NULL) && (strcmp(env, "1") == 0))
|
||||||
simd_support = 0;
|
simd_support = 0;
|
||||||
|
|||||||
18
tjbench.c
18
tjbench.c
@@ -248,7 +248,8 @@ int decomp(unsigned char *srcbuf, unsigned char **jpegbuf,
|
|||||||
int y=(int)((double)srcbuf[rindex]*0.299
|
int y=(int)((double)srcbuf[rindex]*0.299
|
||||||
+ (double)srcbuf[gindex]*0.587
|
+ (double)srcbuf[gindex]*0.587
|
||||||
+ (double)srcbuf[bindex]*0.114 + 0.5);
|
+ (double)srcbuf[bindex]*0.114 + 0.5);
|
||||||
if(y>255) y=255; if(y<0) y=0;
|
if(y>255) y=255;
|
||||||
|
if(y<0) y=0;
|
||||||
dstbuf[rindex]=abs(dstbuf[rindex]-y);
|
dstbuf[rindex]=abs(dstbuf[rindex]-y);
|
||||||
dstbuf[gindex]=abs(dstbuf[gindex]-y);
|
dstbuf[gindex]=abs(dstbuf[gindex]-y);
|
||||||
dstbuf[bindex]=abs(dstbuf[bindex]-y);
|
dstbuf[bindex]=abs(dstbuf[bindex]-y);
|
||||||
@@ -300,7 +301,8 @@ int fullTest(unsigned char *srcbuf, int w, int h, int subsamp, int jpegqual,
|
|||||||
|
|
||||||
for(tilew=dotile? 8:w, tileh=dotile? 8:h; ; tilew*=2, tileh*=2)
|
for(tilew=dotile? 8:w, tileh=dotile? 8:h; ; tilew*=2, tileh*=2)
|
||||||
{
|
{
|
||||||
if(tilew>w) tilew=w; if(tileh>h) tileh=h;
|
if(tilew>w) tilew=w;
|
||||||
|
if(tileh>h) tileh=h;
|
||||||
ntilesw=(w+tilew-1)/tilew; ntilesh=(h+tileh-1)/tileh;
|
ntilesw=(w+tilew-1)/tilew; ntilesh=(h+tileh-1)/tileh;
|
||||||
|
|
||||||
if((jpegbuf=(unsigned char **)malloc(sizeof(unsigned char *)
|
if((jpegbuf=(unsigned char **)malloc(sizeof(unsigned char *)
|
||||||
@@ -447,7 +449,8 @@ int fullTest(unsigned char *srcbuf, int w, int h, int subsamp, int jpegqual,
|
|||||||
|
|
||||||
for(i=0; i<ntilesw*ntilesh; i++)
|
for(i=0; i<ntilesw*ntilesh; i++)
|
||||||
{
|
{
|
||||||
if(jpegbuf[i]) tjFree(jpegbuf[i]); jpegbuf[i]=NULL;
|
if(jpegbuf[i]) tjFree(jpegbuf[i]);
|
||||||
|
jpegbuf[i]=NULL;
|
||||||
}
|
}
|
||||||
free(jpegbuf); jpegbuf=NULL;
|
free(jpegbuf); jpegbuf=NULL;
|
||||||
free(jpegsize); jpegsize=NULL;
|
free(jpegsize); jpegsize=NULL;
|
||||||
@@ -465,7 +468,8 @@ int fullTest(unsigned char *srcbuf, int w, int h, int subsamp, int jpegqual,
|
|||||||
{
|
{
|
||||||
for(i=0; i<ntilesw*ntilesh; i++)
|
for(i=0; i<ntilesw*ntilesh; i++)
|
||||||
{
|
{
|
||||||
if(jpegbuf[i]) tjFree(jpegbuf[i]); jpegbuf[i]=NULL;
|
if(jpegbuf[i]) tjFree(jpegbuf[i]);
|
||||||
|
jpegbuf[i]=NULL;
|
||||||
}
|
}
|
||||||
free(jpegbuf); jpegbuf=NULL;
|
free(jpegbuf); jpegbuf=NULL;
|
||||||
}
|
}
|
||||||
@@ -532,7 +536,8 @@ int decompTest(char *filename)
|
|||||||
|
|
||||||
for(tilew=dotile? 16:w, tileh=dotile? 16:h; ; tilew*=2, tileh*=2)
|
for(tilew=dotile? 16:w, tileh=dotile? 16:h; ; tilew*=2, tileh*=2)
|
||||||
{
|
{
|
||||||
if(tilew>w) tilew=w; if(tileh>h) tileh=h;
|
if(tilew>w) tilew=w;
|
||||||
|
if(tileh>h) tileh=h;
|
||||||
ntilesw=(w+tilew-1)/tilew; ntilesh=(h+tileh-1)/tileh;
|
ntilesw=(w+tilew-1)/tilew; ntilesh=(h+tileh-1)/tileh;
|
||||||
|
|
||||||
if((jpegbuf=(unsigned char **)malloc(sizeof(unsigned char *)
|
if((jpegbuf=(unsigned char **)malloc(sizeof(unsigned char *)
|
||||||
@@ -692,7 +697,8 @@ int decompTest(char *filename)
|
|||||||
{
|
{
|
||||||
for(i=0; i<ntilesw*ntilesh; i++)
|
for(i=0; i<ntilesw*ntilesh; i++)
|
||||||
{
|
{
|
||||||
if(jpegbuf[i]) tjFree(jpegbuf[i]); jpegbuf[i]=NULL;
|
if(jpegbuf[i]) tjFree(jpegbuf[i]);
|
||||||
|
jpegbuf[i]=NULL;
|
||||||
}
|
}
|
||||||
free(jpegbuf); jpegbuf=NULL;
|
free(jpegbuf); jpegbuf=NULL;
|
||||||
}
|
}
|
||||||
|
|||||||
29
turbojpeg.c
29
turbojpeg.c
@@ -368,6 +368,29 @@ static int getSubsamp(j_decompress_ptr dinfo)
|
|||||||
retval=i; break;
|
retval=i; break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/* Handle 4:2:2 and 4:4:0 images whose sampling factors are specified
|
||||||
|
in non-standard ways. */
|
||||||
|
if(dinfo->comp_info[0].h_samp_factor==2 &&
|
||||||
|
dinfo->comp_info[0].v_samp_factor==2 &&
|
||||||
|
(i==TJSAMP_422 || i==TJSAMP_440))
|
||||||
|
{
|
||||||
|
int match=0;
|
||||||
|
for(k=1; k<dinfo->num_components; k++)
|
||||||
|
{
|
||||||
|
int href=tjMCUHeight[i]/8, vref=tjMCUWidth[i]/8;
|
||||||
|
if(dinfo->jpeg_color_space==JCS_YCCK && k==3)
|
||||||
|
{
|
||||||
|
href=vref=2;
|
||||||
|
}
|
||||||
|
if(dinfo->comp_info[k].h_samp_factor==href
|
||||||
|
&& dinfo->comp_info[k].v_samp_factor==vref)
|
||||||
|
match++;
|
||||||
|
}
|
||||||
|
if(match==dinfo->num_components-1)
|
||||||
|
{
|
||||||
|
retval=i; break;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return retval;
|
return retval;
|
||||||
@@ -570,7 +593,8 @@ static tjhandle _tjInitCompress(tjinstance *this)
|
|||||||
if(setjmp(this->jerr.setjmp_buffer))
|
if(setjmp(this->jerr.setjmp_buffer))
|
||||||
{
|
{
|
||||||
/* If we get here, the JPEG code has signaled an error. */
|
/* If we get here, the JPEG code has signaled an error. */
|
||||||
if(this) free(this); return NULL;
|
if(this) free(this);
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
jpeg_create_compress(&this->cinfo);
|
jpeg_create_compress(&this->cinfo);
|
||||||
@@ -1231,7 +1255,8 @@ static tjhandle _tjInitDecompress(tjinstance *this)
|
|||||||
if(setjmp(this->jerr.setjmp_buffer))
|
if(setjmp(this->jerr.setjmp_buffer))
|
||||||
{
|
{
|
||||||
/* If we get here, the JPEG code has signaled an error. */
|
/* If we get here, the JPEG code has signaled an error. */
|
||||||
if(this) free(this); return NULL;
|
if(this) free(this);
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
jpeg_create_decompress(&this->dinfo);
|
jpeg_create_decompress(&this->dinfo);
|
||||||
|
|||||||
Reference in New Issue
Block a user