From 6916f8795542e43e322ee5df88858665f24bd505 Mon Sep 17 00:00:00 2001 From: DRC Date: Fri, 3 Feb 2012 08:59:21 +0000 Subject: [PATCH 01/26] Several clarifications regarding v7/v8 API/ABI support git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/branches/1.2.x@776 632fc199-4ca6-4c93-a231-07263d6284db --- README-turbo.txt | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/README-turbo.txt b/README-turbo.txt index 24490e39..bdf2e10b 100755 --- a/README-turbo.txt +++ b/README-turbo.txt @@ -256,24 +256,17 @@ for the existence of the colorspace extensions at compile time and run time. libjpeg v7 and v8 API/ABI support ================================= -libjpeg v7 and v8 added new features to the API/ABI, and, unfortunately, the -compression and decompression structures were extended in a backward- -incompatible manner to accommodate these features. Thus, programs that are +With libjpeg v7 and v8, new features were added that necessitated extending the +compression and decompression structures. Unfortunately, due to the exposed +nature of those structures, extending them also necessitated breaking backward +ABI compatibility with previous libjpeg releases. Thus, programs that are built to use libjpeg v7 or v8 did not work with libjpeg-turbo, since it is based on the libjpeg v6b code base. Although libjpeg v7 and v8 are still not as widely used as v6b, enough programs (including a few Linux distros) have made the switch that it was desirable to provide support for the libjpeg v7/v8 -API/ABI in libjpeg-turbo. - -Some of the libjpeg v7 and v8 features -- DCT scaling, to name one -- involve -deep modifications to the code that cannot be accommodated by libjpeg-turbo -without either breaking compatibility with libjpeg v6b or producing an -unsupportable mess. In order to fully support libjpeg v8 with all of its -features, we would have to essentially port the SIMD extensions to the libjpeg -v8 code base and maintain two separate code trees. We are hesitant to do this -until/unless the newer libjpeg code bases garner more community support and -involvement and until/unless we have some notion of whether future libjpeg -releases will also be backward-incompatible. +API/ABI in libjpeg-turbo. Although libjpeg-turbo can now be configured as a +drop-in replacement for libjpeg v7 or v8, it should be noted that not all of +the features in libjpeg v7 and v8 are supported (see below.) By passing an argument of --with-jpeg7 or --with-jpeg8 to configure, or an argument of -DWITH_JPEG7=1 or -DWITH_JPEG8=1 to cmake, you can build a version @@ -312,6 +305,16 @@ Not supported: -- libjpeg: DCT scaling in compressor cinfo.scale_num and cinfo.scale_denom are silently ignored. + There is no technical reason why DCT scaling cannot be supported, but + without the SmartScale extension (see below), it would only be able to + down-scale using ratios of 1/2, 8/15, 4/7, 8/13, 2/3, 8/11, 4/5, and 8/9, + which is of limited usefulness. + +-- libjpeg: SmartScale + cinfo.block_size is silently ignored. + SmartScale is an extension to the JPEG format that allows for DCT block + sizes other than 8x8. It would be difficult to support this feature while + retaining backward compatibility with libjpeg v6b. -- libjpeg: IDCT scaling extensions in decompressor libjpeg-turbo still supports IDCT scaling with scaling factors of 1/2, 1/4, @@ -319,9 +322,14 @@ Not supported: -- libjpeg: Fancy downsampling in compressor cinfo.do_fancy_downsampling is silently ignored. + This requires the DCT scaling feature, which is not supported. -- jpegtran: Scaling - Seems to depend on the DCT scaling feature, which isn't supported. + This requires both the DCT scaling and SmartScale features, which are not + supported. + +-- Lossless RGB JPEG files + This requires the SmartScale feature, which is not supported. ******************************************************************************* From 11a122b2d66faeaa0e55b5cceecc60ce6049c073 Mon Sep 17 00:00:00 2001 From: DRC Date: Tue, 7 Feb 2012 00:14:53 +0000 Subject: [PATCH 02/26] Wordsmithing and grammar git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/branches/1.2.x@777 632fc199-4ca6-4c93-a231-07263d6284db --- README-turbo.txt | 71 ++++++++++++++++++++++++------------------------ 1 file changed, 36 insertions(+), 35 deletions(-) diff --git a/README-turbo.txt b/README-turbo.txt index bdf2e10b..899a3681 100755 --- a/README-turbo.txt +++ b/README-turbo.txt @@ -3,31 +3,31 @@ ******************************************************************************* libjpeg-turbo is a derivative of libjpeg that uses SIMD instructions (MMX, -SSE2, etc.) to accelerate baseline JPEG compression and decompression on x86 -and x86-64 systems. On such systems, libjpeg-turbo is generally 2-4x as fast -as the unmodified version of libjpeg, all else being equal. +SSE2, NEON) to accelerate baseline JPEG compression and decompression on x86, +x86-64, and ARM systems. On such systems, libjpeg-turbo is generally 2-4x as +fast as the unmodified version of libjpeg, all else being equal. libjpeg-turbo was originally based on libjpeg/SIMD by Miyasaka Masaru, but the TigerVNC and VirtualGL projects made numerous enhancements to the codec in 2009, including improved support for Mac OS X, 64-bit support, support for -32-bit and big endian pixel formats (RGBX, XBGR, etc.), accelerated Huffman -encoding/decoding, and various bug fixes. The goal was to produce a fully open -source codec that could replace the partially closed source TurboJPEG/IPP codec -used by VirtualGL and TurboVNC. libjpeg-turbo generally performs in the range -of 80-120% of TurboJPEG/IPP. It is faster in some areas but slower in others. +32-bit and big-endian pixel formats (RGBX, XBGR, etc.), accelerated Huffman +encoding/decoding, and various bug fixes. The goal was to produce a fully +open-source codec that could replace the partially closed-source TurboJPEG/IPP +codec used by VirtualGL and TurboVNC. libjpeg-turbo generally achieves 80-120% +of the performance of TurboJPEG/IPP. It is faster in some areas but slower in +others. In early 2010, libjpeg-turbo spun off into its own independent project, with the goal of making high-speed JPEG compression/decompression technology -available to a broader range of users and developers. The libjpeg-turbo shared -libraries can be used as drop-in replacements for libjpeg on most systems. +available to a broader range of users and developers. ******************************************************************************* ** License ******************************************************************************* -libjpeg-turbo is licensed under a non-restrictive, BSD-style license -(see README.) The TurboJPEG/OSS wrapper (both C and Java versions) and +Most of libjpeg-turbo inherits the non-restrictive, BSD-style license used by +libjpeg (see README.) The TurboJPEG/OSS wrapper (both C and Java versions) and associated test programs bear a similar license, which is reproduced below: Redistribution and use in source and binary forms, with or without @@ -62,14 +62,14 @@ POSSIBILITY OF SUCH DAMAGE. libjpeg-turbo includes two APIs that can be used to compress and decompress JPEG images: - TurboJPEG API: This API wraps libjpeg-turbo and provides an easy-to-use - interface for compressing and decompressing JPEG images in memory. It also - provides some features that would not be straightforward to implement using - the underlying libjpeg API, such as generating planar YUV images and - performing multiple simultaneous lossless transforms on an image. The Java - interface for libjpeg-turbo is written on top of the TurboJPEG API. + TurboJPEG API: This API provides an easy-to-use interface for compressing + and decompressing JPEG images in memory. It also provides some functionality + that would not be straightforward to achieve using the underlying libjpeg + API, such as generating planar YUV images and performing multiple + simultaneous lossless transforms on an image. The Java interface for + libjpeg-turbo is written on top of the TurboJPEG API. - libjpeg API: This is the industry standard API for compressing and + libjpeg API: This is the de facto industry-standard API for compressing and decompressing JPEG images. It is more difficult to use than the TurboJPEG API but also more powerful. libjpeg-turbo is both API/ABI-compatible and mathematically compatible with libjpeg v6b. It can also optionally be @@ -101,13 +101,13 @@ NOTE: {lib} can be lib, lib32, lib64, or lib/64, depending on the O/S and architecture. System administrators can also replace the libjpeg sym links in /usr/{lib} with -links to the libjpeg dynamic library located in /opt/libjpeg-turbo/{lib}. This -will effectively accelerate every dynamically linked libjpeg application on the -system. +links to the libjpeg-turbo dynamic library located in /opt/libjpeg-turbo/{lib}. +This will effectively accelerate every application that uses the libjpeg +dynamic library on the system. The libjpeg-turbo SDK for Visual C++ installs the libjpeg-turbo DLL -(jpeg62.dll, jpeg7.dll, or jpeg8.dll, depending on whether libjpeg v6b, v7, or -v8 emulation is enabled) into c:\libjpeg-turbo[64]\bin, and the PATH +(jpeg62.dll, jpeg7.dll, or jpeg8.dll, depending on whether it was built with +libjpeg v6b, v7, or v8 emulation) into c:\libjpeg-turbo[64]\bin, and the PATH environment variable can be modified such that this directory is searched before any others that might contain a libjpeg DLL. However, if a libjpeg DLL exists in an application's install directory, then Windows will load this @@ -117,16 +117,16 @@ version of this DLL and copy c:\libjpeg-turbo[64]\bin\jpeg*.dll into the application's install directory to accelerate it. The version of the libjpeg-turbo DLL distributed in the libjpeg-turbo SDK for -Visual C++ requires the Visual C++ 2008 C run time DLL (msvcr90.dll). +Visual C++ requires the Visual C++ 2008 C run-time DLL (msvcr90.dll). msvcr90.dll ships with more recent versions of Windows, but users of older Windows releases can obtain it from the Visual C++ 2008 Redistributable Package, which is available as a free download from Microsoft's web site. -NOTE: Features of libjpeg that require passing a C run time structure, such +NOTE: Features of libjpeg that require passing a C run-time structure, such as a file handle, from an application to libjpeg will probably not work with the version of the libjpeg-turbo DLL distributed in the libjpeg-turbo SDK for Visual C++, unless the application is also built to use the Visual C++ 2008 C -run time DLL. In particular, this affects jpeg_stdio_dest() and +run-time DLL. In particular, this affects jpeg_stdio_dest() and jpeg_stdio_src(). Mac applications typically embed their own copies of the libjpeg dylib inside @@ -146,7 +146,7 @@ Replacing TurboJPEG/IPP libjpeg-turbo is a drop-in replacement for the TurboJPEG/IPP SDK used by VirtualGL 2.1.x and TurboVNC 0.6 (and prior.) libjpeg-turbo contains a wrapper library (TurboJPEG/OSS) that emulates the TurboJPEG API using libjpeg-turbo -instead of the closed source Intel Performance Primitives. You can replace the +instead of the closed-source Intel Performance Primitives. You can replace the TurboJPEG/IPP package on Linux systems with the libjpeg-turbo package in order to make existing releases of VirtualGL 2.1.x and TurboVNC 0.x use the new codec at run time. Note that the 64-bit libjpeg-turbo packages contain only 64-bit @@ -157,7 +157,7 @@ both the 64-bit and 32-bit versions of libjpeg-turbo. You can also build the VirtualGL 2.1.x and TurboVNC 0.6 source code with the libjpeg-turbo SDK instead of TurboJPEG/IPP. It should work identically. libjpeg-turbo also includes static library versions of TurboJPEG/OSS, which -are used to build TurboVNC 1.0 and later. +are used to build VirtualGL 2.2 and TurboVNC 1.0 and later. ======================================== Using libjpeg-turbo in Your Own Programs @@ -341,12 +341,13 @@ Restart Markers =============== The optimized Huffman decoder in libjpeg-turbo does not handle restart markers -in a way that makes libjpeg happy, so it is necessary to use the slow Huffman -decoder when decompressing a JPEG image that has restart markers. This can -cause the decompression performance to drop by as much as 20%, but the -performance will still be much much greater than that of libjpeg v6b. Many -consumer packages, such as PhotoShop, use restart markers when generating JPEG -images, so images generated by those programs will experience this issue. +in a way that makes the rest of the libjpeg infrastructure happy, so it is +necessary to use the slow Huffman decoder when decompressing a JPEG image that +has restart markers. This can cause the decompression performance to drop by +as much as 20%, but the performance will still be much greater than that of +libjpeg. Many consumer packages, such as PhotoShop, use restart markers when +generating JPEG images, so images generated by those programs will experience +this issue. =============================================== Fast Integer Forward DCT at High Quality Levels From 760ea8dfb38cea7925b663cd392ed737a40bf078 Mon Sep 17 00:00:00 2001 From: DRC Date: Tue, 7 Feb 2012 23:25:19 +0000 Subject: [PATCH 03/26] Merge documentation and wordsmithing changes from 1.2, including promotion of -arithmetic to a "switch for advanced users" git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/branches/1.1.x@783 632fc199-4ca6-4c93-a231-07263d6284db --- BUILDING.txt | 24 +++++++------- CMakeLists.txt | 4 ++- README | 19 ++++++----- README-turbo.txt | 85 ++++++++++++++++++++++++++---------------------- cjpeg.1 | 16 ++++----- cjpeg.c | 6 ++-- install.txt | 10 +++--- jpegtran.1 | 4 +-- jpegtran.c | 6 ++-- libjpeg.txt | 23 +++++++------ structure.txt | 16 ++++----- usage.txt | 16 ++++----- 12 files changed, 122 insertions(+), 107 deletions(-) diff --git a/BUILDING.txt b/BUILDING.txt index 70fa4005..01f67c16 100644 --- a/BUILDING.txt +++ b/BUILDING.txt @@ -99,14 +99,14 @@ This will generate the following files under .libs/ 62, 7, or 8. -libjpeg v7 or v8 Emulation --------------------------- +libjpeg v7 or v8 API/ABI Emulation +---------------------------------- Add --with-jpeg7 to the configure command line to build a version of -libjpeg-turbo that is compatible with libjpeg v7. Add --with-jpeg8 to the -configure command to build a version of libjpeg-turbo that is compatible with -libjpeg v8. See README-turbo.txt for more information on libjpeg v7 and v8 -emulation. +libjpeg-turbo that is API/ABI-compatible with libjpeg v7. Add --with-jpeg8 to +the configure command to build a version of libjpeg-turbo that is +API/ABI-compatible with libjpeg v8. See README-turbo.txt for more information +on libjpeg v7 and v8 emulation. Arithmetic Coding Support @@ -416,14 +416,14 @@ NMake, remove "-DCMAKE_BUILD_TYPE=Release" (Debug builds are the default with NMake.) -libjpeg v7 or v8 Emulation --------------------------- +libjpeg v7 or v8 API/ABI Emulation +----------------------------------- Add "-DWITH_JPEG7=1" to the cmake command line to build a version of -libjpeg-turbo that is compatible with libjpeg v7. Add "-DWITH_JPEG8=1" to the -cmake command to build a version of libjpeg-turbo that is compatible with -libjpeg v8. See README-turbo.txt for more information on libjpeg v7 and v8 -emulation. +libjpeg-turbo that is API/ABI-compatible with libjpeg v7. Add "-DWITH_JPEG8=1" +to the cmake command to build a version of libjpeg-turbo that is +API/ABI-compatible with libjpeg v8. See README-turbo.txt for more information +on libjpeg v7 and v8 emulation. Arithmetic Coding Support diff --git a/CMakeLists.txt b/CMakeLists.txt index f703acf3..28f8e000 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,6 +18,8 @@ else() message(FATAL_ERROR "Platform not supported by this build system. Use autotools instead.") endif() +# This does nothing except when using MinGW. CMAKE_BUILD_TYPE has no meaning +# in Visual Studio, and it always defaults to Debug when using NMake. if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release) endif() @@ -25,7 +27,7 @@ endif() message(STATUS "CMAKE_BUILD_TYPE = ${CMAKE_BUILD_TYPE}") # This only works if building from the command line. There is currently no way -# to set a variable's value based on the build type when using the MSVC IDE. +# to set a variable's value based on the build type when using Visual Studio. if(CMAKE_BUILD_TYPE STREQUAL "Debug") set(BUILD "${BUILD}d") endif() diff --git a/README b/README index 2ead09e6..0e9b4295 100644 --- a/README +++ b/README @@ -1,7 +1,8 @@ -libjpeg-turbo note: This file is mostly taken from the libjpeg v8b README -file, and it is included only for reference. Some parts of it may not apply to -libjpeg-turbo. Please see README-turbo.txt for information specific to the -turbo version. +libjpeg-turbo note: This file contains portions of the libjpeg v6b and v8 +README files, with additional wordsmithing by The libjpeg-turbo Project. +It is included only for reference, as some parts of it may not apply to +libjpeg-turbo. Please see README-turbo.txt for information specific to +libjpeg-turbo. The Independent JPEG Group's JPEG software @@ -62,7 +63,7 @@ OVERVIEW This package contains C software to implement JPEG image encoding, decoding, and transcoding. JPEG (pronounced "jay-peg") is a standardized compression method for full-color and gray-scale images. JPEG's strong suit is compressing -photographic images or other types of images which have smooth color and +photographic images or other types of images that have smooth color and brightness transitions between neighboring pixels. Images with sharp lines or other abrupt features may not compress well with JPEG, and a higher JPEG quality may have to be used to avoid visible compression artifacts with such @@ -256,8 +257,8 @@ ARCHIVE LOCATIONS The "official" archive site for this software is www.ijg.org. The most recent released version can always be found there in directory "files". This particular version will be archived as -http://www.ijg.org/files/jpegsrc.v8b.tar.gz, and in Windows-compatible -"zip" archive format as http://www.ijg.org/files/jpegsr8b.zip. +http://www.ijg.org/files/jpegsrc.v8d.tar.gz, and in Windows-compatible +"zip" archive format as http://www.ijg.org/files/jpegsr8d.zip. The JPEG FAQ (Frequently Asked Questions) article is a source of some general information about JPEG. @@ -274,7 +275,7 @@ FILE FORMAT WARS ================ The ISO JPEG standards committee actually promotes different formats like -"JPEG 2000" or "JPEG XR" which are incompatible with original DCT-based +"JPEG 2000" or "JPEG XR", which are incompatible with original DCT-based JPEG. IJG therefore does not support these formats (see REFERENCES). Indeed, one of the original reasons for developing this free software was to help force convergence on common, interoperable format standards for JPEG files. @@ -286,4 +287,4 @@ image files indefinitely.) TO DO ===== -Please send bug reports, offers of help, etc. to jpeg-info@uc.ag. +Please send bug reports, offers of help, etc. to jpeg-info@jpegclub.org. diff --git a/README-turbo.txt b/README-turbo.txt index a385270c..fcfd27e5 100755 --- a/README-turbo.txt +++ b/README-turbo.txt @@ -10,16 +10,16 @@ as the unmodified version of libjpeg, all else being equal. libjpeg-turbo was originally based on libjpeg/SIMD by Miyasaka Masaru, but the TigerVNC and VirtualGL projects made numerous enhancements to the codec in 2009, including improved support for Mac OS X, 64-bit support, support for -32-bit and big endian pixel formats (RGBX, XBGR, etc.), accelerated Huffman -encoding/decoding, and various bug fixes. The goal was to produce a fully open -source codec that could replace the partially closed source TurboJPEG/IPP codec -used by VirtualGL and TurboVNC. libjpeg-turbo generally performs in the range -of 80-120% of TurboJPEG/IPP. It is faster in some areas but slower in others. +32-bit and big-endian pixel formats (RGBX, XBGR, etc.), accelerated Huffman +encoding/decoding, and various bug fixes. The goal was to produce a fully +open-source codec that could replace the partially closed-source TurboJPEG/IPP +codec used by VirtualGL and TurboVNC. libjpeg-turbo generally achieves 80-120% +of the performance of TurboJPEG/IPP. It is faster in some areas but slower in +others. In early 2010, libjpeg-turbo spun off into its own independent project, with the goal of making high-speed JPEG compression/decompression technology -available to a broader range of users and developers. The libjpeg-turbo shared -libraries can be used as drop-in replacements for libjpeg on most systems. +available to a broader range of users and developers. ******************************************************************************* @@ -72,13 +72,13 @@ NOTE: {lib} can be lib, lib32, lib64, or lib/64, depending on the O/S and architecture. System administrators can also replace the libjpeg sym links in /usr/{lib} with -links to the libjpeg dynamic library located in /opt/libjpeg-turbo/{lib}. This -will effectively accelerate every dynamically linked libjpeg application on the -system. +links to the libjpeg-turbo dynamic library located in /opt/libjpeg-turbo/{lib}. +This will effectively accelerate every application that uses the libjpeg +dynamic library on the system. The libjpeg-turbo SDK for Visual C++ installs the libjpeg-turbo DLL -(jpeg62.dll, jpeg7.dll, or jpeg8.dll, depending on whether libjpeg v6b, v7, or -v8 emulation is enabled) into c:\libjpeg-turbo[64]\bin, and the PATH +(jpeg62.dll, jpeg7.dll, or jpeg8.dll, depending on whether it was built with +libjpeg v6b, v7, or v8 emulation) into c:\libjpeg-turbo[64]\bin, and the PATH environment variable can be modified such that this directory is searched before any others that might contain a libjpeg DLL. However, if a libjpeg DLL exists in an application's install directory, then Windows will load this @@ -88,16 +88,16 @@ version of this DLL and copy c:\libjpeg-turbo[64]\bin\jpeg*.dll into the application's install directory to accelerate it. The version of the libjpeg-turbo DLL distributed in the libjpeg-turbo SDK for -Visual C++ requires the Visual C++ 2008 C run time DLL (msvcr90.dll). +Visual C++ requires the Visual C++ 2008 C run-time DLL (msvcr90.dll). msvcr90.dll ships with more recent versions of Windows, but users of older Windows releases can obtain it from the Visual C++ 2008 Redistributable Package, which is available as a free download from Microsoft's web site. -NOTE: Features of libjpeg that require passing a C run time structure, such +NOTE: Features of libjpeg that require passing a C run-time structure, such as a file handle, from an application to libjpeg will probably not work with the version of the libjpeg-turbo DLL distributed in the libjpeg-turbo SDK for Visual C++, unless the application is also built to use the Visual C++ 2008 C -run time DLL. In particular, this affects jpeg_stdio_dest() and +run-time DLL. In particular, this affects jpeg_stdio_dest() and jpeg_stdio_src(). Mac applications typically embed their own copies of the libjpeg dylib inside @@ -117,7 +117,7 @@ Replacing TurboJPEG/IPP libjpeg-turbo is a drop-in replacement for the TurboJPEG/IPP SDK used by VirtualGL 2.1.x and TurboVNC 0.6 (and prior.) libjpeg-turbo contains a wrapper library (TurboJPEG/OSS) that emulates the TurboJPEG API using libjpeg-turbo -instead of the closed source Intel Performance Primitives. You can replace the +instead of the closed-source Intel Performance Primitives. You can replace the TurboJPEG/IPP package on Linux systems with the libjpeg-turbo package in order to make existing releases of VirtualGL 2.1.x and TurboVNC 0.x use the new codec at run time. Note that the 64-bit libjpeg-turbo packages contain only 64-bit @@ -128,7 +128,7 @@ both the 64-bit and 32-bit versions of libjpeg-turbo. You can also build the VirtualGL 2.1.x and TurboVNC 0.6 source code with the libjpeg-turbo SDK instead of TurboJPEG/IPP. It should work identically. libjpeg-turbo also includes static library versions of TurboJPEG/OSS, which -are used to build TurboVNC 1.0 and later. +are used to build VirtualGL 2.2 and TurboVNC 1.0 and later. ======================================== Using libjpeg-turbo in Your Own Programs @@ -208,24 +208,17 @@ that doesn't support them will result in a "Bogus input colorspace" error. libjpeg v7 and v8 API/ABI support ================================= -libjpeg v7 and v8 added new features to the API/ABI, and, unfortunately, the -compression and decompression structures were extended in a backward- -incompatible manner to accommodate these features. Thus, programs that are +With libjpeg v7 and v8, new features were added that necessitated extending the +compression and decompression structures. Unfortunately, due to the exposed +nature of those structures, extending them also necessitated breaking backward +ABI compatibility with previous libjpeg releases. Thus, programs that are built to use libjpeg v7 or v8 did not work with libjpeg-turbo, since it is based on the libjpeg v6b code base. Although libjpeg v7 and v8 are still not as widely used as v6b, enough programs (including a few Linux distros) have made the switch that it was desirable to provide support for the libjpeg v7/v8 -API/ABI in libjpeg-turbo. - -Some of the libjpeg v7 and v8 features -- DCT scaling, to name one -- involve -deep modifications to the code that cannot be accommodated by libjpeg-turbo -without either breaking compatibility with libjpeg v6b or producing an -unsupportable mess. In order to fully support libjpeg v8 with all of its -features, we would have to essentially port the SIMD extensions to the libjpeg -v8 code base and maintain two separate code trees. We are hesitant to do this -until/unless the newer libjpeg code bases garner more community support and -involvement and until/unless we have some notion of whether future libjpeg -releases will also be backward-incompatible. +API/ABI in libjpeg-turbo. Although libjpeg-turbo can now be configured as a +drop-in replacement for libjpeg v7 or v8, it should be noted that not all of +the features in libjpeg v7 and v8 are supported (see below.) By passing an argument of --with-jpeg7 or --with-jpeg8 to configure, or an argument of -DWITH_JPEG7=1 or -DWITH_JPEG8=1 to cmake, you can build a version @@ -264,6 +257,16 @@ Not supported: -- libjpeg: DCT scaling in compressor cinfo.scale_num and cinfo.scale_denom are silently ignored. + There is no technical reason why DCT scaling cannot be supported, but + without the SmartScale extension (see below), it would only be able to + down-scale using ratios of 1/2, 8/15, 4/7, 8/13, 2/3, 8/11, 4/5, and 8/9, + which is of limited usefulness. + +-- libjpeg: SmartScale + cinfo.block_size is silently ignored. + SmartScale is an extension to the JPEG format that allows for DCT block + sizes other than 8x8. It would be difficult to support this feature while + retaining backward compatibility with libjpeg v6b. -- libjpeg: IDCT scaling extensions in decompressor libjpeg-turbo still supports IDCT scaling with scaling factors of 1/2, 1/4, @@ -271,9 +274,14 @@ Not supported: -- libjpeg: Fancy downsampling in compressor cinfo.do_fancy_downsampling is silently ignored. + This requires the DCT scaling feature, which is not supported. -- jpegtran: Scaling - Seems to depend on the DCT scaling feature, which isn't supported. + This requires both the DCT scaling and SmartScale features, which are not + supported. + +-- Lossless RGB JPEG files + This requires the SmartScale feature, which is not supported. ******************************************************************************* @@ -285,12 +293,13 @@ Restart Markers =============== The optimized Huffman decoder in libjpeg-turbo does not handle restart markers -in a way that makes libjpeg happy, so it is necessary to use the slow Huffman -decoder when decompressing a JPEG image that has restart markers. This can -cause the decompression performance to drop by as much as 20%, but the -performance will still be much much greater than that of libjpeg v6b. Many -consumer packages, such as PhotoShop, use restart markers when generating JPEG -images, so images generated by those programs will experience this issue. +in a way that makes the rest of the libjpeg infrastructure happy, so it is +necessary to use the slow Huffman decoder when decompressing a JPEG image that +has restart markers. This can cause the decompression performance to drop by +as much as 20%, but the performance will still be much greater than that of +libjpeg. Many consumer packages, such as PhotoShop, use restart markers when +generating JPEG images, so images generated by those programs will experience +this issue. =============================================== Fast Integer Forward DCT at High Quality Levels diff --git a/cjpeg.1 b/cjpeg.1 index e4d95ee8..6fb72993 100644 --- a/cjpeg.1 +++ b/cjpeg.1 @@ -1,4 +1,4 @@ -.TH CJPEG 1 "11 October 2010" +.TH CJPEG 1 "31 January 2012" .SH NAME cjpeg \- compress an image file to a JPEG file .SH SYNOPSIS @@ -121,7 +121,7 @@ quality-sensitive applications, for which the artifacts generated by subsampling may be unacceptable. .PP The \fB-quality\fR option accepts a comma-separated list of parameters, which -respectively refer to the quality levels which should be assigned to the +respectively refer to the quality levels that should be assigned to the quantization table slots. If there are more q-table slots than parameters, then the last parameter is replicated. Thus, if only one quality parameter is given, this is used for both luminance and chrominance (slots 0 and 1, @@ -150,6 +150,12 @@ about the same --- often a little smaller. .PP Switches for advanced users: .TP +.B \-arithmetic +Use arithmetic coding. +.B Caution: +arithmetic coded JPEG is not yet widely implemented, so many decoders will be +unable to view an arithmetic coded JPEG file at all. +.TP .B \-dct int Use integer DCT method (default). .TP @@ -214,12 +220,6 @@ visibly blur the image, however. .PP Switches for wizards: .TP -.B \-arithmetic -Use arithmetic coding. -.B Caution: -arithmetic coded JPEG is not yet widely implemented, so many decoders will be -unable to view an arithmetic coded JPEG file at all. -.TP .B \-baseline Force baseline-compatible quantization tables to be generated. This clamps quantization values to 8 bits even at low quality settings. (This switch is diff --git a/cjpeg.c b/cjpeg.c index 6f407f83..0475c023 100644 --- a/cjpeg.c +++ b/cjpeg.c @@ -164,6 +164,9 @@ usage (void) fprintf(stderr, " -targa Input file is Targa format (usually not needed)\n"); #endif fprintf(stderr, "Switches for advanced users:\n"); +#ifdef C_ARITH_CODING_SUPPORTED + fprintf(stderr, " -arithmetic Use arithmetic coding\n"); +#endif #ifdef DCT_ISLOW_SUPPORTED fprintf(stderr, " -dct int Use integer DCT method%s\n", (JDCT_DEFAULT == JDCT_ISLOW ? " (default)" : "")); @@ -184,9 +187,6 @@ usage (void) fprintf(stderr, " -outfile name Specify name for output file\n"); fprintf(stderr, " -verbose or -debug Emit debug output\n"); fprintf(stderr, "Switches for wizards:\n"); -#ifdef C_ARITH_CODING_SUPPORTED - fprintf(stderr, " -arithmetic Use arithmetic coding\n"); -#endif fprintf(stderr, " -baseline Force baseline quantization tables\n"); fprintf(stderr, " -qtables file Use quantization tables given in file\n"); fprintf(stderr, " -qslots N[,...] Set component quantization tables\n"); diff --git a/install.txt b/install.txt index 2ee86adf..1327dc48 100644 --- a/install.txt +++ b/install.txt @@ -534,17 +534,17 @@ In general, it's worth trying the maximum optimization level of your compiler, and experimenting with any optional optimizations such as loop unrolling. (Unfortunately, far too many compilers have optimizer bugs ... be prepared to back off if the code fails self-test.) If you do any experimentation along -these lines, please report the optimal settings to jpeg-info@uc.ag so we -can mention them in future releases. Be sure to specify your machine -and compiler version. +these lines, please report the optimal settings to jpeg-info@jpegclub.org so +we can mention them in future releases. Be sure to specify your machine and +compiler version. HINTS FOR SPECIFIC SYSTEMS ========================== We welcome reports on changes needed for systems not mentioned here. Submit -'em to jpeg-info@uc.ag. Also, if configure or ckconfig.c is wrong about how -to configure the JPEG software for your system, please let us know. +'em to jpeg-info@jpegclub.org. Also, if configure or ckconfig.c is wrong +about how to configure the JPEG software for your system, please let us know. Acorn RISC OS: diff --git a/jpegtran.1 b/jpegtran.1 index f10cdbb6..160b47da 100644 --- a/jpegtran.1 +++ b/jpegtran.1 @@ -1,4 +1,4 @@ -.TH JPEGTRAN 1 "11 October 2010" +.TH JPEGTRAN 1 "31 January 2012" .SH NAME jpegtran \- lossless transformation of JPEG files .SH SYNOPSIS @@ -184,7 +184,7 @@ comments and other excess baggage present in the source file. .TP .B \-copy comments Copy only comment markers. This setting copies comments from the source file -but discards any other data which is inessential for image display. +but discards any other data that is inessential for image display. .TP .B \-copy all Copy all extra markers. This setting preserves miscellaneous markers diff --git a/jpegtran.c b/jpegtran.c index b2a31adb..40d4b6cb 100644 --- a/jpegtran.c +++ b/jpegtran.c @@ -78,14 +78,14 @@ usage (void) fprintf(stderr, " -trim Drop non-transformable edge blocks\n"); #endif fprintf(stderr, "Switches for advanced users:\n"); +#ifdef C_ARITH_CODING_SUPPORTED + fprintf(stderr, " -arithmetic Use arithmetic coding\n"); +#endif fprintf(stderr, " -restart N Set restart interval in rows, or in blocks with B\n"); fprintf(stderr, " -maxmemory N Maximum memory to use (in kbytes)\n"); fprintf(stderr, " -outfile name Specify name for output file\n"); fprintf(stderr, " -verbose or -debug Emit debug output\n"); fprintf(stderr, "Switches for wizards:\n"); -#ifdef C_ARITH_CODING_SUPPORTED - fprintf(stderr, " -arithmetic Use arithmetic coding\n"); -#endif #ifdef C_MULTISCAN_FILES_SUPPORTED fprintf(stderr, " -scans file Create multi-scan JPEG per script file\n"); #endif diff --git a/libjpeg.txt b/libjpeg.txt index 191b35e8..d350fc73 100644 --- a/libjpeg.txt +++ b/libjpeg.txt @@ -850,8 +850,9 @@ int jpeg_quality_scaling (int quality) premise of this routine collapses. Caveat user. jpeg_default_qtables (j_compress_ptr cinfo, boolean force_baseline) - [libjpeg v7/v8 only] Set default quantization tables with linear - q_scale_factor[] values (see below). + [libjpeg v7+ API/ABI emulation only] + Set default quantization tables with linear q_scale_factor[] values + (see below). jpeg_add_quant_table (j_compress_ptr cinfo, int which_tbl, const unsigned int *basic_table, @@ -976,12 +977,13 @@ JQUANT_TBL * quant_tbl_ptrs[NUM_QUANT_TBLS] slot 1 for chrominance. int q_scale_factor[NUM_QUANT_TBLS] - [libjpeg v7+ only] Linear quantization scaling factors (0-100, default - 100) for use with jpeg_default_qtables(). + [libjpeg v7+ API/ABI emulation only] + Linear quantization scaling factors (0-100, default 100) + for use with jpeg_default_qtables(). See rdswitch.c and cjpeg.c for an example of usage. Note that the q_scale_factor[] values use "linear" scales, so JPEG quality levels chosen by the user must be converted to these scales - using jpeg_quality_scaling(). Here is an example which corresponds to + using jpeg_quality_scaling(). Here is an example that corresponds to cjpeg -quality 90,70: jpeg_set_defaults(cinfo); @@ -1012,11 +1014,12 @@ JHUFF_TBL * ac_huff_tbl_ptrs[NUM_HUFF_TBLS] any need to mess with providing your own Huffman tables. -[libjpeg v7+ only] The actual dimensions of the JPEG image that will be written -to the file are given by the following fields. These are computed from the -input image dimensions and the compression parameters by jpeg_start_compress(). -You can also call jpeg_calc_jpeg_dimensions() to obtain the values that will -result from the current parameter settings. +[libjpeg v7+ API/ABI emulation only] +The actual dimensions of the JPEG image that will be written to the file are +given by the following fields. These are computed from the input image +dimensions and the compression parameters by jpeg_start_compress(). You can +also call jpeg_calc_jpeg_dimensions() to obtain the values that will result +from the current parameter settings. JDIMENSION jpeg_width Actual dimensions of output image. JDIMENSION jpeg_height diff --git a/structure.txt b/structure.txt index 779233a8..6a9266ba 100644 --- a/structure.txt +++ b/structure.txt @@ -1,6 +1,6 @@ IJG JPEG LIBRARY: SYSTEM ARCHITECTURE -Copyright (C) 1991-2009, Thomas G. Lane, Guido Vollbeding. +Copyright (C) 1991-2012, Thomas G. Lane, Guido Vollbeding. This file is part of the Independent JPEG Group's software. For conditions of distribution and use, see the accompanying README file. @@ -385,8 +385,9 @@ objects: * Data destination manager: writes the output JPEG datastream to its final destination (e.g., a file). The destination manager supplied with the - library knows how to write to a stdio stream; for other behaviors, the - surrounding application may provide its own destination manager. + library knows how to write to a stdio stream or to a memory buffer; + for other behaviors, the surrounding application may provide its own + destination manager. * Memory manager: allocates and releases memory, controls virtual arrays (with backing store management, where required). @@ -504,9 +505,9 @@ objects: * Marker reading: decodes JPEG markers (except for RSTn). * Data source manager: supplies the input JPEG datastream. The source - manager supplied with the library knows how to read from a stdio stream; - for other behaviors, the surrounding application may provide its own source - manager. + manager supplied with the library knows how to read from a stdio stream + or from a memory buffer; for other behaviors, the surrounding application + may provide its own source manager. * Memory manager: same as for compression library. @@ -586,8 +587,7 @@ as "((value) & 0xFF)" on signed-char machines and "((int) (value))" elsewhere. With these conventions, JSAMPLE values can be assumed to be >= 0. This helps simplify correct rounding during downsampling, etc. The JPEG standard's specification that sample values run from -128..127 is accommodated by -subtracting 128 just as the sample value is copied into the source array for -the DCT step (this will be an array of signed ints). Similarly, during +subtracting 128 from the sample value in the DCT step. Similarly, during decompression the output of the IDCT step will be immediately shifted back to 0..255. (NB: different values are required when 12-bit samples are in use. The code is written in terms of MAXJSAMPLE and CENTERJSAMPLE, which will be diff --git a/usage.txt b/usage.txt index 7af75a8c..2abfbeab 100644 --- a/usage.txt +++ b/usage.txt @@ -131,7 +131,7 @@ quality-sensitive applications, for which the artifacts generated by subsampling may be unacceptable. The -quality option accepts a comma-separated list of parameters, which -respectively refer to the quality levels which should be assigned to the +respectively refer to the quality levels that should be assigned to the quantization table slots. If there are more q-table slots than parameters, then the last parameter is replicated. Thus, if only one quality parameter is given, this is used for both luminance and chrominance (slots 0 and 1, @@ -157,6 +157,11 @@ file size is about the same --- often a little smaller. Switches for advanced users: + -arithmetic Use arithmetic coding. CAUTION: arithmetic coded JPEG + is not yet widely implemented, so many decoders will + be unable to view an arithmetic coded JPEG file at + all. + -dct int Use integer DCT method (default). -dct fast Use fast integer DCT (less accurate). -dct float Use floating-point DCT method. @@ -201,11 +206,6 @@ factor will visibly blur the image, however. Switches for wizards: - -arithmetic Use arithmetic coding. CAUTION: arithmetic coded JPEG - is not yet widely implemented, so many decoders will - be unable to view an arithmetic coded JPEG file at - all. - -baseline Force baseline-compatible quantization tables to be generated. This clamps quantization values to 8 bits even at low quality settings. (This switch is poorly @@ -447,9 +447,9 @@ To specify the coded JPEG representation used in the output file, jpegtran accepts a subset of the switches recognized by cjpeg: -optimize Perform optimization of entropy encoding parameters. -progressive Create progressive JPEG file. + -arithmetic Use arithmetic coding. -restart N Emit a JPEG restart marker every N MCU rows, or every N MCU blocks if "B" is attached to the number. - -arithmetic Use arithmetic coding. -scans file Use the scan script given in the specified text file. See the previous discussion of cjpeg for more details about these switches. If you specify none of these switches, you get a plain baseline-JPEG output @@ -527,7 +527,7 @@ markers, such as comment blocks: present in the source file. -copy comments Copy only comment markers. This setting copies comments from the source file but discards - any other data which is inessential for image display. + any other data that is inessential for image display. -copy all Copy all extra markers. This setting preserves miscellaneous markers found in the source file, such as JFIF thumbnails, Exif data, and Photoshop settings. From 0f0fd75125bad97bafb3661d17a21049c75772fc Mon Sep 17 00:00:00 2001 From: DRC Date: Tue, 7 Feb 2012 23:27:14 +0000 Subject: [PATCH 04/26] Compiler warnings git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/branches/1.1.x@784 632fc199-4ca6-4c93-a231-07263d6284db --- jcdctmgr.c | 2 +- simd/jsimd_i386.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/jcdctmgr.c b/jcdctmgr.c index 711f9dab..12f88725 100644 --- a/jcdctmgr.c +++ b/jcdctmgr.c @@ -182,7 +182,7 @@ compute_reciprocal (UINT16 divisor, DCTELEM * dtbl) /* fq will be one bit too large to fit in DCTELEM, so adjust */ fq >>= 1; r--; - } else if (fr <= (divisor / 2)) { /* fractional part is < 0.5 */ + } else if (fr <= (divisor / 2U)) { /* fractional part is < 0.5 */ c++; } else { /* fractional part is > 0.5 */ fq++; diff --git a/simd/jsimd_i386.c b/simd/jsimd_i386.c index d9bb7743..021bcb25 100644 --- a/simd/jsimd_i386.c +++ b/simd/jsimd_i386.c @@ -41,7 +41,7 @@ init_simd (void) { char *env = NULL; - if (simd_support != ~0) + if (simd_support != ~0U) return; simd_support = jpeg_simd_cpu_support(); From be6d424626a91cc909b06b57133721a3804cfdb7 Mon Sep 17 00:00:00 2001 From: DRC Date: Tue, 7 Feb 2012 23:41:10 +0000 Subject: [PATCH 05/26] Compiler warning git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/branches/1.1.x@785 632fc199-4ca6-4c93-a231-07263d6284db --- jdhuff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jdhuff.c b/jdhuff.c index b795462f..12db1242 100644 --- a/jdhuff.c +++ b/jdhuff.c @@ -784,7 +784,7 @@ decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) usefast = 0; } - if (cinfo->src->bytes_in_buffer < BUFSIZE * cinfo->blocks_in_MCU + if (cinfo->src->bytes_in_buffer < BUFSIZE * (size_t)cinfo->blocks_in_MCU || cinfo->unread_marker != 0) usefast = 0; From f832eae5c2f4837e59c9450ef1d8587884994ea7 Mon Sep 17 00:00:00 2001 From: DRC Date: Wed, 8 Feb 2012 09:56:04 +0000 Subject: [PATCH 06/26] Merge description from README-turbo.txt into packages git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/branches/1.1.x@788 632fc199-4ca6-4c93-a231-07263d6284db --- release/Description.plist.in | 2 +- release/ReadMe.rtf | 6 +++--- release/deb-control.tmpl | 18 ++++++++---------- release/libjpeg-turbo.spec.in | 23 +++++++++++------------ release/pkginfo.in | 2 +- 5 files changed, 24 insertions(+), 27 deletions(-) diff --git a/release/Description.plist.in b/release/Description.plist.in index adca3ece..46a5bceb 100644 --- a/release/Description.plist.in +++ b/release/Description.plist.in @@ -3,7 +3,7 @@ IFPkgDescriptionDescription - A SIMD-accelerated JPEG codec which provides both the libjpeg and TurboJPEG APIs + A SIMD-accelerated JPEG codec that provides both the libjpeg and TurboJPEG APIs IFPkgDescriptionTitle @PACKAGE_NAME@ IFPkgDescriptionVersion diff --git a/release/ReadMe.rtf b/release/ReadMe.rtf index 6d344c80..06c94abe 100644 --- a/release/ReadMe.rtf +++ b/release/ReadMe.rtf @@ -1,13 +1,13 @@ -{\rtf1\ansi\ansicpg1252\cocoartf1038\cocoasubrtf350 +{\rtf1\ansi\ansicpg1252\cocoartf1038\cocoasubrtf360 {\fonttbl\f0\fswiss\fcharset0 Helvetica;} {\colortbl;\red255\green255\blue255;} \margl1440\margr1440\vieww15200\viewh9600\viewkind0 \deftab720 \pard\pardeftab720\ql\qnatural -\f0\fs24 \cf0 libjpeg-turbo is a derivative of libjpeg which uses SIMD instructions (MMX, SSE2, etc.) to accelerate baseline JPEG compression and decompression on x86 and x86-64 systems. On such systems, libjpeg-turbo is generally 2-4x as fast as the unmodified version of libjpeg, all else being equal. libjpeg-turbo also includes a wrapper library which implements the TurboJPEG API used by VirtualGL and TurboVNC.\ +\f0\fs24 \cf0 libjpeg-turbo is a derivative of libjpeg that uses SIMD instructions (MMX, SSE2, NEON) to accelerate baseline JPEG compression and decompression on x86, x86-64, and ARM systems. On such systems, libjpeg-turbo is generally 2-4x as fast as the unmodified version of libjpeg, all else being equal.\ \ -libjpeg-turbo was originally based on libjpeg/SIMD by Miyasaka Masaru, but the TigerVNC and VirtualGL projects made numerous enhancements to the codec in 2009, including improved support for Mac OS X, 64-bit support, support for 32-bit and big endian pixel formats (RGBX, XBGR, etc.), accelerated Huffman encoding/decoding, and various bug fixes. The goal was to produce a fully open source codec that could replace the partially closed source TurboJPEG/IPP codec used by VirtualGL and TurboVNC. libjpeg-turbo generally performs in the range of 80-120% of TurboJPEG/IPP. It is faster in some areas but slower in others.\ +libjpeg-turbo was originally based on libjpeg/SIMD by Miyasaka Masaru, but the TigerVNC and VirtualGL projects made numerous enhancements to the codec in 2009, including improved support for Mac OS X, 64-bit support, support for 32-bit and big-endian pixel formats (RGBX, XBGR, etc.), accelerated Huffman encoding/decoding, and various bug fixes. The goal was to produce a fully open-source codec that could replace the partially closed-source TurboJPEG/IPP codec used by VirtualGL and TurboVNC. libjpeg-turbo generally achieves 80-120% of the performance of TurboJPEG/IPP. It is faster in some areas but slower in others.\ \ In early 2010, libjpeg-turbo spun off into its own independent project, with the goal of making high-speed JPEG compression/decompression technology available to a broader range of users and developers.\ } \ No newline at end of file diff --git a/release/deb-control.tmpl b/release/deb-control.tmpl index 5a7377ec..9cf7c46e 100644 --- a/release/deb-control.tmpl +++ b/release/deb-control.tmpl @@ -5,22 +5,20 @@ Priority: optional Architecture: {__ARCH} Essential: no Maintainer: The libjpeg-turbo Project [http://www.libjpeg-turbo.org] -Description: A SIMD-accelerated JPEG codec which provides both the libjpeg and TurboJPEG APIs - libjpeg-turbo is a derivative of libjpeg which uses SIMD instructions (MMX, - SSE2, etc.) to accelerate baseline JPEG compression and decompression on x86 - and x86-64 systems. On such systems, libjpeg-turbo is generally 2-4x as fast - as the unmodified version of libjpeg, all else being equal. libjpeg-turbo - also includes a wrapper library which implements the TurboJPEG API used by - VirtualGL and TurboVNC. +Description: A SIMD-accelerated JPEG codec that provides both the libjpeg and TurboJPEG APIs + libjpeg-turbo is a derivative of libjpeg that uses SIMD instructions (MMX, + SSE2, NEON) to accelerate baseline JPEG compression and decompression on x86, + x86-64, and ARM systems. On such systems, libjpeg-turbo is generally 2-4x as + fast as the unmodified version of libjpeg, all else being equal. . libjpeg-turbo was originally based on libjpeg/SIMD by Miyasaka Masaru, but the TigerVNC and VirtualGL projects made numerous enhancements to the codec in 2009, including improved support for Mac OS X, 64-bit support, support - for 32-bit and big endian pixel formats (RGBX, XBGR, etc.), accelerated + for 32-bit and big-endian pixel formats (RGBX, XBGR, etc.), accelerated Huffman encoding/decoding, and various bug fixes. The goal was to produce a - fully open source codec that could replace the partially closed source + fully open-source codec that could replace the partially closed-source TurboJPEG/IPP codec used by VirtualGL and TurboVNC. libjpeg-turbo generally - performs in the range of 80-120% of TurboJPEG/IPP. It is faster in some + achieves 80-120% of the performance of TurboJPEG/IPP. It is faster in some areas but slower in others. . In early 2010, libjpeg-turbo spun off into its own independent project, with diff --git a/release/libjpeg-turbo.spec.in b/release/libjpeg-turbo.spec.in index 645be70f..fdb3e980 100644 --- a/release/libjpeg-turbo.spec.in +++ b/release/libjpeg-turbo.spec.in @@ -4,7 +4,7 @@ %define __lib lib %endif -Summary: A SIMD-accelerated JPEG codec which provides both the libjpeg and TurboJPEG APIs +Summary: A SIMD-accelerated JPEG codec that provides both the libjpeg and TurboJPEG APIs Name: @PACKAGE_NAME@ Version: @VERSION@ Vendor: The libjpeg-turbo Project @@ -19,21 +19,20 @@ Provides: %{name} = %{version}-%{release}, turbojpeg = 2.00 Obsoletes: turbojpeg %description -libjpeg-turbo is a derivative of libjpeg which uses SIMD instructions (MMX, -SSE2, etc.) to accelerate baseline JPEG compression and decompression on x86 -and x86-64 systems. On such systems, libjpeg-turbo is generally 2-4x as fast -as the unmodified version of libjpeg, all else being equal. libjpeg-turbo also -includes a wrapper library which implements the TurboJPEG API used by VirtualGL -and TurboVNC. +libjpeg-turbo is a derivative of libjpeg that uses SIMD instructions (MMX, +SSE2, NEON) to accelerate baseline JPEG compression and decompression on x86, +x86-64, and ARM systems. On such systems, libjpeg-turbo is generally 2-4x as +fast as the unmodified version of libjpeg, all else being equal. libjpeg-turbo was originally based on libjpeg/SIMD by Miyasaka Masaru, but the TigerVNC and VirtualGL projects made numerous enhancements to the codec in 2009, including improved support for Mac OS X, 64-bit support, support for -32-bit and big endian pixel formats (RGBX, XBGR, etc.), accelerated Huffman -encoding/decoding, and various bug fixes. The goal was to produce a fully open -source codec that could replace the partially closed source TurboJPEG/IPP codec -used by VirtualGL and TurboVNC. libjpeg-turbo generally performs in the range -of 80-120% of TurboJPEG/IPP. It is faster in some areas but slower in others. +32-bit and big-endian pixel formats (RGBX, XBGR, etc.), accelerated Huffman +encoding/decoding, and various bug fixes. The goal was to produce a fully +open-source codec that could replace the partially closed-source TurboJPEG/IPP +codec used by VirtualGL and TurboVNC. libjpeg-turbo generally achieves 80-120% +of the performance of TurboJPEG/IPP. It is faster in some areas but slower in +others. In early 2010, libjpeg-turbo spun off into its own independent project, with the goal of making high-speed JPEG compression/decompression technology diff --git a/release/pkginfo.in b/release/pkginfo.in index 7801dfb7..772ad0f6 100644 --- a/release/pkginfo.in +++ b/release/pkginfo.in @@ -3,7 +3,7 @@ PKG=@PACKAGE_NAME@ NAME=@PACKAGE_NAME@ SDK and run time libraries VERSION=@VERSION@,REV=@BUILD@ SUNW_PKGVERS=1.0 -DESC=A SIMD-accelerated JPEG codec which provides both the libjpeg and TurboJPEG APIs +DESC=A SIMD-accelerated JPEG codec that provides both the libjpeg and TurboJPEG APIs VENDOR=The libjpeg-turbo Project HOTLINE=http://www.libjpeg-turbo.org EMAIL=information@libjpeg-turbo.org From a1647c84128ecd840024a8579cd672c4556d9ff7 Mon Sep 17 00:00:00 2001 From: DRC Date: Fri, 10 Feb 2012 00:39:05 +0000 Subject: [PATCH 07/26] Install docs when doing 'make install' on Unix; Fix 'install' target on Windows; Include wizard.txt, example.c, and structure.txt in installed docs git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/branches/1.2.x@790 632fc199-4ca6-4c93-a231-07263d6284db --- CMakeLists.txt | 7 ++++--- Makefile.am | 12 +++++++++--- release/libjpeg-turbo.nsi.in | 6 ++++++ release/libjpeg-turbo.spec.in | 7 +++++-- release/makecygwinpkg.in | 10 +++------- release/makedpkg.in | 10 ++++------ release/makemacpkg.in | 10 +++------- release/makesunpkg.in | 13 ++++++++----- 8 files changed, 42 insertions(+), 33 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a166730a..d95c9e68 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -419,9 +419,10 @@ install(TARGETS jpeg-static turbojpeg turbojpeg-static rdjpgcom wrjpgcom tjbench RUNTIME DESTINATION bin ) -install(FILES ${CMAKE_SOURCE_DIR}/LGPL.txt ${CMAKE_SOURCE_DIR}/LICENSE.txt - ${CMAKE_SOURCE_DIR}/README ${CMAKE_SOURCE_DIR}/README-turbo.txt - ${CMAKE_SOURCE_DIR}/libjpeg.txt ${CMAKE_SOURCE_DIR}/usage.txt +install(FILES ${CMAKE_SOURCE_DIR}/README ${CMAKE_SOURCE_DIR}/README-turbo.txt + ${CMAKE_SOURCE_DIR}/example.c ${CMAKE_SOURCE_DIR}/libjpeg.txt + ${CMAKE_SOURCE_DIR}/structure.txt ${CMAKE_SOURCE_DIR}/usage.txt + ${CMAKE_SOURCE_DIR}/wizard.txt DESTINATION doc) install(FILES ${CMAKE_BINARY_DIR}/jconfig.h ${CMAKE_SOURCE_DIR}/jerror.h diff --git a/Makefile.am b/Makefile.am index e8b25702..791fdd2e 100644 --- a/Makefile.am +++ b/Makefile.am @@ -120,9 +120,15 @@ jcstest_LDADD = libjpeg.la dist_man1_MANS = cjpeg.1 djpeg.1 jpegtran.1 rdjpgcom.1 wrjpgcom.1 -DOCS= README install.txt usage.txt wizard.txt example.c libjpeg.txt \ - structure.txt coderules.txt filelist.txt jconfig.txt change.log \ - README-turbo.txt rdrle.c wrrle.c BUILDING.txt ChangeLog.txt +DOCS= install.txt coderules.txt filelist.txt jconfig.txt change.log \ + rdrle.c wrrle.c BUILDING.txt ChangeLog.txt + +docdir = $(datadir)/doc +doc_DATA = README README-turbo.txt libjpeg.txt structure.txt usage.txt \ + wizard.txt + +exampledir = $(datadir)/doc +example_DATA = example.c EXTRA_DIST = win release $(DOCS) testimages CMakeLists.txt \ diff --git a/release/libjpeg-turbo.nsi.in b/release/libjpeg-turbo.nsi.in index 39773f6a..f4643fd1 100755 --- a/release/libjpeg-turbo.nsi.in +++ b/release/libjpeg-turbo.nsi.in @@ -76,8 +76,11 @@ Section "@CMAKE_PROJECT_NAME@ SDK for @INST_PLATFORM@ (required)" SetOutPath $INSTDIR\doc File "@CMAKE_SOURCE_DIR@\README" File "@CMAKE_SOURCE_DIR@\README-turbo.txt" + File "@CMAKE_SOURCE_DIR@\example.c" File "@CMAKE_SOURCE_DIR@\libjpeg.txt" + File "@CMAKE_SOURCE_DIR@\structure.txt" File "@CMAKE_SOURCE_DIR@\usage.txt" + File "@CMAKE_SOURCE_DIR@\wizard.txt" WriteRegStr HKLM "SOFTWARE\@INST_DIR@ @VERSION@" "Install_Dir" "$INSTDIR" @@ -132,8 +135,11 @@ Section "Uninstall" Delete $INSTDIR\uninstall_@VERSION@.exe Delete $INSTDIR\doc\README Delete $INSTDIR\doc\README-turbo.txt + Delete $INSTDIR\doc\example.c Delete $INSTDIR\doc\libjpeg.txt + Delete $INSTDIR\doc\structure.txt Delete $INSTDIR\doc\usage.txt + Delete $INSTDIR\doc\wizard.txt RMDir "$INSTDIR\include" RMDir "$INSTDIR\lib" diff --git a/release/libjpeg-turbo.spec.in b/release/libjpeg-turbo.spec.in index 5d40fede..9e0ff883 100644 --- a/release/libjpeg-turbo.spec.in +++ b/release/libjpeg-turbo.spec.in @@ -48,7 +48,7 @@ available to a broader range of users and developers. %install rm -rf $RPM_BUILD_ROOT -make install DESTDIR=$RPM_BUILD_ROOT libdir=/opt/%{name}/%{__lib} mandir=/opt/%{name}/man +make install DESTDIR=$RPM_BUILD_ROOT libdir=/opt/%{name}/%{__lib} mandir=/opt/%{name}/man docdir=/opt/%{name}/doc exampledir=/opt/%{name}/doc rm -f $RPM_BUILD_ROOT/opt/%{name}/%{__lib}/*.la mkdir -p $RPM_BUILD_ROOT/usr/%{__lib} mv $RPM_BUILD_ROOT/opt/%{name}/%{__lib}/libturbojpeg.* $RPM_BUILD_ROOT/usr/%{__lib} @@ -58,6 +58,8 @@ mkdir -p $RPM_BUILD_ROOT/usr/include mv $RPM_BUILD_ROOT/opt/%{name}/include/turbojpeg.h $RPM_BUILD_ROOT/usr/include ln -fs /usr/include/turbojpeg.h $RPM_BUILD_ROOT/opt/%{name}/include/ ln -fs /usr/%{__lib}/libturbojpeg.a $RPM_BUILD_ROOT/opt/%{name}/%{__lib}/ +mkdir -p $RPM_BUILD_ROOT%{_defaultdocdir} +mv $RPM_BUILD_ROOT/opt/%{name}/doc $RPM_BUILD_ROOT%{_defaultdocdir}/%{name}-%{version} %post -p /sbin/ldconfig @@ -68,7 +70,8 @@ rm -rf $RPM_BUILD_ROOT %files %defattr(-,root,root) -%doc %{_srcdir}/README-turbo.txt %{_srcdir}/README %{_srcdir}/libjpeg.txt %{_srcdir}/usage.txt +%dir %{_defaultdocdir}/%{name}-%{version} +%doc %{_defaultdocdir}/%{name}-%{version}/* %dir /opt/%{name} %dir /opt/%{name}/bin /opt/%{name}/bin/cjpeg diff --git a/release/makecygwinpkg.in b/release/makecygwinpkg.in index 2dbd650c..5fac864c 100755 --- a/release/makecygwinpkg.in +++ b/release/makecygwinpkg.in @@ -23,14 +23,10 @@ umask 022 rm -f $PACKAGE_NAME-$VERSION-cygwin.tar.bz2 TMPDIR=`mktemp -d /tmp/ljtbuild.XXXXXX` __PWD=`pwd` -make install DESTDIR=$TMPDIR/pkg mandir=/opt/$PACKAGE_NAME/man +make install DESTDIR=$TMPDIR/pkg mandir=/opt/$PACKAGE_NAME/man \ + docdir=/usr/share/doc/$PACKAGE_NAME-$VERSION \ + exampledir=/usr/share/doc/$PACKAGE_NAME-$VERSION \ rm $TMPDIR/pkg/opt/$PACKAGE_NAME/lib/*.la -DOCDIR=$TMPDIR/pkg/usr/share/doc/$PACKAGE_NAME-$VERSION -mkdir -p $DOCDIR -install -m 644 $SRCDIR/README-turbo.txt $DOCDIR -install -m 644 $SRCDIR/README $DOCDIR -install -m 644 $SRCDIR/libjpeg.txt $DOCDIR -install -m 644 $SRCDIR/usage.txt $DOCDIR ln -fs lib $TMPDIR/pkg/opt/$PACKAGE_NAME/lib32 cd $TMPDIR/pkg tar cfj ../$PACKAGE_NAME-$VERSION-cygwin.tar.bz2 * diff --git a/release/makedpkg.in b/release/makedpkg.in index 7a15518d..936151b6 100644 --- a/release/makedpkg.in +++ b/release/makedpkg.in @@ -36,7 +36,10 @@ makedeb() | sed s/{__VERSION}/$VERSION/g | sed s/{__BUILD}/$BUILD/g \ | sed s/{__ARCH}/$DEBARCH/g > $TMPDIR/DEBIAN/control) - make install prefix=$TMPDIR/opt/$DIRNAME libdir=$TMPDIR/opt/$DIRNAME/$__LIB mandir=$TMPDIR/opt/$DIRNAME/man + make install prefix=$TMPDIR/opt/$DIRNAME libdir=$TMPDIR/opt/$DIRNAME/$__LIB \ + mandir=$TMPDIR/opt/$DIRNAME/man \ + docdir=$TMPDIR/usr/share/doc/$DIRNAME-$VERSION \ + exampledir=$TMPDIR/usr/share/doc/$DIRNAME-$VERSION rm -f $TMPDIR/opt/$DIRNAME/$__LIB/*.la if [ $SUPPLEMENT = 1 ]; then @@ -63,11 +66,6 @@ makedeb() mkdir -p $TMPDIR/usr/include mv $TMPDIR/opt/$DIRNAME/include/turbojpeg.h $TMPDIR/usr/include ln -fs /usr/include/turbojpeg.h $TMPDIR/opt/$DIRNAME/include/ - mkdir -p $TMPDIR/usr/share/doc/$DIRNAME-$VERSION - install -m 644 $SRCDIR/README-turbo.txt $TMPDIR/usr/share/doc/$DIRNAME-$VERSION - install -m 644 $SRCDIR/README $TMPDIR/usr/share/doc/$DIRNAME-$VERSION - install -m 644 $SRCDIR/libjpeg.txt $TMPDIR/usr/share/doc/$DIRNAME-$VERSION - install -m 644 $SRCDIR/usage.txt $TMPDIR/usr/share/doc/$DIRNAME-$VERSION fi sudo chown -Rh root:root $TMPDIR/* diff --git a/release/makemacpkg.in b/release/makemacpkg.in index 2ee08053..b0f45ab9 100644 --- a/release/makemacpkg.in +++ b/release/makemacpkg.in @@ -46,7 +46,9 @@ TMPDIR=`mktemp -d /tmp/$PACKAGE_NAME-build.XXXXXX` PKGROOT=$TMPDIR/pkg/Package_Root mkdir -p $PKGROOT mkdir -p $PKGROOT/opt/$PACKAGE_NAME/bin -make install DESTDIR=$PKGROOT mandir=/opt/$PACKAGE_NAME/man +make install DESTDIR=$PKGROOT mandir=/opt/$PACKAGE_NAME/man \ + docdir=/Library/Documentation/$PACKAGE_NAME \ + exampledir=/Library/Documentation/$PACKAGE_NAME rm -f $PKGROOT/opt/$PACKAGE_NAME/lib/*.la mkdir -p $PKGROOT/usr/lib mv $PKGROOT/opt/$PACKAGE_NAME/lib/libturbojpeg.* $PKGROOT/usr/lib @@ -133,7 +135,6 @@ if [ ! -h $PKGROOT/opt/$PACKAGE_NAME/lib64 ]; then ln -fs lib $PKGROOT/opt/$PACKAGE_NAME/lib64 fi -mkdir -p $PKGROOT/Library/Documentation/$PACKAGE_NAME chmod 1775 $PKGROOT/Library chmod 775 $PKGROOT/Library/Documentation mkdir -p $TMPDIR/pkg/Resources @@ -142,11 +143,6 @@ cp pkgscripts/Description.plist $TMPDIR/pkg/ cp pkgscripts/Info.plist $TMPDIR/pkg/ install -m 755 pkgscripts/uninstall $PKGROOT/opt/$PACKAGE_NAME/bin/ -install -m 644 $SRCDIR/README-turbo.txt $PKGROOT/Library/Documentation/$PACKAGE_NAME/README-turbo.txt -install -m 644 $SRCDIR/README $PKGROOT/Library/Documentation/$PACKAGE_NAME/README -install -m 644 $SRCDIR/libjpeg.txt $PKGROOT/Library/Documentation/$PACKAGE_NAME/libjpeg.txt -install -m 644 $SRCDIR/usage.txt $PKGROOT/Library/Documentation/$PACKAGE_NAME/usage.txt - sudo chown -R root:admin $PKGROOT sudo chown -R root:0 $PKGROOT/usr cp $SRCDIR/release/License.rtf $SRCDIR/release/Welcome.rtf $SRCDIR/release/ReadMe.rtf $TMPDIR/pkg/Resources/ diff --git a/release/makesunpkg.in b/release/makesunpkg.in index 3331dab0..7cf36a88 100644 --- a/release/makesunpkg.in +++ b/release/makesunpkg.in @@ -65,11 +65,11 @@ if [ $COMBINED = 1 ]; then cd $PWD fi # This mess is to work around a bug in /usr/ccs/bin/make -make install DESTDIR=$TMPDIR libdir=/opt/$PACKAGE_NAME/$__LIB mandir=/opt/$PACKAGE_NAME/man AM_MAKEFLAGS="libdir=/opt/$PACKAGE_NAME/$__LIB mandir=/opt/$PACKAGE_NAME/man" +make install DESTDIR=$TMPDIR libdir=/opt/$PACKAGE_NAME/$__LIB \ + mandir=/opt/$PACKAGE_NAME/man docdir=/opt/$PACKAGE_NAME/doc \ + exampledir=/opt/$PACKAGE_NAME/doc \ + AM_MAKEFLAGS="libdir=/opt/$PACKAGE_NAME/$__LIB mandir=/opt/$PACKAGE_NAME/man docdir=/opt/$PACKAGE_NAME/doc exampledir=/opt/$PACKAGE_NAME/doc" rm -f $TMPDIR/opt/$PACKAGE_NAME/$__LIB/*.la -mkdir -p $TMPDIR/opt/$PACKAGE_NAME/doc -cp $SRCDIR/README-turbo.txt $SRCDIR/README $SRCDIR/libjpeg.txt $SRCDIR/usage.txt $TMPDIR/opt/$PACKAGE_NAME/doc -chmod 644 $TMPDIR/opt/$PACKAGE_NAME/doc/* cat >$TMPDIR/proto <>$TMPDIR/proto From 20b734e1c4a84054c5cbdbd8f6ac0e75d39545e2 Mon Sep 17 00:00:00 2001 From: DRC Date: Fri, 10 Feb 2012 01:30:37 +0000 Subject: [PATCH 08/26] Infrastructure for producing a universal x86-64/i386/ARM version of libjpeg.a and libturbojpeg.a on OS X, so that the same library can be used to build OS X and iOS apps git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/branches/1.2.x@793 632fc199-4ca6-4c93-a231-07263d6284db --- BUILDING.txt | 33 +++++++++++++++----- ChangeLog.txt | 5 +++ Makefile.am | 14 ++++++++- release/makemacpkg.in | 71 ++++++++++++++++++++++++++++++++++++------- 4 files changed, 104 insertions(+), 19 deletions(-) diff --git a/BUILDING.txt b/BUILDING.txt index 0b3e055f..fc583ee3 100644 --- a/BUILDING.txt +++ b/BUILDING.txt @@ -644,13 +644,32 @@ make dmg make udmg [BUILDDIR32={32-bit build directory}] - On 64-bit OS X systems, this creates a version of the Macintosh package and - disk image that contains universal i386/x86-64 binaries. You should first - configure a 32-bit out-of-tree build of libjpeg-turbo, then configure a - 64-bit out-of-tree build, then run 'make udmg' from the 64-bit build - directory. The build system will look for the 32-bit build under - {source_directory}/osxx86 by default, but you can override this by setting - the BUILDDIR32 variable on the make command line as shown above. + On 64-bit OS X systems, this creates a Macintosh package and disk image that + contains universal i386/x86-64 binaries. You should first configure a 32-bit + out-of-tree build of libjpeg-turbo, then configure a 64-bit out-of-tree + build, then run 'make udmg' from the 64-bit build directory. The build + system will look for the 32-bit build under {source_directory}/osxx86 by + default, but you can override this by setting the BUILDDIR32 variable on the + make command line as shown above. + +make iosdmg [BUILDDIR32={32-bit build directory}] \ + [BUILDDIRARMV6={ARM v6 build directory}] \ + [BUILDDIRARMV7={ARM v7 build directory}] \ + + On OS X systems, this creates a Macintosh package and disk image in which the + libjpeg-turbo static libraries contain ARM architectures necessary to build + iOS applications. If building on an x86-64 system, the binaries will also + contain the i386 architecture, as with 'make udmg' above. You should first + configure ARM v6 and ARM v7 out-of-tree builds of libjpeg-turbo (see + "Building libjpeg-turbo for iOS" above.) If you are building an x86-64 + version of libjpeg-turbo, you should configure a 32-bit out-of-tree build as + well. Next, build libjpeg-turbo as you would normally, using an out-of-tree + build. When it is built, run 'make iosdmg' from the build directory. The + build system will look for the ARM v6 build under {source_directory}/iosarmv6 + by default, the ARM v7 build under {source_directory}/iosarmv7 by default, + and (if applicable) the 32-bit build under {source_directory}/osxx86 by + default, but you can override this by setting the BUILDDIR32, BUILDDIRARMV6, + and/or BUILDDIRARMV7 variables on the make command line as shown above. make sunpkg diff --git a/ChangeLog.txt b/ChangeLog.txt index f6b38d7b..529bd7e9 100644 --- a/ChangeLog.txt +++ b/ChangeLog.txt @@ -29,6 +29,11 @@ K component is assigned a component ID of 1 instead of 4. Although these files are in violation of the spec, other JPEG implementations handle them correctly. +[7] Added ARM v6 and ARM v7 architectures to libjpeg.a and libturbojpeg.a in +the official OS X distribution package, so that those libraries can be used to +build both OS X and iOS applications. + + 1.1.90 (1.2 beta1) ================== diff --git a/Makefile.am b/Makefile.am index 791fdd2e..ffd5bb2f 100644 --- a/Makefile.am +++ b/Makefile.am @@ -299,10 +299,22 @@ srpm: dist-gzip deb: all sh pkgscripts/makedpkg +BUILDDIR32=@abs_top_srcdir@/osxx86 +BUILDDIRARMV6=@abs_top_srcdir@/iosarmv6 +BUILDDIRARMV7=@abs_top_srcdir@/iosarmv7 + if X86_64 udmg: all - sh pkgscripts/makemacpkg universal ${BUILDDIR32} + sh pkgscripts/makemacpkg -builddir32 ${BUILDDIR32} + +iosdmg: all + sh pkgscripts/makemacpkg -builddir32 ${BUILDDIR32} -builddirarmv6 ${BUILDDIRARMV6} -builddirarmv7 ${BUILDDIRARMV7} + +else + +iosdmg: all + sh pkgscripts/makemacpkg -builddirarmv6 ${BUILDDIRARMV6} -builddirarmv7 ${BUILDDIRARMV7} endif diff --git a/release/makemacpkg.in b/release/makemacpkg.in index b0f45ab9..89a3fd7d 100644 --- a/release/makemacpkg.in +++ b/release/makemacpkg.in @@ -17,24 +17,27 @@ onexit() usage() { - echo "$0 [universal [32-bit build dir]]" + echo "$0 [-builddir32 [32-bit build dir]] [-builddirarmv6 [ARM v6 build dir]] [-builddirarmv7 [ARM v7 build dir]]" exit 1 } -UNIVERSAL=0 - PACKAGE_NAME=@PACKAGE_NAME@ VERSION=@VERSION@ BUILD=@BUILD@ SRCDIR=@abs_top_srcdir@ -BUILDDIR32=@abs_top_srcdir@/osxx86 +BUILDDIR32= +BUILDDIRARMV6= +BUILDDIRARMV7= WITH_JAVA=@WITH_JAVA@ -if [ $# -gt 0 ]; then - if [ "$1" = "universal" ]; then - UNIVERSAL=1 - if [ $# -gt 1 ]; then BUILDDIR32=$2; fi - fi -fi +while [ $# -gt 0 ]; do + case $1 in + -h*) usage 0 ;; + -builddir32) BUILDDIR32=$2; shift ;; + -builddirarmv6) BUILDDIRARMV6=$2; shift ;; + -builddirarmv7) BUILDDIRARMV7=$2; shift ;; + esac + shift +done PACKAGEMAKER=/Developer/Applications/Utilities/PackageMaker.app/Contents/MacOS/PackageMaker if [ -f $PACKAGE_NAME-$VERSION.dmg ]; then @@ -55,7 +58,7 @@ mv $PKGROOT/opt/$PACKAGE_NAME/lib/libturbojpeg.* $PKGROOT/usr/lib mkdir -p $PKGROOT/usr/include mv $PKGROOT/opt/$PACKAGE_NAME/include/turbojpeg.h $PKGROOT/usr/include -if [ $UNIVERSAL = 1 ]; then +if [ ! "$BUILDDIR32" = "" ]; then if [ ! -d $BUILDDIR32 ]; then echo ERROR: 32-bit build directory $BUILDDIR32 does not exist exit 1 @@ -120,6 +123,52 @@ if [ $UNIVERSAL = 1 ]; then fi +if [ ! "$BUILDDIRARMV6" = "" ]; then + if [ ! -d $BUILDDIRARMV6 ]; then + echo ERROR: ARM v6 build directory $BUILDDIRARMV6 does not exist + exit 1 + fi + if [ ! -f $BUILDDIRARMV6/Makefile ]; then + echo ERROR: ARM v6 build directory $BUILDDIRARMV6 is not configured + exit 1 + fi + mkdir -p $TMPDIR/dist.armv6 + pushd $BUILDDIRARMV6 + make install DESTDIR=$TMPDIR/dist.armv6 + popd + lipo -create \ + $PKGROOT/opt/$PACKAGE_NAME/lib/libjpeg.a \ + -arch arm $TMPDIR/dist.armv6/opt/$PACKAGE_NAME/lib/libjpeg.a \ + -output $PKGROOT/opt/$PACKAGE_NAME/lib/libjpeg.a + lipo -create \ + $PKGROOT/usr/lib/libturbojpeg.a \ + -arch arm $TMPDIR/dist.armv6/opt/$PACKAGE_NAME/lib/libturbojpeg.a \ + -output $PKGROOT/usr/lib/libturbojpeg.a +fi + +if [ ! "$BUILDDIRARMV7" = "" ]; then + if [ ! -d $BUILDDIRARMV7 ]; then + echo ERROR: ARM v7 build directory $BUILDDIRARMV7 does not exist + exit 1 + fi + if [ ! -f $BUILDDIRARMV7/Makefile ]; then + echo ERROR: ARM v7 build directory $BUILDDIRARMV7 is not configured + exit 1 + fi + mkdir -p $TMPDIR/dist.armv7 + pushd $BUILDDIRARMV7 + make install DESTDIR=$TMPDIR/dist.armv7 + popd + lipo -create \ + $PKGROOT/opt/$PACKAGE_NAME/lib/libjpeg.a \ + -arch arm $TMPDIR/dist.armv7/opt/$PACKAGE_NAME/lib/libjpeg.a \ + -output $PKGROOT/opt/$PACKAGE_NAME/lib/libjpeg.a + lipo -create \ + $PKGROOT/usr/lib/libturbojpeg.a \ + -arch arm $TMPDIR/dist.armv7/opt/$PACKAGE_NAME/lib/libturbojpeg.a \ + -output $PKGROOT/usr/lib/libturbojpeg.a +fi + install_name_tool -id /opt/$PACKAGE_NAME/lib/libjpeg.@SO_MAJOR_VERSION@.dylib $PKGROOT/opt/$PACKAGE_NAME/lib/libjpeg.@SO_MAJOR_VERSION@.dylib install_name_tool -id libturbojpeg.dylib $PKGROOT/usr/lib/libturbojpeg.dylib From 57bd84f4355131ac9762f78b71f80264b94a4654 Mon Sep 17 00:00:00 2001 From: DRC Date: Fri, 10 Feb 2012 01:40:29 +0000 Subject: [PATCH 09/26] RPM spec no longer uses %{_srcdir} git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/branches/1.2.x@796 632fc199-4ca6-4c93-a231-07263d6284db --- Makefile.am | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile.am b/Makefile.am index ffd5bb2f..8d8bc3a2 100644 --- a/Makefile.am +++ b/Makefile.am @@ -273,7 +273,7 @@ rpm: all ln -fs `pwd` $$TMPDIR/BUILD; \ rm -f ${PACKAGE_NAME}-${VERSION}.${RPMARCH}.rpm; \ rpmbuild -bb --define "_blddir $$TMPDIR/buildroot" \ - --define "_topdir $$TMPDIR" --define "_srcdir ${srcdir}" \ + --define "_topdir $$TMPDIR" \ --target ${RPMARCH} pkgscripts/libjpeg-turbo.spec; \ cp $$TMPDIR/RPMS/${RPMARCH}/${PACKAGE_NAME}-${VERSION}-${BUILD}.${RPMARCH}.rpm \ ${PACKAGE_NAME}-${VERSION}.${RPMARCH}.rpm; \ @@ -289,7 +289,7 @@ srpm: dist-gzip rm -f ${PACKAGE_NAME}-${VERSION}.src.rpm; \ cp ${PACKAGE_NAME}-${VERSION}.tar.gz $$TMPDIR/SOURCES; \ cat pkgscripts/libjpeg-turbo.spec | sed s/%{_blddir}/%{_tmppath}/g \ - | sed s@%{_srcdir}/@@g | sed s/#--\>//g \ + | sed s/#--\>//g \ > $$TMPDIR/SPECS/libjpeg-turbo.spec; \ rpmbuild -bs --define "_topdir $$TMPDIR" $$TMPDIR/SPECS/libjpeg-turbo.spec; \ cp $$TMPDIR/SRPMS/${PACKAGE_NAME}-${VERSION}-${BUILD}.src.rpm \ From ef49f790bdebd25ff84d2909b757967352f54f98 Mon Sep 17 00:00:00 2001 From: DRC Date: Fri, 10 Feb 2012 01:44:23 +0000 Subject: [PATCH 10/26] git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/branches/1.2.x@797 632fc199-4ca6-4c93-a231-07263d6284db --- ChangeLog.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/ChangeLog.txt b/ChangeLog.txt index 529bd7e9..7cd5b59b 100644 --- a/ChangeLog.txt +++ b/ChangeLog.txt @@ -34,7 +34,6 @@ the official OS X distribution package, so that those libraries can be used to build both OS X and iOS applications. - 1.1.90 (1.2 beta1) ================== From 01bf9d9ccd5afd18ceffa7dcefe543d282742343 Mon Sep 17 00:00:00 2001 From: DRC Date: Fri, 10 Feb 2012 01:52:31 +0000 Subject: [PATCH 11/26] Fix 'make dist' git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/branches/1.2.x@799 632fc199-4ca6-4c93-a231-07263d6284db --- Makefile.am | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile.am b/Makefile.am index 8d8bc3a2..b21acc24 100644 --- a/Makefile.am +++ b/Makefile.am @@ -124,11 +124,11 @@ DOCS= install.txt coderules.txt filelist.txt jconfig.txt change.log \ rdrle.c wrrle.c BUILDING.txt ChangeLog.txt docdir = $(datadir)/doc -doc_DATA = README README-turbo.txt libjpeg.txt structure.txt usage.txt \ +dist_doc_DATA = README README-turbo.txt libjpeg.txt structure.txt usage.txt \ wizard.txt exampledir = $(datadir)/doc -example_DATA = example.c +dist_example_DATA = example.c EXTRA_DIST = win release $(DOCS) testimages CMakeLists.txt \ From 51d626ff9d32dc67294575460c5401e763ae8ca9 Mon Sep 17 00:00:00 2001 From: DRC Date: Fri, 10 Feb 2012 02:51:40 +0000 Subject: [PATCH 12/26] Don't include documentation in 32-bit supplemental package git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/branches/1.2.x@801 632fc199-4ca6-4c93-a231-07263d6284db --- release/makedpkg.in | 1 + 1 file changed, 1 insertion(+) diff --git a/release/makedpkg.in b/release/makedpkg.in index 936151b6..6d081bb6 100644 --- a/release/makedpkg.in +++ b/release/makedpkg.in @@ -47,6 +47,7 @@ makedeb() rm -rf $TMPDIR/opt/$DIRNAME/man rm -rf $TMPDIR/opt/$DIRNAME/bin rm -rf $TMPDIR/opt/$DIRNAME/classes + rm -rf $TMPDIR/usr fi mkdir -p $TMPDIR/usr/$__LIB From 9e7eb2768638bd655e17a13e90b98c84aa927443 Mon Sep 17 00:00:00 2001 From: DRC Date: Fri, 10 Feb 2012 03:41:20 +0000 Subject: [PATCH 13/26] Move build dir. variables back into makemacpkg to avoid messing up the Solaris packaging system. git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/branches/1.2.x@804 632fc199-4ca6-4c93-a231-07263d6284db --- Makefile.am | 4 ---- release/makemacpkg.in | 30 ++++++++++++++++++++++++------ 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/Makefile.am b/Makefile.am index b21acc24..0fef9fa5 100644 --- a/Makefile.am +++ b/Makefile.am @@ -299,10 +299,6 @@ srpm: dist-gzip deb: all sh pkgscripts/makedpkg -BUILDDIR32=@abs_top_srcdir@/osxx86 -BUILDDIRARMV6=@abs_top_srcdir@/iosarmv6 -BUILDDIRARMV7=@abs_top_srcdir@/iosarmv7 - if X86_64 udmg: all diff --git a/release/makemacpkg.in b/release/makemacpkg.in index 89a3fd7d..6cb4069d 100644 --- a/release/makemacpkg.in +++ b/release/makemacpkg.in @@ -25,16 +25,34 @@ PACKAGE_NAME=@PACKAGE_NAME@ VERSION=@VERSION@ BUILD=@BUILD@ SRCDIR=@abs_top_srcdir@ -BUILDDIR32= -BUILDDIRARMV6= -BUILDDIRARMV7= +BUILDDIR32=@abs_top_srcdir@/osxx86 +BUILDDIRARMV6=@abs_top_srcdir@/iosarmv6 +BUILDDIRARMV7=@abs_top_srcdir@/iosarmv7 WITH_JAVA=@WITH_JAVA@ while [ $# -gt 0 ]; do case $1 in -h*) usage 0 ;; - -builddir32) BUILDDIR32=$2; shift ;; - -builddirarmv6) BUILDDIRARMV6=$2; shift ;; - -builddirarmv7) BUILDDIRARMV7=$2; shift ;; + -builddir32) + if [ $# -gt 1 ]; then + if [[ ! "$2" =~ -.* ]]; then + BUILDDIR32=$2; shift + fi + fi + ;; + -builddirarmv6) + if [ $# -gt 1 ]; then + if [[ ! "$2" =~ -.* ]]; then + BUILDDIRARMV6=$2; shift + fi + fi + ;; + -builddirarmv7) + if [ $# -gt 1 ]; then + if [[ ! "$2" =~ -.* ]]; then + BUILDDIRARMV7=$2; shift + fi + fi + ;; esac shift done From 69c1408e9d5ea1db45ae537b911c60088d813ad5 Mon Sep 17 00:00:00 2001 From: DRC Date: Fri, 10 Feb 2012 03:47:18 +0000 Subject: [PATCH 14/26] Oops git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/branches/1.2.x@805 632fc199-4ca6-4c93-a231-07263d6284db --- release/makecygwinpkg.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release/makecygwinpkg.in b/release/makecygwinpkg.in index 5fac864c..32cecddb 100755 --- a/release/makecygwinpkg.in +++ b/release/makecygwinpkg.in @@ -25,7 +25,7 @@ TMPDIR=`mktemp -d /tmp/ljtbuild.XXXXXX` __PWD=`pwd` make install DESTDIR=$TMPDIR/pkg mandir=/opt/$PACKAGE_NAME/man \ docdir=/usr/share/doc/$PACKAGE_NAME-$VERSION \ - exampledir=/usr/share/doc/$PACKAGE_NAME-$VERSION \ + exampledir=/usr/share/doc/$PACKAGE_NAME-$VERSION rm $TMPDIR/pkg/opt/$PACKAGE_NAME/lib/*.la ln -fs lib $TMPDIR/pkg/opt/$PACKAGE_NAME/lib32 cd $TMPDIR/pkg From a9b646c202a2a154a125545b4347cbfd70598c9e Mon Sep 17 00:00:00 2001 From: DRC Date: Sun, 11 Mar 2012 22:06:54 +0000 Subject: [PATCH 15/26] Allow RGB JPEG files to be created/decoded when using the LJT colorspace extensions git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/branches/1.2.x@809 632fc199-4ca6-4c93-a231-07263d6284db --- ChangeLog.txt | 8 +++++ jccolext.c | 34 +++++++++++++++++++- jccolor.c | 88 +++++++++++++++++++++++++++++++++++++++++++-------- jdcolext.c | 37 ++++++++++++++++++++++ jdcolor.c | 70 +++++++++++++++++++++++++++++++++++++--- 5 files changed, 219 insertions(+), 18 deletions(-) diff --git a/ChangeLog.txt b/ChangeLog.txt index 7cd5b59b..e27ac09a 100644 --- a/ChangeLog.txt +++ b/ChangeLog.txt @@ -1,3 +1,11 @@ +1.2.1 +===== + +[1] Creating or decoding a JPEG file that uses the RGB colorspace should now +properly work when the input or output colorspace is one of the libjpeg-turbo +colorspace extensions. + + 1.2.0 ===== diff --git a/jccolext.c b/jccolext.c index acbfa235..dbac84a9 100644 --- a/jccolext.c +++ b/jccolext.c @@ -2,7 +2,7 @@ * jccolext.c * * Copyright (C) 1991-1996, Thomas G. Lane. - * Copyright (C) 2009-2011, D. R. Commander. + * Copyright (C) 2009-2012, D. R. Commander. * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * @@ -112,3 +112,35 @@ rgb_gray_convert_internal (j_compress_ptr cinfo, } } } + + +/* + * Convert some rows of samples to the JPEG colorspace. + * This version handles extended RGB->plain RGB conversion + */ + +INLINE +LOCAL(void) +rgb_rgb_convert_internal (j_compress_ptr cinfo, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ + register JSAMPROW inptr; + register JSAMPROW outptr0, outptr1, outptr2; + register JDIMENSION col; + JDIMENSION num_cols = cinfo->image_width; + + while (--num_rows >= 0) { + inptr = *input_buf++; + outptr0 = output_buf[0][output_row]; + outptr1 = output_buf[1][output_row]; + outptr2 = output_buf[2][output_row]; + output_row++; + for (col = 0; col < num_cols; col++) { + outptr0[col] = GETJSAMPLE(inptr[RGB_RED]); + outptr1[col] = GETJSAMPLE(inptr[RGB_GREEN]); + outptr2[col] = GETJSAMPLE(inptr[RGB_BLUE]); + inptr += RGB_PIXELSIZE; + } + } +} diff --git a/jccolor.c b/jccolor.c index 97305557..3a0772bb 100644 --- a/jccolor.c +++ b/jccolor.c @@ -3,7 +3,7 @@ * * Copyright (C) 1991-1996, Thomas G. Lane. * Copyright 2009 Pierre Ossman for Cendio AB - * Copyright (C) 2009-2011, D. R. Commander. + * Copyright (C) 2009-2012, D. R. Commander. * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * @@ -96,6 +96,7 @@ typedef my_color_converter * my_cconvert_ptr; #define RGB_PIXELSIZE EXT_RGB_PIXELSIZE #define rgb_ycc_convert_internal extrgb_ycc_convert_internal #define rgb_gray_convert_internal extrgb_gray_convert_internal +#define rgb_rgb_convert_internal extrgb_rgb_convert_internal #include "jccolext.c" #undef RGB_RED #undef RGB_GREEN @@ -103,6 +104,7 @@ typedef my_color_converter * my_cconvert_ptr; #undef RGB_PIXELSIZE #undef rgb_ycc_convert_internal #undef rgb_gray_convert_internal +#undef rgb_rgb_convert_internal #define RGB_RED EXT_RGBX_RED #define RGB_GREEN EXT_RGBX_GREEN @@ -110,6 +112,7 @@ typedef my_color_converter * my_cconvert_ptr; #define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE #define rgb_ycc_convert_internal extrgbx_ycc_convert_internal #define rgb_gray_convert_internal extrgbx_gray_convert_internal +#define rgb_rgb_convert_internal extrgbx_rgb_convert_internal #include "jccolext.c" #undef RGB_RED #undef RGB_GREEN @@ -117,6 +120,7 @@ typedef my_color_converter * my_cconvert_ptr; #undef RGB_PIXELSIZE #undef rgb_ycc_convert_internal #undef rgb_gray_convert_internal +#undef rgb_rgb_convert_internal #define RGB_RED EXT_BGR_RED #define RGB_GREEN EXT_BGR_GREEN @@ -124,6 +128,7 @@ typedef my_color_converter * my_cconvert_ptr; #define RGB_PIXELSIZE EXT_BGR_PIXELSIZE #define rgb_ycc_convert_internal extbgr_ycc_convert_internal #define rgb_gray_convert_internal extbgr_gray_convert_internal +#define rgb_rgb_convert_internal extbgr_rgb_convert_internal #include "jccolext.c" #undef RGB_RED #undef RGB_GREEN @@ -131,6 +136,7 @@ typedef my_color_converter * my_cconvert_ptr; #undef RGB_PIXELSIZE #undef rgb_ycc_convert_internal #undef rgb_gray_convert_internal +#undef rgb_rgb_convert_internal #define RGB_RED EXT_BGRX_RED #define RGB_GREEN EXT_BGRX_GREEN @@ -138,6 +144,7 @@ typedef my_color_converter * my_cconvert_ptr; #define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE #define rgb_ycc_convert_internal extbgrx_ycc_convert_internal #define rgb_gray_convert_internal extbgrx_gray_convert_internal +#define rgb_rgb_convert_internal extbgrx_rgb_convert_internal #include "jccolext.c" #undef RGB_RED #undef RGB_GREEN @@ -145,6 +152,7 @@ typedef my_color_converter * my_cconvert_ptr; #undef RGB_PIXELSIZE #undef rgb_ycc_convert_internal #undef rgb_gray_convert_internal +#undef rgb_rgb_convert_internal #define RGB_RED EXT_XBGR_RED #define RGB_GREEN EXT_XBGR_GREEN @@ -152,6 +160,7 @@ typedef my_color_converter * my_cconvert_ptr; #define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE #define rgb_ycc_convert_internal extxbgr_ycc_convert_internal #define rgb_gray_convert_internal extxbgr_gray_convert_internal +#define rgb_rgb_convert_internal extxbgr_rgb_convert_internal #include "jccolext.c" #undef RGB_RED #undef RGB_GREEN @@ -159,6 +168,7 @@ typedef my_color_converter * my_cconvert_ptr; #undef RGB_PIXELSIZE #undef rgb_ycc_convert_internal #undef rgb_gray_convert_internal +#undef rgb_rgb_convert_internal #define RGB_RED EXT_XRGB_RED #define RGB_GREEN EXT_XRGB_GREEN @@ -166,6 +176,7 @@ typedef my_color_converter * my_cconvert_ptr; #define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE #define rgb_ycc_convert_internal extxrgb_ycc_convert_internal #define rgb_gray_convert_internal extxrgb_gray_convert_internal +#define rgb_rgb_convert_internal extxrgb_rgb_convert_internal #include "jccolext.c" #undef RGB_RED #undef RGB_GREEN @@ -173,6 +184,7 @@ typedef my_color_converter * my_cconvert_ptr; #undef RGB_PIXELSIZE #undef rgb_ycc_convert_internal #undef rgb_gray_convert_internal +#undef rgb_rgb_convert_internal /* @@ -306,6 +318,52 @@ rgb_gray_convert (j_compress_ptr cinfo, } +/* + * Extended RGB to plain RGB conversion + */ + +METHODDEF(void) +rgb_rgb_convert (j_compress_ptr cinfo, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ + switch (cinfo->in_color_space) { + case JCS_EXT_RGB: + extrgb_rgb_convert_internal(cinfo, input_buf, output_buf, output_row, + num_rows); + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + extrgbx_rgb_convert_internal(cinfo, input_buf, output_buf, output_row, + num_rows); + break; + case JCS_EXT_BGR: + extbgr_rgb_convert_internal(cinfo, input_buf, output_buf, output_row, + num_rows); + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + extbgrx_rgb_convert_internal(cinfo, input_buf, output_buf, output_row, + num_rows); + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + extxbgr_rgb_convert_internal(cinfo, input_buf, output_buf, output_row, + num_rows); + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + extxrgb_rgb_convert_internal(cinfo, input_buf, output_buf, output_row, + num_rows); + break; + default: + rgb_rgb_convert_internal(cinfo, input_buf, output_buf, output_row, + num_rows); + break; + } +} + + /* * Convert some rows of samples to the JPEG colorspace. * This version handles Adobe-style CMYK->YCCK conversion, @@ -523,21 +581,25 @@ jinit_color_converter (j_compress_ptr cinfo) break; case JCS_RGB: - case JCS_EXT_RGB: - case JCS_EXT_RGBX: - case JCS_EXT_BGR: - case JCS_EXT_BGRX: - case JCS_EXT_XBGR: - case JCS_EXT_XRGB: - case JCS_EXT_RGBA: - case JCS_EXT_BGRA: - case JCS_EXT_ABGR: - case JCS_EXT_ARGB: if (cinfo->num_components != 3) ERREXIT(cinfo, JERR_BAD_J_COLORSPACE); - if (cinfo->in_color_space == cinfo->jpeg_color_space && - rgb_pixelsize[cinfo->in_color_space] == 3) + if (rgb_red[cinfo->in_color_space] == 0 && + rgb_green[cinfo->in_color_space] == 1 && + rgb_blue[cinfo->in_color_space] == 2 && + rgb_pixelsize[cinfo->in_color_space] == 3) cconvert->pub.color_convert = null_convert; + else if (cinfo->in_color_space == JCS_RGB || + cinfo->in_color_space == JCS_EXT_RGB || + cinfo->in_color_space == JCS_EXT_RGBX || + cinfo->in_color_space == JCS_EXT_BGR || + cinfo->in_color_space == JCS_EXT_BGRX || + cinfo->in_color_space == JCS_EXT_XBGR || + cinfo->in_color_space == JCS_EXT_XRGB || + cinfo->in_color_space == JCS_EXT_RGBA || + cinfo->in_color_space == JCS_EXT_BGRA || + cinfo->in_color_space == JCS_EXT_ABGR || + cinfo->in_color_space == JCS_EXT_ARGB) + cconvert->pub.color_convert = rgb_rgb_convert; else ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL); break; diff --git a/jdcolext.c b/jdcolext.c index 07da949f..3b8aeffc 100644 --- a/jdcolext.c +++ b/jdcolext.c @@ -102,3 +102,40 @@ gray_rgb_convert_internal (j_decompress_ptr cinfo, } } } + + +/* + * Convert RGB to extended RGB: just swap the order of source pixels + */ + +INLINE +LOCAL(void) +rgb_rgb_convert_internal (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + register JSAMPROW inptr0, inptr1, inptr2; + register JSAMPROW outptr; + register JDIMENSION col; + JDIMENSION num_cols = cinfo->output_width; + + while (--num_rows >= 0) { + inptr0 = input_buf[0][input_row]; + inptr1 = input_buf[1][input_row]; + inptr2 = input_buf[2][input_row]; + input_row++; + outptr = *output_buf++; + for (col = 0; col < num_cols; col++) { + /* We can dispense with GETJSAMPLE() here */ + outptr[RGB_RED] = inptr0[col]; + outptr[RGB_GREEN] = inptr1[col]; + outptr[RGB_BLUE] = inptr2[col]; + /* Set unused byte to 0xFF so it can be interpreted as an opaque */ + /* alpha channel value */ +#ifdef RGB_ALPHA + outptr[RGB_ALPHA] = 0xFF; +#endif + outptr += RGB_PIXELSIZE; + } + } +} diff --git a/jdcolor.c b/jdcolor.c index d9268dbb..694de9b6 100644 --- a/jdcolor.c +++ b/jdcolor.c @@ -3,7 +3,7 @@ * * Copyright (C) 1991-1997, Thomas G. Lane. * Copyright 2009 Pierre Ossman for Cendio AB - * Copyright (C) 2009, 2011, D. R. Commander. + * Copyright (C) 2009, 2011-2012, D. R. Commander. * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * @@ -80,6 +80,7 @@ typedef my_color_deconverter * my_cconvert_ptr; #define RGB_PIXELSIZE EXT_RGB_PIXELSIZE #define ycc_rgb_convert_internal ycc_extrgb_convert_internal #define gray_rgb_convert_internal gray_extrgb_convert_internal +#define rgb_rgb_convert_internal rgb_extrgb_convert_internal #include "jdcolext.c" #undef RGB_RED #undef RGB_GREEN @@ -87,6 +88,7 @@ typedef my_color_deconverter * my_cconvert_ptr; #undef RGB_PIXELSIZE #undef ycc_rgb_convert_internal #undef gray_rgb_convert_internal +#undef rgb_rgb_convert_internal #define RGB_RED EXT_RGBX_RED #define RGB_GREEN EXT_RGBX_GREEN @@ -95,6 +97,7 @@ typedef my_color_deconverter * my_cconvert_ptr; #define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE #define ycc_rgb_convert_internal ycc_extrgbx_convert_internal #define gray_rgb_convert_internal gray_extrgbx_convert_internal +#define rgb_rgb_convert_internal rgb_extrgbx_convert_internal #include "jdcolext.c" #undef RGB_RED #undef RGB_GREEN @@ -103,6 +106,7 @@ typedef my_color_deconverter * my_cconvert_ptr; #undef RGB_PIXELSIZE #undef ycc_rgb_convert_internal #undef gray_rgb_convert_internal +#undef rgb_rgb_convert_internal #define RGB_RED EXT_BGR_RED #define RGB_GREEN EXT_BGR_GREEN @@ -110,6 +114,7 @@ typedef my_color_deconverter * my_cconvert_ptr; #define RGB_PIXELSIZE EXT_BGR_PIXELSIZE #define ycc_rgb_convert_internal ycc_extbgr_convert_internal #define gray_rgb_convert_internal gray_extbgr_convert_internal +#define rgb_rgb_convert_internal rgb_extbgr_convert_internal #include "jdcolext.c" #undef RGB_RED #undef RGB_GREEN @@ -117,6 +122,7 @@ typedef my_color_deconverter * my_cconvert_ptr; #undef RGB_PIXELSIZE #undef ycc_rgb_convert_internal #undef gray_rgb_convert_internal +#undef rgb_rgb_convert_internal #define RGB_RED EXT_BGRX_RED #define RGB_GREEN EXT_BGRX_GREEN @@ -125,6 +131,7 @@ typedef my_color_deconverter * my_cconvert_ptr; #define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE #define ycc_rgb_convert_internal ycc_extbgrx_convert_internal #define gray_rgb_convert_internal gray_extbgrx_convert_internal +#define rgb_rgb_convert_internal rgb_extbgrx_convert_internal #include "jdcolext.c" #undef RGB_RED #undef RGB_GREEN @@ -133,6 +140,7 @@ typedef my_color_deconverter * my_cconvert_ptr; #undef RGB_PIXELSIZE #undef ycc_rgb_convert_internal #undef gray_rgb_convert_internal +#undef rgb_rgb_convert_internal #define RGB_RED EXT_XBGR_RED #define RGB_GREEN EXT_XBGR_GREEN @@ -141,6 +149,7 @@ typedef my_color_deconverter * my_cconvert_ptr; #define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE #define ycc_rgb_convert_internal ycc_extxbgr_convert_internal #define gray_rgb_convert_internal gray_extxbgr_convert_internal +#define rgb_rgb_convert_internal rgb_extxbgr_convert_internal #include "jdcolext.c" #undef RGB_RED #undef RGB_GREEN @@ -149,6 +158,7 @@ typedef my_color_deconverter * my_cconvert_ptr; #undef RGB_PIXELSIZE #undef ycc_rgb_convert_internal #undef gray_rgb_convert_internal +#undef rgb_rgb_convert_internal #define RGB_RED EXT_XRGB_RED #define RGB_GREEN EXT_XRGB_GREEN @@ -157,6 +167,7 @@ typedef my_color_deconverter * my_cconvert_ptr; #define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE #define ycc_rgb_convert_internal ycc_extxrgb_convert_internal #define gray_rgb_convert_internal gray_extxrgb_convert_internal +#define rgb_rgb_convert_internal rgb_extxrgb_convert_internal #include "jdcolext.c" #undef RGB_RED #undef RGB_GREEN @@ -165,6 +176,7 @@ typedef my_color_deconverter * my_cconvert_ptr; #undef RGB_PIXELSIZE #undef ycc_rgb_convert_internal #undef gray_rgb_convert_internal +#undef rgb_rgb_convert_internal /* @@ -352,6 +364,51 @@ gray_rgb_convert (j_decompress_ptr cinfo, } +/* + * Convert plain RGB to extended RGB + */ + +METHODDEF(void) +rgb_rgb_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + switch (cinfo->out_color_space) { + case JCS_EXT_RGB: + rgb_extrgb_convert_internal(cinfo, input_buf, input_row, output_buf, + num_rows); + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + rgb_extrgbx_convert_internal(cinfo, input_buf, input_row, output_buf, + num_rows); + break; + case JCS_EXT_BGR: + rgb_extbgr_convert_internal(cinfo, input_buf, input_row, output_buf, + num_rows); + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + rgb_extbgrx_convert_internal(cinfo, input_buf, input_row, output_buf, + num_rows); + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + rgb_extxbgr_convert_internal(cinfo, input_buf, input_row, output_buf, + num_rows); + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + rgb_extxrgb_convert_internal(cinfo, input_buf, input_row, output_buf, + num_rows); + break; + default: + rgb_rgb_convert_internal(cinfo, input_buf, input_row, output_buf, + num_rows); + break; + } +} + /* * Adobe-style YCCK->CMYK conversion. * We convert YCbCr to R=1-C, G=1-M, and B=1-Y using the same @@ -494,9 +551,14 @@ jinit_color_deconverter (j_decompress_ptr cinfo) } } else if (cinfo->jpeg_color_space == JCS_GRAYSCALE) { cconvert->pub.color_convert = gray_rgb_convert; - } else if (cinfo->jpeg_color_space == cinfo->out_color_space && - rgb_pixelsize[cinfo->out_color_space] == 3) { - cconvert->pub.color_convert = null_convert; + } else if (cinfo->jpeg_color_space == JCS_RGB) { + if (rgb_red[cinfo->out_color_space] == 0 && + rgb_green[cinfo->out_color_space] == 1 && + rgb_blue[cinfo->out_color_space] == 2 && + rgb_pixelsize[cinfo->out_color_space] == 3) + cconvert->pub.color_convert = null_convert; + else + cconvert->pub.color_convert = rgb_rgb_convert; } else ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL); break; From cac105133e75a52fa5d57a2abccf242bb7b820d0 Mon Sep 17 00:00:00 2001 From: DRC Date: Fri, 16 Mar 2012 14:37:36 +0000 Subject: [PATCH 16/26] Fix the behavior of the alpha-enabled colorspace constants whenever libjpeg-turbo is built without SIMD support and merged upsampling is used. git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/branches/1.2.x@811 632fc199-4ca6-4c93-a231-07263d6284db --- ChangeLog.txt | 6 ++++++ java/TJUnitTest.java | 31 ++++++++++++++++--------------- jdmerge.c | 8 ++++++++ jdmrgext.c | 28 ++++++++++++++++++++++++++++ tjbench.c | 2 +- tjunittest.c | 21 ++++++++++----------- 6 files changed, 69 insertions(+), 27 deletions(-) diff --git a/ChangeLog.txt b/ChangeLog.txt index e27ac09a..b0d06a76 100644 --- a/ChangeLog.txt +++ b/ChangeLog.txt @@ -5,6 +5,12 @@ properly work when the input or output colorspace is one of the libjpeg-turbo colorspace extensions. +[2] When libjpeg-turbo was built without SIMD support and merged (non-fancy) +upsampling was used along with an alpha-enabled colorspace during +decompression, the unused byte of the decompressed pixels was not being set to +0xFF. This has been fixed. TJUnitTest has also been extended to test for the +correct behavior of the colorspace extensions when merged upsampling is used. + 1.2.0 ===== diff --git a/java/TJUnitTest.java b/java/TJUnitTest.java index 557a85a9..b88b28ed 100644 --- a/java/TJUnitTest.java +++ b/java/TJUnitTest.java @@ -1,5 +1,5 @@ /* - * Copyright (C)2011 D. R. Commander. All Rights Reserved. + * Copyright (C)2011-2012 D. R. Commander. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -765,6 +765,9 @@ public class TJUnitTest { for(int pf : formats) { for(int i = 0; i < 2; i++) { int flags = 0; + if (subsamp == TJ.SAMP_422 || subsamp == TJ.SAMP_420 + || subsamp == TJ.SAMP_440) + flags |= TJ.FLAG_FASTUPSAMPLE; if(i == 1) { if(yuv == YUVDECODE) { tjc.close(); tjd.close(); return; @@ -850,20 +853,18 @@ public class TJUnitTest { if(doyuv) yuv = YUVENCODE; doTest(35, 39, bi ? _3byteFormatsBI : _3byteFormats, TJ.SAMP_444, testName); doTest(39, 41, bi ? _4byteFormatsBI : _4byteFormats, TJ.SAMP_444, testName); - if(doyuv) { - doTest(41, 35, bi ? _3byteFormatsBI : _3byteFormats, TJ.SAMP_422, - testName); - doTest(35, 39, bi ? _4byteFormatsBI : _4byteFormats, TJ.SAMP_422, - testName); - doTest(39, 41, bi ? _3byteFormatsBI : _3byteFormats, TJ.SAMP_420, - testName); - doTest(41, 35, bi ? _4byteFormatsBI : _4byteFormats, TJ.SAMP_420, - testName); - doTest(35, 39, bi ? _3byteFormatsBI : _3byteFormats, TJ.SAMP_440, - testName); - doTest(39, 41, bi ? _4byteFormatsBI : _4byteFormats, TJ.SAMP_440, - testName); - } + doTest(41, 35, bi ? _3byteFormatsBI : _3byteFormats, TJ.SAMP_422, + testName); + doTest(35, 39, bi ? _4byteFormatsBI : _4byteFormats, TJ.SAMP_422, + testName); + doTest(39, 41, bi ? _3byteFormatsBI : _3byteFormats, TJ.SAMP_420, + testName); + doTest(41, 35, bi ? _4byteFormatsBI : _4byteFormats, TJ.SAMP_420, + testName); + doTest(35, 39, bi ? _3byteFormatsBI : _3byteFormats, TJ.SAMP_440, + testName); + doTest(39, 41, bi ? _4byteFormatsBI : _4byteFormats, TJ.SAMP_440, + testName); doTest(35, 39, bi ? onlyGrayBI : onlyGray, TJ.SAMP_GRAY, testName); doTest(39, 41, bi ? _3byteFormatsBI : _3byteFormats, TJ.SAMP_GRAY, testName); diff --git a/jdmerge.c b/jdmerge.c index cfa3bb92..53361252 100644 --- a/jdmerge.c +++ b/jdmerge.c @@ -103,6 +103,7 @@ typedef my_upsampler * my_upsample_ptr; #define RGB_RED EXT_RGBX_RED #define RGB_GREEN EXT_RGBX_GREEN #define RGB_BLUE EXT_RGBX_BLUE +#define RGB_ALPHA 3 #define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE #define h2v1_merged_upsample_internal extrgbx_h2v1_merged_upsample_internal #define h2v2_merged_upsample_internal extrgbx_h2v2_merged_upsample_internal @@ -110,6 +111,7 @@ typedef my_upsampler * my_upsample_ptr; #undef RGB_RED #undef RGB_GREEN #undef RGB_BLUE +#undef RGB_ALPHA #undef RGB_PIXELSIZE #undef h2v1_merged_upsample_internal #undef h2v2_merged_upsample_internal @@ -131,6 +133,7 @@ typedef my_upsampler * my_upsample_ptr; #define RGB_RED EXT_BGRX_RED #define RGB_GREEN EXT_BGRX_GREEN #define RGB_BLUE EXT_BGRX_BLUE +#define RGB_ALPHA 3 #define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE #define h2v1_merged_upsample_internal extbgrx_h2v1_merged_upsample_internal #define h2v2_merged_upsample_internal extbgrx_h2v2_merged_upsample_internal @@ -138,6 +141,7 @@ typedef my_upsampler * my_upsample_ptr; #undef RGB_RED #undef RGB_GREEN #undef RGB_BLUE +#undef RGB_ALPHA #undef RGB_PIXELSIZE #undef h2v1_merged_upsample_internal #undef h2v2_merged_upsample_internal @@ -145,6 +149,7 @@ typedef my_upsampler * my_upsample_ptr; #define RGB_RED EXT_XBGR_RED #define RGB_GREEN EXT_XBGR_GREEN #define RGB_BLUE EXT_XBGR_BLUE +#define RGB_ALPHA 0 #define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE #define h2v1_merged_upsample_internal extxbgr_h2v1_merged_upsample_internal #define h2v2_merged_upsample_internal extxbgr_h2v2_merged_upsample_internal @@ -152,6 +157,7 @@ typedef my_upsampler * my_upsample_ptr; #undef RGB_RED #undef RGB_GREEN #undef RGB_BLUE +#undef RGB_ALPHA #undef RGB_PIXELSIZE #undef h2v1_merged_upsample_internal #undef h2v2_merged_upsample_internal @@ -159,6 +165,7 @@ typedef my_upsampler * my_upsample_ptr; #define RGB_RED EXT_XRGB_RED #define RGB_GREEN EXT_XRGB_GREEN #define RGB_BLUE EXT_XRGB_BLUE +#define RGB_ALPHA 0 #define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE #define h2v1_merged_upsample_internal extxrgb_h2v1_merged_upsample_internal #define h2v2_merged_upsample_internal extxrgb_h2v2_merged_upsample_internal @@ -166,6 +173,7 @@ typedef my_upsampler * my_upsample_ptr; #undef RGB_RED #undef RGB_GREEN #undef RGB_BLUE +#undef RGB_ALPHA #undef RGB_PIXELSIZE #undef h2v1_merged_upsample_internal #undef h2v2_merged_upsample_internal diff --git a/jdmrgext.c b/jdmrgext.c index 95ddd556..2b932655 100644 --- a/jdmrgext.c +++ b/jdmrgext.c @@ -2,6 +2,7 @@ * jdmrgext.c * * Copyright (C) 1994-1996, Thomas G. Lane. + * Copyright (C) 2011, D. R. Commander. * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * @@ -54,11 +55,17 @@ h2v1_merged_upsample_internal (j_decompress_ptr cinfo, outptr[RGB_RED] = range_limit[y + cred]; outptr[RGB_GREEN] = range_limit[y + cgreen]; outptr[RGB_BLUE] = range_limit[y + cblue]; +#ifdef RGB_ALPHA + outptr[RGB_ALPHA] = 0xFF; +#endif outptr += RGB_PIXELSIZE; y = GETJSAMPLE(*inptr0++); outptr[RGB_RED] = range_limit[y + cred]; outptr[RGB_GREEN] = range_limit[y + cgreen]; outptr[RGB_BLUE] = range_limit[y + cblue]; +#ifdef RGB_ALPHA + outptr[RGB_ALPHA] = 0xFF; +#endif outptr += RGB_PIXELSIZE; } /* If image width is odd, do the last output column separately */ @@ -72,6 +79,9 @@ h2v1_merged_upsample_internal (j_decompress_ptr cinfo, outptr[RGB_RED] = range_limit[y + cred]; outptr[RGB_GREEN] = range_limit[y + cgreen]; outptr[RGB_BLUE] = range_limit[y + cblue]; +#ifdef RGB_ALPHA + outptr[RGB_ALPHA] = 0xFF; +#endif } } @@ -120,21 +130,33 @@ h2v2_merged_upsample_internal (j_decompress_ptr cinfo, outptr0[RGB_RED] = range_limit[y + cred]; outptr0[RGB_GREEN] = range_limit[y + cgreen]; outptr0[RGB_BLUE] = range_limit[y + cblue]; +#ifdef RGB_ALPHA + outptr0[RGB_ALPHA] = 0xFF; +#endif outptr0 += RGB_PIXELSIZE; y = GETJSAMPLE(*inptr00++); outptr0[RGB_RED] = range_limit[y + cred]; outptr0[RGB_GREEN] = range_limit[y + cgreen]; outptr0[RGB_BLUE] = range_limit[y + cblue]; +#ifdef RGB_ALPHA + outptr0[RGB_ALPHA] = 0xFF; +#endif outptr0 += RGB_PIXELSIZE; y = GETJSAMPLE(*inptr01++); outptr1[RGB_RED] = range_limit[y + cred]; outptr1[RGB_GREEN] = range_limit[y + cgreen]; outptr1[RGB_BLUE] = range_limit[y + cblue]; +#ifdef RGB_ALPHA + outptr1[RGB_ALPHA] = 0xFF; +#endif outptr1 += RGB_PIXELSIZE; y = GETJSAMPLE(*inptr01++); outptr1[RGB_RED] = range_limit[y + cred]; outptr1[RGB_GREEN] = range_limit[y + cgreen]; outptr1[RGB_BLUE] = range_limit[y + cblue]; +#ifdef RGB_ALPHA + outptr1[RGB_ALPHA] = 0xFF; +#endif outptr1 += RGB_PIXELSIZE; } /* If image width is odd, do the last output column separately */ @@ -148,9 +170,15 @@ h2v2_merged_upsample_internal (j_decompress_ptr cinfo, outptr0[RGB_RED] = range_limit[y + cred]; outptr0[RGB_GREEN] = range_limit[y + cgreen]; outptr0[RGB_BLUE] = range_limit[y + cblue]; +#ifdef RGB_ALPHA + outptr0[RGB_ALPHA] = 0xFF; +#endif y = GETJSAMPLE(*inptr01); outptr1[RGB_RED] = range_limit[y + cred]; outptr1[RGB_GREEN] = range_limit[y + cgreen]; outptr1[RGB_BLUE] = range_limit[y + cblue]; +#ifdef RGB_ALPHA + outptr1[RGB_ALPHA] = 0xFF; +#endif } } diff --git a/tjbench.c b/tjbench.c index f298732c..5f5b9e94 100644 --- a/tjbench.c +++ b/tjbench.c @@ -681,7 +681,7 @@ void usage(char *progname) printf("-rgb, -bgr, -rgbx, -bgrx, -xbgr, -xrgb =\n"); printf(" Test the specified color conversion path in the codec (default: BGR)\n"); printf("-fastupsample = Use fast, inaccurate upsampling code to perform 4:2:2 and 4:2:0\n"); - printf(" YUV decoding in libjpeg decompressor\n"); + printf(" YUV decoding\n"); printf("-quiet = Output results in tabular rather than verbose format\n"); printf("-yuvencode = Encode RGB input as planar YUV rather than compressing as JPEG\n"); printf("-yuvdecode = Decode JPEG image to planar YUV rather than RGB\n"); diff --git a/tjunittest.c b/tjunittest.c index d14ec528..89a6d1db 100644 --- a/tjunittest.c +++ b/tjunittest.c @@ -1,5 +1,5 @@ /* - * Copyright (C)2009-2011 D. R. Commander. All Rights Reserved. + * Copyright (C)2009-2012 D. R. Commander. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -77,7 +77,7 @@ const int _onlyGray[]={TJPF_GRAY}; const int _onlyRGB[]={TJPF_RGB}; enum {YUVENCODE=1, YUVDECODE}; -int yuv=0, alloc=0, alpha=0; +int yuv=0, alloc=0; int exitStatus=0; #define bailout() {exitStatus=-1; goto bailout;} @@ -502,6 +502,8 @@ void doTest(int w, int h, const int *formats, int nformats, int subsamp, for(i=0; i<2; i++) { int flags=0; + if(subsamp==TJSAMP_422 || subsamp==TJSAMP_420 || subsamp==TJSAMP_440) + flags|=TJFLAG_FASTUPSAMPLE; if(i==1) { if(yuv==YUVDECODE) goto bailout; @@ -617,15 +619,12 @@ int main(int argc, char *argv[]) if(doyuv) {yuv=YUVENCODE; alloc=0;} doTest(35, 39, _3byteFormats, 2, TJSAMP_444, "test"); doTest(39, 41, _4byteFormats, 4, TJSAMP_444, "test"); - if(doyuv) - { - doTest(41, 35, _3byteFormats, 2, TJSAMP_422, "test"); - doTest(35, 39, _4byteFormats, 4, TJSAMP_422, "test"); - doTest(39, 41, _3byteFormats, 2, TJSAMP_420, "test"); - doTest(41, 35, _4byteFormats, 4, TJSAMP_420, "test"); - doTest(35, 39, _3byteFormats, 2, TJSAMP_440, "test"); - doTest(39, 41, _4byteFormats, 4, TJSAMP_440, "test"); - } + doTest(41, 35, _3byteFormats, 2, TJSAMP_422, "test"); + doTest(35, 39, _4byteFormats, 4, TJSAMP_422, "test"); + doTest(39, 41, _3byteFormats, 2, TJSAMP_420, "test"); + doTest(41, 35, _4byteFormats, 4, TJSAMP_420, "test"); + doTest(35, 39, _3byteFormats, 2, TJSAMP_440, "test"); + doTest(39, 41, _4byteFormats, 4, TJSAMP_440, "test"); doTest(35, 39, _onlyGray, 1, TJSAMP_GRAY, "test"); doTest(39, 41, _3byteFormats, 2, TJSAMP_GRAY, "test"); doTest(41, 35, _4byteFormats, 4, TJSAMP_GRAY, "test"); From 8015a303086599719b89c1d3a2c3c56405451ef0 Mon Sep 17 00:00:00 2001 From: DRC Date: Sat, 17 Mar 2012 14:32:38 +0000 Subject: [PATCH 17/26] Visual Studio 2010 doesn't like the wildcard at compile time, so let CMake expand it instead. git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/branches/1.2.x@813 632fc199-4ca6-4c93-a231-07263d6284db --- simd/CMakeLists.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/simd/CMakeLists.txt b/simd/CMakeLists.txt index 0eacebe1..06bbae4f 100755 --- a/simd/CMakeLists.txt +++ b/simd/CMakeLists.txt @@ -37,6 +37,8 @@ else() set(OBJDIR ${CMAKE_CURRENT_BINARY_DIR}) endif() +file(GLOB INC_FILES *.inc) + foreach(file ${SIMD_BASENAMES}) set(DEPFILE "") set(SIMD_SRC ${CMAKE_CURRENT_SOURCE_DIR}/${file}.asm) @@ -53,7 +55,8 @@ foreach(file ${SIMD_BASENAMES}) string(REGEX REPLACE "gra" "gry" DEPFILE ${DEPFILE}) endif() set(SIMD_OBJ ${OBJDIR}/${file}.obj) - add_custom_command(OUTPUT ${SIMD_OBJ} DEPENDS ${SIMD_SRC} ${DEPFILE} *.inc + add_custom_command(OUTPUT ${SIMD_OBJ} + DEPENDS ${SIMD_SRC} ${DEPFILE} ${INC_FILES} COMMAND ${NASM} ${NAFLAGS} ${SIMD_SRC} -o${SIMD_OBJ}) set(SIMD_OBJS ${SIMD_OBJS} ${SIMD_OBJ}) endforeach() From 68071bd2e427cf1f68c40dc771b23a0046ddb4e1 Mon Sep 17 00:00:00 2001 From: DRC Date: Thu, 22 Mar 2012 22:05:09 +0000 Subject: [PATCH 18/26] 1.2.1 git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/branches/1.2.x@814 632fc199-4ca6-4c93-a231-07263d6284db --- CMakeLists.txt | 2 +- configure.ac | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d95c9e68..e7cbbf95 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,7 +5,7 @@ cmake_minimum_required(VERSION 2.6) project(libjpeg-turbo C) -set(VERSION 1.2.0) +set(VERSION 1.2.1) if(MINGW OR CYGWIN) execute_process(COMMAND "date" "+%Y%m%d" OUTPUT_VARIABLE BUILD) diff --git a/configure.ac b/configure.ac index 2930c27c..59f50ff5 100644 --- a/configure.ac +++ b/configure.ac @@ -2,7 +2,7 @@ # Process this file with autoconf to produce a configure script. AC_PREREQ([2.56]) -AC_INIT([libjpeg-turbo], [1.2.0]) +AC_INIT([libjpeg-turbo], [1.2.1]) BUILD=`date +%Y%m%d` AM_INIT_AUTOMAKE([-Wall foreign dist-bzip2]) From 7c6ae034da54989dbe3e6254492e68efa5ec9179 Mon Sep 17 00:00:00 2001 From: DRC Date: Fri, 23 Mar 2012 00:51:56 +0000 Subject: [PATCH 19/26] Fix universal DMG build git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/branches/1.2.x@816 632fc199-4ca6-4c93-a231-07263d6284db --- Makefile.am | 6 +++--- release/makemacpkg.in | 20 +++++++++++++------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/Makefile.am b/Makefile.am index 0fef9fa5..aa2426be 100644 --- a/Makefile.am +++ b/Makefile.am @@ -302,15 +302,15 @@ deb: all if X86_64 udmg: all - sh pkgscripts/makemacpkg -builddir32 ${BUILDDIR32} + sh pkgscripts/makemacpkg -build32 ${BUILDDIR32} iosdmg: all - sh pkgscripts/makemacpkg -builddir32 ${BUILDDIR32} -builddirarmv6 ${BUILDDIRARMV6} -builddirarmv7 ${BUILDDIRARMV7} + sh pkgscripts/makemacpkg -build32 ${BUILDDIR32} -buildarmv6 ${BUILDDIRARMV6} -buildarmv7 ${BUILDDIRARMV7} else iosdmg: all - sh pkgscripts/makemacpkg -builddirarmv6 ${BUILDDIRARMV6} -builddirarmv7 ${BUILDDIRARMV7} + sh pkgscripts/makemacpkg -buildarmv6 ${BUILDDIRARMV6} -buildarmv7 ${BUILDDIRARMV7} endif diff --git a/release/makemacpkg.in b/release/makemacpkg.in index 6cb4069d..7b43aa37 100644 --- a/release/makemacpkg.in +++ b/release/makemacpkg.in @@ -17,7 +17,7 @@ onexit() usage() { - echo "$0 [-builddir32 [32-bit build dir]] [-builddirarmv6 [ARM v6 build dir]] [-builddirarmv7 [ARM v7 build dir]]" + echo "$0 [-build32 [32-bit build dir]] [-buildarmv6 [ARM v6 build dir]] [-buildarmv7 [ARM v7 build dir]]" exit 1 } @@ -26,27 +26,33 @@ VERSION=@VERSION@ BUILD=@BUILD@ SRCDIR=@abs_top_srcdir@ BUILDDIR32=@abs_top_srcdir@/osxx86 +BUILD32=0 BUILDDIRARMV6=@abs_top_srcdir@/iosarmv6 +BUILDARMV6=0 BUILDDIRARMV7=@abs_top_srcdir@/iosarmv7 +BUILDARMV7=0 WITH_JAVA=@WITH_JAVA@ while [ $# -gt 0 ]; do case $1 in -h*) usage 0 ;; - -builddir32) + -build32) + BUILD32=1 if [ $# -gt 1 ]; then if [[ ! "$2" =~ -.* ]]; then BUILDDIR32=$2; shift fi fi ;; - -builddirarmv6) + -buildarmv6) + BUILDARMV6=1 if [ $# -gt 1 ]; then if [[ ! "$2" =~ -.* ]]; then BUILDDIRARMV6=$2; shift fi fi ;; - -builddirarmv7) + -buildarmv7) + BUILDARMV7=1 if [ $# -gt 1 ]; then if [[ ! "$2" =~ -.* ]]; then BUILDDIRARMV7=$2; shift @@ -76,7 +82,7 @@ mv $PKGROOT/opt/$PACKAGE_NAME/lib/libturbojpeg.* $PKGROOT/usr/lib mkdir -p $PKGROOT/usr/include mv $PKGROOT/opt/$PACKAGE_NAME/include/turbojpeg.h $PKGROOT/usr/include -if [ ! "$BUILDDIR32" = "" ]; then +if [ $BUILD32 = 1 ]; then if [ ! -d $BUILDDIR32 ]; then echo ERROR: 32-bit build directory $BUILDDIR32 does not exist exit 1 @@ -141,7 +147,7 @@ if [ ! "$BUILDDIR32" = "" ]; then fi -if [ ! "$BUILDDIRARMV6" = "" ]; then +if [ $BUILDARMV6 = 1 ]; then if [ ! -d $BUILDDIRARMV6 ]; then echo ERROR: ARM v6 build directory $BUILDDIRARMV6 does not exist exit 1 @@ -164,7 +170,7 @@ if [ ! "$BUILDDIRARMV6" = "" ]; then -output $PKGROOT/usr/lib/libturbojpeg.a fi -if [ ! "$BUILDDIRARMV7" = "" ]; then +if [ $BUILDARMV7 = 1 ]; then if [ ! -d $BUILDDIRARMV7 ]; then echo ERROR: ARM v7 build directory $BUILDDIRARMV7 does not exist exit 1 From 0559e94c725648b25bb07fdcde89513225de14ab Mon Sep 17 00:00:00 2001 From: DRC Date: Fri, 23 Mar 2012 03:12:35 +0000 Subject: [PATCH 20/26] "Sun Studio"="Oracle Solaris Studio" git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/branches/1.2.x@818 632fc199-4ca6-4c93-a231-07263d6284db --- BUILDING.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/BUILDING.txt b/BUILDING.txt index fc583ee3..8f19edc1 100644 --- a/BUILDING.txt +++ b/BUILDING.txt @@ -258,8 +258,8 @@ to the configure command line. NASM 2.07 or later from FreeBSD ports must be installed. -Sun Studio ----------- +Oracle Solaris Studio +--------------------- Add @@ -268,7 +268,7 @@ Add to the configure command line. libjpeg-turbo will automatically be built with the maximum optimization level (-xO5) unless you override CFLAGS. -To build a 64-bit version of libjpeg-turbo using Sun Studio, add +To build a 64-bit version of libjpeg-turbo using Oracle Solaris Studio, add --host x86_64-pc-solaris CC=cc CFLAGS='-xO5 -m64' LDFLAGS=-m64 From 2eda8212e4b01c9b4d343dd0eaa579f0bba036e7 Mon Sep 17 00:00:00 2001 From: DRC Date: Fri, 23 Mar 2012 19:32:38 +0000 Subject: [PATCH 21/26] Ensure that tjDecompress2() exits cleanly if setDecompDefaults() fails git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/branches/1.2.x@821 632fc199-4ca6-4c93-a231-07263d6284db --- turbojpeg.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/turbojpeg.c b/turbojpeg.c index 2d5959a3..37ad5a37 100644 --- a/turbojpeg.c +++ b/turbojpeg.c @@ -1,5 +1,5 @@ /* - * Copyright (C)2009-2011 D. R. Commander. All Rights Reserved. + * Copyright (C)2009-2012 D. R. Commander. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -756,7 +756,10 @@ DLLEXPORT int DLLCALL tjDecompress2(tjhandle handle, unsigned char *jpegBuf, jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize); jpeg_read_header(dinfo, TRUE); - if(setDecompDefaults(dinfo, pixelFormat)==-1) return -1; + if(setDecompDefaults(dinfo, pixelFormat)==-1) + { + retval=-1; goto bailout; + } if(flags&TJFLAG_FASTUPSAMPLE) dinfo->do_fancy_upsampling=FALSE; From d4c15e103cc91dcb6e8ca5291e490a9c138026b2 Mon Sep 17 00:00:00 2001 From: DRC Date: Fri, 23 Mar 2012 19:39:14 +0000 Subject: [PATCH 22/26] Whitespace tweak git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/branches/1.2.x@823 632fc199-4ca6-4c93-a231-07263d6284db --- turbojpeg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/turbojpeg.c b/turbojpeg.c index 37ad5a37..e08c767f 100644 --- a/turbojpeg.c +++ b/turbojpeg.c @@ -498,7 +498,7 @@ DLLEXPORT int DLLCALL tjEncodeYUV2(tjhandle handle, unsigned char *srcBuf, JSAMPROW *outbuf[MAX_COMPONENTS]; int row, pw, ph, cw[MAX_COMPONENTS], ch[MAX_COMPONENTS]; JSAMPLE *ptr=dstBuf; - unsigned long yuvsize=0; + unsigned long yuvsize=0; jpeg_component_info *compptr; getinstance(handle); From 4f24016bde11aa4282900ff071a6c8481027e063 Mon Sep 17 00:00:00 2001 From: DRC Date: Thu, 26 Apr 2012 19:50:37 +0000 Subject: [PATCH 23/26] Preserve all 128 bits of xmm6 and xmm7 git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/branches/1.2.x@829 632fc199-4ca6-4c93-a231-07263d6284db --- ChangeLog.txt | 4 ++++ simd/jsimdext.inc | 8 ++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/ChangeLog.txt b/ChangeLog.txt index b0d06a76..8a8a29b5 100644 --- a/ChangeLog.txt +++ b/ChangeLog.txt @@ -11,6 +11,10 @@ decompression, the unused byte of the decompressed pixels was not being set to 0xFF. This has been fixed. TJUnitTest has also been extended to test for the correct behavior of the colorspace extensions when merged upsampling is used. +[3] Fixed a bug whereby the libjpeg-turbo SSE2 SIMD code would not preserve the +upper 64 bits of xmm6 and xmm7 on Win64 platforms, which violated the Win64 +calling conventions. + 1.2.0 ===== diff --git a/simd/jsimdext.inc b/simd/jsimdext.inc index 4ab9bc0f..1d4d3e2d 100644 --- a/simd/jsimdext.inc +++ b/simd/jsimdext.inc @@ -322,15 +322,15 @@ const_base: push rsi push rdi sub rsp, SIZEOF_XMMWORD - movlpd XMMWORD [rsp], xmm6 + movaps XMMWORD [rsp], xmm6 sub rsp, SIZEOF_XMMWORD - movlpd XMMWORD [rsp], xmm7 + movaps XMMWORD [rsp], xmm7 %endmacro %imacro uncollect_args 0 - movlpd xmm7, XMMWORD [rsp] + movaps xmm7, XMMWORD [rsp] add rsp, SIZEOF_XMMWORD - movlpd xmm6, XMMWORD [rsp] + movaps xmm6, XMMWORD [rsp] add rsp, SIZEOF_XMMWORD pop rdi pop rsi From dd2b651243125701dca2ed2f31b3d34056719b9c Mon Sep 17 00:00:00 2001 From: DRC Date: Wed, 30 May 2012 20:36:42 +0000 Subject: [PATCH 24/26] Guard against num_components being a ridiculous value due to a corrupt header git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/branches/1.2.x@831 632fc199-4ca6-4c93-a231-07263d6284db --- ChangeLog.txt | 4 ++++ jdmarker.c | 5 +++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/ChangeLog.txt b/ChangeLog.txt index 8a8a29b5..3775d543 100644 --- a/ChangeLog.txt +++ b/ChangeLog.txt @@ -15,6 +15,10 @@ correct behavior of the colorspace extensions when merged upsampling is used. upper 64 bits of xmm6 and xmm7 on Win64 platforms, which violated the Win64 calling conventions. +[4] Fixed a regression caused by 1.2.0[6] whereby decompressing corrupt JPEG +images (specifically, images in which the component count was erroneously set +to a large value) would cause libjpeg-turbo to segfault. + 1.2.0 ===== diff --git a/jdmarker.c b/jdmarker.c index d8dcba98..6fc0f7dc 100644 --- a/jdmarker.c +++ b/jdmarker.c @@ -323,14 +323,15 @@ get_sos (j_decompress_ptr cinfo) /* Collect the component-spec parameters */ - for (i = 0; i < cinfo->num_components; i++) + for (i = 0; i < MAX_COMPS_IN_SCAN; i++) cinfo->cur_comp_info[i] = NULL; for (i = 0; i < n; i++) { INPUT_BYTE(cinfo, cc, return FALSE); INPUT_BYTE(cinfo, c, return FALSE); - for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; + for (ci = 0, compptr = cinfo->comp_info; + ci < cinfo->num_components && ci < MAX_COMPS_IN_SCAN; ci++, compptr++) { if (cc == compptr->component_id && !cinfo->cur_comp_info[ci]) goto id_found; From 69799275be895de4963bb22c975081cb53a147a0 Mon Sep 17 00:00:00 2001 From: DRC Date: Wed, 13 Jun 2012 01:21:29 +0000 Subject: [PATCH 25/26] Eliminate the use of the MASKMOVDQU instruction, to speed up decompression performance by 10x on AMD Bobcat embedded processors (and ~5% on AMD desktop processors.) git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/branches/1.2.x@835 632fc199-4ca6-4c93-a231-07263d6284db --- ChangeLog.txt | 7 +++++++ simd/jdclrss2-64.asm | 49 +++++++++++++++++-------------------------- simd/jdclrss2.asm | 49 +++++++++++++++++-------------------------- simd/jdmrgss2-64.asm | 50 +++++++++++++++++--------------------------- simd/jdmrgss2.asm | 49 +++++++++++++++++-------------------------- 5 files changed, 83 insertions(+), 121 deletions(-) diff --git a/ChangeLog.txt b/ChangeLog.txt index 3775d543..e80ac6c3 100644 --- a/ChangeLog.txt +++ b/ChangeLog.txt @@ -19,6 +19,13 @@ calling conventions. images (specifically, images in which the component count was erroneously set to a large value) would cause libjpeg-turbo to segfault. +[5] Worked around a severe performance issue with "Bobcat" (AMD Embedded APU) +processors. The MASKMOVDQU instruction, which was used by the libjpeg-turbo +SSE2 SIMD code, is apparently implemented in microcode on AMD processors, and +it is painfully slow on Bobcat processors in particular. Eliminating the use +of this instruction improved performance by an order of magnitude on Bobcat +processors and by a small amount (typically 5%) on AMD desktop processors. + 1.2.0 ===== diff --git a/simd/jdclrss2-64.asm b/simd/jdclrss2-64.asm index 696a383b..06cb213c 100644 --- a/simd/jdclrss2-64.asm +++ b/simd/jdclrss2-64.asm @@ -251,17 +251,13 @@ EXTN(jsimd_ycc_rgb_convert_sse2): movntdq XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA movntdq XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD movntdq XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF - add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr jmp short .out0 .out1: ; --(unaligned)----------------- - pcmpeqb xmmH,xmmH ; xmmH=(all 1's) - maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [rdi], xmmA - add rdi, byte SIZEOF_XMMWORD ; outptr - maskmovdqu xmmD,xmmH ; movntdqu XMMWORD [rdi], xmmD - add rdi, byte SIZEOF_XMMWORD ; outptr - maskmovdqu xmmF,xmmH ; movntdqu XMMWORD [rdi], xmmF - add rdi, byte SIZEOF_XMMWORD ; outptr + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + movdqu XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF .out0: + add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr sub rcx, byte SIZEOF_XMMWORD jz near .nextrow @@ -275,17 +271,16 @@ EXTN(jsimd_ycc_rgb_convert_sse2): lea rcx, [rcx+rcx*2] ; imul ecx, RGB_PIXELSIZE cmp rcx, byte 2*SIZEOF_XMMWORD jb short .column_st16 - maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [rdi], xmmA - add rdi, byte SIZEOF_XMMWORD ; outptr - maskmovdqu xmmD,xmmH ; movntdqu XMMWORD [rdi], xmmD - add rdi, byte SIZEOF_XMMWORD ; outptr + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + add rdi, byte 2*SIZEOF_XMMWORD ; outptr movdqa xmmA,xmmF sub rcx, byte 2*SIZEOF_XMMWORD jmp short .column_st15 .column_st16: cmp rcx, byte SIZEOF_XMMWORD jb short .column_st15 - maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [rdi], xmmA + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA add rdi, byte SIZEOF_XMMWORD ; outptr movdqa xmmA,xmmD sub rcx, byte SIZEOF_XMMWORD @@ -363,7 +358,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2): por xmmA,xmmG por xmmE,xmmC .adj0: ; ---------------- - maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [rdi], xmmA + movdqu XMMWORD [rdi], xmmA %endif ; STRICT_MEMORY_ACCESS ; --------------- %else ; RGB_PIXELSIZE == 4 ; ----------- @@ -409,19 +404,14 @@ EXTN(jsimd_ycc_rgb_convert_sse2): movntdq XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD movntdq XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC movntdq XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH - add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr jmp short .out0 .out1: ; --(unaligned)----------------- - pcmpeqb xmmE,xmmE ; xmmE=(all 1's) - maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [rdi], xmmA - add rdi, byte SIZEOF_XMMWORD ; outptr - maskmovdqu xmmD,xmmE ; movntdqu XMMWORD [rdi], xmmD - add rdi, byte SIZEOF_XMMWORD ; outptr - maskmovdqu xmmC,xmmE ; movntdqu XMMWORD [rdi], xmmC - add rdi, byte SIZEOF_XMMWORD ; outptr - maskmovdqu xmmH,xmmE ; movntdqu XMMWORD [rdi], xmmH - add rdi, byte SIZEOF_XMMWORD ; outptr + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + movdqu XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC + movdqu XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH .out0: + add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr sub rcx, byte SIZEOF_XMMWORD jz near .nextrow @@ -434,17 +424,16 @@ EXTN(jsimd_ycc_rgb_convert_sse2): pcmpeqb xmmE,xmmE ; xmmE=(all 1's) cmp rcx, byte SIZEOF_XMMWORD/2 jb short .column_st16 - maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [rdi], xmmA - add rdi, byte SIZEOF_XMMWORD ; outptr - maskmovdqu xmmD,xmmE ; movntdqu XMMWORD [rdi], xmmD - add rdi, byte SIZEOF_XMMWORD ; outptr + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + add rdi, byte 2*SIZEOF_XMMWORD ; outptr movdqa xmmA,xmmC movdqa xmmD,xmmH sub rcx, byte SIZEOF_XMMWORD/2 .column_st16: cmp rcx, byte SIZEOF_XMMWORD/4 jb short .column_st15 - maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [rdi], xmmA + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA add rdi, byte SIZEOF_XMMWORD ; outptr movdqa xmmA,xmmD sub rcx, byte SIZEOF_XMMWORD/4 @@ -503,7 +492,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2): por xmmA,xmmB por xmmE,xmmG .adj0: ; ---------------- - maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [rdi], xmmA + movdqu XMMWORD [rdi], xmmA %endif ; STRICT_MEMORY_ACCESS ; --------------- %endif ; RGB_PIXELSIZE ; --------------- diff --git a/simd/jdclrss2.asm b/simd/jdclrss2.asm index 7f519e6f..1354c3dc 100644 --- a/simd/jdclrss2.asm +++ b/simd/jdclrss2.asm @@ -262,17 +262,13 @@ EXTN(jsimd_ycc_rgb_convert_sse2): movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF - add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr jmp short .out0 .out1: ; --(unaligned)----------------- - pcmpeqb xmmH,xmmH ; xmmH=(all 1's) - maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [edi], xmmA - add edi, byte SIZEOF_XMMWORD ; outptr - maskmovdqu xmmD,xmmH ; movntdqu XMMWORD [edi], xmmD - add edi, byte SIZEOF_XMMWORD ; outptr - maskmovdqu xmmF,xmmH ; movntdqu XMMWORD [edi], xmmF - add edi, byte SIZEOF_XMMWORD ; outptr + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + movdqu XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF .out0: + add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr sub ecx, byte SIZEOF_XMMWORD jz near .nextrow @@ -287,17 +283,16 @@ EXTN(jsimd_ycc_rgb_convert_sse2): lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE cmp ecx, byte 2*SIZEOF_XMMWORD jb short .column_st16 - maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [edi], xmmA - add edi, byte SIZEOF_XMMWORD ; outptr - maskmovdqu xmmD,xmmH ; movntdqu XMMWORD [edi], xmmD - add edi, byte SIZEOF_XMMWORD ; outptr + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + add edi, byte 2*SIZEOF_XMMWORD ; outptr movdqa xmmA,xmmF sub ecx, byte 2*SIZEOF_XMMWORD jmp short .column_st15 .column_st16: cmp ecx, byte SIZEOF_XMMWORD jb short .column_st15 - maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [edi], xmmA + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA add edi, byte SIZEOF_XMMWORD ; outptr movdqa xmmA,xmmD sub ecx, byte SIZEOF_XMMWORD @@ -375,7 +370,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2): por xmmA,xmmG por xmmE,xmmC .adj0: ; ---------------- - maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA + movdqu XMMWORD [edi], xmmA %endif ; STRICT_MEMORY_ACCESS ; --------------- %else ; RGB_PIXELSIZE == 4 ; ----------- @@ -421,19 +416,14 @@ EXTN(jsimd_ycc_rgb_convert_sse2): movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC movntdq XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH - add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr jmp short .out0 .out1: ; --(unaligned)----------------- - pcmpeqb xmmE,xmmE ; xmmE=(all 1's) - maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA - add edi, byte SIZEOF_XMMWORD ; outptr - maskmovdqu xmmD,xmmE ; movntdqu XMMWORD [edi], xmmD - add edi, byte SIZEOF_XMMWORD ; outptr - maskmovdqu xmmC,xmmE ; movntdqu XMMWORD [edi], xmmC - add edi, byte SIZEOF_XMMWORD ; outptr - maskmovdqu xmmH,xmmE ; movntdqu XMMWORD [edi], xmmH - add edi, byte SIZEOF_XMMWORD ; outptr + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + movdqu XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC + movdqu XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH .out0: + add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr sub ecx, byte SIZEOF_XMMWORD jz near .nextrow @@ -447,17 +437,16 @@ EXTN(jsimd_ycc_rgb_convert_sse2): pcmpeqb xmmE,xmmE ; xmmE=(all 1's) cmp ecx, byte SIZEOF_XMMWORD/2 jb short .column_st16 - maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA - add edi, byte SIZEOF_XMMWORD ; outptr - maskmovdqu xmmD,xmmE ; movntdqu XMMWORD [edi], xmmD - add edi, byte SIZEOF_XMMWORD ; outptr + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + add edi, byte 2*SIZEOF_XMMWORD ; outptr movdqa xmmA,xmmC movdqa xmmD,xmmH sub ecx, byte SIZEOF_XMMWORD/2 .column_st16: cmp ecx, byte SIZEOF_XMMWORD/4 jb short .column_st15 - maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA add edi, byte SIZEOF_XMMWORD ; outptr movdqa xmmA,xmmD sub ecx, byte SIZEOF_XMMWORD/4 @@ -516,7 +505,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2): por xmmA,xmmB por xmmE,xmmG .adj0: ; ---------------- - maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA + movdqu XMMWORD [edi], xmmA %endif ; STRICT_MEMORY_ACCESS ; --------------- %endif ; RGB_PIXELSIZE ; --------------- diff --git a/simd/jdmrgss2-64.asm b/simd/jdmrgss2-64.asm index a64a6b33..ffe02882 100644 --- a/simd/jdmrgss2-64.asm +++ b/simd/jdmrgss2-64.asm @@ -252,17 +252,13 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): movntdq XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA movntdq XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD movntdq XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF - add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr jmp short .out0 .out1: ; --(unaligned)----------------- - pcmpeqb xmmH,xmmH ; xmmH=(all 1's) - maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [rdi], xmmA - add rdi, byte SIZEOF_XMMWORD ; outptr - maskmovdqu xmmD,xmmH ; movntdqu XMMWORD [rdi], xmmD - add rdi, byte SIZEOF_XMMWORD ; outptr - maskmovdqu xmmF,xmmH ; movntdqu XMMWORD [rdi], xmmF - add rdi, byte SIZEOF_XMMWORD ; outptr + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + movdqu XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF .out0: + add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr sub rcx, byte SIZEOF_XMMWORD jz near .endcolumn @@ -275,21 +271,19 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): jmp near .columnloop .column_st32: - pcmpeqb xmmH,xmmH ; xmmH=(all 1's) lea rcx, [rcx+rcx*2] ; imul ecx, RGB_PIXELSIZE cmp rcx, byte 2*SIZEOF_XMMWORD jb short .column_st16 - maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [rdi], xmmA - add rdi, byte SIZEOF_XMMWORD ; outptr - maskmovdqu xmmD,xmmH ; movntdqu XMMWORD [rdi], xmmD - add rdi, byte SIZEOF_XMMWORD ; outptr + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + add rdi, byte 2*SIZEOF_XMMWORD ; outptr movdqa xmmA,xmmF sub rcx, byte 2*SIZEOF_XMMWORD jmp short .column_st15 .column_st16: cmp rcx, byte SIZEOF_XMMWORD jb short .column_st15 - maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [rdi], xmmA + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA add rdi, byte SIZEOF_XMMWORD ; outptr movdqa xmmA,xmmD sub rcx, byte SIZEOF_XMMWORD @@ -367,7 +361,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): por xmmA,xmmG por xmmE,xmmC .adj0: ; ---------------- - maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA + movdqu XMMWORD [rdi],xmmA %endif ; STRICT_MEMORY_ACCESS ; --------------- %else ; RGB_PIXELSIZE == 4 ; ----------- @@ -413,19 +407,14 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): movntdq XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD movntdq XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC movntdq XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH - add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr jmp short .out0 .out1: ; --(unaligned)----------------- - pcmpeqb xmmE,xmmE ; xmmE=(all 1's) - maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [rdi], xmmA - add rdi, byte SIZEOF_XMMWORD ; outptr - maskmovdqu xmmD,xmmE ; movntdqu XMMWORD [rdi], xmmD - add rdi, byte SIZEOF_XMMWORD ; outptr - maskmovdqu xmmC,xmmE ; movntdqu XMMWORD [rdi], xmmC - add rdi, byte SIZEOF_XMMWORD ; outptr - maskmovdqu xmmH,xmmE ; movntdqu XMMWORD [rdi], xmmH - add rdi, byte SIZEOF_XMMWORD ; outptr + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + movdqu XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC + movdqu XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH .out0: + add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr sub rcx, byte SIZEOF_XMMWORD jz near .endcolumn @@ -441,17 +430,16 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): pcmpeqb xmmE,xmmE ; xmmE=(all 1's) cmp rcx, byte SIZEOF_XMMWORD/2 jb short .column_st16 - maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [rdi], xmmA - add rdi, byte SIZEOF_XMMWORD ; outptr - maskmovdqu xmmD,xmmE ; movntdqu XMMWORD [rdi], xmmD - add rdi, byte SIZEOF_XMMWORD ; outptr + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + add rdi, byte 2*SIZEOF_XMMWORD ; outptr movdqa xmmA,xmmC movdqa xmmD,xmmH sub rcx, byte SIZEOF_XMMWORD/2 .column_st16: cmp rcx, byte SIZEOF_XMMWORD/4 jb short .column_st15 - maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA add rdi, byte SIZEOF_XMMWORD ; outptr movdqa xmmA,xmmD sub rcx, byte SIZEOF_XMMWORD/4 @@ -510,7 +498,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): por xmmA,xmmB por xmmE,xmmG .adj0: ; ---------------- - maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA + movdqu XMMWORD [rdi],xmmA %endif ; STRICT_MEMORY_ACCESS ; --------------- %endif ; RGB_PIXELSIZE ; --------------- diff --git a/simd/jdmrgss2.asm b/simd/jdmrgss2.asm index 04089aa3..556a4906 100644 --- a/simd/jdmrgss2.asm +++ b/simd/jdmrgss2.asm @@ -264,17 +264,13 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF - add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr jmp short .out0 .out1: ; --(unaligned)----------------- - pcmpeqb xmmH,xmmH ; xmmH=(all 1's) - maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [edi], xmmA - add edi, byte SIZEOF_XMMWORD ; outptr - maskmovdqu xmmD,xmmH ; movntdqu XMMWORD [edi], xmmD - add edi, byte SIZEOF_XMMWORD ; outptr - maskmovdqu xmmF,xmmH ; movntdqu XMMWORD [edi], xmmF - add edi, byte SIZEOF_XMMWORD ; outptr + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + movdqu XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF .out0: + add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr sub ecx, byte SIZEOF_XMMWORD jz near .endcolumn @@ -292,17 +288,16 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE cmp ecx, byte 2*SIZEOF_XMMWORD jb short .column_st16 - maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [edi], xmmA - add edi, byte SIZEOF_XMMWORD ; outptr - maskmovdqu xmmD,xmmH ; movntdqu XMMWORD [edi], xmmD - add edi, byte SIZEOF_XMMWORD ; outptr + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + add edi, byte 2*SIZEOF_XMMWORD ; outptr movdqa xmmA,xmmF sub ecx, byte 2*SIZEOF_XMMWORD jmp short .column_st15 .column_st16: cmp ecx, byte SIZEOF_XMMWORD jb short .column_st15 - maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [edi], xmmA + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA add edi, byte SIZEOF_XMMWORD ; outptr movdqa xmmA,xmmD sub ecx, byte SIZEOF_XMMWORD @@ -380,7 +375,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): por xmmA,xmmG por xmmE,xmmC .adj0: ; ---------------- - maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA + movdqu XMMWORD [edi], xmmA %endif ; STRICT_MEMORY_ACCESS ; --------------- %else ; RGB_PIXELSIZE == 4 ; ----------- @@ -426,19 +421,14 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC movntdq XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH - add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr jmp short .out0 .out1: ; --(unaligned)----------------- - pcmpeqb xmmE,xmmE ; xmmE=(all 1's) - maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA - add edi, byte SIZEOF_XMMWORD ; outptr - maskmovdqu xmmD,xmmE ; movntdqu XMMWORD [edi], xmmD - add edi, byte SIZEOF_XMMWORD ; outptr - maskmovdqu xmmC,xmmE ; movntdqu XMMWORD [edi], xmmC - add edi, byte SIZEOF_XMMWORD ; outptr - maskmovdqu xmmH,xmmE ; movntdqu XMMWORD [edi], xmmH - add edi, byte SIZEOF_XMMWORD ; outptr + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + movdqu XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC + movdqu XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH .out0: + add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr sub ecx, byte SIZEOF_XMMWORD jz near .endcolumn @@ -455,17 +445,16 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): pcmpeqb xmmE,xmmE ; xmmE=(all 1's) cmp ecx, byte SIZEOF_XMMWORD/2 jb short .column_st16 - maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA - add edi, byte SIZEOF_XMMWORD ; outptr - maskmovdqu xmmD,xmmE ; movntdqu XMMWORD [edi], xmmD - add edi, byte SIZEOF_XMMWORD ; outptr + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + add edi, byte 2*SIZEOF_XMMWORD ; outptr movdqa xmmA,xmmC movdqa xmmD,xmmH sub ecx, byte SIZEOF_XMMWORD/2 .column_st16: cmp ecx, byte SIZEOF_XMMWORD/4 jb short .column_st15 - maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA add edi, byte SIZEOF_XMMWORD ; outptr movdqa xmmA,xmmD sub ecx, byte SIZEOF_XMMWORD/4 @@ -524,7 +513,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): por xmmA,xmmB por xmmE,xmmG .adj0: ; ---------------- - maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA + movdqu XMMWORD [edi], xmmA %endif ; STRICT_MEMORY_ACCESS ; --------------- %endif ; RGB_PIXELSIZE ; --------------- From 316617faf4a9c5f00bf76f4a0e9c9864d65ec97f Mon Sep 17 00:00:00 2001 From: DRC Date: Wed, 13 Jun 2012 05:17:03 +0000 Subject: [PATCH 26/26] Accelerated 4:2:2 upsampling routine for ARM (improves performance ~20-30% when decompressing 4:2:2 JPEGs using fancy upsampling) git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/branches/1.2.x@837 632fc199-4ca6-4c93-a231-07263d6284db --- ChangeLog.txt | 4 + simd/jsimd.h | 4 + simd/jsimd_arm.c | 12 +++ simd/jsimd_arm_neon.S | 238 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 258 insertions(+) diff --git a/ChangeLog.txt b/ChangeLog.txt index e80ac6c3..7e4b4ea1 100644 --- a/ChangeLog.txt +++ b/ChangeLog.txt @@ -26,6 +26,10 @@ it is painfully slow on Bobcat processors in particular. Eliminating the use of this instruction improved performance by an order of magnitude on Bobcat processors and by a small amount (typically 5%) on AMD desktop processors. +[6] Added SIMD acceleration for performing 4:2:2 upsampling on NEON-capable ARM +platforms. This speeds up the decompression of 4:2:2 JPEGs by 20-25% on such +platforms. + 1.2.0 ===== diff --git a/simd/jsimd.h b/simd/jsimd.h index 6ee99cc6..3d4751ff 100644 --- a/simd/jsimd.h +++ b/simd/jsimd.h @@ -522,6 +522,10 @@ EXTERN(void) jsimd_h2v1_extxrgb_merged_upsample_sse2 JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); +EXTERN(void) jsimd_h2v1_fancy_upsample_neon + JPP((int max_v_samp_factor, JDIMENSION downsampled_width, + JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)); + /* SIMD Sample Conversion */ EXTERN(void) jsimd_convsamp_mmx JPP((JSAMPARRAY sample_data, JDIMENSION start_col, diff --git a/simd/jsimd_arm.c b/simd/jsimd_arm.c index af0c2c8a..cae84df0 100644 --- a/simd/jsimd_arm.c +++ b/simd/jsimd_arm.c @@ -338,6 +338,15 @@ jsimd_can_h2v1_fancy_upsample (void) { init_simd(); + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_ARM_NEON) + return 1; + return 0; } @@ -355,6 +364,9 @@ jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr) { + if (simd_support & JSIMD_ARM_NEON) + jsimd_h2v1_fancy_upsample_neon(cinfo->max_v_samp_factor, + compptr->downsampled_width, input_data, output_data_ptr); } GLOBAL(int) diff --git a/simd/jsimd_arm_neon.S b/simd/jsimd_arm_neon.S index b2f9c2ae..9962b8a1 100644 --- a/simd/jsimd_arm_neon.S +++ b/simd/jsimd_arm_neon.S @@ -2157,3 +2157,241 @@ asm_function jsimd_quantize_neon .unreq SHIFT .unreq LOOP_COUNT .endfunc + +/*****************************************************************************/ + +/* + * GLOBAL(void) + * jsimd_h2v1_fancy_upsample_neon (int max_v_samp_factor, + * JDIMENSION downsampled_width, + * JSAMPARRAY input_data, + * JSAMPARRAY * output_data_ptr); + * + * Note: the use of unaligned writes is the main remaining bottleneck in + * this code, which can be potentially solved to get up to tens + * of percents performance improvement on Cortex-A8/Cortex-A9. + */ + +/* + * Upsample 16 source pixels to 32 destination pixels. The new 16 source + * pixels are loaded to q0. The previous 16 source pixels are in q1. The + * shifted-by-one source pixels are constructed in q2 by using q0 and q1. + * Register d28 is used for multiplication by 3. Register q15 is used + * for adding +1 bias. + */ +.macro upsample16 OUTPTR, INPTR + vld1.8 {q0}, [\INPTR]! + vmovl.u8 q8, d0 + vext.8 q2, q1, q0, #15 + vmovl.u8 q9, d1 + vaddw.u8 q10, q15, d4 + vaddw.u8 q11, q15, d5 + vmlal.u8 q8, d4, d28 + vmlal.u8 q9, d5, d28 + vmlal.u8 q10, d0, d28 + vmlal.u8 q11, d1, d28 + vmov q1, q0 /* backup source pixels to q1 */ + vrshrn.u16 d6, q8, #2 + vrshrn.u16 d7, q9, #2 + vshrn.u16 d8, q10, #2 + vshrn.u16 d9, q11, #2 + vst2.8 {d6, d7, d8, d9}, [\OUTPTR]! +.endm + +/* + * Upsample 32 source pixels to 64 destination pixels. Compared to 'usample16' + * macro, the roles of q0 and q1 registers are reversed for even and odd + * groups of 16 pixels, that's why "vmov q1, q0" instructions are not needed. + * Also this unrolling allows to reorder loads and stores to compensate + * multiplication latency and reduce stalls. + */ +.macro upsample32 OUTPTR, INPTR + /* even 16 pixels group */ + vld1.8 {q0}, [\INPTR]! + vmovl.u8 q8, d0 + vext.8 q2, q1, q0, #15 + vmovl.u8 q9, d1 + vaddw.u8 q10, q15, d4 + vaddw.u8 q11, q15, d5 + vmlal.u8 q8, d4, d28 + vmlal.u8 q9, d5, d28 + vmlal.u8 q10, d0, d28 + vmlal.u8 q11, d1, d28 + /* odd 16 pixels group */ + vld1.8 {q1}, [\INPTR]! + vrshrn.u16 d6, q8, #2 + vrshrn.u16 d7, q9, #2 + vshrn.u16 d8, q10, #2 + vshrn.u16 d9, q11, #2 + vmovl.u8 q8, d2 + vext.8 q2, q0, q1, #15 + vmovl.u8 q9, d3 + vaddw.u8 q10, q15, d4 + vaddw.u8 q11, q15, d5 + vmlal.u8 q8, d4, d28 + vmlal.u8 q9, d5, d28 + vmlal.u8 q10, d2, d28 + vmlal.u8 q11, d3, d28 + vst2.8 {d6, d7, d8, d9}, [\OUTPTR]! + vrshrn.u16 d6, q8, #2 + vrshrn.u16 d7, q9, #2 + vshrn.u16 d8, q10, #2 + vshrn.u16 d9, q11, #2 + vst2.8 {d6, d7, d8, d9}, [\OUTPTR]! +.endm + +/* + * Upsample a row of WIDTH pixels from INPTR to OUTPTR. + */ +.macro upsample_row OUTPTR, INPTR, WIDTH, TMP1 + /* special case for the first and last pixels */ + sub \WIDTH, \WIDTH, #1 + add \OUTPTR, \OUTPTR, #1 + ldrb \TMP1, [\INPTR, \WIDTH] + strb \TMP1, [\OUTPTR, \WIDTH, asl #1] + ldrb \TMP1, [\INPTR], #1 + strb \TMP1, [\OUTPTR, #-1] + vmov.8 d3[7], \TMP1 + + subs \WIDTH, \WIDTH, #32 + blt 5f +0: /* process 32 pixels per iteration */ + upsample32 \OUTPTR, \INPTR + subs \WIDTH, \WIDTH, #32 + bge 0b +5: + adds \WIDTH, \WIDTH, #16 + blt 1f +0: /* process 16 pixels if needed */ + upsample16 \OUTPTR, \INPTR + subs \WIDTH, \WIDTH, #16 +1: + adds \WIDTH, \WIDTH, #16 + beq 9f + + /* load the remaining 1-15 pixels */ + add \INPTR, \INPTR, \WIDTH + tst \WIDTH, #1 + beq 2f + sub \INPTR, \INPTR, #1 + vld1.8 {d0[0]}, [\INPTR] +2: + tst \WIDTH, #2 + beq 2f + vext.8 d0, d0, d0, #6 + sub \INPTR, \INPTR, #1 + vld1.8 {d0[1]}, [\INPTR] + sub \INPTR, \INPTR, #1 + vld1.8 {d0[0]}, [\INPTR] +2: + tst \WIDTH, #4 + beq 2f + vrev64.32 d0, d0 + sub \INPTR, \INPTR, #1 + vld1.8 {d0[3]}, [\INPTR] + sub \INPTR, \INPTR, #1 + vld1.8 {d0[2]}, [\INPTR] + sub \INPTR, \INPTR, #1 + vld1.8 {d0[1]}, [\INPTR] + sub \INPTR, \INPTR, #1 + vld1.8 {d0[0]}, [\INPTR] +2: + tst \WIDTH, #8 + beq 2f + vmov d1, d0 + sub \INPTR, \INPTR, #8 + vld1.8 {d0}, [\INPTR] +2: /* upsample the remaining pixels */ + vmovl.u8 q8, d0 + vext.8 q2, q1, q0, #15 + vmovl.u8 q9, d1 + vaddw.u8 q10, q15, d4 + vaddw.u8 q11, q15, d5 + vmlal.u8 q8, d4, d28 + vmlal.u8 q9, d5, d28 + vmlal.u8 q10, d0, d28 + vmlal.u8 q11, d1, d28 + vrshrn.u16 d10, q8, #2 + vrshrn.u16 d12, q9, #2 + vshrn.u16 d11, q10, #2 + vshrn.u16 d13, q11, #2 + vzip.8 d10, d11 + vzip.8 d12, d13 + /* store the remaining pixels */ + tst \WIDTH, #8 + beq 2f + vst1.8 {d10, d11}, [\OUTPTR]! + vmov q5, q6 +2: + tst \WIDTH, #4 + beq 2f + vst1.8 {d10}, [\OUTPTR]! + vmov d10, d11 +2: + tst \WIDTH, #2 + beq 2f + vst1.8 {d10[0]}, [\OUTPTR]! + vst1.8 {d10[1]}, [\OUTPTR]! + vst1.8 {d10[2]}, [\OUTPTR]! + vst1.8 {d10[3]}, [\OUTPTR]! + vext.8 d10, d10, d10, #4 +2: + tst \WIDTH, #1 + beq 2f + vst1.8 {d10[0]}, [\OUTPTR]! + vst1.8 {d10[1]}, [\OUTPTR]! +2: +9: +.endm + +asm_function jsimd_h2v1_fancy_upsample_neon + + MAX_V_SAMP_FACTOR .req r0 + DOWNSAMPLED_WIDTH .req r1 + INPUT_DATA .req r2 + OUTPUT_DATA_PTR .req r3 + OUTPUT_DATA .req OUTPUT_DATA_PTR + + OUTPTR .req r4 + INPTR .req r5 + WIDTH .req ip + TMP .req lr + + push {r4, r5, r6, lr} + vpush {d8-d15} + + ldr OUTPUT_DATA, [OUTPUT_DATA_PTR] + cmp MAX_V_SAMP_FACTOR, #0 + ble 99f + + /* initialize constants */ + vmov.u8 d28, #3 + vmov.u16 q15, #1 +11: + ldr INPTR, [INPUT_DATA], #4 + ldr OUTPTR, [OUTPUT_DATA], #4 + mov WIDTH, DOWNSAMPLED_WIDTH + upsample_row OUTPTR, INPTR, WIDTH, TMP + subs MAX_V_SAMP_FACTOR, MAX_V_SAMP_FACTOR, #1 + bgt 11b + +99: + vpop {d8-d15} + pop {r4, r5, r6, pc} + + .unreq MAX_V_SAMP_FACTOR + .unreq DOWNSAMPLED_WIDTH + .unreq INPUT_DATA + .unreq OUTPUT_DATA_PTR + .unreq OUTPUT_DATA + + .unreq OUTPTR + .unreq INPTR + .unreq WIDTH + .unreq TMP + +.endfunc + +.purgem upsample16 +.purgem upsample32 +.purgem upsample_row