Eliminate duplicate copies of jpeg_nbits_table
ef9a4e05ba(libjpeg-turbo 1.4.x), which was based on https://bug815473.bmoattachments.org/attachment.cgi?id=692126 (https://bugzilla.mozilla.org/show_bug.cgi?id=815473), modified the C baseline Huffman encoder so that it precomputes jpeg_nbits_table, in order to facilitate sharing the table among multiple processes. However, libjpeg-turbo never shared the table, and because the table was implemented as a static array,f3a8684cd1(libjpeg-turbo 1.5.x) and37bae1a0e9(libjpeg-turbo 2.0.x) each introduced a duplicate copy of the table for (respectively) the SSE2 baseline Huffman encoder and the C progressive Huffman encoder. This commit does the following: - Move the duplicated code in jchuff.c and jcphuff.c, originally introduced in0cfc4c17b7and37bae1a0e9, into a header (jpeg_nbits.h). - Credit the co-author of0cfc4c17b7. (Refer to https://sourceforge.net/p/libjpeg-turbo/patches/57). - Modify the SSE2 baseline Huffman encoder so that the C Huffman encoders can share its definition of jpeg_nbits_table. - Move the definition of jpeg_nbits_table into a C source file (jpeg_nbits.c) rather than a header, and define the table only if USE_CLZ_INTRINSIC is undefined and the SSE2 baseline Huffman encoder will not be built. - Apply hidden symbol visibility to the shared definition of jpeg_nbits_table, if the compiler supports the necessary attribute. (In practice, only Visual C++ doesn't.) Closes #114 See also: https://bugzilla.mozilla.org/show_bug.cgi?id=1501523
This commit is contained in:
@@ -468,6 +468,15 @@ if(UNIX)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT MSVC OR CMAKE_C_COMPILER_ID STREQUAL "Clang")
|
||||
check_c_source_compiles("const int __attribute__((visibility(\"hidden\"))) table[1] = { 0 }; int main(void) { return table[0]; }"
|
||||
HIDDEN_WORKS)
|
||||
if(HIDDEN_WORKS)
|
||||
set(HIDDEN "__attribute__((visibility(\"hidden\")))")
|
||||
message(STATUS "HIDDEN = ${HIDDEN}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(MSVC)
|
||||
set(INLINE_OPTIONS "__inline;inline")
|
||||
else()
|
||||
@@ -572,7 +581,7 @@ set(JPEG_SOURCES ${JPEG12_SOURCES} jcapimin.c jchuff.c jcicc.c jcinit.c
|
||||
jclhuff.c jcmarker.c jcmaster.c jcomapi.c jcparam.c jcphuff.c jctrans.c
|
||||
jdapimin.c jdatadst.c jdatasrc.c jdhuff.c jdicc.c jdinput.c jdlhuff.c
|
||||
jdmarker.c jdmaster.c jdphuff.c jdtrans.c jerror.c jfdctflt.c jmemmgr.c
|
||||
jmemnobs.c)
|
||||
jmemnobs.c jpeg_nbits.c)
|
||||
|
||||
if(WITH_ARITH_ENC OR WITH_ARITH_DEC)
|
||||
set(JPEG_SOURCES ${JPEG_SOURCES} jaricom.c)
|
||||
|
||||
38
jchuff.c
38
jchuff.c
@@ -6,7 +6,7 @@
|
||||
* Lossless JPEG Modifications:
|
||||
* Copyright (C) 1999, Ken Murchison.
|
||||
* libjpeg-turbo Modifications:
|
||||
* Copyright (C) 2009-2011, 2014-2016, 2018-2023, D. R. Commander.
|
||||
* Copyright (C) 2009-2011, 2014-2016, 2018-2024, D. R. Commander.
|
||||
* Copyright (C) 2015, Matthieu Darbois.
|
||||
* Copyright (C) 2018, Matthias Räncker.
|
||||
* Copyright (C) 2020, Arm Limited.
|
||||
@@ -35,41 +35,7 @@
|
||||
#include "jchuff.h" /* Declarations shared with jc*huff.c */
|
||||
#endif
|
||||
#include <limits.h>
|
||||
|
||||
/*
|
||||
* NOTE: If USE_CLZ_INTRINSIC is defined, then clz/bsr instructions will be
|
||||
* used for bit counting rather than the lookup table. This will reduce the
|
||||
* memory footprint by 64k, which is important for some mobile applications
|
||||
* that create many isolated instances of libjpeg-turbo (web browsers, for
|
||||
* instance.) This may improve performance on some mobile platforms as well.
|
||||
* This feature is enabled by default only on Arm processors, because some x86
|
||||
* chips have a slow implementation of bsr, and the use of clz/bsr cannot be
|
||||
* shown to have a significant performance impact even on the x86 chips that
|
||||
* have a fast implementation of it. When building for Armv6, you can
|
||||
* explicitly disable the use of clz/bsr by adding -mthumb to the compiler
|
||||
* flags (this defines __thumb__).
|
||||
*/
|
||||
|
||||
/* NOTE: Both GCC and Clang define __GNUC__ */
|
||||
#if (defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))) || \
|
||||
defined(_M_ARM) || defined(_M_ARM64)
|
||||
#if !defined(__thumb__) || defined(__thumb2__)
|
||||
#define USE_CLZ_INTRINSIC
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef USE_CLZ_INTRINSIC
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
#define JPEG_NBITS_NONZERO(x) (32 - _CountLeadingZeros(x))
|
||||
#else
|
||||
#define JPEG_NBITS_NONZERO(x) (32 - __builtin_clz(x))
|
||||
#endif
|
||||
#define JPEG_NBITS(x) (x ? JPEG_NBITS_NONZERO(x) : 0)
|
||||
#else
|
||||
#include "jpeg_nbits_table.h"
|
||||
#define JPEG_NBITS(x) (jpeg_nbits_table[x])
|
||||
#define JPEG_NBITS_NONZERO(x) JPEG_NBITS(x)
|
||||
#endif
|
||||
#include "jpeg_nbits.h"
|
||||
|
||||
|
||||
/* Expanded entropy encoder object for Huffman encoding.
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
/* libjpeg-turbo build number */
|
||||
#define BUILD "@BUILD@"
|
||||
|
||||
/* How to hide global symbols. */
|
||||
#define HIDDEN @HIDDEN@
|
||||
|
||||
/* Compiler's inline keyword */
|
||||
#undef inline
|
||||
|
||||
|
||||
37
jcphuff.c
37
jcphuff.c
@@ -6,7 +6,7 @@
|
||||
* Lossless JPEG Modifications:
|
||||
* Copyright (C) 1999, Ken Murchison.
|
||||
* libjpeg-turbo Modifications:
|
||||
* Copyright (C) 2011, 2015, 2018, 2021-2022, D. R. Commander.
|
||||
* Copyright (C) 2011, 2015, 2018, 2021-2022, 2024, D. R. Commander.
|
||||
* Copyright (C) 2016, 2018, 2022, Matthieu Darbois.
|
||||
* Copyright (C) 2020, Arm Limited.
|
||||
* Copyright (C) 2021, Alex Richardson.
|
||||
@@ -44,40 +44,7 @@
|
||||
|
||||
#ifdef C_PROGRESSIVE_SUPPORTED
|
||||
|
||||
/*
|
||||
* NOTE: If USE_CLZ_INTRINSIC is defined, then clz/bsr instructions will be
|
||||
* used for bit counting rather than the lookup table. This will reduce the
|
||||
* memory footprint by 64k, which is important for some mobile applications
|
||||
* that create many isolated instances of libjpeg-turbo (web browsers, for
|
||||
* instance.) This may improve performance on some mobile platforms as well.
|
||||
* This feature is enabled by default only on Arm processors, because some x86
|
||||
* chips have a slow implementation of bsr, and the use of clz/bsr cannot be
|
||||
* shown to have a significant performance impact even on the x86 chips that
|
||||
* have a fast implementation of it. When building for Armv6, you can
|
||||
* explicitly disable the use of clz/bsr by adding -mthumb to the compiler
|
||||
* flags (this defines __thumb__).
|
||||
*/
|
||||
|
||||
/* NOTE: Both GCC and Clang define __GNUC__ */
|
||||
#if (defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))) || \
|
||||
defined(_M_ARM) || defined(_M_ARM64)
|
||||
#if !defined(__thumb__) || defined(__thumb2__)
|
||||
#define USE_CLZ_INTRINSIC
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef USE_CLZ_INTRINSIC
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
#define JPEG_NBITS_NONZERO(x) (32 - _CountLeadingZeros(x))
|
||||
#else
|
||||
#define JPEG_NBITS_NONZERO(x) (32 - __builtin_clz(x))
|
||||
#endif
|
||||
#define JPEG_NBITS(x) (x ? JPEG_NBITS_NONZERO(x) : 0)
|
||||
#else
|
||||
#include "jpeg_nbits_table.h"
|
||||
#define JPEG_NBITS(x) (jpeg_nbits_table[x])
|
||||
#define JPEG_NBITS_NONZERO(x) JPEG_NBITS(x)
|
||||
#endif
|
||||
#include "jpeg_nbits.h"
|
||||
|
||||
|
||||
/* Expanded entropy encoder object for progressive Huffman encoding. */
|
||||
|
||||
@@ -1,4 +1,32 @@
|
||||
static const unsigned char jpeg_nbits_table[65536] = {
|
||||
/*
|
||||
* Copyright (C) 2024, D. R. Commander.
|
||||
*
|
||||
* For conditions of distribution and use, see the accompanying README.ijg
|
||||
* file.
|
||||
*/
|
||||
|
||||
#include "jpeg_nbits.h"
|
||||
#include "jconfigint.h"
|
||||
|
||||
|
||||
#ifndef USE_CLZ_INTRINSIC
|
||||
|
||||
#define INCLUDE_JPEG_NBITS_TABLE
|
||||
|
||||
/* When building for x86[-64] with the SIMD extensions enabled, the C Huffman
|
||||
* encoders can reuse jpeg_nbits_table from the SSE2 baseline Huffman encoder.
|
||||
*/
|
||||
#if (defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || \
|
||||
defined(_M_X64)) && defined(WITH_SIMD)
|
||||
#undef INCLUDE_JPEG_NBITS_TABLE
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef INCLUDE_JPEG_NBITS_TABLE
|
||||
|
||||
const unsigned char HIDDEN jpeg_nbits_table[65536] = {
|
||||
0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
@@ -4096,3 +4124,11 @@ static const unsigned char jpeg_nbits_table[65536] = {
|
||||
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
|
||||
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
|
||||
};
|
||||
|
||||
#else
|
||||
|
||||
/* Suppress compiler warnings about empty translation unit. */
|
||||
|
||||
typedef int dummy_jpeg_nbits_table;
|
||||
|
||||
#endif
|
||||
43
jpeg_nbits.h
Normal file
43
jpeg_nbits.h
Normal file
@@ -0,0 +1,43 @@
|
||||
/*
|
||||
* Copyright (C) 2014, 2021, 2024, D. R. Commander.
|
||||
* Copyright (C) 2014, Olle Liljenzin.
|
||||
* Copyright (C) 2020, Arm Limited.
|
||||
*
|
||||
* For conditions of distribution and use, see the accompanying README.ijg
|
||||
* file.
|
||||
*/
|
||||
|
||||
/*
|
||||
* NOTE: If USE_CLZ_INTRINSIC is defined, then clz/bsr instructions will be
|
||||
* used for bit counting rather than the lookup table. This will reduce the
|
||||
* memory footprint by 64k, which is important for some mobile applications
|
||||
* that create many isolated instances of libjpeg-turbo (web browsers, for
|
||||
* instance.) This may improve performance on some mobile platforms as well.
|
||||
* This feature is enabled by default only on Arm processors, because some x86
|
||||
* chips have a slow implementation of bsr, and the use of clz/bsr cannot be
|
||||
* shown to have a significant performance impact even on the x86 chips that
|
||||
* have a fast implementation of it. When building for Armv6, you can
|
||||
* explicitly disable the use of clz/bsr by adding -mthumb to the compiler
|
||||
* flags (this defines __thumb__).
|
||||
*/
|
||||
|
||||
/* NOTE: Both GCC and Clang define __GNUC__ */
|
||||
#if (defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))) || \
|
||||
defined(_M_ARM) || defined(_M_ARM64)
|
||||
#if !defined(__thumb__) || defined(__thumb2__)
|
||||
#define USE_CLZ_INTRINSIC
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef USE_CLZ_INTRINSIC
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
#define JPEG_NBITS_NONZERO(x) (32 - _CountLeadingZeros(x))
|
||||
#else
|
||||
#define JPEG_NBITS_NONZERO(x) (32 - __builtin_clz(x))
|
||||
#endif
|
||||
#define JPEG_NBITS(x) (x ? JPEG_NBITS_NONZERO(x) : 0)
|
||||
#else
|
||||
extern const unsigned char jpeg_nbits_table[65536];
|
||||
#define JPEG_NBITS(x) (jpeg_nbits_table[x])
|
||||
#define JPEG_NBITS_NONZERO(x) JPEG_NBITS(x)
|
||||
#endif
|
||||
@@ -1,7 +1,7 @@
|
||||
;
|
||||
; jchuff-sse2.asm - Huffman entropy encoding (SSE2)
|
||||
;
|
||||
; Copyright (C) 2009-2011, 2014-2017, 2019, D. R. Commander.
|
||||
; Copyright (C) 2009-2011, 2014-2017, 2019, 2024, D. R. Commander.
|
||||
; Copyright (C) 2015, Matthieu Darbois.
|
||||
; Copyright (C) 2018, Matthias Räncker.
|
||||
;
|
||||
@@ -65,7 +65,8 @@ times 1 << 2 db 3
|
||||
times 1 << 1 db 2
|
||||
times 1 << 0 db 1
|
||||
times 1 db 0
|
||||
jpeg_nbits_table:
|
||||
GLOBAL_DATA(jpeg_nbits_table)
|
||||
EXTN(jpeg_nbits_table):
|
||||
times 1 db 0
|
||||
times 1 << 0 db 1
|
||||
times 1 << 1 db 2
|
||||
@@ -88,9 +89,9 @@ times 1 << 14 db 15
|
||||
%ifdef PIC
|
||||
%define NBITS(x) nbits_base + x
|
||||
%else
|
||||
%define NBITS(x) jpeg_nbits_table + x
|
||||
%define NBITS(x) EXTN(jpeg_nbits_table) + x
|
||||
%endif
|
||||
%define MASK_BITS(x) NBITS((x) * 8) + (jpeg_mask_bits - jpeg_nbits_table)
|
||||
%define MASK_BITS(x) NBITS((x) * 8) + (jpeg_mask_bits - EXTN(jpeg_nbits_table))
|
||||
|
||||
; --------------------------------------------------------------------------
|
||||
SECTION SEG_TEXT
|
||||
@@ -469,7 +470,7 @@ EXTN(jsimd_huff_encode_one_block_sse2):
|
||||
pcmpeqw mm_all_0xff, mm_all_0xff ;Z: all_0xff[i] = 0xFF;
|
||||
%endmacro
|
||||
|
||||
GET_SYM nbits_base, jpeg_nbits_table, GET_SYM_BEFORE, GET_SYM_AFTER
|
||||
GET_SYM nbits_base, EXTN(jpeg_nbits_table), GET_SYM_BEFORE, GET_SYM_AFTER
|
||||
|
||||
psrldq xmm4, 1 * SIZEOF_WORD ;G: w4 = 37 44 45 38 39 46 47 --
|
||||
shufpd xmm1, xmm5, 10b ;F: w1 = 36 37 44 45 50 51 58 59
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
;
|
||||
; jchuff-sse2.asm - Huffman entropy encoding (64-bit SSE2)
|
||||
;
|
||||
; Copyright (C) 2009-2011, 2014-2016, 2019, 2021, 2023, D. R. Commander.
|
||||
; Copyright (C) 2009-2011, 2014-2016, 2019, 2021, 2023-2024, D. R. Commander.
|
||||
; Copyright (C) 2015, Matthieu Darbois.
|
||||
; Copyright (C) 2018, Matthias Räncker.
|
||||
; Copyright (C) 2023, Aliaksiej Kandracienka.
|
||||
@@ -67,7 +67,8 @@ times 1 << 2 db 3
|
||||
times 1 << 1 db 2
|
||||
times 1 << 0 db 1
|
||||
times 1 db 0
|
||||
jpeg_nbits_table:
|
||||
GLOBAL_DATA(jpeg_nbits_table)
|
||||
EXTN(jpeg_nbits_table):
|
||||
times 1 db 0
|
||||
times 1 << 0 db 1
|
||||
times 1 << 1 db 2
|
||||
@@ -89,7 +90,7 @@ times 1 << 15 db 16
|
||||
alignz 32
|
||||
|
||||
%define NBITS(x) nbits_base + x
|
||||
%define MASK_BITS(x) NBITS((x) * 4) + (jpeg_mask_bits - jpeg_nbits_table)
|
||||
%define MASK_BITS(x) NBITS((x) * 4) + (jpeg_mask_bits - EXTN(jpeg_nbits_table))
|
||||
|
||||
; --------------------------------------------------------------------------
|
||||
SECTION SEG_TEXT
|
||||
@@ -290,7 +291,7 @@ EXTN(jsimd_huff_encode_one_block_sse2):
|
||||
mov dctbl, POINTER [rbp+48]
|
||||
mov actbl, POINTER [rbp+56]
|
||||
punpckldq xmm0, xmm1 ;A: w0 = xx 01 08 09 02 03 10 11
|
||||
lea nbits_base, [rel jpeg_nbits_table]
|
||||
lea nbits_base, [rel EXTN(jpeg_nbits_table)]
|
||||
|
||||
%else
|
||||
|
||||
@@ -312,7 +313,7 @@ EXTN(jsimd_huff_encode_one_block_sse2):
|
||||
mov buffer, rsi
|
||||
movups xmm1, XMMWORD [block + 8 * SIZEOF_WORD] ;B: w1 = 08 09 10 11 12 13 14 15
|
||||
movsx codeq, word [block] ;Z: code = block[0];
|
||||
lea nbits_base, [rel jpeg_nbits_table]
|
||||
lea nbits_base, [rel EXTN(jpeg_nbits_table)]
|
||||
pxor xmm4, xmm4 ;A: w4[i] = 0;
|
||||
sub codeq, rcx ;Z: code -= last_dc_val;
|
||||
punpckldq xmm0, xmm1 ;A: w0 = xx 01 08 09 02 03 10 11
|
||||
|
||||
Reference in New Issue
Block a user