Merge branch 'main' into dev

This commit is contained in:
DRC
2022-11-15 21:28:01 -06:00
16 changed files with 166 additions and 149 deletions

View File

@@ -46,6 +46,17 @@ CMake generator, a static build of libjpeg-turbo (a build in which
`ENABLE_SHARED` is `0`) could not be installed, a Windows installer could not
be built, and the Java regression tests failed.
2. Fixed a regression introduced by 2.0 beta1[15] that caused a buffer overrun
in the progressive Huffman encoder when attempting to transform a
specially-crafted malformed 12-bit-per-component JPEG image into a progressive
12-bit-per-component JPEG image using a 12-bit-per-component build of
libjpeg-turbo (`-DWITH_12BIT=1`.) Given that the buffer overrun was fully
contained within the progressive Huffman encoder structure and did not cause a
segfault or other user-visible errant behavior, given that the lossless
transformer (unlike the decompressor) is not generally exposed to arbitrary
data exploits, and given that 12-bit-per-component builds of libjpeg-turbo are
uncommon, this issue did not likely pose a security risk.
2.1.4
=====

View File

@@ -3,8 +3,8 @@
*
* This file was part of the Independent JPEG Group's software:
* Copyright (C) 1991-1997, Thomas G. Lane.
* It was modified by The libjpeg-turbo Project to include only code relevant
* to libjpeg-turbo.
* libjpeg-turbo Modifications:
* Copyright (C) 2022, D. R. Commander.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
@@ -19,6 +19,14 @@
* Hence the magnitude should always fit in 10 or 14 bits respectively.
*/
/* The progressive Huffman encoder uses an unsigned 16-bit data type to store
* absolute values of coefficients, because it is possible to inject a
* coefficient value of -32768 into the encoder by attempting to transform a
* malformed 12-bit JPEG image, and the absolute value of -32768 would overflow
* a signed 16-bit integer.
*/
typedef unsigned short UJCOEF;
/* Derived data constructed for each Huffman table */
typedef struct {

View File

@@ -7,7 +7,7 @@
* Copyright (C) 1999, Ken Murchison.
* libjpeg-turbo Modifications:
* Copyright (C) 2011, 2015, 2018, 2021-2022, D. R. Commander.
* Copyright (C) 2016, 2018, Matthieu Darbois.
* Copyright (C) 2016, 2018, 2022, Matthieu Darbois.
* Copyright (C) 2020, Arm Limited.
* Copyright (C) 2021, Alex Richardson.
* For conditions of distribution and use, see the accompanying README.ijg
@@ -88,11 +88,11 @@ typedef struct {
/* Pointer to routine to prepare data for encode_mcu_AC_first() */
void (*AC_first_prepare) (const JCOEF *block,
const int *jpeg_natural_order_start, int Sl,
int Al, JCOEF *values, size_t *zerobits);
int Al, UJCOEF *values, size_t *zerobits);
/* Pointer to routine to prepare data for encode_mcu_AC_refine() */
int (*AC_refine_prepare) (const JCOEF *block,
const int *jpeg_natural_order_start, int Sl,
int Al, JCOEF *absvalues, size_t *bits);
int Al, UJCOEF *absvalues, size_t *bits);
/* Mode flag: TRUE for optimization, FALSE for actual data output */
boolean gather_statistics;
@@ -162,14 +162,14 @@ METHODDEF(boolean) encode_mcu_DC_first(j_compress_ptr cinfo,
JBLOCKROW *MCU_data);
METHODDEF(void) encode_mcu_AC_first_prepare
(const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
JCOEF *values, size_t *zerobits);
UJCOEF *values, size_t *zerobits);
METHODDEF(boolean) encode_mcu_AC_first(j_compress_ptr cinfo,
JBLOCKROW *MCU_data);
METHODDEF(boolean) encode_mcu_DC_refine(j_compress_ptr cinfo,
JBLOCKROW *MCU_data);
METHODDEF(int) encode_mcu_AC_refine_prepare
(const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
JCOEF *absvalues, size_t *bits);
UJCOEF *absvalues, size_t *bits);
METHODDEF(boolean) encode_mcu_AC_refine(j_compress_ptr cinfo,
JBLOCKROW *MCU_data);
METHODDEF(void) finish_pass_phuff(j_compress_ptr cinfo);
@@ -594,8 +594,8 @@ encode_mcu_DC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
continue; \
/* For a negative coef, want temp2 = bitwise complement of abs(coef) */ \
temp2 ^= temp; \
values[k] = (JCOEF)temp; \
values[k + DCTSIZE2] = (JCOEF)temp2; \
values[k] = (UJCOEF)temp; \
values[k + DCTSIZE2] = (UJCOEF)temp2; \
zerobits |= ((size_t)1U) << k; \
} \
}
@@ -603,7 +603,7 @@ encode_mcu_DC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
METHODDEF(void)
encode_mcu_AC_first_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl,
int Al, JCOEF *values, size_t *bits)
int Al, UJCOEF *values, size_t *bits)
{
register int k, temp, temp2;
size_t zerobits = 0U;
@@ -676,9 +676,9 @@ encode_mcu_AC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
register int nbits, r;
int Sl = cinfo->Se - cinfo->Ss + 1;
int Al = cinfo->Al;
JCOEF values_unaligned[2 * DCTSIZE2 + 15];
JCOEF *values;
const JCOEF *cvalue;
UJCOEF values_unaligned[2 * DCTSIZE2 + 15];
UJCOEF *values;
const UJCOEF *cvalue;
size_t zerobits;
size_t bits[8 / SIZEOF_SIZE_T];
int max_coef_bits = cinfo->data_precision + 2;
@@ -692,7 +692,7 @@ encode_mcu_AC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
emit_restart(entropy, entropy->next_restart_num);
#ifdef WITH_SIMD
cvalue = values = (JCOEF *)PAD((JUINTPTR)values_unaligned, 16);
cvalue = values = (UJCOEF *)PAD((JUINTPTR)values_unaligned, 16);
#else
/* Not using SIMD, so alignment is not needed */
cvalue = values = values_unaligned;
@@ -826,7 +826,7 @@ encode_mcu_DC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
zerobits |= ((size_t)1U) << k; \
signbits |= ((size_t)(temp2 + 1)) << k; \
} \
absvalues[k] = (JCOEF)temp; /* save abs value for main pass */ \
absvalues[k] = (UJCOEF)temp; /* save abs value for main pass */ \
if (temp == 1) \
EOB = k + koffset; /* EOB = index of last newly-nonzero coef */ \
} \
@@ -835,7 +835,7 @@ encode_mcu_DC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
METHODDEF(int)
encode_mcu_AC_refine_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl,
int Al, JCOEF *absvalues, size_t *bits)
int Al, UJCOEF *absvalues, size_t *bits)
{
register int k, temp, temp2;
int EOB = 0;
@@ -942,9 +942,9 @@ encode_mcu_AC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
unsigned int BR;
int Sl = cinfo->Se - cinfo->Ss + 1;
int Al = cinfo->Al;
JCOEF absvalues_unaligned[DCTSIZE2 + 15];
JCOEF *absvalues;
const JCOEF *cabsvalue, *EOBPTR;
UJCOEF absvalues_unaligned[DCTSIZE2 + 15];
UJCOEF *absvalues;
const UJCOEF *cabsvalue, *EOBPTR;
size_t zerobits, signbits;
size_t bits[16 / SIZEOF_SIZE_T];
@@ -957,7 +957,7 @@ encode_mcu_AC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
emit_restart(entropy, entropy->next_restart_num);
#ifdef WITH_SIMD
cabsvalue = absvalues = (JCOEF *)PAD((JUINTPTR)absvalues_unaligned, 16);
cabsvalue = absvalues = (UJCOEF *)PAD((JUINTPTR)absvalues_unaligned, 16);
#else
/* Not using SIMD, so alignment is not needed */
cabsvalue = absvalues = absvalues_unaligned;

View File

@@ -3,7 +3,7 @@
*
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
* Copyright (C) 2011, 2014, 2022, D. R. Commander.
* Copyright (C) 2015-2016, 2018, Matthieu Darbois.
* Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
* Copyright (C) 2020, Arm Limited.
*
* Based on the x86 SIMD extension for IJG JPEG library,
@@ -116,12 +116,12 @@ EXTERN(int) jsimd_can_encode_mcu_AC_first_prepare(void);
EXTERN(void) jsimd_encode_mcu_AC_first_prepare
(const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
JCOEF *values, size_t *zerobits);
UJCOEF *values, size_t *zerobits);
EXTERN(int) jsimd_can_encode_mcu_AC_refine_prepare(void);
EXTERN(int) jsimd_encode_mcu_AC_refine_prepare
(const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
JCOEF *absvalues, size_t *bits);
UJCOEF *absvalues, size_t *bits);
#endif /* WITH_SIMD */

View File

@@ -4,7 +4,7 @@
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
* Copyright (C) 2011, Nokia Corporation and/or its subsidiary(-ies).
* Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2022, D. R. Commander.
* Copyright (C) 2015-2016, 2018, Matthieu Darbois.
* Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
* Copyright (C) 2019, Google LLC.
* Copyright (C) 2020, Arm Limited.
*
@@ -943,7 +943,7 @@ jsimd_can_encode_mcu_AC_first_prepare(void)
GLOBAL(void)
jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl,
int Al, JCOEF *values, size_t *zerobits)
int Al, UJCOEF *values, size_t *zerobits)
{
jsimd_encode_mcu_AC_first_prepare_neon(block, jpeg_natural_order_start,
Sl, Al, values, zerobits);
@@ -968,7 +968,7 @@ jsimd_can_encode_mcu_AC_refine_prepare(void)
GLOBAL(int)
jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl,
int Al, JCOEF *absvalues, size_t *bits)
int Al, UJCOEF *absvalues, size_t *bits)
{
return jsimd_encode_mcu_AC_refine_prepare_neon(block,
jpeg_natural_order_start, Sl,

View File

@@ -4,7 +4,7 @@
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
* Copyright (C) 2011, Nokia Corporation and/or its subsidiary(-ies).
* Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2020, 2022, D. R. Commander.
* Copyright (C) 2015-2016, 2018, Matthieu Darbois.
* Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
* Copyright (C) 2020, Arm Limited.
*
* Based on the x86 SIMD extension for IJG JPEG library,
@@ -1018,7 +1018,7 @@ jsimd_can_encode_mcu_AC_first_prepare(void)
GLOBAL(void)
jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl,
int Al, JCOEF *values, size_t *zerobits)
int Al, UJCOEF *values, size_t *zerobits)
{
jsimd_encode_mcu_AC_first_prepare_neon(block, jpeg_natural_order_start,
Sl, Al, values, zerobits);
@@ -1045,7 +1045,7 @@ jsimd_can_encode_mcu_AC_refine_prepare(void)
GLOBAL(int)
jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl,
int Al, JCOEF *absvalues, size_t *bits)
int Al, UJCOEF *absvalues, size_t *bits)
{
return jsimd_encode_mcu_AC_refine_prepare_neon(block,
jpeg_natural_order_start,

View File

@@ -2,6 +2,8 @@
* jcphuff-neon.c - prepare data for progressive Huffman encoding (Arm Neon)
*
* Copyright (C) 2020-2021, Arm Limited. All Rights Reserved.
* Copyright (C) 2022, Matthieu Darbois. All Rights Reserved.
* Copyright (C) 2022, D. R. Commander. All Rights Reserved.
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
@@ -40,10 +42,10 @@
void jsimd_encode_mcu_AC_first_prepare_neon
(const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
JCOEF *values, size_t *zerobits)
UJCOEF *values, size_t *zerobits)
{
JCOEF *values_ptr = values;
JCOEF *diff_values_ptr = values + DCTSIZE2;
UJCOEF *values_ptr = values;
UJCOEF *diff_values_ptr = values + DCTSIZE2;
/* Rows of coefficients to zero (since they haven't been processed) */
int i, rows_to_zero = 8;
@@ -67,23 +69,23 @@ void jsimd_encode_mcu_AC_first_prepare_neon
coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[15], coefs2, 7);
/* Isolate sign of coefficients. */
int16x8_t sign_coefs1 = vshrq_n_s16(coefs1, 15);
int16x8_t sign_coefs2 = vshrq_n_s16(coefs2, 15);
uint16x8_t sign_coefs1 = vreinterpretq_u16_s16(vshrq_n_s16(coefs1, 15));
uint16x8_t sign_coefs2 = vreinterpretq_u16_s16(vshrq_n_s16(coefs2, 15));
/* Compute absolute value of coefficients and apply point transform Al. */
int16x8_t abs_coefs1 = vabsq_s16(coefs1);
int16x8_t abs_coefs2 = vabsq_s16(coefs2);
coefs1 = vshlq_s16(abs_coefs1, vdupq_n_s16(-Al));
coefs2 = vshlq_s16(abs_coefs2, vdupq_n_s16(-Al));
uint16x8_t abs_coefs1 = vreinterpretq_u16_s16(vabsq_s16(coefs1));
uint16x8_t abs_coefs2 = vreinterpretq_u16_s16(vabsq_s16(coefs2));
abs_coefs1 = vshlq_u16(abs_coefs1, vdupq_n_s16(-Al));
abs_coefs2 = vshlq_u16(abs_coefs2, vdupq_n_s16(-Al));
/* Compute diff values. */
int16x8_t diff1 = veorq_s16(coefs1, sign_coefs1);
int16x8_t diff2 = veorq_s16(coefs2, sign_coefs2);
uint16x8_t diff1 = veorq_u16(abs_coefs1, sign_coefs1);
uint16x8_t diff2 = veorq_u16(abs_coefs2, sign_coefs2);
/* Store transformed coefficients and diff values. */
vst1q_s16(values_ptr, coefs1);
vst1q_s16(values_ptr + DCTSIZE, coefs2);
vst1q_s16(diff_values_ptr, diff1);
vst1q_s16(diff_values_ptr + DCTSIZE, diff2);
vst1q_u16(values_ptr, abs_coefs1);
vst1q_u16(values_ptr + DCTSIZE, abs_coefs2);
vst1q_u16(diff_values_ptr, diff1);
vst1q_u16(diff_values_ptr + DCTSIZE, diff2);
values_ptr += 16;
diff_values_ptr += 16;
jpeg_natural_order_start += 16;
@@ -129,23 +131,23 @@ void jsimd_encode_mcu_AC_first_prepare_neon
}
/* Isolate sign of coefficients. */
int16x8_t sign_coefs1 = vshrq_n_s16(coefs1, 15);
int16x8_t sign_coefs2 = vshrq_n_s16(coefs2, 15);
uint16x8_t sign_coefs1 = vreinterpretq_u16_s16(vshrq_n_s16(coefs1, 15));
uint16x8_t sign_coefs2 = vreinterpretq_u16_s16(vshrq_n_s16(coefs2, 15));
/* Compute absolute value of coefficients and apply point transform Al. */
int16x8_t abs_coefs1 = vabsq_s16(coefs1);
int16x8_t abs_coefs2 = vabsq_s16(coefs2);
coefs1 = vshlq_s16(abs_coefs1, vdupq_n_s16(-Al));
coefs2 = vshlq_s16(abs_coefs2, vdupq_n_s16(-Al));
uint16x8_t abs_coefs1 = vreinterpretq_u16_s16(vabsq_s16(coefs1));
uint16x8_t abs_coefs2 = vreinterpretq_u16_s16(vabsq_s16(coefs2));
abs_coefs1 = vshlq_u16(abs_coefs1, vdupq_n_s16(-Al));
abs_coefs2 = vshlq_u16(abs_coefs2, vdupq_n_s16(-Al));
/* Compute diff values. */
int16x8_t diff1 = veorq_s16(coefs1, sign_coefs1);
int16x8_t diff2 = veorq_s16(coefs2, sign_coefs2);
uint16x8_t diff1 = veorq_u16(abs_coefs1, sign_coefs1);
uint16x8_t diff2 = veorq_u16(abs_coefs2, sign_coefs2);
/* Store transformed coefficients and diff values. */
vst1q_s16(values_ptr, coefs1);
vst1q_s16(values_ptr + DCTSIZE, coefs2);
vst1q_s16(diff_values_ptr, diff1);
vst1q_s16(diff_values_ptr + DCTSIZE, diff2);
vst1q_u16(values_ptr, abs_coefs1);
vst1q_u16(values_ptr + DCTSIZE, abs_coefs2);
vst1q_u16(diff_values_ptr, diff1);
vst1q_u16(diff_values_ptr + DCTSIZE, diff2);
values_ptr += 16;
diff_values_ptr += 16;
rows_to_zero -= 2;
@@ -183,17 +185,17 @@ void jsimd_encode_mcu_AC_first_prepare_neon
}
/* Isolate sign of coefficients. */
int16x8_t sign_coefs = vshrq_n_s16(coefs, 15);
uint16x8_t sign_coefs = vreinterpretq_u16_s16(vshrq_n_s16(coefs, 15));
/* Compute absolute value of coefficients and apply point transform Al. */
int16x8_t abs_coefs = vabsq_s16(coefs);
coefs = vshlq_s16(abs_coefs, vdupq_n_s16(-Al));
uint16x8_t abs_coefs = vreinterpretq_u16_s16(vabsq_s16(coefs));
abs_coefs = vshlq_u16(abs_coefs, vdupq_n_s16(-Al));
/* Compute diff values. */
int16x8_t diff = veorq_s16(coefs, sign_coefs);
uint16x8_t diff = veorq_u16(abs_coefs, sign_coefs);
/* Store transformed coefficients and diff values. */
vst1q_s16(values_ptr, coefs);
vst1q_s16(diff_values_ptr, diff);
vst1q_u16(values_ptr, abs_coefs);
vst1q_u16(diff_values_ptr, diff);
values_ptr += 8;
diff_values_ptr += 8;
rows_to_zero--;
@@ -201,8 +203,8 @@ void jsimd_encode_mcu_AC_first_prepare_neon
/* Zero remaining memory in the values and diff_values blocks. */
for (i = 0; i < rows_to_zero; i++) {
vst1q_s16(values_ptr, vdupq_n_s16(0));
vst1q_s16(diff_values_ptr, vdupq_n_s16(0));
vst1q_u16(values_ptr, vdupq_n_u16(0));
vst1q_u16(diff_values_ptr, vdupq_n_u16(0));
values_ptr += 8;
diff_values_ptr += 8;
}
@@ -210,23 +212,23 @@ void jsimd_encode_mcu_AC_first_prepare_neon
/* Construct zerobits bitmap. A set bit means that the corresponding
* coefficient != 0.
*/
int16x8_t row0 = vld1q_s16(values + 0 * DCTSIZE);
int16x8_t row1 = vld1q_s16(values + 1 * DCTSIZE);
int16x8_t row2 = vld1q_s16(values + 2 * DCTSIZE);
int16x8_t row3 = vld1q_s16(values + 3 * DCTSIZE);
int16x8_t row4 = vld1q_s16(values + 4 * DCTSIZE);
int16x8_t row5 = vld1q_s16(values + 5 * DCTSIZE);
int16x8_t row6 = vld1q_s16(values + 6 * DCTSIZE);
int16x8_t row7 = vld1q_s16(values + 7 * DCTSIZE);
uint16x8_t row0 = vld1q_u16(values + 0 * DCTSIZE);
uint16x8_t row1 = vld1q_u16(values + 1 * DCTSIZE);
uint16x8_t row2 = vld1q_u16(values + 2 * DCTSIZE);
uint16x8_t row3 = vld1q_u16(values + 3 * DCTSIZE);
uint16x8_t row4 = vld1q_u16(values + 4 * DCTSIZE);
uint16x8_t row5 = vld1q_u16(values + 5 * DCTSIZE);
uint16x8_t row6 = vld1q_u16(values + 6 * DCTSIZE);
uint16x8_t row7 = vld1q_u16(values + 7 * DCTSIZE);
uint8x8_t row0_eq0 = vmovn_u16(vceqq_s16(row0, vdupq_n_s16(0)));
uint8x8_t row1_eq0 = vmovn_u16(vceqq_s16(row1, vdupq_n_s16(0)));
uint8x8_t row2_eq0 = vmovn_u16(vceqq_s16(row2, vdupq_n_s16(0)));
uint8x8_t row3_eq0 = vmovn_u16(vceqq_s16(row3, vdupq_n_s16(0)));
uint8x8_t row4_eq0 = vmovn_u16(vceqq_s16(row4, vdupq_n_s16(0)));
uint8x8_t row5_eq0 = vmovn_u16(vceqq_s16(row5, vdupq_n_s16(0)));
uint8x8_t row6_eq0 = vmovn_u16(vceqq_s16(row6, vdupq_n_s16(0)));
uint8x8_t row7_eq0 = vmovn_u16(vceqq_s16(row7, vdupq_n_s16(0)));
uint8x8_t row0_eq0 = vmovn_u16(vceqq_u16(row0, vdupq_n_u16(0)));
uint8x8_t row1_eq0 = vmovn_u16(vceqq_u16(row1, vdupq_n_u16(0)));
uint8x8_t row2_eq0 = vmovn_u16(vceqq_u16(row2, vdupq_n_u16(0)));
uint8x8_t row3_eq0 = vmovn_u16(vceqq_u16(row3, vdupq_n_u16(0)));
uint8x8_t row4_eq0 = vmovn_u16(vceqq_u16(row4, vdupq_n_u16(0)));
uint8x8_t row5_eq0 = vmovn_u16(vceqq_u16(row5, vdupq_n_u16(0)));
uint8x8_t row6_eq0 = vmovn_u16(vceqq_u16(row6, vdupq_n_u16(0)));
uint8x8_t row7_eq0 = vmovn_u16(vceqq_u16(row7, vdupq_n_u16(0)));
/* { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 } */
const uint8x8_t bitmap_mask =
@@ -273,7 +275,7 @@ void jsimd_encode_mcu_AC_first_prepare_neon
int jsimd_encode_mcu_AC_refine_prepare_neon
(const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
JCOEF *absvalues, size_t *bits)
UJCOEF *absvalues, size_t *bits)
{
/* Temporary storage buffers for data used to compute the signbits bitmap and
* the end-of-block (EOB) position
@@ -281,7 +283,7 @@ int jsimd_encode_mcu_AC_refine_prepare_neon
uint8_t coef_sign_bits[64];
uint8_t coef_eq1_bits[64];
JCOEF *absvalues_ptr = absvalues;
UJCOEF *absvalues_ptr = absvalues;
uint8_t *coef_sign_bits_ptr = coef_sign_bits;
uint8_t *eq1_bits_ptr = coef_eq1_bits;
@@ -315,18 +317,18 @@ int jsimd_encode_mcu_AC_refine_prepare_neon
vst1_u8(coef_sign_bits_ptr + DCTSIZE, sign_coefs2);
/* Compute absolute value of coefficients and apply point transform Al. */
int16x8_t abs_coefs1 = vabsq_s16(coefs1);
int16x8_t abs_coefs2 = vabsq_s16(coefs2);
coefs1 = vshlq_s16(abs_coefs1, vdupq_n_s16(-Al));
coefs2 = vshlq_s16(abs_coefs2, vdupq_n_s16(-Al));
vst1q_s16(absvalues_ptr, coefs1);
vst1q_s16(absvalues_ptr + DCTSIZE, coefs2);
uint16x8_t abs_coefs1 = vreinterpretq_u16_s16(vabsq_s16(coefs1));
uint16x8_t abs_coefs2 = vreinterpretq_u16_s16(vabsq_s16(coefs2));
abs_coefs1 = vshlq_u16(abs_coefs1, vdupq_n_s16(-Al));
abs_coefs2 = vshlq_u16(abs_coefs2, vdupq_n_s16(-Al));
vst1q_u16(absvalues_ptr, abs_coefs1);
vst1q_u16(absvalues_ptr + DCTSIZE, abs_coefs2);
/* Test whether transformed coefficient values == 1 (used to find EOB
* position.)
*/
uint8x8_t coefs_eq11 = vmovn_u16(vceqq_s16(coefs1, vdupq_n_s16(1)));
uint8x8_t coefs_eq12 = vmovn_u16(vceqq_s16(coefs2, vdupq_n_s16(1)));
uint8x8_t coefs_eq11 = vmovn_u16(vceqq_u16(abs_coefs1, vdupq_n_u16(1)));
uint8x8_t coefs_eq12 = vmovn_u16(vceqq_u16(abs_coefs2, vdupq_n_u16(1)));
vst1_u8(eq1_bits_ptr, coefs_eq11);
vst1_u8(eq1_bits_ptr + DCTSIZE, coefs_eq12);
@@ -384,18 +386,18 @@ int jsimd_encode_mcu_AC_refine_prepare_neon
vst1_u8(coef_sign_bits_ptr + DCTSIZE, sign_coefs2);
/* Compute absolute value of coefficients and apply point transform Al. */
int16x8_t abs_coefs1 = vabsq_s16(coefs1);
int16x8_t abs_coefs2 = vabsq_s16(coefs2);
coefs1 = vshlq_s16(abs_coefs1, vdupq_n_s16(-Al));
coefs2 = vshlq_s16(abs_coefs2, vdupq_n_s16(-Al));
vst1q_s16(absvalues_ptr, coefs1);
vst1q_s16(absvalues_ptr + DCTSIZE, coefs2);
uint16x8_t abs_coefs1 = vreinterpretq_u16_s16(vabsq_s16(coefs1));
uint16x8_t abs_coefs2 = vreinterpretq_u16_s16(vabsq_s16(coefs2));
abs_coefs1 = vshlq_u16(abs_coefs1, vdupq_n_s16(-Al));
abs_coefs2 = vshlq_u16(abs_coefs2, vdupq_n_s16(-Al));
vst1q_u16(absvalues_ptr, abs_coefs1);
vst1q_u16(absvalues_ptr + DCTSIZE, abs_coefs2);
/* Test whether transformed coefficient values == 1 (used to find EOB
* position.)
*/
uint8x8_t coefs_eq11 = vmovn_u16(vceqq_s16(coefs1, vdupq_n_s16(1)));
uint8x8_t coefs_eq12 = vmovn_u16(vceqq_s16(coefs2, vdupq_n_s16(1)));
uint8x8_t coefs_eq11 = vmovn_u16(vceqq_u16(abs_coefs1, vdupq_n_u16(1)));
uint8x8_t coefs_eq12 = vmovn_u16(vceqq_u16(abs_coefs2, vdupq_n_u16(1)));
vst1_u8(eq1_bits_ptr, coefs_eq11);
vst1_u8(eq1_bits_ptr + DCTSIZE, coefs_eq12);
@@ -443,14 +445,14 @@ int jsimd_encode_mcu_AC_refine_prepare_neon
vst1_u8(coef_sign_bits_ptr, sign_coefs);
/* Compute absolute value of coefficients and apply point transform Al. */
int16x8_t abs_coefs = vabsq_s16(coefs);
coefs = vshlq_s16(abs_coefs, vdupq_n_s16(-Al));
vst1q_s16(absvalues_ptr, coefs);
uint16x8_t abs_coefs = vreinterpretq_u16_s16(vabsq_s16(coefs));
abs_coefs = vshlq_u16(abs_coefs, vdupq_n_s16(-Al));
vst1q_u16(absvalues_ptr, abs_coefs);
/* Test whether transformed coefficient values == 1 (used to find EOB
* position.)
*/
uint8x8_t coefs_eq1 = vmovn_u16(vceqq_s16(coefs, vdupq_n_s16(1)));
uint8x8_t coefs_eq1 = vmovn_u16(vceqq_u16(abs_coefs, vdupq_n_u16(1)));
vst1_u8(eq1_bits_ptr, coefs_eq1);
absvalues_ptr += 8;
@@ -461,7 +463,7 @@ int jsimd_encode_mcu_AC_refine_prepare_neon
/* Zero remaining memory in blocks. */
for (i = 0; i < rows_to_zero; i++) {
vst1q_s16(absvalues_ptr, vdupq_n_s16(0));
vst1q_u16(absvalues_ptr, vdupq_n_u16(0));
vst1_u8(coef_sign_bits_ptr, vdup_n_u8(0));
vst1_u8(eq1_bits_ptr, vdup_n_u8(0));
absvalues_ptr += 8;
@@ -470,23 +472,23 @@ int jsimd_encode_mcu_AC_refine_prepare_neon
}
/* Construct zerobits bitmap. */
int16x8_t abs_row0 = vld1q_s16(absvalues + 0 * DCTSIZE);
int16x8_t abs_row1 = vld1q_s16(absvalues + 1 * DCTSIZE);
int16x8_t abs_row2 = vld1q_s16(absvalues + 2 * DCTSIZE);
int16x8_t abs_row3 = vld1q_s16(absvalues + 3 * DCTSIZE);
int16x8_t abs_row4 = vld1q_s16(absvalues + 4 * DCTSIZE);
int16x8_t abs_row5 = vld1q_s16(absvalues + 5 * DCTSIZE);
int16x8_t abs_row6 = vld1q_s16(absvalues + 6 * DCTSIZE);
int16x8_t abs_row7 = vld1q_s16(absvalues + 7 * DCTSIZE);
uint16x8_t abs_row0 = vld1q_u16(absvalues + 0 * DCTSIZE);
uint16x8_t abs_row1 = vld1q_u16(absvalues + 1 * DCTSIZE);
uint16x8_t abs_row2 = vld1q_u16(absvalues + 2 * DCTSIZE);
uint16x8_t abs_row3 = vld1q_u16(absvalues + 3 * DCTSIZE);
uint16x8_t abs_row4 = vld1q_u16(absvalues + 4 * DCTSIZE);
uint16x8_t abs_row5 = vld1q_u16(absvalues + 5 * DCTSIZE);
uint16x8_t abs_row6 = vld1q_u16(absvalues + 6 * DCTSIZE);
uint16x8_t abs_row7 = vld1q_u16(absvalues + 7 * DCTSIZE);
uint8x8_t abs_row0_eq0 = vmovn_u16(vceqq_s16(abs_row0, vdupq_n_s16(0)));
uint8x8_t abs_row1_eq0 = vmovn_u16(vceqq_s16(abs_row1, vdupq_n_s16(0)));
uint8x8_t abs_row2_eq0 = vmovn_u16(vceqq_s16(abs_row2, vdupq_n_s16(0)));
uint8x8_t abs_row3_eq0 = vmovn_u16(vceqq_s16(abs_row3, vdupq_n_s16(0)));
uint8x8_t abs_row4_eq0 = vmovn_u16(vceqq_s16(abs_row4, vdupq_n_s16(0)));
uint8x8_t abs_row5_eq0 = vmovn_u16(vceqq_s16(abs_row5, vdupq_n_s16(0)));
uint8x8_t abs_row6_eq0 = vmovn_u16(vceqq_s16(abs_row6, vdupq_n_s16(0)));
uint8x8_t abs_row7_eq0 = vmovn_u16(vceqq_s16(abs_row7, vdupq_n_s16(0)));
uint8x8_t abs_row0_eq0 = vmovn_u16(vceqq_u16(abs_row0, vdupq_n_u16(0)));
uint8x8_t abs_row1_eq0 = vmovn_u16(vceqq_u16(abs_row1, vdupq_n_u16(0)));
uint8x8_t abs_row2_eq0 = vmovn_u16(vceqq_u16(abs_row2, vdupq_n_u16(0)));
uint8x8_t abs_row3_eq0 = vmovn_u16(vceqq_u16(abs_row3, vdupq_n_u16(0)));
uint8x8_t abs_row4_eq0 = vmovn_u16(vceqq_u16(abs_row4, vdupq_n_u16(0)));
uint8x8_t abs_row5_eq0 = vmovn_u16(vceqq_u16(abs_row5, vdupq_n_u16(0)));
uint8x8_t abs_row6_eq0 = vmovn_u16(vceqq_u16(abs_row6, vdupq_n_u16(0)));
uint8x8_t abs_row7_eq0 = vmovn_u16(vceqq_u16(abs_row7, vdupq_n_u16(0)));
/* { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 } */
const uint8x8_t bitmap_mask =

View File

@@ -3,7 +3,7 @@
*
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
* Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2022, D. R. Commander.
* Copyright (C) 2015-2016, 2018, Matthieu Darbois.
* Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
*
* Based on the x86 SIMD extension for IJG JPEG library,
* Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -1209,7 +1209,7 @@ jsimd_can_encode_mcu_AC_first_prepare(void)
GLOBAL(void)
jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl,
int Al, JCOEF *values, size_t *zerobits)
int Al, UJCOEF *values, size_t *zerobits)
{
jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start,
Sl, Al, values, zerobits);
@@ -1235,7 +1235,7 @@ jsimd_can_encode_mcu_AC_refine_prepare(void)
GLOBAL(int)
jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl,
int Al, JCOEF *absvalues, size_t *bits)
int Al, UJCOEF *absvalues, size_t *bits)
{
return jsimd_encode_mcu_AC_refine_prepare_sse2(block,
jpeg_natural_order_start,

View File

@@ -2,10 +2,10 @@
* simd/jsimd.h
*
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
* Copyright (C) 2011, 2014-2016, 2018, 2020, D. R. Commander.
* Copyright (C) 2011, 2014-2016, 2018, 2020, 2022, D. R. Commander.
* Copyright (C) 2013-2014, MIPS Technologies, Inc., California.
* Copyright (C) 2014, Linaro Limited.
* Copyright (C) 2015-2016, 2018, Matthieu Darbois.
* Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
* Copyright (C) 2016-2018, Loongson Technology Corporation Limited, BeiJing.
* Copyright (C) 2020, Arm Limited.
*
@@ -1243,16 +1243,16 @@ EXTERN(JOCTET *) jsimd_huff_encode_one_block_neon_slowtbl
/* Progressive Huffman encoding */
EXTERN(void) jsimd_encode_mcu_AC_first_prepare_sse2
(const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
JCOEF *values, size_t *zerobits);
UJCOEF *values, size_t *zerobits);
EXTERN(void) jsimd_encode_mcu_AC_first_prepare_neon
(const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
JCOEF *values, size_t *zerobits);
UJCOEF *values, size_t *zerobits);
EXTERN(int) jsimd_encode_mcu_AC_refine_prepare_sse2
(const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
JCOEF *absvalues, size_t *bits);
UJCOEF *absvalues, size_t *bits);
EXTERN(int) jsimd_encode_mcu_AC_refine_prepare_neon
(const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
JCOEF *absvalues, size_t *bits);
UJCOEF *absvalues, size_t *bits);

View File

@@ -4,7 +4,7 @@
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
* Copyright (C) 2009-2011, 2014, 2016, 2018, 2020, 2022, D. R. Commander.
* Copyright (C) 2013-2014, MIPS Technologies, Inc., California.
* Copyright (C) 2015-2016, 2018, Matthieu Darbois.
* Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
*
* Based on the x86 SIMD extension for IJG JPEG library,
* Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -1124,7 +1124,7 @@ jsimd_can_encode_mcu_AC_first_prepare(void)
GLOBAL(void)
jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl,
int Al, JCOEF *values, size_t *zerobits)
int Al, UJCOEF *values, size_t *zerobits)
{
}
@@ -1137,7 +1137,7 @@ jsimd_can_encode_mcu_AC_refine_prepare(void)
GLOBAL(int)
jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl,
int Al, JCOEF *absvalues, size_t *bits)
int Al, UJCOEF *absvalues, size_t *bits)
{
return 0;
}

View File

@@ -4,7 +4,7 @@
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
* Copyright (C) 2009-2011, 2014, 2016, 2018, 2022, D. R. Commander.
* Copyright (C) 2013-2014, MIPS Technologies, Inc., California.
* Copyright (C) 2015, 2018, Matthieu Darbois.
* Copyright (C) 2015, 2018, 2022, Matthieu Darbois.
* Copyright (C) 2016-2018, Loongson Technology Corporation Limited, BeiJing.
*
* Based on the x86 SIMD extension for IJG JPEG library,
@@ -847,7 +847,7 @@ jsimd_can_encode_mcu_AC_first_prepare(void)
GLOBAL(void)
jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl,
int Al, JCOEF *values, size_t *zerobits)
int Al, UJCOEF *values, size_t *zerobits)
{
}
@@ -860,7 +860,7 @@ jsimd_can_encode_mcu_AC_refine_prepare(void)
GLOBAL(int)
jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl,
int Al, JCOEF *absvalues, size_t *bits)
int Al, UJCOEF *absvalues, size_t *bits)
{
return 0;
}

View File

@@ -3,7 +3,7 @@
*
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
* Copyright (C) 2009-2011, 2014-2016, 2018, 2022, D. R. Commander.
* Copyright (C) 2015-2016, 2018, Matthieu Darbois.
* Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
*
* Based on the x86 SIMD extension for IJG JPEG library,
* Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -865,7 +865,7 @@ jsimd_can_encode_mcu_AC_first_prepare(void)
GLOBAL(void)
jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl,
int Al, JCOEF *values, size_t *zerobits)
int Al, UJCOEF *values, size_t *zerobits)
{
}
@@ -878,7 +878,7 @@ jsimd_can_encode_mcu_AC_refine_prepare(void)
GLOBAL(int)
jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl,
int Al, JCOEF *absvalues, size_t *bits)
int Al, UJCOEF *absvalues, size_t *bits)
{
return 0;
}

View File

@@ -3,7 +3,7 @@
*
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
* Copyright (C) 2009-2011, 2014, 2016, 2018, 2022, D. R. Commander.
* Copyright (C) 2015-2016, 2018, Matthieu Darbois.
* Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
*
* Based on the x86 SIMD extension for IJG JPEG library,
* Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -1033,7 +1033,7 @@ jsimd_can_encode_mcu_AC_first_prepare(void)
GLOBAL(void)
jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl,
int Al, JCOEF *values, size_t *zerobits)
int Al, UJCOEF *values, size_t *zerobits)
{
jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start,
Sl, Al, values, zerobits);
@@ -1057,7 +1057,7 @@ jsimd_can_encode_mcu_AC_refine_prepare(void)
GLOBAL(int)
jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl,
int Al, JCOEF *absvalues, size_t *bits)
int Al, UJCOEF *absvalues, size_t *bits)
{
return jsimd_encode_mcu_AC_refine_prepare_sse2(block,
jpeg_natural_order_start,

View File

@@ -1392,8 +1392,8 @@ DLLEXPORT int tjDecompress(tjhandle handle, unsigned char *jpegBuf,
}
static int setDecodeDefaults(struct jpeg_decompress_struct *dinfo,
int pixelFormat, int subsamp, int flags)
static void setDecodeDefaults(struct jpeg_decompress_struct *dinfo,
int pixelFormat, int subsamp, int flags)
{
int i;
@@ -1428,8 +1428,6 @@ static int setDecodeDefaults(struct jpeg_decompress_struct *dinfo,
if (dinfo->quant_tbl_ptrs[i] == NULL)
dinfo->quant_tbl_ptrs[i] = jpeg_alloc_quant_table((j_common_ptr)dinfo);
}
return 0;
}
@@ -1495,9 +1493,7 @@ DLLEXPORT int tjDecodeYUVPlanes(tjhandle handle,
dinfo->progressive_mode = dinfo->inputctl->has_multiple_scans = FALSE;
dinfo->Ss = dinfo->Ah = dinfo->Al = 0;
dinfo->Se = DCTSIZE2 - 1;
if (setDecodeDefaults(dinfo, pixelFormat, subsamp, flags) == -1) {
retval = -1; goto bailout;
}
setDecodeDefaults(dinfo, pixelFormat, subsamp, flags);
old_read_markers = dinfo->marker->read_markers;
dinfo->marker->read_markers = my_read_markers;
old_reset_marker_reader = dinfo->marker->reset_marker_reader;

View File

@@ -24,7 +24,7 @@ BEGIN
VALUE "ProductVersion", "@VERSION@"
VALUE "ProductName", "@CMAKE_PROJECT_NAME@"
VALUE "InternalName", "jpeg@SO_MAJOR_VERSION@"
VALUE "LegalCopyright", "Copyright \xA9 @COPYRIGHT_YEAR@ The libjpeg-turbo Project and many others"
VALUE "LegalCopyright", L"Copyright \xA9 @COPYRIGHT_YEAR@ The libjpeg-turbo Project and many others"
VALUE "OriginalFilename", "jpeg@SO_MAJOR_VERSION@.dll"
END
END

View File

@@ -24,7 +24,7 @@ BEGIN
VALUE "ProductVersion", "@VERSION@"
VALUE "ProductName", "@CMAKE_PROJECT_NAME@"
VALUE "InternalName", "turbojpeg"
VALUE "LegalCopyright", "Copyright \xA9 @COPYRIGHT_YEAR@ The libjpeg-turbo Project and many others"
VALUE "LegalCopyright", L"Copyright \xA9 @COPYRIGHT_YEAR@ The libjpeg-turbo Project and many others"
VALUE "OriginalFilename", "turbojpeg.dll"
END
END