Fix buffer overrun in 12-bit prog Huffman encoder

Regression introduced by 16bd984557 and
5b177b3cab

The pre-computed absolute values used in encode_mcu_AC_first() and
encode_mcu_AC_refine() were stored in a JCOEF (signed short) array.
When attempting to losslessly transform a specially-crafted malformed
12-bit JPEG image with a coefficient value of -32768 into a progressive
12-bit JPEG image, the progressive Huffman encoder attempted to store
the absolute value of -32768 in the JCOEF array, thus overflowing the
16-bit signed data type.  Therefore, at this point in the code:
8c5e78ce29/jcphuff.c (L889)
the absolute value was read as -32768, which caused the test at
8c5e78ce29/jcphuff.c (L896)
to fail, falling through to
8c5e78ce29/jcphuff.c (L908)
with an overly large value of r (46) that, when shifted left four
places, incremented, and passed to emit_symbol(), exceeded the maximum
index (255) for the derived code tables.  Fortunately, the buffer
overrun was fully contained within phuff_entropy_encoder, so the issue
did not generate a segfault or other user-visible errant behavior, but
it did cause a UBSan failure that was detected by OSS-Fuzz.

This commit introduces an unsigned JCOEF (UJCOEF) data type and uses it
to store the absolute values of DCT coefficients computed by the
AC_first_prepare() and AC_refine_prepare() methods.

Note that the changes to the Arm Neon progressive Huffman encoder
extensions cause signed 16-bit instructions to be replaced with
equivalent unsigned 16-bit instructions, so the changes should be
performance-neutral.

Based on:
bbf61c0382

Closes #628
This commit is contained in:
DRC
2022-11-15 17:01:17 -06:00
parent aa3dd0bd29
commit 78a36f6dc3
14 changed files with 166 additions and 145 deletions

View File

@@ -8,6 +8,17 @@ CMake generator, a static build of libjpeg-turbo (a build in which
`ENABLE_SHARED` is `0`) could not be installed, a Windows installer could not `ENABLE_SHARED` is `0`) could not be installed, a Windows installer could not
be built, and the Java regression tests failed. be built, and the Java regression tests failed.
2. Fixed a regression introduced by 2.0 beta1[15] that caused a buffer overrun
in the progressive Huffman encoder when attempting to transform a
specially-crafted malformed 12-bit-per-component JPEG image into a progressive
12-bit-per-component JPEG image using a 12-bit-per-component build of
libjpeg-turbo (`-DWITH_12BIT=1`.) Given that the buffer overrun was fully
contained within the progressive Huffman encoder structure and did not cause a
segfault or other user-visible errant behavior, given that the lossless
transformer (unlike the decompressor) is not generally exposed to arbitrary
data exploits, and given that 12-bit-per-component builds of libjpeg-turbo are
uncommon, this issue did not likely pose a security risk.
2.1.4 2.1.4
===== =====

View File

@@ -3,8 +3,8 @@
* *
* This file was part of the Independent JPEG Group's software: * This file was part of the Independent JPEG Group's software:
* Copyright (C) 1991-1997, Thomas G. Lane. * Copyright (C) 1991-1997, Thomas G. Lane.
* It was modified by The libjpeg-turbo Project to include only code relevant * libjpeg-turbo Modifications:
* to libjpeg-turbo. * Copyright (C) 2022, D. R. Commander.
* For conditions of distribution and use, see the accompanying README.ijg * For conditions of distribution and use, see the accompanying README.ijg
* file. * file.
* *
@@ -25,6 +25,14 @@
#define MAX_COEF_BITS 14 #define MAX_COEF_BITS 14
#endif #endif
/* The progressive Huffman encoder uses an unsigned 16-bit data type to store
* absolute values of coefficients, because it is possible to inject a
* coefficient value of -32768 into the encoder by attempting to transform a
* malformed 12-bit JPEG image, and the absolute value of -32768 would overflow
* a signed 16-bit integer.
*/
typedef unsigned short UJCOEF;
/* Derived data constructed for each Huffman table */ /* Derived data constructed for each Huffman table */
typedef struct { typedef struct {

View File

@@ -5,7 +5,7 @@
* Copyright (C) 1995-1997, Thomas G. Lane. * Copyright (C) 1995-1997, Thomas G. Lane.
* libjpeg-turbo Modifications: * libjpeg-turbo Modifications:
* Copyright (C) 2011, 2015, 2018, 2021-2022, D. R. Commander. * Copyright (C) 2011, 2015, 2018, 2021-2022, D. R. Commander.
* Copyright (C) 2016, 2018, Matthieu Darbois. * Copyright (C) 2016, 2018, 2022, Matthieu Darbois.
* Copyright (C) 2020, Arm Limited. * Copyright (C) 2020, Arm Limited.
* Copyright (C) 2021, Alex Richardson. * Copyright (C) 2021, Alex Richardson.
* For conditions of distribution and use, see the accompanying README.ijg * For conditions of distribution and use, see the accompanying README.ijg
@@ -82,11 +82,11 @@ typedef struct {
/* Pointer to routine to prepare data for encode_mcu_AC_first() */ /* Pointer to routine to prepare data for encode_mcu_AC_first() */
void (*AC_first_prepare) (const JCOEF *block, void (*AC_first_prepare) (const JCOEF *block,
const int *jpeg_natural_order_start, int Sl, const int *jpeg_natural_order_start, int Sl,
int Al, JCOEF *values, size_t *zerobits); int Al, UJCOEF *values, size_t *zerobits);
/* Pointer to routine to prepare data for encode_mcu_AC_refine() */ /* Pointer to routine to prepare data for encode_mcu_AC_refine() */
int (*AC_refine_prepare) (const JCOEF *block, int (*AC_refine_prepare) (const JCOEF *block,
const int *jpeg_natural_order_start, int Sl, const int *jpeg_natural_order_start, int Sl,
int Al, JCOEF *absvalues, size_t *bits); int Al, UJCOEF *absvalues, size_t *bits);
/* Mode flag: TRUE for optimization, FALSE for actual data output */ /* Mode flag: TRUE for optimization, FALSE for actual data output */
boolean gather_statistics; boolean gather_statistics;
@@ -156,14 +156,14 @@ METHODDEF(boolean) encode_mcu_DC_first(j_compress_ptr cinfo,
JBLOCKROW *MCU_data); JBLOCKROW *MCU_data);
METHODDEF(void) encode_mcu_AC_first_prepare METHODDEF(void) encode_mcu_AC_first_prepare
(const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al, (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
JCOEF *values, size_t *zerobits); UJCOEF *values, size_t *zerobits);
METHODDEF(boolean) encode_mcu_AC_first(j_compress_ptr cinfo, METHODDEF(boolean) encode_mcu_AC_first(j_compress_ptr cinfo,
JBLOCKROW *MCU_data); JBLOCKROW *MCU_data);
METHODDEF(boolean) encode_mcu_DC_refine(j_compress_ptr cinfo, METHODDEF(boolean) encode_mcu_DC_refine(j_compress_ptr cinfo,
JBLOCKROW *MCU_data); JBLOCKROW *MCU_data);
METHODDEF(int) encode_mcu_AC_refine_prepare METHODDEF(int) encode_mcu_AC_refine_prepare
(const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al, (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
JCOEF *absvalues, size_t *bits); UJCOEF *absvalues, size_t *bits);
METHODDEF(boolean) encode_mcu_AC_refine(j_compress_ptr cinfo, METHODDEF(boolean) encode_mcu_AC_refine(j_compress_ptr cinfo,
JBLOCKROW *MCU_data); JBLOCKROW *MCU_data);
METHODDEF(void) finish_pass_phuff(j_compress_ptr cinfo); METHODDEF(void) finish_pass_phuff(j_compress_ptr cinfo);
@@ -583,8 +583,8 @@ encode_mcu_DC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
continue; \ continue; \
/* For a negative coef, want temp2 = bitwise complement of abs(coef) */ \ /* For a negative coef, want temp2 = bitwise complement of abs(coef) */ \
temp2 ^= temp; \ temp2 ^= temp; \
values[k] = (JCOEF)temp; \ values[k] = (UJCOEF)temp; \
values[k + DCTSIZE2] = (JCOEF)temp2; \ values[k + DCTSIZE2] = (UJCOEF)temp2; \
zerobits |= ((size_t)1U) << k; \ zerobits |= ((size_t)1U) << k; \
} \ } \
} }
@@ -592,7 +592,7 @@ encode_mcu_DC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
METHODDEF(void) METHODDEF(void)
encode_mcu_AC_first_prepare(const JCOEF *block, encode_mcu_AC_first_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl, const int *jpeg_natural_order_start, int Sl,
int Al, JCOEF *values, size_t *bits) int Al, UJCOEF *values, size_t *bits)
{ {
register int k, temp, temp2; register int k, temp, temp2;
size_t zerobits = 0U; size_t zerobits = 0U;
@@ -665,9 +665,9 @@ encode_mcu_AC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
register int nbits, r; register int nbits, r;
int Sl = cinfo->Se - cinfo->Ss + 1; int Sl = cinfo->Se - cinfo->Ss + 1;
int Al = cinfo->Al; int Al = cinfo->Al;
JCOEF values_unaligned[2 * DCTSIZE2 + 15]; UJCOEF values_unaligned[2 * DCTSIZE2 + 15];
JCOEF *values; UJCOEF *values;
const JCOEF *cvalue; const UJCOEF *cvalue;
size_t zerobits; size_t zerobits;
size_t bits[8 / SIZEOF_SIZE_T]; size_t bits[8 / SIZEOF_SIZE_T];
@@ -680,7 +680,7 @@ encode_mcu_AC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
emit_restart(entropy, entropy->next_restart_num); emit_restart(entropy, entropy->next_restart_num);
#ifdef WITH_SIMD #ifdef WITH_SIMD
cvalue = values = (JCOEF *)PAD((JUINTPTR)values_unaligned, 16); cvalue = values = (UJCOEF *)PAD((JUINTPTR)values_unaligned, 16);
#else #else
/* Not using SIMD, so alignment is not needed */ /* Not using SIMD, so alignment is not needed */
cvalue = values = values_unaligned; cvalue = values = values_unaligned;
@@ -814,7 +814,7 @@ encode_mcu_DC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
zerobits |= ((size_t)1U) << k; \ zerobits |= ((size_t)1U) << k; \
signbits |= ((size_t)(temp2 + 1)) << k; \ signbits |= ((size_t)(temp2 + 1)) << k; \
} \ } \
absvalues[k] = (JCOEF)temp; /* save abs value for main pass */ \ absvalues[k] = (UJCOEF)temp; /* save abs value for main pass */ \
if (temp == 1) \ if (temp == 1) \
EOB = k + koffset; /* EOB = index of last newly-nonzero coef */ \ EOB = k + koffset; /* EOB = index of last newly-nonzero coef */ \
} \ } \
@@ -823,7 +823,7 @@ encode_mcu_DC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
METHODDEF(int) METHODDEF(int)
encode_mcu_AC_refine_prepare(const JCOEF *block, encode_mcu_AC_refine_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl, const int *jpeg_natural_order_start, int Sl,
int Al, JCOEF *absvalues, size_t *bits) int Al, UJCOEF *absvalues, size_t *bits)
{ {
register int k, temp, temp2; register int k, temp, temp2;
int EOB = 0; int EOB = 0;
@@ -930,9 +930,9 @@ encode_mcu_AC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
unsigned int BR; unsigned int BR;
int Sl = cinfo->Se - cinfo->Ss + 1; int Sl = cinfo->Se - cinfo->Ss + 1;
int Al = cinfo->Al; int Al = cinfo->Al;
JCOEF absvalues_unaligned[DCTSIZE2 + 15]; UJCOEF absvalues_unaligned[DCTSIZE2 + 15];
JCOEF *absvalues; UJCOEF *absvalues;
const JCOEF *cabsvalue, *EOBPTR; const UJCOEF *cabsvalue, *EOBPTR;
size_t zerobits, signbits; size_t zerobits, signbits;
size_t bits[16 / SIZEOF_SIZE_T]; size_t bits[16 / SIZEOF_SIZE_T];
@@ -945,7 +945,7 @@ encode_mcu_AC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
emit_restart(entropy, entropy->next_restart_num); emit_restart(entropy, entropy->next_restart_num);
#ifdef WITH_SIMD #ifdef WITH_SIMD
cabsvalue = absvalues = (JCOEF *)PAD((JUINTPTR)absvalues_unaligned, 16); cabsvalue = absvalues = (UJCOEF *)PAD((JUINTPTR)absvalues_unaligned, 16);
#else #else
/* Not using SIMD, so alignment is not needed */ /* Not using SIMD, so alignment is not needed */
cabsvalue = absvalues = absvalues_unaligned; cabsvalue = absvalues = absvalues_unaligned;

View File

@@ -2,8 +2,8 @@
* jsimd.h * jsimd.h
* *
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
* Copyright (C) 2011, 2014, D. R. Commander. * Copyright (C) 2011, 2014, 2022, D. R. Commander.
* Copyright (C) 2015-2016, 2018, Matthieu Darbois. * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
* Copyright (C) 2020, Arm Limited. * Copyright (C) 2020, Arm Limited.
* *
* Based on the x86 SIMD extension for IJG JPEG library, * Based on the x86 SIMD extension for IJG JPEG library,
@@ -114,10 +114,10 @@ EXTERN(int) jsimd_can_encode_mcu_AC_first_prepare(void);
EXTERN(void) jsimd_encode_mcu_AC_first_prepare EXTERN(void) jsimd_encode_mcu_AC_first_prepare
(const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al, (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
JCOEF *values, size_t *zerobits); UJCOEF *values, size_t *zerobits);
EXTERN(int) jsimd_can_encode_mcu_AC_refine_prepare(void); EXTERN(int) jsimd_can_encode_mcu_AC_refine_prepare(void);
EXTERN(int) jsimd_encode_mcu_AC_refine_prepare EXTERN(int) jsimd_encode_mcu_AC_refine_prepare
(const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al, (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
JCOEF *absvalues, size_t *bits); UJCOEF *absvalues, size_t *bits);

View File

@@ -2,8 +2,8 @@
* jsimd_none.c * jsimd_none.c
* *
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
* Copyright (C) 2009-2011, 2014, D. R. Commander. * Copyright (C) 2009-2011, 2014, 2022, D. R. Commander.
* Copyright (C) 2015-2016, 2018, Matthieu Darbois. * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
* Copyright (C) 2020, Arm Limited. * Copyright (C) 2020, Arm Limited.
* *
* Based on the x86 SIMD extension for IJG JPEG library, * Based on the x86 SIMD extension for IJG JPEG library,
@@ -412,7 +412,7 @@ jsimd_can_encode_mcu_AC_first_prepare(void)
GLOBAL(void) GLOBAL(void)
jsimd_encode_mcu_AC_first_prepare(const JCOEF *block, jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl, const int *jpeg_natural_order_start, int Sl,
int Al, JCOEF *values, size_t *zerobits) int Al, UJCOEF *values, size_t *zerobits)
{ {
} }
@@ -425,7 +425,7 @@ jsimd_can_encode_mcu_AC_refine_prepare(void)
GLOBAL(int) GLOBAL(int)
jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block, jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl, const int *jpeg_natural_order_start, int Sl,
int Al, JCOEF *absvalues, size_t *bits) int Al, UJCOEF *absvalues, size_t *bits)
{ {
return 0; return 0;
} }

View File

@@ -4,7 +4,7 @@
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
* Copyright (C) 2011, Nokia Corporation and/or its subsidiary(-ies). * Copyright (C) 2011, Nokia Corporation and/or its subsidiary(-ies).
* Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2022, D. R. Commander. * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2022, D. R. Commander.
* Copyright (C) 2015-2016, 2018, Matthieu Darbois. * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
* Copyright (C) 2019, Google LLC. * Copyright (C) 2019, Google LLC.
* Copyright (C) 2020, Arm Limited. * Copyright (C) 2020, Arm Limited.
* *
@@ -943,7 +943,7 @@ jsimd_can_encode_mcu_AC_first_prepare(void)
GLOBAL(void) GLOBAL(void)
jsimd_encode_mcu_AC_first_prepare(const JCOEF *block, jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl, const int *jpeg_natural_order_start, int Sl,
int Al, JCOEF *values, size_t *zerobits) int Al, UJCOEF *values, size_t *zerobits)
{ {
jsimd_encode_mcu_AC_first_prepare_neon(block, jpeg_natural_order_start, jsimd_encode_mcu_AC_first_prepare_neon(block, jpeg_natural_order_start,
Sl, Al, values, zerobits); Sl, Al, values, zerobits);
@@ -968,7 +968,7 @@ jsimd_can_encode_mcu_AC_refine_prepare(void)
GLOBAL(int) GLOBAL(int)
jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block, jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl, const int *jpeg_natural_order_start, int Sl,
int Al, JCOEF *absvalues, size_t *bits) int Al, UJCOEF *absvalues, size_t *bits)
{ {
return jsimd_encode_mcu_AC_refine_prepare_neon(block, return jsimd_encode_mcu_AC_refine_prepare_neon(block,
jpeg_natural_order_start, Sl, jpeg_natural_order_start, Sl,

View File

@@ -4,7 +4,7 @@
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
* Copyright (C) 2011, Nokia Corporation and/or its subsidiary(-ies). * Copyright (C) 2011, Nokia Corporation and/or its subsidiary(-ies).
* Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2020, 2022, D. R. Commander. * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2020, 2022, D. R. Commander.
* Copyright (C) 2015-2016, 2018, Matthieu Darbois. * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
* Copyright (C) 2020, Arm Limited. * Copyright (C) 2020, Arm Limited.
* *
* Based on the x86 SIMD extension for IJG JPEG library, * Based on the x86 SIMD extension for IJG JPEG library,
@@ -1018,7 +1018,7 @@ jsimd_can_encode_mcu_AC_first_prepare(void)
GLOBAL(void) GLOBAL(void)
jsimd_encode_mcu_AC_first_prepare(const JCOEF *block, jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl, const int *jpeg_natural_order_start, int Sl,
int Al, JCOEF *values, size_t *zerobits) int Al, UJCOEF *values, size_t *zerobits)
{ {
jsimd_encode_mcu_AC_first_prepare_neon(block, jpeg_natural_order_start, jsimd_encode_mcu_AC_first_prepare_neon(block, jpeg_natural_order_start,
Sl, Al, values, zerobits); Sl, Al, values, zerobits);
@@ -1045,7 +1045,7 @@ jsimd_can_encode_mcu_AC_refine_prepare(void)
GLOBAL(int) GLOBAL(int)
jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block, jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl, const int *jpeg_natural_order_start, int Sl,
int Al, JCOEF *absvalues, size_t *bits) int Al, UJCOEF *absvalues, size_t *bits)
{ {
return jsimd_encode_mcu_AC_refine_prepare_neon(block, return jsimd_encode_mcu_AC_refine_prepare_neon(block,
jpeg_natural_order_start, jpeg_natural_order_start,

View File

@@ -2,6 +2,8 @@
* jcphuff-neon.c - prepare data for progressive Huffman encoding (Arm Neon) * jcphuff-neon.c - prepare data for progressive Huffman encoding (Arm Neon)
* *
* Copyright (C) 2020-2021, Arm Limited. All Rights Reserved. * Copyright (C) 2020-2021, Arm Limited. All Rights Reserved.
* Copyright (C) 2022, Matthieu Darbois. All Rights Reserved.
* Copyright (C) 2022, D. R. Commander. All Rights Reserved.
* *
* This software is provided 'as-is', without any express or implied * This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages * warranty. In no event will the authors be held liable for any damages
@@ -40,10 +42,10 @@
void jsimd_encode_mcu_AC_first_prepare_neon void jsimd_encode_mcu_AC_first_prepare_neon
(const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al, (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
JCOEF *values, size_t *zerobits) UJCOEF *values, size_t *zerobits)
{ {
JCOEF *values_ptr = values; UJCOEF *values_ptr = values;
JCOEF *diff_values_ptr = values + DCTSIZE2; UJCOEF *diff_values_ptr = values + DCTSIZE2;
/* Rows of coefficients to zero (since they haven't been processed) */ /* Rows of coefficients to zero (since they haven't been processed) */
int i, rows_to_zero = 8; int i, rows_to_zero = 8;
@@ -67,23 +69,23 @@ void jsimd_encode_mcu_AC_first_prepare_neon
coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[15], coefs2, 7); coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[15], coefs2, 7);
/* Isolate sign of coefficients. */ /* Isolate sign of coefficients. */
int16x8_t sign_coefs1 = vshrq_n_s16(coefs1, 15); uint16x8_t sign_coefs1 = vreinterpretq_u16_s16(vshrq_n_s16(coefs1, 15));
int16x8_t sign_coefs2 = vshrq_n_s16(coefs2, 15); uint16x8_t sign_coefs2 = vreinterpretq_u16_s16(vshrq_n_s16(coefs2, 15));
/* Compute absolute value of coefficients and apply point transform Al. */ /* Compute absolute value of coefficients and apply point transform Al. */
int16x8_t abs_coefs1 = vabsq_s16(coefs1); uint16x8_t abs_coefs1 = vreinterpretq_u16_s16(vabsq_s16(coefs1));
int16x8_t abs_coefs2 = vabsq_s16(coefs2); uint16x8_t abs_coefs2 = vreinterpretq_u16_s16(vabsq_s16(coefs2));
coefs1 = vshlq_s16(abs_coefs1, vdupq_n_s16(-Al)); abs_coefs1 = vshlq_u16(abs_coefs1, vdupq_n_s16(-Al));
coefs2 = vshlq_s16(abs_coefs2, vdupq_n_s16(-Al)); abs_coefs2 = vshlq_u16(abs_coefs2, vdupq_n_s16(-Al));
/* Compute diff values. */ /* Compute diff values. */
int16x8_t diff1 = veorq_s16(coefs1, sign_coefs1); uint16x8_t diff1 = veorq_u16(abs_coefs1, sign_coefs1);
int16x8_t diff2 = veorq_s16(coefs2, sign_coefs2); uint16x8_t diff2 = veorq_u16(abs_coefs2, sign_coefs2);
/* Store transformed coefficients and diff values. */ /* Store transformed coefficients and diff values. */
vst1q_s16(values_ptr, coefs1); vst1q_u16(values_ptr, abs_coefs1);
vst1q_s16(values_ptr + DCTSIZE, coefs2); vst1q_u16(values_ptr + DCTSIZE, abs_coefs2);
vst1q_s16(diff_values_ptr, diff1); vst1q_u16(diff_values_ptr, diff1);
vst1q_s16(diff_values_ptr + DCTSIZE, diff2); vst1q_u16(diff_values_ptr + DCTSIZE, diff2);
values_ptr += 16; values_ptr += 16;
diff_values_ptr += 16; diff_values_ptr += 16;
jpeg_natural_order_start += 16; jpeg_natural_order_start += 16;
@@ -129,23 +131,23 @@ void jsimd_encode_mcu_AC_first_prepare_neon
} }
/* Isolate sign of coefficients. */ /* Isolate sign of coefficients. */
int16x8_t sign_coefs1 = vshrq_n_s16(coefs1, 15); uint16x8_t sign_coefs1 = vreinterpretq_u16_s16(vshrq_n_s16(coefs1, 15));
int16x8_t sign_coefs2 = vshrq_n_s16(coefs2, 15); uint16x8_t sign_coefs2 = vreinterpretq_u16_s16(vshrq_n_s16(coefs2, 15));
/* Compute absolute value of coefficients and apply point transform Al. */ /* Compute absolute value of coefficients and apply point transform Al. */
int16x8_t abs_coefs1 = vabsq_s16(coefs1); uint16x8_t abs_coefs1 = vreinterpretq_u16_s16(vabsq_s16(coefs1));
int16x8_t abs_coefs2 = vabsq_s16(coefs2); uint16x8_t abs_coefs2 = vreinterpretq_u16_s16(vabsq_s16(coefs2));
coefs1 = vshlq_s16(abs_coefs1, vdupq_n_s16(-Al)); abs_coefs1 = vshlq_u16(abs_coefs1, vdupq_n_s16(-Al));
coefs2 = vshlq_s16(abs_coefs2, vdupq_n_s16(-Al)); abs_coefs2 = vshlq_u16(abs_coefs2, vdupq_n_s16(-Al));
/* Compute diff values. */ /* Compute diff values. */
int16x8_t diff1 = veorq_s16(coefs1, sign_coefs1); uint16x8_t diff1 = veorq_u16(abs_coefs1, sign_coefs1);
int16x8_t diff2 = veorq_s16(coefs2, sign_coefs2); uint16x8_t diff2 = veorq_u16(abs_coefs2, sign_coefs2);
/* Store transformed coefficients and diff values. */ /* Store transformed coefficients and diff values. */
vst1q_s16(values_ptr, coefs1); vst1q_u16(values_ptr, abs_coefs1);
vst1q_s16(values_ptr + DCTSIZE, coefs2); vst1q_u16(values_ptr + DCTSIZE, abs_coefs2);
vst1q_s16(diff_values_ptr, diff1); vst1q_u16(diff_values_ptr, diff1);
vst1q_s16(diff_values_ptr + DCTSIZE, diff2); vst1q_u16(diff_values_ptr + DCTSIZE, diff2);
values_ptr += 16; values_ptr += 16;
diff_values_ptr += 16; diff_values_ptr += 16;
rows_to_zero -= 2; rows_to_zero -= 2;
@@ -183,17 +185,17 @@ void jsimd_encode_mcu_AC_first_prepare_neon
} }
/* Isolate sign of coefficients. */ /* Isolate sign of coefficients. */
int16x8_t sign_coefs = vshrq_n_s16(coefs, 15); uint16x8_t sign_coefs = vreinterpretq_u16_s16(vshrq_n_s16(coefs, 15));
/* Compute absolute value of coefficients and apply point transform Al. */ /* Compute absolute value of coefficients and apply point transform Al. */
int16x8_t abs_coefs = vabsq_s16(coefs); uint16x8_t abs_coefs = vreinterpretq_u16_s16(vabsq_s16(coefs));
coefs = vshlq_s16(abs_coefs, vdupq_n_s16(-Al)); abs_coefs = vshlq_u16(abs_coefs, vdupq_n_s16(-Al));
/* Compute diff values. */ /* Compute diff values. */
int16x8_t diff = veorq_s16(coefs, sign_coefs); uint16x8_t diff = veorq_u16(abs_coefs, sign_coefs);
/* Store transformed coefficients and diff values. */ /* Store transformed coefficients and diff values. */
vst1q_s16(values_ptr, coefs); vst1q_u16(values_ptr, abs_coefs);
vst1q_s16(diff_values_ptr, diff); vst1q_u16(diff_values_ptr, diff);
values_ptr += 8; values_ptr += 8;
diff_values_ptr += 8; diff_values_ptr += 8;
rows_to_zero--; rows_to_zero--;
@@ -201,8 +203,8 @@ void jsimd_encode_mcu_AC_first_prepare_neon
/* Zero remaining memory in the values and diff_values blocks. */ /* Zero remaining memory in the values and diff_values blocks. */
for (i = 0; i < rows_to_zero; i++) { for (i = 0; i < rows_to_zero; i++) {
vst1q_s16(values_ptr, vdupq_n_s16(0)); vst1q_u16(values_ptr, vdupq_n_u16(0));
vst1q_s16(diff_values_ptr, vdupq_n_s16(0)); vst1q_u16(diff_values_ptr, vdupq_n_u16(0));
values_ptr += 8; values_ptr += 8;
diff_values_ptr += 8; diff_values_ptr += 8;
} }
@@ -210,23 +212,23 @@ void jsimd_encode_mcu_AC_first_prepare_neon
/* Construct zerobits bitmap. A set bit means that the corresponding /* Construct zerobits bitmap. A set bit means that the corresponding
* coefficient != 0. * coefficient != 0.
*/ */
int16x8_t row0 = vld1q_s16(values + 0 * DCTSIZE); uint16x8_t row0 = vld1q_u16(values + 0 * DCTSIZE);
int16x8_t row1 = vld1q_s16(values + 1 * DCTSIZE); uint16x8_t row1 = vld1q_u16(values + 1 * DCTSIZE);
int16x8_t row2 = vld1q_s16(values + 2 * DCTSIZE); uint16x8_t row2 = vld1q_u16(values + 2 * DCTSIZE);
int16x8_t row3 = vld1q_s16(values + 3 * DCTSIZE); uint16x8_t row3 = vld1q_u16(values + 3 * DCTSIZE);
int16x8_t row4 = vld1q_s16(values + 4 * DCTSIZE); uint16x8_t row4 = vld1q_u16(values + 4 * DCTSIZE);
int16x8_t row5 = vld1q_s16(values + 5 * DCTSIZE); uint16x8_t row5 = vld1q_u16(values + 5 * DCTSIZE);
int16x8_t row6 = vld1q_s16(values + 6 * DCTSIZE); uint16x8_t row6 = vld1q_u16(values + 6 * DCTSIZE);
int16x8_t row7 = vld1q_s16(values + 7 * DCTSIZE); uint16x8_t row7 = vld1q_u16(values + 7 * DCTSIZE);
uint8x8_t row0_eq0 = vmovn_u16(vceqq_s16(row0, vdupq_n_s16(0))); uint8x8_t row0_eq0 = vmovn_u16(vceqq_u16(row0, vdupq_n_u16(0)));
uint8x8_t row1_eq0 = vmovn_u16(vceqq_s16(row1, vdupq_n_s16(0))); uint8x8_t row1_eq0 = vmovn_u16(vceqq_u16(row1, vdupq_n_u16(0)));
uint8x8_t row2_eq0 = vmovn_u16(vceqq_s16(row2, vdupq_n_s16(0))); uint8x8_t row2_eq0 = vmovn_u16(vceqq_u16(row2, vdupq_n_u16(0)));
uint8x8_t row3_eq0 = vmovn_u16(vceqq_s16(row3, vdupq_n_s16(0))); uint8x8_t row3_eq0 = vmovn_u16(vceqq_u16(row3, vdupq_n_u16(0)));
uint8x8_t row4_eq0 = vmovn_u16(vceqq_s16(row4, vdupq_n_s16(0))); uint8x8_t row4_eq0 = vmovn_u16(vceqq_u16(row4, vdupq_n_u16(0)));
uint8x8_t row5_eq0 = vmovn_u16(vceqq_s16(row5, vdupq_n_s16(0))); uint8x8_t row5_eq0 = vmovn_u16(vceqq_u16(row5, vdupq_n_u16(0)));
uint8x8_t row6_eq0 = vmovn_u16(vceqq_s16(row6, vdupq_n_s16(0))); uint8x8_t row6_eq0 = vmovn_u16(vceqq_u16(row6, vdupq_n_u16(0)));
uint8x8_t row7_eq0 = vmovn_u16(vceqq_s16(row7, vdupq_n_s16(0))); uint8x8_t row7_eq0 = vmovn_u16(vceqq_u16(row7, vdupq_n_u16(0)));
/* { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 } */ /* { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 } */
const uint8x8_t bitmap_mask = const uint8x8_t bitmap_mask =
@@ -273,7 +275,7 @@ void jsimd_encode_mcu_AC_first_prepare_neon
int jsimd_encode_mcu_AC_refine_prepare_neon int jsimd_encode_mcu_AC_refine_prepare_neon
(const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al, (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
JCOEF *absvalues, size_t *bits) UJCOEF *absvalues, size_t *bits)
{ {
/* Temporary storage buffers for data used to compute the signbits bitmap and /* Temporary storage buffers for data used to compute the signbits bitmap and
* the end-of-block (EOB) position * the end-of-block (EOB) position
@@ -281,7 +283,7 @@ int jsimd_encode_mcu_AC_refine_prepare_neon
uint8_t coef_sign_bits[64]; uint8_t coef_sign_bits[64];
uint8_t coef_eq1_bits[64]; uint8_t coef_eq1_bits[64];
JCOEF *absvalues_ptr = absvalues; UJCOEF *absvalues_ptr = absvalues;
uint8_t *coef_sign_bits_ptr = coef_sign_bits; uint8_t *coef_sign_bits_ptr = coef_sign_bits;
uint8_t *eq1_bits_ptr = coef_eq1_bits; uint8_t *eq1_bits_ptr = coef_eq1_bits;
@@ -315,18 +317,18 @@ int jsimd_encode_mcu_AC_refine_prepare_neon
vst1_u8(coef_sign_bits_ptr + DCTSIZE, sign_coefs2); vst1_u8(coef_sign_bits_ptr + DCTSIZE, sign_coefs2);
/* Compute absolute value of coefficients and apply point transform Al. */ /* Compute absolute value of coefficients and apply point transform Al. */
int16x8_t abs_coefs1 = vabsq_s16(coefs1); uint16x8_t abs_coefs1 = vreinterpretq_u16_s16(vabsq_s16(coefs1));
int16x8_t abs_coefs2 = vabsq_s16(coefs2); uint16x8_t abs_coefs2 = vreinterpretq_u16_s16(vabsq_s16(coefs2));
coefs1 = vshlq_s16(abs_coefs1, vdupq_n_s16(-Al)); abs_coefs1 = vshlq_u16(abs_coefs1, vdupq_n_s16(-Al));
coefs2 = vshlq_s16(abs_coefs2, vdupq_n_s16(-Al)); abs_coefs2 = vshlq_u16(abs_coefs2, vdupq_n_s16(-Al));
vst1q_s16(absvalues_ptr, coefs1); vst1q_u16(absvalues_ptr, abs_coefs1);
vst1q_s16(absvalues_ptr + DCTSIZE, coefs2); vst1q_u16(absvalues_ptr + DCTSIZE, abs_coefs2);
/* Test whether transformed coefficient values == 1 (used to find EOB /* Test whether transformed coefficient values == 1 (used to find EOB
* position.) * position.)
*/ */
uint8x8_t coefs_eq11 = vmovn_u16(vceqq_s16(coefs1, vdupq_n_s16(1))); uint8x8_t coefs_eq11 = vmovn_u16(vceqq_u16(abs_coefs1, vdupq_n_u16(1)));
uint8x8_t coefs_eq12 = vmovn_u16(vceqq_s16(coefs2, vdupq_n_s16(1))); uint8x8_t coefs_eq12 = vmovn_u16(vceqq_u16(abs_coefs2, vdupq_n_u16(1)));
vst1_u8(eq1_bits_ptr, coefs_eq11); vst1_u8(eq1_bits_ptr, coefs_eq11);
vst1_u8(eq1_bits_ptr + DCTSIZE, coefs_eq12); vst1_u8(eq1_bits_ptr + DCTSIZE, coefs_eq12);
@@ -384,18 +386,18 @@ int jsimd_encode_mcu_AC_refine_prepare_neon
vst1_u8(coef_sign_bits_ptr + DCTSIZE, sign_coefs2); vst1_u8(coef_sign_bits_ptr + DCTSIZE, sign_coefs2);
/* Compute absolute value of coefficients and apply point transform Al. */ /* Compute absolute value of coefficients and apply point transform Al. */
int16x8_t abs_coefs1 = vabsq_s16(coefs1); uint16x8_t abs_coefs1 = vreinterpretq_u16_s16(vabsq_s16(coefs1));
int16x8_t abs_coefs2 = vabsq_s16(coefs2); uint16x8_t abs_coefs2 = vreinterpretq_u16_s16(vabsq_s16(coefs2));
coefs1 = vshlq_s16(abs_coefs1, vdupq_n_s16(-Al)); abs_coefs1 = vshlq_u16(abs_coefs1, vdupq_n_s16(-Al));
coefs2 = vshlq_s16(abs_coefs2, vdupq_n_s16(-Al)); abs_coefs2 = vshlq_u16(abs_coefs2, vdupq_n_s16(-Al));
vst1q_s16(absvalues_ptr, coefs1); vst1q_u16(absvalues_ptr, abs_coefs1);
vst1q_s16(absvalues_ptr + DCTSIZE, coefs2); vst1q_u16(absvalues_ptr + DCTSIZE, abs_coefs2);
/* Test whether transformed coefficient values == 1 (used to find EOB /* Test whether transformed coefficient values == 1 (used to find EOB
* position.) * position.)
*/ */
uint8x8_t coefs_eq11 = vmovn_u16(vceqq_s16(coefs1, vdupq_n_s16(1))); uint8x8_t coefs_eq11 = vmovn_u16(vceqq_u16(abs_coefs1, vdupq_n_u16(1)));
uint8x8_t coefs_eq12 = vmovn_u16(vceqq_s16(coefs2, vdupq_n_s16(1))); uint8x8_t coefs_eq12 = vmovn_u16(vceqq_u16(abs_coefs2, vdupq_n_u16(1)));
vst1_u8(eq1_bits_ptr, coefs_eq11); vst1_u8(eq1_bits_ptr, coefs_eq11);
vst1_u8(eq1_bits_ptr + DCTSIZE, coefs_eq12); vst1_u8(eq1_bits_ptr + DCTSIZE, coefs_eq12);
@@ -443,14 +445,14 @@ int jsimd_encode_mcu_AC_refine_prepare_neon
vst1_u8(coef_sign_bits_ptr, sign_coefs); vst1_u8(coef_sign_bits_ptr, sign_coefs);
/* Compute absolute value of coefficients and apply point transform Al. */ /* Compute absolute value of coefficients and apply point transform Al. */
int16x8_t abs_coefs = vabsq_s16(coefs); uint16x8_t abs_coefs = vreinterpretq_u16_s16(vabsq_s16(coefs));
coefs = vshlq_s16(abs_coefs, vdupq_n_s16(-Al)); abs_coefs = vshlq_u16(abs_coefs, vdupq_n_s16(-Al));
vst1q_s16(absvalues_ptr, coefs); vst1q_u16(absvalues_ptr, abs_coefs);
/* Test whether transformed coefficient values == 1 (used to find EOB /* Test whether transformed coefficient values == 1 (used to find EOB
* position.) * position.)
*/ */
uint8x8_t coefs_eq1 = vmovn_u16(vceqq_s16(coefs, vdupq_n_s16(1))); uint8x8_t coefs_eq1 = vmovn_u16(vceqq_u16(abs_coefs, vdupq_n_u16(1)));
vst1_u8(eq1_bits_ptr, coefs_eq1); vst1_u8(eq1_bits_ptr, coefs_eq1);
absvalues_ptr += 8; absvalues_ptr += 8;
@@ -461,7 +463,7 @@ int jsimd_encode_mcu_AC_refine_prepare_neon
/* Zero remaining memory in blocks. */ /* Zero remaining memory in blocks. */
for (i = 0; i < rows_to_zero; i++) { for (i = 0; i < rows_to_zero; i++) {
vst1q_s16(absvalues_ptr, vdupq_n_s16(0)); vst1q_u16(absvalues_ptr, vdupq_n_u16(0));
vst1_u8(coef_sign_bits_ptr, vdup_n_u8(0)); vst1_u8(coef_sign_bits_ptr, vdup_n_u8(0));
vst1_u8(eq1_bits_ptr, vdup_n_u8(0)); vst1_u8(eq1_bits_ptr, vdup_n_u8(0));
absvalues_ptr += 8; absvalues_ptr += 8;
@@ -470,23 +472,23 @@ int jsimd_encode_mcu_AC_refine_prepare_neon
} }
/* Construct zerobits bitmap. */ /* Construct zerobits bitmap. */
int16x8_t abs_row0 = vld1q_s16(absvalues + 0 * DCTSIZE); uint16x8_t abs_row0 = vld1q_u16(absvalues + 0 * DCTSIZE);
int16x8_t abs_row1 = vld1q_s16(absvalues + 1 * DCTSIZE); uint16x8_t abs_row1 = vld1q_u16(absvalues + 1 * DCTSIZE);
int16x8_t abs_row2 = vld1q_s16(absvalues + 2 * DCTSIZE); uint16x8_t abs_row2 = vld1q_u16(absvalues + 2 * DCTSIZE);
int16x8_t abs_row3 = vld1q_s16(absvalues + 3 * DCTSIZE); uint16x8_t abs_row3 = vld1q_u16(absvalues + 3 * DCTSIZE);
int16x8_t abs_row4 = vld1q_s16(absvalues + 4 * DCTSIZE); uint16x8_t abs_row4 = vld1q_u16(absvalues + 4 * DCTSIZE);
int16x8_t abs_row5 = vld1q_s16(absvalues + 5 * DCTSIZE); uint16x8_t abs_row5 = vld1q_u16(absvalues + 5 * DCTSIZE);
int16x8_t abs_row6 = vld1q_s16(absvalues + 6 * DCTSIZE); uint16x8_t abs_row6 = vld1q_u16(absvalues + 6 * DCTSIZE);
int16x8_t abs_row7 = vld1q_s16(absvalues + 7 * DCTSIZE); uint16x8_t abs_row7 = vld1q_u16(absvalues + 7 * DCTSIZE);
uint8x8_t abs_row0_eq0 = vmovn_u16(vceqq_s16(abs_row0, vdupq_n_s16(0))); uint8x8_t abs_row0_eq0 = vmovn_u16(vceqq_u16(abs_row0, vdupq_n_u16(0)));
uint8x8_t abs_row1_eq0 = vmovn_u16(vceqq_s16(abs_row1, vdupq_n_s16(0))); uint8x8_t abs_row1_eq0 = vmovn_u16(vceqq_u16(abs_row1, vdupq_n_u16(0)));
uint8x8_t abs_row2_eq0 = vmovn_u16(vceqq_s16(abs_row2, vdupq_n_s16(0))); uint8x8_t abs_row2_eq0 = vmovn_u16(vceqq_u16(abs_row2, vdupq_n_u16(0)));
uint8x8_t abs_row3_eq0 = vmovn_u16(vceqq_s16(abs_row3, vdupq_n_s16(0))); uint8x8_t abs_row3_eq0 = vmovn_u16(vceqq_u16(abs_row3, vdupq_n_u16(0)));
uint8x8_t abs_row4_eq0 = vmovn_u16(vceqq_s16(abs_row4, vdupq_n_s16(0))); uint8x8_t abs_row4_eq0 = vmovn_u16(vceqq_u16(abs_row4, vdupq_n_u16(0)));
uint8x8_t abs_row5_eq0 = vmovn_u16(vceqq_s16(abs_row5, vdupq_n_s16(0))); uint8x8_t abs_row5_eq0 = vmovn_u16(vceqq_u16(abs_row5, vdupq_n_u16(0)));
uint8x8_t abs_row6_eq0 = vmovn_u16(vceqq_s16(abs_row6, vdupq_n_s16(0))); uint8x8_t abs_row6_eq0 = vmovn_u16(vceqq_u16(abs_row6, vdupq_n_u16(0)));
uint8x8_t abs_row7_eq0 = vmovn_u16(vceqq_s16(abs_row7, vdupq_n_s16(0))); uint8x8_t abs_row7_eq0 = vmovn_u16(vceqq_u16(abs_row7, vdupq_n_u16(0)));
/* { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 } */ /* { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 } */
const uint8x8_t bitmap_mask = const uint8x8_t bitmap_mask =

View File

@@ -3,7 +3,7 @@
* *
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
* Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2022, D. R. Commander. * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2022, D. R. Commander.
* Copyright (C) 2015-2016, 2018, Matthieu Darbois. * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
* *
* Based on the x86 SIMD extension for IJG JPEG library, * Based on the x86 SIMD extension for IJG JPEG library,
* Copyright (C) 1999-2006, MIYASAKA Masaru. * Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -1209,7 +1209,7 @@ jsimd_can_encode_mcu_AC_first_prepare(void)
GLOBAL(void) GLOBAL(void)
jsimd_encode_mcu_AC_first_prepare(const JCOEF *block, jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl, const int *jpeg_natural_order_start, int Sl,
int Al, JCOEF *values, size_t *zerobits) int Al, UJCOEF *values, size_t *zerobits)
{ {
jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start, jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start,
Sl, Al, values, zerobits); Sl, Al, values, zerobits);
@@ -1235,7 +1235,7 @@ jsimd_can_encode_mcu_AC_refine_prepare(void)
GLOBAL(int) GLOBAL(int)
jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block, jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl, const int *jpeg_natural_order_start, int Sl,
int Al, JCOEF *absvalues, size_t *bits) int Al, UJCOEF *absvalues, size_t *bits)
{ {
return jsimd_encode_mcu_AC_refine_prepare_sse2(block, return jsimd_encode_mcu_AC_refine_prepare_sse2(block,
jpeg_natural_order_start, jpeg_natural_order_start,

View File

@@ -2,10 +2,10 @@
* simd/jsimd.h * simd/jsimd.h
* *
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
* Copyright (C) 2011, 2014-2016, 2018, 2020, D. R. Commander. * Copyright (C) 2011, 2014-2016, 2018, 2020, 2022, D. R. Commander.
* Copyright (C) 2013-2014, MIPS Technologies, Inc., California. * Copyright (C) 2013-2014, MIPS Technologies, Inc., California.
* Copyright (C) 2014, Linaro Limited. * Copyright (C) 2014, Linaro Limited.
* Copyright (C) 2015-2016, 2018, Matthieu Darbois. * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
* Copyright (C) 2016-2018, Loongson Technology Corporation Limited, BeiJing. * Copyright (C) 2016-2018, Loongson Technology Corporation Limited, BeiJing.
* Copyright (C) 2020, Arm Limited. * Copyright (C) 2020, Arm Limited.
* *
@@ -1243,16 +1243,16 @@ EXTERN(JOCTET *) jsimd_huff_encode_one_block_neon_slowtbl
/* Progressive Huffman encoding */ /* Progressive Huffman encoding */
EXTERN(void) jsimd_encode_mcu_AC_first_prepare_sse2 EXTERN(void) jsimd_encode_mcu_AC_first_prepare_sse2
(const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al, (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
JCOEF *values, size_t *zerobits); UJCOEF *values, size_t *zerobits);
EXTERN(void) jsimd_encode_mcu_AC_first_prepare_neon EXTERN(void) jsimd_encode_mcu_AC_first_prepare_neon
(const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al, (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
JCOEF *values, size_t *zerobits); UJCOEF *values, size_t *zerobits);
EXTERN(int) jsimd_encode_mcu_AC_refine_prepare_sse2 EXTERN(int) jsimd_encode_mcu_AC_refine_prepare_sse2
(const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al, (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
JCOEF *absvalues, size_t *bits); UJCOEF *absvalues, size_t *bits);
EXTERN(int) jsimd_encode_mcu_AC_refine_prepare_neon EXTERN(int) jsimd_encode_mcu_AC_refine_prepare_neon
(const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al, (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
JCOEF *absvalues, size_t *bits); UJCOEF *absvalues, size_t *bits);

View File

@@ -4,7 +4,7 @@
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
* Copyright (C) 2009-2011, 2014, 2016, 2018, 2020, 2022, D. R. Commander. * Copyright (C) 2009-2011, 2014, 2016, 2018, 2020, 2022, D. R. Commander.
* Copyright (C) 2013-2014, MIPS Technologies, Inc., California. * Copyright (C) 2013-2014, MIPS Technologies, Inc., California.
* Copyright (C) 2015-2016, 2018, Matthieu Darbois. * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
* *
* Based on the x86 SIMD extension for IJG JPEG library, * Based on the x86 SIMD extension for IJG JPEG library,
* Copyright (C) 1999-2006, MIYASAKA Masaru. * Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -1124,7 +1124,7 @@ jsimd_can_encode_mcu_AC_first_prepare(void)
GLOBAL(void) GLOBAL(void)
jsimd_encode_mcu_AC_first_prepare(const JCOEF *block, jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl, const int *jpeg_natural_order_start, int Sl,
int Al, JCOEF *values, size_t *zerobits) int Al, UJCOEF *values, size_t *zerobits)
{ {
} }
@@ -1137,7 +1137,7 @@ jsimd_can_encode_mcu_AC_refine_prepare(void)
GLOBAL(int) GLOBAL(int)
jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block, jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl, const int *jpeg_natural_order_start, int Sl,
int Al, JCOEF *absvalues, size_t *bits) int Al, UJCOEF *absvalues, size_t *bits)
{ {
return 0; return 0;
} }

View File

@@ -4,7 +4,7 @@
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
* Copyright (C) 2009-2011, 2014, 2016, 2018, 2022, D. R. Commander. * Copyright (C) 2009-2011, 2014, 2016, 2018, 2022, D. R. Commander.
* Copyright (C) 2013-2014, MIPS Technologies, Inc., California. * Copyright (C) 2013-2014, MIPS Technologies, Inc., California.
* Copyright (C) 2015, 2018, Matthieu Darbois. * Copyright (C) 2015, 2018, 2022, Matthieu Darbois.
* Copyright (C) 2016-2018, Loongson Technology Corporation Limited, BeiJing. * Copyright (C) 2016-2018, Loongson Technology Corporation Limited, BeiJing.
* *
* Based on the x86 SIMD extension for IJG JPEG library, * Based on the x86 SIMD extension for IJG JPEG library,
@@ -847,7 +847,7 @@ jsimd_can_encode_mcu_AC_first_prepare(void)
GLOBAL(void) GLOBAL(void)
jsimd_encode_mcu_AC_first_prepare(const JCOEF *block, jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl, const int *jpeg_natural_order_start, int Sl,
int Al, JCOEF *values, size_t *zerobits) int Al, UJCOEF *values, size_t *zerobits)
{ {
} }
@@ -860,7 +860,7 @@ jsimd_can_encode_mcu_AC_refine_prepare(void)
GLOBAL(int) GLOBAL(int)
jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block, jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl, const int *jpeg_natural_order_start, int Sl,
int Al, JCOEF *absvalues, size_t *bits) int Al, UJCOEF *absvalues, size_t *bits)
{ {
return 0; return 0;
} }

View File

@@ -3,7 +3,7 @@
* *
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
* Copyright (C) 2009-2011, 2014-2016, 2018, 2022, D. R. Commander. * Copyright (C) 2009-2011, 2014-2016, 2018, 2022, D. R. Commander.
* Copyright (C) 2015-2016, 2018, Matthieu Darbois. * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
* *
* Based on the x86 SIMD extension for IJG JPEG library, * Based on the x86 SIMD extension for IJG JPEG library,
* Copyright (C) 1999-2006, MIYASAKA Masaru. * Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -865,7 +865,7 @@ jsimd_can_encode_mcu_AC_first_prepare(void)
GLOBAL(void) GLOBAL(void)
jsimd_encode_mcu_AC_first_prepare(const JCOEF *block, jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl, const int *jpeg_natural_order_start, int Sl,
int Al, JCOEF *values, size_t *zerobits) int Al, UJCOEF *values, size_t *zerobits)
{ {
} }
@@ -878,7 +878,7 @@ jsimd_can_encode_mcu_AC_refine_prepare(void)
GLOBAL(int) GLOBAL(int)
jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block, jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl, const int *jpeg_natural_order_start, int Sl,
int Al, JCOEF *absvalues, size_t *bits) int Al, UJCOEF *absvalues, size_t *bits)
{ {
return 0; return 0;
} }

View File

@@ -3,7 +3,7 @@
* *
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
* Copyright (C) 2009-2011, 2014, 2016, 2018, 2022, D. R. Commander. * Copyright (C) 2009-2011, 2014, 2016, 2018, 2022, D. R. Commander.
* Copyright (C) 2015-2016, 2018, Matthieu Darbois. * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
* *
* Based on the x86 SIMD extension for IJG JPEG library, * Based on the x86 SIMD extension for IJG JPEG library,
* Copyright (C) 1999-2006, MIYASAKA Masaru. * Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -1033,7 +1033,7 @@ jsimd_can_encode_mcu_AC_first_prepare(void)
GLOBAL(void) GLOBAL(void)
jsimd_encode_mcu_AC_first_prepare(const JCOEF *block, jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl, const int *jpeg_natural_order_start, int Sl,
int Al, JCOEF *values, size_t *zerobits) int Al, UJCOEF *values, size_t *zerobits)
{ {
jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start, jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start,
Sl, Al, values, zerobits); Sl, Al, values, zerobits);
@@ -1057,7 +1057,7 @@ jsimd_can_encode_mcu_AC_refine_prepare(void)
GLOBAL(int) GLOBAL(int)
jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block, jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl, const int *jpeg_natural_order_start, int Sl,
int Al, JCOEF *absvalues, size_t *bits) int Al, UJCOEF *absvalues, size_t *bits)
{ {
return jsimd_encode_mcu_AC_refine_prepare_sse2(block, return jsimd_encode_mcu_AC_refine_prepare_sse2(block,
jpeg_natural_order_start, jpeg_natural_order_start,