Merge branch 'master' into dev
This commit is contained in:
1
cdjpeg.h
1
cdjpeg.h
@@ -127,7 +127,6 @@ EXTERN(void) read_color_map(j_decompress_ptr cinfo, FILE *infile);
|
||||
|
||||
/* common support routines (in cdjpeg.c) */
|
||||
|
||||
EXTERN(void) enable_signal_catcher(j_common_ptr cinfo);
|
||||
EXTERN(void) start_progress_monitor(j_common_ptr cinfo,
|
||||
cd_progress_ptr progress);
|
||||
EXTERN(void) end_progress_monitor(j_common_ptr cinfo);
|
||||
|
||||
@@ -571,11 +571,10 @@ ycck_cmyk_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
||||
* RGB565 conversion
|
||||
*/
|
||||
|
||||
#define PACK_SHORT_565_LE(r, g, b) ((((r) << 8) & 0xF800) | \
|
||||
(((g) << 3) & 0x7E0) | ((b) >> 3))
|
||||
#define PACK_SHORT_565_BE(r, g, b) (((r) & 0xF8) | ((g) >> 5) | \
|
||||
(((g) << 11) & 0xE000) | \
|
||||
(((b) << 5) & 0x1F00))
|
||||
#define PACK_SHORT_565_LE(r, g, b) \
|
||||
((((r) << 8) & 0xF800) | (((g) << 3) & 0x7E0) | ((b) >> 3))
|
||||
#define PACK_SHORT_565_BE(r, g, b) \
|
||||
(((r) & 0xF8) | ((g) >> 5) | (((g) << 11) & 0xE000) | (((b) << 5) & 0x1F00))
|
||||
|
||||
#define PACK_TWO_PIXELS_LE(l, r) ((r << 16) | l)
|
||||
#define PACK_TWO_PIXELS_BE(l, r) ((l << 16) | r)
|
||||
|
||||
@@ -392,11 +392,10 @@ h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
||||
* RGB565 conversion
|
||||
*/
|
||||
|
||||
#define PACK_SHORT_565_LE(r, g, b) ((((r) << 8) & 0xF800) | \
|
||||
(((g) << 3) & 0x7E0) | ((b) >> 3))
|
||||
#define PACK_SHORT_565_BE(r, g, b) (((r) & 0xF8) | ((g) >> 5) | \
|
||||
(((g) << 11) & 0xE000) | \
|
||||
(((b) << 5) & 0x1F00))
|
||||
#define PACK_SHORT_565_LE(r, g, b) \
|
||||
((((r) << 8) & 0xF800) | (((g) << 3) & 0x7E0) | ((b) >> 3))
|
||||
#define PACK_SHORT_565_BE(r, g, b) \
|
||||
(((r) & 0xF8) | ((g) >> 5) | (((g) << 11) & 0xE000) | (((b) << 5) & 0x1F00))
|
||||
|
||||
#define PACK_TWO_PIXELS_LE(l, r) ((r << 16) | l)
|
||||
#define PACK_TWO_PIXELS_BE(l, r) ((l << 16) | r)
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
* This file was part of the Independent JPEG Group's software:
|
||||
* Copyright (C) 1995-2019, Thomas G. Lane, Guido Vollbeding.
|
||||
* libjpeg-turbo Modifications:
|
||||
* Copyright (C) 2010, 2014, 2017, 2019, D. R. Commander.
|
||||
* Copyright (C) 2010, 2014, 2017, 2019-2020, D. R. Commander.
|
||||
* For conditions of distribution and use, see the accompanying README.ijg
|
||||
* file.
|
||||
*
|
||||
|
||||
7
rdppm.c
7
rdppm.c
@@ -649,11 +649,12 @@ start_input_ppm(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
|
||||
if (maxval > 255) {
|
||||
source->pub.get_pixel_rows = get_word_rgb_row;
|
||||
} else if (maxval == MAXJSAMPLE && sizeof(JSAMPLE) == sizeof(U_CHAR) &&
|
||||
(cinfo->in_color_space == JCS_EXT_RGB
|
||||
#if RGB_RED == 0 && RGB_GREEN == 1 && RGB_BLUE == 2 && RGB_PIXELSIZE == 3
|
||||
|| cinfo->in_color_space == JCS_RGB
|
||||
(cinfo->in_color_space == JCS_EXT_RGB ||
|
||||
cinfo->in_color_space == JCS_RGB)) {
|
||||
#else
|
||||
cinfo->in_color_space == JCS_EXT_RGB) {
|
||||
#endif
|
||||
)) {
|
||||
source->pub.get_pixel_rows = get_raw_row;
|
||||
use_raw_buffer = TRUE;
|
||||
need_rescale = FALSE;
|
||||
|
||||
@@ -334,8 +334,9 @@ start_input_tga(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
|
||||
unsigned int width, height, maplen;
|
||||
boolean is_bottom_up;
|
||||
|
||||
#define GET_2B(offset) ((unsigned int)UCH(targaheader[offset]) + \
|
||||
(((unsigned int)UCH(targaheader[offset + 1])) << 8))
|
||||
#define GET_2B(offset) \
|
||||
((unsigned int)UCH(targaheader[offset]) + \
|
||||
(((unsigned int)UCH(targaheader[offset + 1])) << 8))
|
||||
|
||||
if (!ReadOK(source->pub.input_file, targaheader, 18))
|
||||
ERREXIT(cinfo, JERR_INPUT_EOF);
|
||||
|
||||
@@ -1,36 +1,36 @@
|
||||
%global _docdir %{_defaultdocdir}/%{name}-%{version}
|
||||
%define _prefix @CMAKE_INSTALL_PREFIX@
|
||||
%define _bindir @CMAKE_INSTALL_FULL_BINDIR@
|
||||
%define _datarootdir @CMAKE_INSTALL_FULL_DATAROOTDIR@
|
||||
%define _includedir @CMAKE_INSTALL_FULL_INCLUDEDIR@
|
||||
%define _javadir @CMAKE_INSTALL_FULL_JAVADIR@
|
||||
%define _mandir @CMAKE_INSTALL_FULL_MANDIR@
|
||||
%define _enable_static @ENABLE_STATIC@
|
||||
%define _enable_shared @ENABLE_SHARED@
|
||||
%define _with_turbojpeg @WITH_TURBOJPEG@
|
||||
%define _with_java @WITH_JAVA@
|
||||
%define _prefix @CMAKE_INSTALL_PREFIX@
|
||||
%define _bindir @CMAKE_INSTALL_FULL_BINDIR@
|
||||
%define _datarootdir @CMAKE_INSTALL_FULL_DATAROOTDIR@
|
||||
%define _includedir @CMAKE_INSTALL_FULL_INCLUDEDIR@
|
||||
%define _javadir @CMAKE_INSTALL_FULL_JAVADIR@
|
||||
%define _mandir @CMAKE_INSTALL_FULL_MANDIR@
|
||||
%define _enable_static @ENABLE_STATIC@
|
||||
%define _enable_shared @ENABLE_SHARED@
|
||||
%define _with_turbojpeg @WITH_TURBOJPEG@
|
||||
%define _with_java @WITH_JAVA@
|
||||
|
||||
%if "%{?__isa_bits:1}" == "1"
|
||||
%define _bits %{__isa_bits}
|
||||
%define _bits %{__isa_bits}
|
||||
%else
|
||||
# RPM < 4.6
|
||||
%if "%{_lib}" == "lib64"
|
||||
%define _bits 64
|
||||
%define _bits 64
|
||||
%else
|
||||
%define _bits 32
|
||||
%define _bits 32
|
||||
%endif
|
||||
%endif
|
||||
|
||||
#-->%if 1
|
||||
%if "%{_bits}" == "64"
|
||||
%define _libdir %{_exec_prefix}/lib64
|
||||
%define _libdir %{_exec_prefix}/lib64
|
||||
%else
|
||||
%if "%{_prefix}" == "/opt/libjpeg-turbo"
|
||||
%define _libdir %{_exec_prefix}/lib32
|
||||
%define _libdir %{_exec_prefix}/lib32
|
||||
%endif
|
||||
%endif
|
||||
#-->%else
|
||||
%define _libdir @CMAKE_INSTALL_FULL_LIBDIR@
|
||||
%define _libdir @CMAKE_INSTALL_FULL_LIBDIR@
|
||||
#-->%endif
|
||||
|
||||
Summary: A SIMD-accelerated JPEG codec that provides both the libjpeg and TurboJPEG APIs
|
||||
@@ -101,7 +101,6 @@ broader range of users and developers.
|
||||
#-->make DESTDIR=$RPM_BUILD_ROOT
|
||||
|
||||
%install
|
||||
|
||||
rm -rf $RPM_BUILD_ROOT
|
||||
make install DESTDIR=$RPM_BUILD_ROOT
|
||||
/sbin/ldconfig -n $RPM_BUILD_ROOT%{_libdir}
|
||||
@@ -163,25 +162,25 @@ rm -rf $RPM_BUILD_ROOT
|
||||
%doc %{_docdir}/*
|
||||
%dir %{_prefix}
|
||||
%if "%{_prefix}" == "@CMAKE_INSTALL_DEFAULT_PREFIX@" && "%{_docdir}" != "%{_prefix}/doc"
|
||||
%{_prefix}/doc
|
||||
%{_prefix}/doc
|
||||
%endif
|
||||
%dir %{_bindir}
|
||||
%{_bindir}/cjpeg
|
||||
%{_bindir}/djpeg
|
||||
%{_bindir}/jpegtran
|
||||
%if "%{_with_turbojpeg}" == "1"
|
||||
%{_bindir}/tjbench
|
||||
%{_bindir}/tjbench
|
||||
%endif
|
||||
%{_bindir}/rdjpgcom
|
||||
%{_bindir}/wrjpgcom
|
||||
%dir %{_libdir}
|
||||
%if "%{_enable_shared}" == "1"
|
||||
%{_libdir}/libjpeg.so.@SO_MAJOR_VERSION@.@SO_AGE@.@SO_MINOR_VERSION@
|
||||
%{_libdir}/libjpeg.so.@SO_MAJOR_VERSION@
|
||||
%{_libdir}/libjpeg.so
|
||||
%{_libdir}/libjpeg.so.@SO_MAJOR_VERSION@.@SO_AGE@.@SO_MINOR_VERSION@
|
||||
%{_libdir}/libjpeg.so.@SO_MAJOR_VERSION@
|
||||
%{_libdir}/libjpeg.so
|
||||
%endif
|
||||
%if "%{_enable_static}" == "1"
|
||||
%{_libdir}/libjpeg.a
|
||||
%{_libdir}/libjpeg.a
|
||||
%endif
|
||||
%dir %{_libdir}/pkgconfig
|
||||
%{_libdir}/pkgconfig/libjpeg.pc
|
||||
@@ -189,15 +188,15 @@ rm -rf $RPM_BUILD_ROOT
|
||||
%dir %{_libdir}/cmake/@CMAKE_PROJECT_NAME@
|
||||
%{_libdir}/cmake/@CMAKE_PROJECT_NAME@
|
||||
%if "%{_with_turbojpeg}" == "1"
|
||||
%if "%{_enable_shared}" == "1" || "%{_with_java}" == "1"
|
||||
%{_libdir}/libturbojpeg.so.@TURBOJPEG_SO_VERSION@
|
||||
%{_libdir}/libturbojpeg.so.@TURBOJPEG_SO_MAJOR_VERSION@
|
||||
%{_libdir}/libturbojpeg.so
|
||||
%endif
|
||||
%if "%{_enable_static}" == "1"
|
||||
%{_libdir}/libturbojpeg.a
|
||||
%endif
|
||||
%{_libdir}/pkgconfig/libturbojpeg.pc
|
||||
%if "%{_enable_shared}" == "1" || "%{_with_java}" == "1"
|
||||
%{_libdir}/libturbojpeg.so.@TURBOJPEG_SO_VERSION@
|
||||
%{_libdir}/libturbojpeg.so.@TURBOJPEG_SO_MAJOR_VERSION@
|
||||
%{_libdir}/libturbojpeg.so
|
||||
%endif
|
||||
%if "%{_enable_static}" == "1"
|
||||
%{_libdir}/libturbojpeg.a
|
||||
%endif
|
||||
%{_libdir}/pkgconfig/libturbojpeg.pc
|
||||
%endif
|
||||
%dir %{_includedir}
|
||||
%{_includedir}/jconfig.h
|
||||
@@ -205,7 +204,7 @@ rm -rf $RPM_BUILD_ROOT
|
||||
%{_includedir}/jmorecfg.h
|
||||
%{_includedir}/jpeglib.h
|
||||
%if "%{_with_turbojpeg}" == "1"
|
||||
%{_includedir}/turbojpeg.h
|
||||
%{_includedir}/turbojpeg.h
|
||||
%endif
|
||||
%dir %{_mandir}
|
||||
%dir %{_mandir}/man1
|
||||
@@ -215,10 +214,11 @@ rm -rf $RPM_BUILD_ROOT
|
||||
%{_mandir}/man1/rdjpgcom.1*
|
||||
%{_mandir}/man1/wrjpgcom.1*
|
||||
%if "%{_prefix}" != "%{_datarootdir}"
|
||||
%dir %{_datarootdir}
|
||||
%dir %{_datarootdir}
|
||||
%endif
|
||||
%if "%{_with_java}" == "1"
|
||||
%dir %{_javadir}
|
||||
%{_javadir}/turbojpeg.jar
|
||||
%dir %{_javadir}
|
||||
%{_javadir}/turbojpeg.jar
|
||||
%endif
|
||||
|
||||
%changelog
|
||||
|
||||
@@ -107,69 +107,69 @@ _\fname:
|
||||
* Uses some ideas from the comments in 'simd/jiss2int-64.asm'
|
||||
*/
|
||||
#define REF_1D_IDCT(xrow0, xrow1, xrow2, xrow3, xrow4, xrow5, xrow6, xrow7) { \
|
||||
DCTELEM row0, row1, row2, row3, row4, row5, row6, row7; \
|
||||
JLONG q1, q2, q3, q4, q5, q6, q7; \
|
||||
JLONG tmp11_plus_tmp2, tmp11_minus_tmp2; \
|
||||
\
|
||||
/* 1-D iDCT input data */ \
|
||||
row0 = xrow0; \
|
||||
row1 = xrow1; \
|
||||
row2 = xrow2; \
|
||||
row3 = xrow3; \
|
||||
row4 = xrow4; \
|
||||
row5 = xrow5; \
|
||||
row6 = xrow6; \
|
||||
row7 = xrow7; \
|
||||
\
|
||||
q5 = row7 + row3; \
|
||||
q4 = row5 + row1; \
|
||||
q6 = MULTIPLY(q5, FIX_1_175875602_MINUS_1_961570560) + \
|
||||
MULTIPLY(q4, FIX_1_175875602); \
|
||||
q7 = MULTIPLY(q5, FIX_1_175875602) + \
|
||||
MULTIPLY(q4, FIX_1_175875602_MINUS_0_390180644); \
|
||||
q2 = MULTIPLY(row2, FIX_0_541196100) + \
|
||||
MULTIPLY(row6, FIX_0_541196100_MINUS_1_847759065); \
|
||||
q4 = q6; \
|
||||
q3 = ((JLONG)row0 - (JLONG)row4) << 13; \
|
||||
q6 += MULTIPLY(row5, -FIX_2_562915447) + \
|
||||
MULTIPLY(row3, FIX_3_072711026_MINUS_2_562915447); \
|
||||
/* now we can use q1 (reloadable constants have been used up) */ \
|
||||
q1 = q3 + q2; \
|
||||
q4 += MULTIPLY(row7, FIX_0_298631336_MINUS_0_899976223) + \
|
||||
MULTIPLY(row1, -FIX_0_899976223); \
|
||||
q5 = q7; \
|
||||
q1 = q1 + q6; \
|
||||
q7 += MULTIPLY(row7, -FIX_0_899976223) + \
|
||||
MULTIPLY(row1, FIX_1_501321110_MINUS_0_899976223); \
|
||||
\
|
||||
/* (tmp11 + tmp2) has been calculated (out_row1 before descale) */ \
|
||||
tmp11_plus_tmp2 = q1; \
|
||||
row1 = 0; \
|
||||
\
|
||||
q1 = q1 - q6; \
|
||||
q5 += MULTIPLY(row5, FIX_2_053119869_MINUS_2_562915447) + \
|
||||
MULTIPLY(row3, -FIX_2_562915447); \
|
||||
q1 = q1 - q6; \
|
||||
q6 = MULTIPLY(row2, FIX_0_541196100_PLUS_0_765366865) + \
|
||||
MULTIPLY(row6, FIX_0_541196100); \
|
||||
q3 = q3 - q2; \
|
||||
\
|
||||
/* (tmp11 - tmp2) has been calculated (out_row6 before descale) */ \
|
||||
tmp11_minus_tmp2 = q1; \
|
||||
\
|
||||
q1 = ((JLONG)row0 + (JLONG)row4) << 13; \
|
||||
q2 = q1 + q6; \
|
||||
q1 = q1 - q6; \
|
||||
\
|
||||
/* pick up the results */ \
|
||||
tmp0 = q4; \
|
||||
tmp1 = q5; \
|
||||
tmp2 = (tmp11_plus_tmp2 - tmp11_minus_tmp2) / 2; \
|
||||
tmp3 = q7; \
|
||||
tmp10 = q2; \
|
||||
tmp11 = (tmp11_plus_tmp2 + tmp11_minus_tmp2) / 2; \
|
||||
tmp12 = q3; \
|
||||
tmp13 = q1; \
|
||||
DCTELEM row0, row1, row2, row3, row4, row5, row6, row7; \
|
||||
JLONG q1, q2, q3, q4, q5, q6, q7; \
|
||||
JLONG tmp11_plus_tmp2, tmp11_minus_tmp2; \
|
||||
\
|
||||
/* 1-D iDCT input data */ \
|
||||
row0 = xrow0; \
|
||||
row1 = xrow1; \
|
||||
row2 = xrow2; \
|
||||
row3 = xrow3; \
|
||||
row4 = xrow4; \
|
||||
row5 = xrow5; \
|
||||
row6 = xrow6; \
|
||||
row7 = xrow7; \
|
||||
\
|
||||
q5 = row7 + row3; \
|
||||
q4 = row5 + row1; \
|
||||
q6 = MULTIPLY(q5, FIX_1_175875602_MINUS_1_961570560) + \
|
||||
MULTIPLY(q4, FIX_1_175875602); \
|
||||
q7 = MULTIPLY(q5, FIX_1_175875602) + \
|
||||
MULTIPLY(q4, FIX_1_175875602_MINUS_0_390180644); \
|
||||
q2 = MULTIPLY(row2, FIX_0_541196100) + \
|
||||
MULTIPLY(row6, FIX_0_541196100_MINUS_1_847759065); \
|
||||
q4 = q6; \
|
||||
q3 = ((JLONG)row0 - (JLONG)row4) << 13; \
|
||||
q6 += MULTIPLY(row5, -FIX_2_562915447) + \
|
||||
MULTIPLY(row3, FIX_3_072711026_MINUS_2_562915447); \
|
||||
/* now we can use q1 (reloadable constants have been used up) */ \
|
||||
q1 = q3 + q2; \
|
||||
q4 += MULTIPLY(row7, FIX_0_298631336_MINUS_0_899976223) + \
|
||||
MULTIPLY(row1, -FIX_0_899976223); \
|
||||
q5 = q7; \
|
||||
q1 = q1 + q6; \
|
||||
q7 += MULTIPLY(row7, -FIX_0_899976223) + \
|
||||
MULTIPLY(row1, FIX_1_501321110_MINUS_0_899976223); \
|
||||
\
|
||||
/* (tmp11 + tmp2) has been calculated (out_row1 before descale) */ \
|
||||
tmp11_plus_tmp2 = q1; \
|
||||
row1 = 0; \
|
||||
\
|
||||
q1 = q1 - q6; \
|
||||
q5 += MULTIPLY(row5, FIX_2_053119869_MINUS_2_562915447) + \
|
||||
MULTIPLY(row3, -FIX_2_562915447); \
|
||||
q1 = q1 - q6; \
|
||||
q6 = MULTIPLY(row2, FIX_0_541196100_PLUS_0_765366865) + \
|
||||
MULTIPLY(row6, FIX_0_541196100); \
|
||||
q3 = q3 - q2; \
|
||||
\
|
||||
/* (tmp11 - tmp2) has been calculated (out_row6 before descale) */ \
|
||||
tmp11_minus_tmp2 = q1; \
|
||||
\
|
||||
q1 = ((JLONG)row0 + (JLONG)row4) << 13; \
|
||||
q2 = q1 + q6; \
|
||||
q1 = q1 - q6; \
|
||||
\
|
||||
/* pick up the results */ \
|
||||
tmp0 = q4; \
|
||||
tmp1 = q5; \
|
||||
tmp2 = (tmp11_plus_tmp2 - tmp11_minus_tmp2) / 2; \
|
||||
tmp3 = q7; \
|
||||
tmp10 = q2; \
|
||||
tmp11 = (tmp11_plus_tmp2 + tmp11_minus_tmp2) / 2; \
|
||||
tmp12 = q3; \
|
||||
tmp13 = q1; \
|
||||
}
|
||||
|
||||
#define XFIX_0_899976223 d0[0]
|
||||
@@ -261,7 +261,7 @@ asm_function jsimd_idct_islow_neon
|
||||
vld1.16 {d0, d1, d2, d3}, [ip, :128] /* load constants */
|
||||
add ip, ip, #16
|
||||
vmul.s16 q15, q15, q3
|
||||
vpush {d8-d15} /* save Neon registers */
|
||||
vpush {d8 - d15} /* save Neon registers */
|
||||
/* 1-D IDCT, pass 1, left 4x8 half */
|
||||
vadd.s16 d4, ROW7L, ROW3L
|
||||
vadd.s16 d5, ROW5L, ROW1L
|
||||
@@ -507,7 +507,7 @@ asm_function jsimd_idct_islow_neon
|
||||
vqrshrn.s16 d17, q9, #2
|
||||
vqrshrn.s16 d18, q10, #2
|
||||
vqrshrn.s16 d19, q11, #2
|
||||
vpop {d8-d15} /* restore Neon registers */
|
||||
vpop {d8 - d15} /* restore Neon registers */
|
||||
vqrshrn.s16 d20, q12, #2
|
||||
/* Transpose the final 8-bit samples and do signed->unsigned conversion */
|
||||
vtrn.16 q8, q9
|
||||
@@ -749,7 +749,7 @@ asm_function jsimd_idct_ifast_neon
|
||||
vmul.s16 q13, q13, q1
|
||||
vld1.16 {d0}, [ip, :64] /* load constants */
|
||||
vmul.s16 q15, q15, q3
|
||||
vpush {d8-d13} /* save Neon registers */
|
||||
vpush {d8 - d13} /* save Neon registers */
|
||||
/* 1-D IDCT, pass 1 */
|
||||
vsub.s16 q2, q10, q14
|
||||
vadd.s16 q14, q10, q14
|
||||
@@ -842,7 +842,7 @@ asm_function jsimd_idct_ifast_neon
|
||||
vadd.s16 q14, q5, q3
|
||||
vsub.s16 q9, q5, q3
|
||||
vsub.s16 q13, q10, q2
|
||||
vpop {d8-d13} /* restore Neon registers */
|
||||
vpop {d8 - d13} /* restore Neon registers */
|
||||
vadd.s16 q10, q10, q2
|
||||
vsub.s16 q11, q12, q1
|
||||
vadd.s16 q12, q12, q1
|
||||
@@ -1010,7 +1010,7 @@ asm_function jsimd_idct_4x4_neon
|
||||
TMP3 .req r2
|
||||
TMP4 .req ip
|
||||
|
||||
vpush {d8-d15}
|
||||
vpush {d8 - d15}
|
||||
|
||||
/* Load constants (d3 is just used for padding) */
|
||||
adr TMP4, jsimd_idct_4x4_neon_consts
|
||||
@@ -1099,7 +1099,7 @@ asm_function jsimd_idct_4x4_neon
|
||||
vst1.8 {d27[7]}, [TMP4]!
|
||||
#endif
|
||||
|
||||
vpop {d8-d15}
|
||||
vpop {d8 - d15}
|
||||
bx lr
|
||||
|
||||
.unreq DCT_TABLE
|
||||
@@ -1167,7 +1167,7 @@ asm_function jsimd_idct_2x2_neon
|
||||
TMP1 .req r0
|
||||
TMP2 .req ip
|
||||
|
||||
vpush {d8-d15}
|
||||
vpush {d8 - d15}
|
||||
|
||||
/* Load constants */
|
||||
adr TMP2, jsimd_idct_2x2_neon_consts
|
||||
@@ -1254,7 +1254,7 @@ asm_function jsimd_idct_2x2_neon
|
||||
vst1.8 {d26[1]}, [TMP2]!
|
||||
vst1.8 {d27[5]}, [TMP2]!
|
||||
|
||||
vpop {d8-d15}
|
||||
vpop {d8 - d15}
|
||||
bx lr
|
||||
|
||||
.unreq DCT_TABLE
|
||||
@@ -1508,7 +1508,7 @@ asm_function jsimd_ycc_\colorid\()_convert_neon
|
||||
.unreq INPUT_BUF
|
||||
|
||||
/* Save Neon registers */
|
||||
vpush {d8-d15}
|
||||
vpush {d8 - d15}
|
||||
|
||||
/* Initially set d10, d11, d12, d13 to 0xFF */
|
||||
vmov.u8 q5, #255
|
||||
@@ -1571,7 +1571,7 @@ asm_function jsimd_ycc_\colorid\()_convert_neon
|
||||
bgt 0b
|
||||
9:
|
||||
/* Restore all registers and return */
|
||||
vpop {d8-d15}
|
||||
vpop {d8 - d15}
|
||||
pop {r4, r5, r6, r7, r8, r9, r10, pc}
|
||||
|
||||
.unreq OUTPUT_WIDTH
|
||||
@@ -1823,7 +1823,7 @@ asm_function jsimd_\colorid\()_ycc_convert_neon
|
||||
.unreq OUTPUT_BUF
|
||||
|
||||
/* Save Neon registers */
|
||||
vpush {d8-d15}
|
||||
vpush {d8 - d15}
|
||||
|
||||
/* Outer loop over scanlines */
|
||||
cmp NUM_ROWS, #1
|
||||
@@ -1882,7 +1882,7 @@ asm_function jsimd_\colorid\()_ycc_convert_neon
|
||||
bgt 0b
|
||||
9:
|
||||
/* Restore all registers and return */
|
||||
vpop {d8-d15}
|
||||
vpop {d8 - d15}
|
||||
pop {r4, r5, r6, r7, r8, r9, r10, pc}
|
||||
|
||||
.unreq OUTPUT_WIDTH
|
||||
@@ -2011,7 +2011,7 @@ asm_function jsimd_fdct_ifast_neon
|
||||
DATA .req r0
|
||||
TMP .req ip
|
||||
|
||||
vpush {d8-d15}
|
||||
vpush {d8 - d15}
|
||||
|
||||
/* Load constants */
|
||||
adr TMP, jsimd_fdct_ifast_neon_consts
|
||||
@@ -2096,7 +2096,7 @@ asm_function jsimd_fdct_ifast_neon
|
||||
vst1.16 {d24, d25, d26, d27}, [DATA, :128]!
|
||||
vst1.16 {d28, d29, d30, d31}, [DATA, :128]
|
||||
|
||||
vpop {d8-d15}
|
||||
vpop {d8 - d15}
|
||||
bx lr
|
||||
|
||||
.unreq DATA
|
||||
@@ -2404,7 +2404,7 @@ asm_function jsimd_h2v1_fancy_upsample_neon
|
||||
TMP .req lr
|
||||
|
||||
push {r4, r5, r6, lr}
|
||||
vpush {d8-d15}
|
||||
vpush {d8 - d15}
|
||||
|
||||
ldr OUTPUT_DATA, [OUTPUT_DATA_PTR]
|
||||
cmp MAX_V_SAMP_FACTOR, #0
|
||||
@@ -2422,7 +2422,7 @@ asm_function jsimd_h2v1_fancy_upsample_neon
|
||||
bgt 11b
|
||||
|
||||
99:
|
||||
vpop {d8-d15}
|
||||
vpop {d8 - d15}
|
||||
pop {r4, r5, r6, pc}
|
||||
|
||||
.unreq MAX_V_SAMP_FACTOR
|
||||
|
||||
@@ -627,21 +627,21 @@ asm_function jsimd_idct_islow_neon
|
||||
movi v0.16b, #(CENTERJSAMPLE)
|
||||
/* Prepare pointers (dual-issue with Neon instructions) */
|
||||
ldp TMP1, TMP2, [OUTPUT_BUF], 16
|
||||
sqrshrn v28.8b, v2.8h, #(CONST_BITS+PASS1_BITS+3-16)
|
||||
sqrshrn v28.8b, v2.8h, #(CONST_BITS + PASS1_BITS + 3 - 16)
|
||||
ldp TMP3, TMP4, [OUTPUT_BUF], 16
|
||||
sqrshrn v29.8b, v3.8h, #(CONST_BITS+PASS1_BITS+3-16)
|
||||
sqrshrn v29.8b, v3.8h, #(CONST_BITS + PASS1_BITS + 3 - 16)
|
||||
add TMP1, TMP1, OUTPUT_COL
|
||||
sqrshrn v30.8b, v4.8h, #(CONST_BITS+PASS1_BITS+3-16)
|
||||
sqrshrn v30.8b, v4.8h, #(CONST_BITS + PASS1_BITS + 3 - 16)
|
||||
add TMP2, TMP2, OUTPUT_COL
|
||||
sqrshrn v31.8b, v5.8h, #(CONST_BITS+PASS1_BITS+3-16)
|
||||
sqrshrn v31.8b, v5.8h, #(CONST_BITS + PASS1_BITS + 3 - 16)
|
||||
add TMP3, TMP3, OUTPUT_COL
|
||||
sqrshrn2 v28.16b, v6.8h, #(CONST_BITS+PASS1_BITS+3-16)
|
||||
sqrshrn2 v28.16b, v6.8h, #(CONST_BITS + PASS1_BITS + 3 - 16)
|
||||
add TMP4, TMP4, OUTPUT_COL
|
||||
sqrshrn2 v29.16b, v7.8h, #(CONST_BITS+PASS1_BITS+3-16)
|
||||
sqrshrn2 v29.16b, v7.8h, #(CONST_BITS + PASS1_BITS + 3 - 16)
|
||||
ldp TMP5, TMP6, [OUTPUT_BUF], 16
|
||||
sqrshrn2 v30.16b, v8.8h, #(CONST_BITS+PASS1_BITS+3-16)
|
||||
sqrshrn2 v30.16b, v8.8h, #(CONST_BITS + PASS1_BITS + 3 - 16)
|
||||
ldp TMP7, TMP8, [OUTPUT_BUF], 16
|
||||
sqrshrn2 v31.16b, v9.8h, #(CONST_BITS+PASS1_BITS+3-16)
|
||||
sqrshrn2 v31.16b, v9.8h, #(CONST_BITS + PASS1_BITS + 3 - 16)
|
||||
add TMP5, TMP5, OUTPUT_COL
|
||||
add v16.16b, v28.16b, v0.16b
|
||||
add TMP6, TMP6, OUTPUT_COL
|
||||
@@ -753,14 +753,14 @@ asm_function jsimd_idct_islow_neon
|
||||
add v14.4s, v6.4s, v10.4s /* tmp13 + tmp0 */
|
||||
sub v16.4s, v6.4s, v10.4s /* tmp13 - tmp0 */
|
||||
|
||||
rshrn v2.4h, v18.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*0] = (int)DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) */
|
||||
rshrn v3.4h, v22.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*1] = (int)DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) */
|
||||
rshrn v4.4h, v26.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*2] = (int)DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) */
|
||||
rshrn v5.4h, v14.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*3] = (int)DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) */
|
||||
rshrn2 v2.8h, v16.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*4] = (int)DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) */
|
||||
rshrn2 v3.8h, v28.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*5] = (int)DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) */
|
||||
rshrn2 v4.8h, v24.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*6] = (int)DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) */
|
||||
rshrn2 v5.8h, v20.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*7] = (int)DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) */
|
||||
rshrn v2.4h, v18.4s, #(CONST_BITS - PASS1_BITS) /* wsptr[DCTSIZE*0] = (int)DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) */
|
||||
rshrn v3.4h, v22.4s, #(CONST_BITS - PASS1_BITS) /* wsptr[DCTSIZE*1] = (int)DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) */
|
||||
rshrn v4.4h, v26.4s, #(CONST_BITS - PASS1_BITS) /* wsptr[DCTSIZE*2] = (int)DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) */
|
||||
rshrn v5.4h, v14.4s, #(CONST_BITS - PASS1_BITS) /* wsptr[DCTSIZE*3] = (int)DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) */
|
||||
rshrn2 v2.8h, v16.4s, #(CONST_BITS - PASS1_BITS) /* wsptr[DCTSIZE*4] = (int)DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) */
|
||||
rshrn2 v3.8h, v28.4s, #(CONST_BITS - PASS1_BITS) /* wsptr[DCTSIZE*5] = (int)DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) */
|
||||
rshrn2 v4.8h, v24.4s, #(CONST_BITS - PASS1_BITS) /* wsptr[DCTSIZE*6] = (int)DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) */
|
||||
rshrn2 v5.8h, v20.4s, #(CONST_BITS - PASS1_BITS) /* wsptr[DCTSIZE*7] = (int)DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) */
|
||||
mov v6.16b, v15.16b
|
||||
mov v7.16b, v15.16b
|
||||
mov v8.16b, v15.16b
|
||||
@@ -837,14 +837,14 @@ asm_function jsimd_idct_islow_neon
|
||||
mov v3.16b, v14.16b
|
||||
mov v4.16b, v14.16b
|
||||
mov v5.16b, v14.16b
|
||||
rshrn v6.4h, v19.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*0] = (int)DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) */
|
||||
rshrn v7.4h, v23.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*1] = (int)DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) */
|
||||
rshrn v8.4h, v27.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*2] = (int)DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) */
|
||||
rshrn v9.4h, v15.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*3] = (int)DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) */
|
||||
rshrn2 v6.8h, v17.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*4] = (int)DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) */
|
||||
rshrn2 v7.8h, v29.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*5] = (int)DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) */
|
||||
rshrn2 v8.8h, v25.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*6] = (int)DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) */
|
||||
rshrn2 v9.8h, v21.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*7] = (int)DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) */
|
||||
rshrn v6.4h, v19.4s, #(CONST_BITS - PASS1_BITS) /* wsptr[DCTSIZE*0] = (int)DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) */
|
||||
rshrn v7.4h, v23.4s, #(CONST_BITS - PASS1_BITS) /* wsptr[DCTSIZE*1] = (int)DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) */
|
||||
rshrn v8.4h, v27.4s, #(CONST_BITS - PASS1_BITS) /* wsptr[DCTSIZE*2] = (int)DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) */
|
||||
rshrn v9.4h, v15.4s, #(CONST_BITS - PASS1_BITS) /* wsptr[DCTSIZE*3] = (int)DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) */
|
||||
rshrn2 v6.8h, v17.4s, #(CONST_BITS - PASS1_BITS) /* wsptr[DCTSIZE*4] = (int)DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) */
|
||||
rshrn2 v7.8h, v29.4s, #(CONST_BITS - PASS1_BITS) /* wsptr[DCTSIZE*5] = (int)DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) */
|
||||
rshrn2 v8.8h, v25.4s, #(CONST_BITS - PASS1_BITS) /* wsptr[DCTSIZE*6] = (int)DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) */
|
||||
rshrn2 v9.8h, v21.4s, #(CONST_BITS - PASS1_BITS) /* wsptr[DCTSIZE*7] = (int)DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) */
|
||||
b 1b
|
||||
|
||||
.balign 16
|
||||
@@ -947,22 +947,22 @@ asm_function jsimd_idct_islow_neon
|
||||
sub v16.4s, v6.4s, v10.4s /* tmp13 - tmp0 */
|
||||
sub v17.4s, v31.4s, v11.4s /* tmp13 - tmp0 */
|
||||
|
||||
rshrn v2.4h, v18.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*0] = (int)DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) */
|
||||
rshrn v3.4h, v22.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*1] = (int)DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) */
|
||||
rshrn v4.4h, v26.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*2] = (int)DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) */
|
||||
rshrn v5.4h, v14.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*3] = (int)DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) */
|
||||
rshrn v6.4h, v19.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*0] = (int)DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) */
|
||||
rshrn v7.4h, v23.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*1] = (int)DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) */
|
||||
rshrn v8.4h, v27.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*2] = (int)DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) */
|
||||
rshrn v9.4h, v15.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*3] = (int)DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) */
|
||||
rshrn2 v2.8h, v16.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*4] = (int)DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) */
|
||||
rshrn2 v3.8h, v28.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*5] = (int)DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) */
|
||||
rshrn2 v4.8h, v24.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*6] = (int)DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) */
|
||||
rshrn2 v5.8h, v20.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*7] = (int)DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) */
|
||||
rshrn2 v6.8h, v17.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*4] = (int)DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) */
|
||||
rshrn2 v7.8h, v29.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*5] = (int)DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) */
|
||||
rshrn2 v8.8h, v25.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*6] = (int)DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) */
|
||||
rshrn2 v9.8h, v21.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*7] = (int)DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) */
|
||||
rshrn v2.4h, v18.4s, #(CONST_BITS - PASS1_BITS) /* wsptr[DCTSIZE*0] = (int)DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) */
|
||||
rshrn v3.4h, v22.4s, #(CONST_BITS - PASS1_BITS) /* wsptr[DCTSIZE*1] = (int)DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) */
|
||||
rshrn v4.4h, v26.4s, #(CONST_BITS - PASS1_BITS) /* wsptr[DCTSIZE*2] = (int)DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) */
|
||||
rshrn v5.4h, v14.4s, #(CONST_BITS - PASS1_BITS) /* wsptr[DCTSIZE*3] = (int)DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) */
|
||||
rshrn v6.4h, v19.4s, #(CONST_BITS - PASS1_BITS) /* wsptr[DCTSIZE*0] = (int)DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) */
|
||||
rshrn v7.4h, v23.4s, #(CONST_BITS - PASS1_BITS) /* wsptr[DCTSIZE*1] = (int)DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) */
|
||||
rshrn v8.4h, v27.4s, #(CONST_BITS - PASS1_BITS) /* wsptr[DCTSIZE*2] = (int)DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) */
|
||||
rshrn v9.4h, v15.4s, #(CONST_BITS - PASS1_BITS) /* wsptr[DCTSIZE*3] = (int)DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) */
|
||||
rshrn2 v2.8h, v16.4s, #(CONST_BITS - PASS1_BITS) /* wsptr[DCTSIZE*4] = (int)DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) */
|
||||
rshrn2 v3.8h, v28.4s, #(CONST_BITS - PASS1_BITS) /* wsptr[DCTSIZE*5] = (int)DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) */
|
||||
rshrn2 v4.8h, v24.4s, #(CONST_BITS - PASS1_BITS) /* wsptr[DCTSIZE*6] = (int)DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) */
|
||||
rshrn2 v5.8h, v20.4s, #(CONST_BITS - PASS1_BITS) /* wsptr[DCTSIZE*7] = (int)DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) */
|
||||
rshrn2 v6.8h, v17.4s, #(CONST_BITS - PASS1_BITS) /* wsptr[DCTSIZE*4] = (int)DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) */
|
||||
rshrn2 v7.8h, v29.4s, #(CONST_BITS - PASS1_BITS) /* wsptr[DCTSIZE*5] = (int)DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) */
|
||||
rshrn2 v8.8h, v25.4s, #(CONST_BITS - PASS1_BITS) /* wsptr[DCTSIZE*6] = (int)DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) */
|
||||
rshrn2 v9.8h, v21.4s, #(CONST_BITS - PASS1_BITS) /* wsptr[DCTSIZE*7] = (int)DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) */
|
||||
b 1b
|
||||
|
||||
.unreq DCT_TABLE
|
||||
@@ -1419,7 +1419,7 @@ asm_function jsimd_idct_4x4_neon
|
||||
st1 {v27.b}[7], [TMP4], 1
|
||||
#endif
|
||||
|
||||
/* vpop {v8.4h - v15.4h} ;not available */
|
||||
/* vpop {v8.4h - v15.4h} (not available) */
|
||||
ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32
|
||||
ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32
|
||||
blr x30
|
||||
@@ -1487,7 +1487,7 @@ asm_function jsimd_idct_2x2_neon
|
||||
instruction ensures that those bits are set to zero. */
|
||||
uxtw x3, w3
|
||||
|
||||
/* vpush {v8.4h - v15.4h} ; not available */
|
||||
/* vpush {v8.4h - v15.4h} (not available) */
|
||||
sub sp, sp, 64
|
||||
mov x9, sp
|
||||
|
||||
@@ -3457,136 +3457,136 @@ generate_jsimd_huff_encode_one_block 0
|
||||
*/
|
||||
|
||||
.macro LOAD16
|
||||
ldr T0d, [LUT, #(0*4)]
|
||||
ldr T1d, [LUT, #(8*4)]
|
||||
ldr T0d, [LUT, #(0 * 4)]
|
||||
ldr T1d, [LUT, #(8 * 4)]
|
||||
add T0, BLOCK, T0, lsl #1
|
||||
add T1, BLOCK, T1, lsl #1
|
||||
ld1 {Y0.h}[0], [T0]
|
||||
ld1 {Y1.h}[0], [T1]
|
||||
|
||||
ldr T0d, [LUT, #(1*4)]
|
||||
ldr T1d, [LUT, #(9*4)]
|
||||
ldr T0d, [LUT, #(1 * 4)]
|
||||
ldr T1d, [LUT, #(9 * 4)]
|
||||
add T0, BLOCK, T0, lsl #1
|
||||
add T1, BLOCK, T1, lsl #1
|
||||
ld1 {Y0.h}[1], [T0]
|
||||
ld1 {Y1.h}[1], [T1]
|
||||
|
||||
ldr T0d, [LUT, #(2*4)]
|
||||
ldr T1d, [LUT, #(10*4)]
|
||||
ldr T0d, [LUT, #(2 * 4)]
|
||||
ldr T1d, [LUT, #(10 * 4)]
|
||||
add T0, BLOCK, T0, lsl #1
|
||||
add T1, BLOCK, T1, lsl #1
|
||||
ld1 {Y0.h}[2], [T0]
|
||||
ld1 {Y1.h}[2], [T1]
|
||||
|
||||
ldr T0d, [LUT, #(3*4)]
|
||||
ldr T1d, [LUT, #(11*4)]
|
||||
ldr T0d, [LUT, #(3 * 4)]
|
||||
ldr T1d, [LUT, #(11 * 4)]
|
||||
add T0, BLOCK, T0, lsl #1
|
||||
add T1, BLOCK, T1, lsl #1
|
||||
ld1 {Y0.h}[3], [T0]
|
||||
ld1 {Y1.h}[3], [T1]
|
||||
|
||||
ldr T0d, [LUT, #(4*4)]
|
||||
ldr T1d, [LUT, #(12*4)]
|
||||
ldr T0d, [LUT, #(4 * 4)]
|
||||
ldr T1d, [LUT, #(12 * 4)]
|
||||
add T0, BLOCK, T0, lsl #1
|
||||
add T1, BLOCK, T1, lsl #1
|
||||
ld1 {Y0.h}[4], [T0]
|
||||
ld1 {Y1.h}[4], [T1]
|
||||
|
||||
ldr T0d, [LUT, #(5*4)]
|
||||
ldr T1d, [LUT, #(13*4)]
|
||||
ldr T0d, [LUT, #(5 * 4)]
|
||||
ldr T1d, [LUT, #(13 * 4)]
|
||||
add T0, BLOCK, T0, lsl #1
|
||||
add T1, BLOCK, T1, lsl #1
|
||||
ld1 {Y0.h}[5], [T0]
|
||||
ld1 {Y1.h}[5], [T1]
|
||||
|
||||
ldr T0d, [LUT, #(6*4)]
|
||||
ldr T1d, [LUT, #(14*4)]
|
||||
ldr T0d, [LUT, #(6 * 4)]
|
||||
ldr T1d, [LUT, #(14 * 4)]
|
||||
add T0, BLOCK, T0, lsl #1
|
||||
add T1, BLOCK, T1, lsl #1
|
||||
ld1 {Y0.h}[6], [T0]
|
||||
ld1 {Y1.h}[6], [T1]
|
||||
|
||||
ldr T0d, [LUT, #(7*4)]
|
||||
ldr T1d, [LUT, #(15*4)]
|
||||
ldr T0d, [LUT, #(7 * 4)]
|
||||
ldr T1d, [LUT, #(15 * 4)]
|
||||
add T0, BLOCK, T0, lsl #1
|
||||
add T1, BLOCK, T1, lsl #1
|
||||
ld1 {Y0.h}[7], [T0]
|
||||
ld1 {Y1.h}[7], [T1]
|
||||
|
||||
add LUT, LUT, #(16*4)
|
||||
add LUT, LUT, #(16 * 4)
|
||||
.endm
|
||||
|
||||
.macro LOAD15
|
||||
eor Y1.16b, Y1.16b, Y1.16b
|
||||
|
||||
ldr T0d, [LUT, #(0*4)]
|
||||
ldr T1d, [LUT, #(8*4)]
|
||||
ldr T0d, [LUT, #(0 * 4)]
|
||||
ldr T1d, [LUT, #(8 * 4)]
|
||||
add T0, BLOCK, T0, lsl #1
|
||||
add T1, BLOCK, T1, lsl #1
|
||||
ld1 {Y0.h}[0], [T0]
|
||||
ld1 {Y1.h}[0], [T1]
|
||||
|
||||
ldr T0d, [LUT, #(1*4)]
|
||||
ldr T0d, [LUT, #(1 * 4)]
|
||||
add T0, BLOCK, T0, lsl #1
|
||||
ld1 {Y0.h}[1], [T0]
|
||||
|
||||
ldr T0d, [LUT, #(2*4)]
|
||||
ldr T0d, [LUT, #(2 * 4)]
|
||||
add T0, BLOCK, T0, lsl #1
|
||||
ld1 {Y0.h}[2], [T0]
|
||||
|
||||
ldr T0d, [LUT, #(3*4)]
|
||||
ldr T0d, [LUT, #(3 * 4)]
|
||||
add T0, BLOCK, T0, lsl #1
|
||||
ld1 {Y0.h}[3], [T0]
|
||||
|
||||
ldr T0d, [LUT, #(4*4)]
|
||||
ldr T0d, [LUT, #(4 * 4)]
|
||||
add T0, BLOCK, T0, lsl #1
|
||||
ld1 {Y0.h}[4], [T0]
|
||||
|
||||
ldr T0d, [LUT, #(5*4)]
|
||||
ldr T0d, [LUT, #(5 * 4)]
|
||||
add T0, BLOCK, T0, lsl #1
|
||||
ld1 {Y0.h}[5], [T0]
|
||||
|
||||
ldr T0d, [LUT, #(6*4)]
|
||||
ldr T0d, [LUT, #(6 * 4)]
|
||||
add T0, BLOCK, T0, lsl #1
|
||||
ld1 {Y0.h}[6], [T0]
|
||||
|
||||
ldr T0d, [LUT, #(7*4)]
|
||||
ldr T0d, [LUT, #(7 * 4)]
|
||||
add T0, BLOCK, T0, lsl #1
|
||||
ld1 {Y0.h}[7], [T0]
|
||||
|
||||
cmp LENEND, #2
|
||||
b.lt 1515f
|
||||
ldr T1d, [LUT, #(9*4)]
|
||||
ldr T1d, [LUT, #(9 * 4)]
|
||||
add T1, BLOCK, T1, lsl #1
|
||||
ld1 {Y1.h}[1], [T1]
|
||||
|
||||
cmp LENEND, #3
|
||||
b.lt 1515f
|
||||
ldr T1d, [LUT, #(10*4)]
|
||||
ldr T1d, [LUT, #(10 * 4)]
|
||||
add T1, BLOCK, T1, lsl #1
|
||||
ld1 {Y1.h}[2], [T1]
|
||||
|
||||
cmp LENEND, #4
|
||||
b.lt 1515f
|
||||
ldr T1d, [LUT, #(11*4)]
|
||||
ldr T1d, [LUT, #(11 * 4)]
|
||||
add T1, BLOCK, T1, lsl #1
|
||||
ld1 {Y1.h}[3], [T1]
|
||||
|
||||
cmp LENEND, #5
|
||||
b.lt 1515f
|
||||
ldr T1d, [LUT, #(12*4)]
|
||||
ldr T1d, [LUT, #(12 * 4)]
|
||||
add T1, BLOCK, T1, lsl #1
|
||||
ld1 {Y1.h}[4], [T1]
|
||||
|
||||
cmp LENEND, #6
|
||||
b.lt 1515f
|
||||
ldr T1d, [LUT, #(13*4)]
|
||||
ldr T1d, [LUT, #(13 * 4)]
|
||||
add T1, BLOCK, T1, lsl #1
|
||||
ld1 {Y1.h}[5], [T1]
|
||||
|
||||
cmp LENEND, #7
|
||||
b.lt 1515f
|
||||
ldr T1d, [LUT, #(14*4)]
|
||||
ldr T1d, [LUT, #(14 * 4)]
|
||||
add T1, BLOCK, T1, lsl #1
|
||||
ld1 {Y1.h}[6], [T1]
|
||||
|
||||
@@ -3594,35 +3594,35 @@ generate_jsimd_huff_encode_one_block 0
|
||||
.endm
|
||||
|
||||
.macro LOAD8
|
||||
ldr T0d, [LUT, #(0*4)]
|
||||
ldr T0d, [LUT, #(0 * 4)]
|
||||
add T0, BLOCK, T0, lsl #1
|
||||
ld1 {Y0.h}[0], [T0]
|
||||
|
||||
ldr T0d, [LUT, #(1*4)]
|
||||
ldr T0d, [LUT, #(1 * 4)]
|
||||
add T0, BLOCK, T0, lsl #1
|
||||
ld1 {Y0.h}[1], [T0]
|
||||
|
||||
ldr T0d, [LUT, #(2*4)]
|
||||
ldr T0d, [LUT, #(2 * 4)]
|
||||
add T0, BLOCK, T0, lsl #1
|
||||
ld1 {Y0.h}[2], [T0]
|
||||
|
||||
ldr T0d, [LUT, #(3*4)]
|
||||
ldr T0d, [LUT, #(3 * 4)]
|
||||
add T0, BLOCK, T0, lsl #1
|
||||
ld1 {Y0.h}[3], [T0]
|
||||
|
||||
ldr T0d, [LUT, #(4*4)]
|
||||
ldr T0d, [LUT, #(4 * 4)]
|
||||
add T0, BLOCK, T0, lsl #1
|
||||
ld1 {Y0.h}[4], [T0]
|
||||
|
||||
ldr T0d, [LUT, #(5*4)]
|
||||
ldr T0d, [LUT, #(5 * 4)]
|
||||
add T0, BLOCK, T0, lsl #1
|
||||
ld1 {Y0.h}[5], [T0]
|
||||
|
||||
ldr T0d, [LUT, #(6*4)]
|
||||
ldr T0d, [LUT, #(6 * 4)]
|
||||
add T0, BLOCK, T0, lsl #1
|
||||
ld1 {Y0.h}[6], [T0]
|
||||
|
||||
ldr T0d, [LUT, #(7*4)]
|
||||
ldr T0d, [LUT, #(7 * 4)]
|
||||
add T0, BLOCK, T0, lsl #1
|
||||
ld1 {Y0.h}[7], [T0]
|
||||
.endm
|
||||
@@ -3630,43 +3630,43 @@ generate_jsimd_huff_encode_one_block 0
|
||||
.macro LOAD7
|
||||
eor Y0.16b, Y0.16b, Y0.16b
|
||||
|
||||
ldr T0d, [LUT, #(0*4)]
|
||||
ldr T0d, [LUT, #(0 * 4)]
|
||||
add T0, BLOCK, T0, lsl #1
|
||||
ld1 {Y0.h}[0], [T0]
|
||||
|
||||
cmp LENEND, #2
|
||||
b.lt 77f
|
||||
ldr T1d, [LUT, #(1*4)]
|
||||
ldr T1d, [LUT, #(1 * 4)]
|
||||
add T1, BLOCK, T1, lsl #1
|
||||
ld1 {Y0.h}[1], [T1]
|
||||
|
||||
cmp LENEND, #3
|
||||
b.lt 77f
|
||||
ldr T1d, [LUT, #(2*4)]
|
||||
ldr T1d, [LUT, #(2 * 4)]
|
||||
add T1, BLOCK, T1, lsl #1
|
||||
ld1 {Y0.h}[2], [T1]
|
||||
|
||||
cmp LENEND, #4
|
||||
b.lt 77f
|
||||
ldr T1d, [LUT, #(3*4)]
|
||||
ldr T1d, [LUT, #(3 * 4)]
|
||||
add T1, BLOCK, T1, lsl #1
|
||||
ld1 {Y0.h}[3], [T1]
|
||||
|
||||
cmp LENEND, #5
|
||||
b.lt 77f
|
||||
ldr T1d, [LUT, #(4*4)]
|
||||
ldr T1d, [LUT, #(4 * 4)]
|
||||
add T1, BLOCK, T1, lsl #1
|
||||
ld1 {Y0.h}[4], [T1]
|
||||
|
||||
cmp LENEND, #6
|
||||
b.lt 77f
|
||||
ldr T1d, [LUT, #(5*4)]
|
||||
ldr T1d, [LUT, #(5 * 4)]
|
||||
add T1, BLOCK, T1, lsl #1
|
||||
ld1 {Y0.h}[5], [T1]
|
||||
|
||||
cmp LENEND, #7
|
||||
b.lt 77f
|
||||
ldr T1d, [LUT, #(6*4)]
|
||||
ldr T1d, [LUT, #(6 * 4)]
|
||||
add T1, BLOCK, T1, lsl #1
|
||||
ld1 {Y0.h}[6], [T1]
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -56,12 +56,14 @@
|
||||
((uint64_t)(uint8_t)g << 8) | \
|
||||
((uint64_t)(uint8_t)h))
|
||||
#define _uint64_set1_pi8(a) _uint64_set_pi8(a, a, a, a, a, a, a, a)
|
||||
#define _uint64_set_pi16(a, b, c, d) (((uint64_t)(uint16_t)a << 48) | \
|
||||
((uint64_t)(uint16_t)b << 32) | \
|
||||
((uint64_t)(uint16_t)c << 16) | \
|
||||
((uint64_t)(uint16_t)d))
|
||||
#define _uint64_set_pi16(a, b, c, d) \
|
||||
(((uint64_t)(uint16_t)a << 48) | \
|
||||
((uint64_t)(uint16_t)b << 32) | \
|
||||
((uint64_t)(uint16_t)c << 16) | \
|
||||
((uint64_t)(uint16_t)d))
|
||||
#define _uint64_set1_pi16(a) _uint64_set_pi16(a, a, a, a)
|
||||
#define _uint64_set_pi32(a, b) (((uint64_t)(uint32_t)a << 32) | \
|
||||
((uint64_t)(uint32_t)b))
|
||||
#define _uint64_set_pi32(a, b) \
|
||||
(((uint64_t)(uint32_t)a << 32) | \
|
||||
((uint64_t)(uint32_t)b))
|
||||
|
||||
#define get_const_value(index) (*(__m64 *)&const_value[index])
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
// This file generates the include file for the assembly
|
||||
// implementations by abusing the C preprocessor.
|
||||
//
|
||||
// Note: Some things are manually defined as they need to
|
||||
// be mapped to NASM types.
|
||||
/*
|
||||
* This file generates the include file for the assembly
|
||||
* implementations by abusing the C preprocessor.
|
||||
*
|
||||
* Note: Some things are manually defined as they need to
|
||||
* be mapped to NASM types.
|
||||
*/
|
||||
|
||||
;
|
||||
; Automatically generated include file from jsimdcfg.inc.h
|
||||
|
||||
11
transupp.c
11
transupp.c
@@ -816,8 +816,7 @@ do_flip_h(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
|
||||
((j_common_ptr)srcinfo, dst_coef_arrays[ci], dst_blk_y,
|
||||
(JDIMENSION)compptr->v_samp_factor, TRUE);
|
||||
src_buffer = (*srcinfo->mem->access_virt_barray)
|
||||
((j_common_ptr)srcinfo, src_coef_arrays[ci],
|
||||
dst_blk_y + y_crop_blocks,
|
||||
((j_common_ptr)srcinfo, src_coef_arrays[ci], dst_blk_y + y_crop_blocks,
|
||||
(JDIMENSION)compptr->v_samp_factor, FALSE);
|
||||
for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
|
||||
dst_row_ptr = dst_buffer[offset_y];
|
||||
@@ -830,8 +829,9 @@ do_flip_h(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
|
||||
src_ptr = src_row_ptr[comp_width - x_crop_blocks - dst_blk_x - 1];
|
||||
/* this unrolled loop doesn't need to know which row it's on... */
|
||||
for (k = 0; k < DCTSIZE2; k += 2) {
|
||||
*dst_ptr++ = *src_ptr++; /* copy even column */
|
||||
*dst_ptr++ = -(*src_ptr++); /* copy odd column with sign change */
|
||||
*dst_ptr++ = *src_ptr++; /* copy even column */
|
||||
*dst_ptr++ = -(*src_ptr++); /* copy odd column with sign
|
||||
change */
|
||||
}
|
||||
} else {
|
||||
/* Copy last partial block(s) verbatim */
|
||||
@@ -916,8 +916,7 @@ do_flip_v(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
|
||||
} else {
|
||||
/* Just copy row verbatim. */
|
||||
jcopy_block_row(src_buffer[offset_y] + x_crop_blocks,
|
||||
dst_buffer[offset_y],
|
||||
compptr->width_in_blocks);
|
||||
dst_buffer[offset_y], compptr->width_in_blocks);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
9
wrppm.c
9
wrppm.c
@@ -5,7 +5,7 @@
|
||||
* Copyright (C) 1991-1996, Thomas G. Lane.
|
||||
* Modified 2009 by Guido Vollbeding.
|
||||
* libjpeg-turbo Modifications:
|
||||
* Copyright (C) 2017, 2019, D. R. Commander.
|
||||
* Copyright (C) 2017, 2019-2020, D. R. Commander.
|
||||
* For conditions of distribution and use, see the accompanying README.ijg
|
||||
* file.
|
||||
*
|
||||
@@ -326,11 +326,12 @@ jinit_write_ppm(j_decompress_ptr cinfo)
|
||||
|
||||
if (cinfo->quantize_colors || BITS_IN_JSAMPLE != 8 ||
|
||||
sizeof(JSAMPLE) != sizeof(char) ||
|
||||
(cinfo->out_color_space != JCS_EXT_RGB
|
||||
#if RGB_RED == 0 && RGB_GREEN == 1 && RGB_BLUE == 2 && RGB_PIXELSIZE == 3
|
||||
&& cinfo->out_color_space != JCS_RGB
|
||||
(cinfo->out_color_space != JCS_EXT_RGB &&
|
||||
cinfo->out_color_space != JCS_RGB)) {
|
||||
#else
|
||||
cinfo->out_color_space != JCS_EXT_RGB) {
|
||||
#endif
|
||||
)) {
|
||||
/* When quantizing, we need an output buffer for colormap indexes
|
||||
* that's separate from the physical I/O buffer. We also need a
|
||||
* separate buffer if pixel format translation must take place.
|
||||
|
||||
Reference in New Issue
Block a user