Merge branch 'master' into dev

2020-10-27 16:42:14 -05:00
parent 88ae60986e c3bfbde21d
commit cd342acf7f
14 changed files with 1390 additions and 1323 deletions
--- a/cdjpeg.h
+++ b/cdjpeg.h
@@ -127,7 +127,6 @@ EXTERN(void) read_color_map(j_decompress_ptr cinfo, FILE *infile);

 /* common support routines (in cdjpeg.c) */

-EXTERN(void) enable_signal_catcher(j_common_ptr cinfo);
 EXTERN(void) start_progress_monitor(j_common_ptr cinfo,
                                    cd_progress_ptr progress);
 EXTERN(void) end_progress_monitor(j_common_ptr cinfo);
--- a/jdcolor.c
+++ b/jdcolor.c
@@ -571,11 +571,10 @@ ycck_cmyk_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
 * RGB565 conversion
 */

-#define PACK_SHORT_565_LE(r, g, b)  ((((r) << 8) & 0xF800) | \
-                                     (((g) << 3) & 0x7E0) | ((b) >> 3))
-#define PACK_SHORT_565_BE(r, g, b)  (((r) & 0xF8) | ((g) >> 5) | \
-                                     (((g) << 11) & 0xE000) | \
-                                     (((b) << 5) & 0x1F00))
+#define PACK_SHORT_565_LE(r, g, b) \
+  ((((r) << 8) & 0xF800) | (((g) << 3) & 0x7E0) | ((b) >> 3))
+#define PACK_SHORT_565_BE(r, g, b) \
+  (((r) & 0xF8) | ((g) >> 5) | (((g) << 11) & 0xE000) | (((b) << 5) & 0x1F00))

 #define PACK_TWO_PIXELS_LE(l, r)    ((r << 16) | l)
 #define PACK_TWO_PIXELS_BE(l, r)    ((l << 16) | r)
--- a/jdmerge.c
+++ b/jdmerge.c
@@ -392,11 +392,10 @@ h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
 * RGB565 conversion
 */

-#define PACK_SHORT_565_LE(r, g, b)  ((((r) << 8) & 0xF800) | \
-                                     (((g) << 3) & 0x7E0) | ((b) >> 3))
-#define PACK_SHORT_565_BE(r, g, b)  (((r) & 0xF8) | ((g) >> 5) | \
-                                     (((g) << 11) & 0xE000) | \
-                                     (((b) << 5) & 0x1F00))
+#define PACK_SHORT_565_LE(r, g, b) \
+  ((((r) << 8) & 0xF800) | (((g) << 3) & 0x7E0) | ((b) >> 3))
+#define PACK_SHORT_565_BE(r, g, b) \
+  (((r) & 0xF8) | ((g) >> 5) | (((g) << 11) & 0xE000) | (((b) << 5) & 0x1F00))

 #define PACK_TWO_PIXELS_LE(l, r)    ((r << 16) | l)
 #define PACK_TWO_PIXELS_BE(l, r)    ((l << 16) | r)
--- a/jpegtran.c
+++ b/jpegtran.c
@@ -4,7 +4,7 @@
 * This file was part of the Independent JPEG Group's software:
 * Copyright (C) 1995-2019, Thomas G. Lane, Guido Vollbeding.
 * libjpeg-turbo Modifications:
- * Copyright (C) 2010, 2014, 2017, 2019, D. R. Commander.
+ * Copyright (C) 2010, 2014, 2017, 2019-2020, D. R. Commander.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
 *
--- a/rdppm.c
+++ b/rdppm.c
@@ -649,11 +649,12 @@ start_input_ppm(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
    if (maxval > 255) {
      source->pub.get_pixel_rows = get_word_rgb_row;
    } else if (maxval == MAXJSAMPLE && sizeof(JSAMPLE) == sizeof(U_CHAR) &&
-               (cinfo->in_color_space == JCS_EXT_RGB
 #if RGB_RED == 0 && RGB_GREEN == 1 && RGB_BLUE == 2 && RGB_PIXELSIZE == 3
-                || cinfo->in_color_space == JCS_RGB
+               (cinfo->in_color_space == JCS_EXT_RGB ||
+                cinfo->in_color_space == JCS_RGB)) {
+#else
+               cinfo->in_color_space == JCS_EXT_RGB) {
 #endif
-               )) {
      source->pub.get_pixel_rows = get_raw_row;
      use_raw_buffer = TRUE;
      need_rescale = FALSE;
--- a/rdtarga.c
+++ b/rdtarga.c
@@ -334,8 +334,9 @@ start_input_tga(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
  unsigned int width, height, maplen;
  boolean is_bottom_up;

-#define GET_2B(offset)  ((unsigned int)UCH(targaheader[offset]) + \
-                         (((unsigned int)UCH(targaheader[offset + 1])) << 8))
+#define GET_2B(offset) \
+  ((unsigned int)UCH(targaheader[offset]) + \
+   (((unsigned int)UCH(targaheader[offset + 1])) << 8))

  if (!ReadOK(source->pub.input_file, targaheader, 18))
    ERREXIT(cinfo, JERR_INPUT_EOF);
--- a/release/rpm.spec.in
+++ b/release/rpm.spec.in
@@ -1,36 +1,36 @@
 %global _docdir %{_defaultdocdir}/%{name}-%{version}
-%define _prefix @CMAKE_INSTALL_PREFIX@
-%define _bindir @CMAKE_INSTALL_FULL_BINDIR@
-%define _datarootdir @CMAKE_INSTALL_FULL_DATAROOTDIR@
-%define _includedir @CMAKE_INSTALL_FULL_INCLUDEDIR@
-%define _javadir @CMAKE_INSTALL_FULL_JAVADIR@
-%define _mandir @CMAKE_INSTALL_FULL_MANDIR@
-%define _enable_static @ENABLE_STATIC@
-%define _enable_shared @ENABLE_SHARED@
-%define _with_turbojpeg @WITH_TURBOJPEG@
-%define _with_java @WITH_JAVA@
+%define _prefix  @CMAKE_INSTALL_PREFIX@
+%define _bindir  @CMAKE_INSTALL_FULL_BINDIR@
+%define _datarootdir  @CMAKE_INSTALL_FULL_DATAROOTDIR@
+%define _includedir  @CMAKE_INSTALL_FULL_INCLUDEDIR@
+%define _javadir  @CMAKE_INSTALL_FULL_JAVADIR@
+%define _mandir  @CMAKE_INSTALL_FULL_MANDIR@
+%define _enable_static  @ENABLE_STATIC@
+%define _enable_shared  @ENABLE_SHARED@
+%define _with_turbojpeg  @WITH_TURBOJPEG@
+%define _with_java  @WITH_JAVA@

 %if "%{?__isa_bits:1}" == "1"
-%define _bits %{__isa_bits}
+%define _bits  %{__isa_bits}
 %else
 # RPM < 4.6
 %if "%{_lib}" == "lib64"
-%define _bits 64
+%define _bits  64
 %else
-%define _bits 32
+%define _bits  32
 %endif
 %endif

 #-->%if 1
 %if "%{_bits}" == "64"
-%define _libdir %{_exec_prefix}/lib64
+%define _libdir  %{_exec_prefix}/lib64
 %else
 %if "%{_prefix}" == "/opt/libjpeg-turbo"
-%define _libdir %{_exec_prefix}/lib32
+%define _libdir  %{_exec_prefix}/lib32
 %endif
 %endif
 #-->%else
-%define _libdir @CMAKE_INSTALL_FULL_LIBDIR@
+%define _libdir  @CMAKE_INSTALL_FULL_LIBDIR@
 #-->%endif

 Summary: A SIMD-accelerated JPEG codec that provides both the libjpeg and TurboJPEG APIs
@@ -101,7 +101,6 @@ broader range of users and developers.
 #-->make DESTDIR=$RPM_BUILD_ROOT

 %install
-
 rm -rf $RPM_BUILD_ROOT
 make install DESTDIR=$RPM_BUILD_ROOT
 /sbin/ldconfig -n $RPM_BUILD_ROOT%{_libdir}
@@ -163,25 +162,25 @@ rm -rf $RPM_BUILD_ROOT
 %doc %{_docdir}/*
 %dir %{_prefix}
 %if "%{_prefix}" == "@CMAKE_INSTALL_DEFAULT_PREFIX@" && "%{_docdir}" != "%{_prefix}/doc"
- %{_prefix}/doc
+	%{_prefix}/doc
 %endif
 %dir %{_bindir}
 %{_bindir}/cjpeg
 %{_bindir}/djpeg
 %{_bindir}/jpegtran
 %if "%{_with_turbojpeg}" == "1"
- %{_bindir}/tjbench
+	%{_bindir}/tjbench
 %endif
 %{_bindir}/rdjpgcom
 %{_bindir}/wrjpgcom
 %dir %{_libdir}
 %if "%{_enable_shared}" == "1"
- %{_libdir}/libjpeg.so.@SO_MAJOR_VERSION@.@SO_AGE@.@SO_MINOR_VERSION@
- %{_libdir}/libjpeg.so.@SO_MAJOR_VERSION@
- %{_libdir}/libjpeg.so
+	%{_libdir}/libjpeg.so.@SO_MAJOR_VERSION@.@SO_AGE@.@SO_MINOR_VERSION@
+	%{_libdir}/libjpeg.so.@SO_MAJOR_VERSION@
+	%{_libdir}/libjpeg.so
 %endif
 %if "%{_enable_static}" == "1"
- %{_libdir}/libjpeg.a
+	%{_libdir}/libjpeg.a
 %endif
 %dir %{_libdir}/pkgconfig
 %{_libdir}/pkgconfig/libjpeg.pc
@@ -189,15 +188,15 @@ rm -rf $RPM_BUILD_ROOT
 %dir %{_libdir}/cmake/@CMAKE_PROJECT_NAME@
 %{_libdir}/cmake/@CMAKE_PROJECT_NAME@
 %if "%{_with_turbojpeg}" == "1"
- %if "%{_enable_shared}" == "1" || "%{_with_java}" == "1"
-  %{_libdir}/libturbojpeg.so.@TURBOJPEG_SO_VERSION@
-  %{_libdir}/libturbojpeg.so.@TURBOJPEG_SO_MAJOR_VERSION@
-  %{_libdir}/libturbojpeg.so
- %endif
- %if "%{_enable_static}" == "1"
-  %{_libdir}/libturbojpeg.a
- %endif
- %{_libdir}/pkgconfig/libturbojpeg.pc
+	%if "%{_enable_shared}" == "1" || "%{_with_java}" == "1"
+		%{_libdir}/libturbojpeg.so.@TURBOJPEG_SO_VERSION@
+		%{_libdir}/libturbojpeg.so.@TURBOJPEG_SO_MAJOR_VERSION@
+		%{_libdir}/libturbojpeg.so
+	%endif
+	%if "%{_enable_static}" == "1"
+		%{_libdir}/libturbojpeg.a
+	%endif
+	%{_libdir}/pkgconfig/libturbojpeg.pc
 %endif
 %dir %{_includedir}
 %{_includedir}/jconfig.h
@@ -205,7 +204,7 @@ rm -rf $RPM_BUILD_ROOT
 %{_includedir}/jmorecfg.h
 %{_includedir}/jpeglib.h
 %if "%{_with_turbojpeg}" == "1"
- %{_includedir}/turbojpeg.h
+	%{_includedir}/turbojpeg.h
 %endif
 %dir %{_mandir}
 %dir %{_mandir}/man1
@@ -215,10 +214,11 @@ rm -rf $RPM_BUILD_ROOT
 %{_mandir}/man1/rdjpgcom.1*
 %{_mandir}/man1/wrjpgcom.1*
 %if "%{_prefix}" != "%{_datarootdir}"
- %dir %{_datarootdir}
+	%dir %{_datarootdir}
 %endif
 %if "%{_with_java}" == "1"
- %dir %{_javadir}
- %{_javadir}/turbojpeg.jar
+	%dir %{_javadir}
+	%{_javadir}/turbojpeg.jar
 %endif
+
 %changelog
--- a/simd/arm/jsimd_neon.S
+++ b/simd/arm/jsimd_neon.S
@@ -107,69 +107,69 @@ _\fname:
 * Uses some ideas from the comments in 'simd/jiss2int-64.asm'
 */
 #define REF_1D_IDCT(xrow0, xrow1, xrow2, xrow3, xrow4, xrow5, xrow6, xrow7) { \
-    DCTELEM row0, row1, row2, row3, row4, row5, row6, row7; \
-    JLONG   q1, q2, q3, q4, q5, q6, q7; \
-    JLONG   tmp11_plus_tmp2, tmp11_minus_tmp2; \
-    \
-    /* 1-D iDCT input data */ \
-    row0 = xrow0; \
-    row1 = xrow1; \
-    row2 = xrow2; \
-    row3 = xrow3; \
-    row4 = xrow4; \
-    row5 = xrow5; \
-    row6 = xrow6; \
-    row7 = xrow7; \
-    \
-    q5 = row7 + row3; \
-    q4 = row5 + row1; \
-    q6 = MULTIPLY(q5, FIX_1_175875602_MINUS_1_961570560) + \
-         MULTIPLY(q4, FIX_1_175875602); \
-    q7 = MULTIPLY(q5, FIX_1_175875602) + \
-         MULTIPLY(q4, FIX_1_175875602_MINUS_0_390180644); \
-    q2 = MULTIPLY(row2, FIX_0_541196100) + \
-         MULTIPLY(row6, FIX_0_541196100_MINUS_1_847759065); \
-    q4 = q6; \
-    q3 = ((JLONG)row0 - (JLONG)row4) << 13; \
-    q6 += MULTIPLY(row5, -FIX_2_562915447) + \
-          MULTIPLY(row3, FIX_3_072711026_MINUS_2_562915447); \
-    /* now we can use q1 (reloadable constants have been used up) */ \
-    q1 = q3 + q2; \
-    q4 += MULTIPLY(row7, FIX_0_298631336_MINUS_0_899976223) + \
-          MULTIPLY(row1, -FIX_0_899976223); \
-    q5 = q7; \
-    q1 = q1 + q6; \
-    q7 += MULTIPLY(row7, -FIX_0_899976223) + \
-          MULTIPLY(row1, FIX_1_501321110_MINUS_0_899976223); \
-    \
-    /* (tmp11 + tmp2) has been calculated (out_row1 before descale) */ \
-    tmp11_plus_tmp2 = q1; \
-    row1 = 0; \
-    \
-    q1 = q1 - q6; \
-    q5 += MULTIPLY(row5, FIX_2_053119869_MINUS_2_562915447) + \
-          MULTIPLY(row3, -FIX_2_562915447); \
-    q1 = q1 - q6; \
-    q6 = MULTIPLY(row2, FIX_0_541196100_PLUS_0_765366865) + \
-         MULTIPLY(row6, FIX_0_541196100); \
-    q3 = q3 - q2; \
-    \
-    /* (tmp11 - tmp2) has been calculated (out_row6 before descale) */ \
-    tmp11_minus_tmp2 = q1; \
-    \
-    q1 = ((JLONG)row0 + (JLONG)row4) << 13; \
-    q2 = q1 + q6; \
-    q1 = q1 - q6; \
-    \
-    /* pick up the results */ \
-    tmp0  = q4; \
-    tmp1  = q5; \
-    tmp2  = (tmp11_plus_tmp2 - tmp11_minus_tmp2) / 2; \
-    tmp3  = q7; \
-    tmp10 = q2; \
-    tmp11 = (tmp11_plus_tmp2 + tmp11_minus_tmp2) / 2; \
-    tmp12 = q3; \
-    tmp13 = q1; \
+  DCTELEM row0, row1, row2, row3, row4, row5, row6, row7; \
+  JLONG   q1, q2, q3, q4, q5, q6, q7; \
+  JLONG   tmp11_plus_tmp2, tmp11_minus_tmp2; \
+  \
+  /* 1-D iDCT input data */ \
+  row0 = xrow0; \
+  row1 = xrow1; \
+  row2 = xrow2; \
+  row3 = xrow3; \
+  row4 = xrow4; \
+  row5 = xrow5; \
+  row6 = xrow6; \
+  row7 = xrow7; \
+  \
+  q5 = row7 + row3; \
+  q4 = row5 + row1; \
+  q6 = MULTIPLY(q5, FIX_1_175875602_MINUS_1_961570560) + \
+       MULTIPLY(q4, FIX_1_175875602); \
+  q7 = MULTIPLY(q5, FIX_1_175875602) + \
+       MULTIPLY(q4, FIX_1_175875602_MINUS_0_390180644); \
+  q2 = MULTIPLY(row2, FIX_0_541196100) + \
+       MULTIPLY(row6, FIX_0_541196100_MINUS_1_847759065); \
+  q4 = q6; \
+  q3 = ((JLONG)row0 - (JLONG)row4) << 13; \
+  q6 += MULTIPLY(row5, -FIX_2_562915447) + \
+        MULTIPLY(row3, FIX_3_072711026_MINUS_2_562915447); \
+  /* now we can use q1 (reloadable constants have been used up) */ \
+  q1 = q3 + q2; \
+  q4 += MULTIPLY(row7, FIX_0_298631336_MINUS_0_899976223) + \
+        MULTIPLY(row1, -FIX_0_899976223); \
+  q5 = q7; \
+  q1 = q1 + q6; \
+  q7 += MULTIPLY(row7, -FIX_0_899976223) + \
+        MULTIPLY(row1, FIX_1_501321110_MINUS_0_899976223); \
+  \
+  /* (tmp11 + tmp2) has been calculated (out_row1 before descale) */ \
+  tmp11_plus_tmp2 = q1; \
+  row1 = 0; \
+  \
+  q1 = q1 - q6; \
+  q5 += MULTIPLY(row5, FIX_2_053119869_MINUS_2_562915447) + \
+        MULTIPLY(row3, -FIX_2_562915447); \
+  q1 = q1 - q6; \
+  q6 = MULTIPLY(row2, FIX_0_541196100_PLUS_0_765366865) + \
+       MULTIPLY(row6, FIX_0_541196100); \
+  q3 = q3 - q2; \
+  \
+  /* (tmp11 - tmp2) has been calculated (out_row6 before descale) */ \
+  tmp11_minus_tmp2 = q1; \
+  \
+  q1 = ((JLONG)row0 + (JLONG)row4) << 13; \
+  q2 = q1 + q6; \
+  q1 = q1 - q6; \
+  \
+  /* pick up the results */ \
+  tmp0  = q4; \
+  tmp1  = q5; \
+  tmp2  = (tmp11_plus_tmp2 - tmp11_minus_tmp2) / 2; \
+  tmp3  = q7; \
+  tmp10 = q2; \
+  tmp11 = (tmp11_plus_tmp2 + tmp11_minus_tmp2) / 2; \
+  tmp12 = q3; \
+  tmp13 = q1; \
 }

 #define XFIX_0_899976223                    d0[0]
@@ -261,7 +261,7 @@ asm_function jsimd_idct_islow_neon
    vld1.16         {d0, d1, d2, d3}, [ip, :128]  /* load constants */
    add             ip, ip, #16
    vmul.s16        q15, q15, q3
-    vpush           {d8-d15}                      /* save Neon registers */
+    vpush           {d8 - d15}                    /* save Neon registers */
    /* 1-D IDCT, pass 1, left 4x8 half */
    vadd.s16        d4, ROW7L, ROW3L
    vadd.s16        d5, ROW5L, ROW1L
@@ -507,7 +507,7 @@ asm_function jsimd_idct_islow_neon
    vqrshrn.s16     d17, q9, #2
    vqrshrn.s16     d18, q10, #2
    vqrshrn.s16     d19, q11, #2
-    vpop            {d8-d15}                      /* restore Neon registers */
+    vpop            {d8 - d15}                    /* restore Neon registers */
    vqrshrn.s16     d20, q12, #2
      /* Transpose the final 8-bit samples and do signed->unsigned conversion */
      vtrn.16         q8, q9
@@ -749,7 +749,7 @@ asm_function jsimd_idct_ifast_neon
    vmul.s16        q13, q13, q1
    vld1.16         {d0}, [ip, :64]  /* load constants */
    vmul.s16        q15, q15, q3
-    vpush           {d8-d13}         /* save Neon registers */
+    vpush           {d8 - d13}       /* save Neon registers */
    /* 1-D IDCT, pass 1 */
    vsub.s16        q2, q10, q14
    vadd.s16        q14, q10, q14
@@ -842,7 +842,7 @@ asm_function jsimd_idct_ifast_neon
    vadd.s16        q14, q5, q3
    vsub.s16        q9, q5, q3
    vsub.s16        q13, q10, q2
-    vpop            {d8-d13}      /* restore Neon registers */
+    vpop            {d8 - d13}    /* restore Neon registers */
    vadd.s16        q10, q10, q2
    vsub.s16        q11, q12, q1
    vadd.s16        q12, q12, q1
@@ -1010,7 +1010,7 @@ asm_function jsimd_idct_4x4_neon
    TMP3            .req r2
    TMP4            .req ip

-    vpush           {d8-d15}
+    vpush           {d8 - d15}

    /* Load constants (d3 is just used for padding) */
    adr             TMP4, jsimd_idct_4x4_neon_consts
@@ -1099,7 +1099,7 @@ asm_function jsimd_idct_4x4_neon
    vst1.8          {d27[7]}, [TMP4]!
 #endif

-    vpop            {d8-d15}
+    vpop            {d8 - d15}
    bx              lr

    .unreq          DCT_TABLE
@@ -1167,7 +1167,7 @@ asm_function jsimd_idct_2x2_neon
    TMP1            .req r0
    TMP2            .req ip

-    vpush           {d8-d15}
+    vpush           {d8 - d15}

    /* Load constants */
    adr             TMP2, jsimd_idct_2x2_neon_consts
@@ -1254,7 +1254,7 @@ asm_function jsimd_idct_2x2_neon
    vst1.8          {d26[1]}, [TMP2]!
    vst1.8          {d27[5]}, [TMP2]!

-    vpop            {d8-d15}
+    vpop            {d8 - d15}
    bx              lr

    .unreq          DCT_TABLE
@@ -1508,7 +1508,7 @@ asm_function jsimd_ycc_\colorid\()_convert_neon
    .unreq          INPUT_BUF

    /* Save Neon registers */
-    vpush           {d8-d15}
+    vpush           {d8 - d15}

    /* Initially set d10, d11, d12, d13 to 0xFF */
    vmov.u8         q5, #255
@@ -1571,7 +1571,7 @@ asm_function jsimd_ycc_\colorid\()_convert_neon
    bgt             0b
 9:
    /* Restore all registers and return */
-    vpop            {d8-d15}
+    vpop            {d8 - d15}
    pop             {r4, r5, r6, r7, r8, r9, r10, pc}

    .unreq          OUTPUT_WIDTH
@@ -1823,7 +1823,7 @@ asm_function jsimd_\colorid\()_ycc_convert_neon
    .unreq          OUTPUT_BUF

    /* Save Neon registers */
-    vpush           {d8-d15}
+    vpush           {d8 - d15}

    /* Outer loop over scanlines */
    cmp             NUM_ROWS, #1
@@ -1882,7 +1882,7 @@ asm_function jsimd_\colorid\()_ycc_convert_neon
    bgt             0b
 9:
    /* Restore all registers and return */
-    vpop            {d8-d15}
+    vpop            {d8 - d15}
    pop             {r4, r5, r6, r7, r8, r9, r10, pc}

    .unreq          OUTPUT_WIDTH
@@ -2011,7 +2011,7 @@ asm_function jsimd_fdct_ifast_neon
    DATA            .req r0
    TMP             .req ip

-    vpush           {d8-d15}
+    vpush           {d8 - d15}

    /* Load constants */
    adr             TMP, jsimd_fdct_ifast_neon_consts
@@ -2096,7 +2096,7 @@ asm_function jsimd_fdct_ifast_neon
    vst1.16         {d24, d25, d26, d27}, [DATA, :128]!
    vst1.16         {d28, d29, d30, d31}, [DATA, :128]

-    vpop            {d8-d15}
+    vpop            {d8 - d15}
    bx              lr

    .unreq          DATA
@@ -2404,7 +2404,7 @@ asm_function jsimd_h2v1_fancy_upsample_neon
    TMP               .req lr

    push            {r4, r5, r6, lr}
-    vpush           {d8-d15}
+    vpush           {d8 - d15}

    ldr             OUTPUT_DATA, [OUTPUT_DATA_PTR]
    cmp             MAX_V_SAMP_FACTOR, #0
@@ -2422,7 +2422,7 @@ asm_function jsimd_h2v1_fancy_upsample_neon
    bgt             11b

 99:
-    vpop            {d8-d15}
+    vpop            {d8 - d15}
    pop             {r4, r5, r6, pc}

    .unreq          MAX_V_SAMP_FACTOR
--- a/simd/arm64/jsimd_neon.S
+++ b/simd/arm64/jsimd_neon.S
@@ -627,21 +627,21 @@ asm_function jsimd_idct_islow_neon
    movi            v0.16b, #(CENTERJSAMPLE)
    /* Prepare pointers (dual-issue with Neon instructions) */
      ldp             TMP1, TMP2, [OUTPUT_BUF], 16
-    sqrshrn         v28.8b, v2.8h, #(CONST_BITS+PASS1_BITS+3-16)
+    sqrshrn         v28.8b, v2.8h, #(CONST_BITS + PASS1_BITS + 3 - 16)
      ldp             TMP3, TMP4, [OUTPUT_BUF], 16
-    sqrshrn         v29.8b, v3.8h, #(CONST_BITS+PASS1_BITS+3-16)
+    sqrshrn         v29.8b, v3.8h, #(CONST_BITS + PASS1_BITS + 3 - 16)
      add             TMP1, TMP1, OUTPUT_COL
-    sqrshrn         v30.8b, v4.8h, #(CONST_BITS+PASS1_BITS+3-16)
+    sqrshrn         v30.8b, v4.8h, #(CONST_BITS + PASS1_BITS + 3 - 16)
      add             TMP2, TMP2, OUTPUT_COL
-    sqrshrn         v31.8b, v5.8h, #(CONST_BITS+PASS1_BITS+3-16)
+    sqrshrn         v31.8b, v5.8h, #(CONST_BITS + PASS1_BITS + 3 - 16)
      add             TMP3, TMP3, OUTPUT_COL
-    sqrshrn2        v28.16b, v6.8h, #(CONST_BITS+PASS1_BITS+3-16)
+    sqrshrn2        v28.16b, v6.8h, #(CONST_BITS + PASS1_BITS + 3 - 16)
      add             TMP4, TMP4, OUTPUT_COL
-    sqrshrn2        v29.16b, v7.8h, #(CONST_BITS+PASS1_BITS+3-16)
+    sqrshrn2        v29.16b, v7.8h, #(CONST_BITS + PASS1_BITS + 3 - 16)
      ldp             TMP5, TMP6, [OUTPUT_BUF], 16
-    sqrshrn2        v30.16b, v8.8h, #(CONST_BITS+PASS1_BITS+3-16)
+    sqrshrn2        v30.16b, v8.8h, #(CONST_BITS + PASS1_BITS + 3 - 16)
      ldp             TMP7, TMP8, [OUTPUT_BUF], 16
-    sqrshrn2        v31.16b, v9.8h, #(CONST_BITS+PASS1_BITS+3-16)
+    sqrshrn2        v31.16b, v9.8h, #(CONST_BITS + PASS1_BITS + 3 - 16)
      add             TMP5, TMP5, OUTPUT_COL
    add             v16.16b, v28.16b, v0.16b
      add             TMP6, TMP6, OUTPUT_COL
@@ -753,14 +753,14 @@ asm_function jsimd_idct_islow_neon
    add             v14.4s, v6.4s, v10.4s  /* tmp13 + tmp0 */
    sub             v16.4s, v6.4s, v10.4s  /* tmp13 - tmp0 */

-    rshrn           v2.4h, v18.4s, #(CONST_BITS-PASS1_BITS)  /* wsptr[DCTSIZE*0] = (int)DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) */
-    rshrn           v3.4h, v22.4s, #(CONST_BITS-PASS1_BITS)  /* wsptr[DCTSIZE*1] = (int)DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) */
-    rshrn           v4.4h, v26.4s, #(CONST_BITS-PASS1_BITS)  /* wsptr[DCTSIZE*2] = (int)DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) */
-    rshrn           v5.4h, v14.4s, #(CONST_BITS-PASS1_BITS)  /* wsptr[DCTSIZE*3] = (int)DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) */
-    rshrn2          v2.8h, v16.4s, #(CONST_BITS-PASS1_BITS)  /* wsptr[DCTSIZE*4] = (int)DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) */
-    rshrn2          v3.8h, v28.4s, #(CONST_BITS-PASS1_BITS)  /* wsptr[DCTSIZE*5] = (int)DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) */
-    rshrn2          v4.8h, v24.4s, #(CONST_BITS-PASS1_BITS)  /* wsptr[DCTSIZE*6] = (int)DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) */
-    rshrn2          v5.8h, v20.4s, #(CONST_BITS-PASS1_BITS)  /* wsptr[DCTSIZE*7] = (int)DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) */
+    rshrn           v2.4h, v18.4s, #(CONST_BITS - PASS1_BITS)  /* wsptr[DCTSIZE*0] = (int)DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) */
+    rshrn           v3.4h, v22.4s, #(CONST_BITS - PASS1_BITS)  /* wsptr[DCTSIZE*1] = (int)DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) */
+    rshrn           v4.4h, v26.4s, #(CONST_BITS - PASS1_BITS)  /* wsptr[DCTSIZE*2] = (int)DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) */
+    rshrn           v5.4h, v14.4s, #(CONST_BITS - PASS1_BITS)  /* wsptr[DCTSIZE*3] = (int)DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) */
+    rshrn2          v2.8h, v16.4s, #(CONST_BITS - PASS1_BITS)  /* wsptr[DCTSIZE*4] = (int)DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) */
+    rshrn2          v3.8h, v28.4s, #(CONST_BITS - PASS1_BITS)  /* wsptr[DCTSIZE*5] = (int)DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) */
+    rshrn2          v4.8h, v24.4s, #(CONST_BITS - PASS1_BITS)  /* wsptr[DCTSIZE*6] = (int)DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) */
+    rshrn2          v5.8h, v20.4s, #(CONST_BITS - PASS1_BITS)  /* wsptr[DCTSIZE*7] = (int)DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) */
    mov             v6.16b, v15.16b
    mov             v7.16b, v15.16b
    mov             v8.16b, v15.16b
@@ -837,14 +837,14 @@ asm_function jsimd_idct_islow_neon
    mov             v3.16b, v14.16b
    mov             v4.16b, v14.16b
    mov             v5.16b, v14.16b
-    rshrn           v6.4h, v19.4s, #(CONST_BITS-PASS1_BITS)  /* wsptr[DCTSIZE*0] = (int)DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) */
-    rshrn           v7.4h, v23.4s, #(CONST_BITS-PASS1_BITS)  /* wsptr[DCTSIZE*1] = (int)DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) */
-    rshrn           v8.4h, v27.4s, #(CONST_BITS-PASS1_BITS)  /* wsptr[DCTSIZE*2] = (int)DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) */
-    rshrn           v9.4h, v15.4s, #(CONST_BITS-PASS1_BITS)  /* wsptr[DCTSIZE*3] = (int)DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) */
-    rshrn2          v6.8h, v17.4s, #(CONST_BITS-PASS1_BITS)  /* wsptr[DCTSIZE*4] = (int)DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) */
-    rshrn2          v7.8h, v29.4s, #(CONST_BITS-PASS1_BITS)  /* wsptr[DCTSIZE*5] = (int)DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) */
-    rshrn2          v8.8h, v25.4s, #(CONST_BITS-PASS1_BITS)  /* wsptr[DCTSIZE*6] = (int)DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) */
-    rshrn2          v9.8h, v21.4s, #(CONST_BITS-PASS1_BITS)  /* wsptr[DCTSIZE*7] = (int)DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) */
+    rshrn           v6.4h, v19.4s, #(CONST_BITS - PASS1_BITS)  /* wsptr[DCTSIZE*0] = (int)DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) */
+    rshrn           v7.4h, v23.4s, #(CONST_BITS - PASS1_BITS)  /* wsptr[DCTSIZE*1] = (int)DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) */
+    rshrn           v8.4h, v27.4s, #(CONST_BITS - PASS1_BITS)  /* wsptr[DCTSIZE*2] = (int)DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) */
+    rshrn           v9.4h, v15.4s, #(CONST_BITS - PASS1_BITS)  /* wsptr[DCTSIZE*3] = (int)DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) */
+    rshrn2          v6.8h, v17.4s, #(CONST_BITS - PASS1_BITS)  /* wsptr[DCTSIZE*4] = (int)DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) */
+    rshrn2          v7.8h, v29.4s, #(CONST_BITS - PASS1_BITS)  /* wsptr[DCTSIZE*5] = (int)DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) */
+    rshrn2          v8.8h, v25.4s, #(CONST_BITS - PASS1_BITS)  /* wsptr[DCTSIZE*6] = (int)DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) */
+    rshrn2          v9.8h, v21.4s, #(CONST_BITS - PASS1_BITS)  /* wsptr[DCTSIZE*7] = (int)DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) */
    b               1b

 .balign 16
@@ -947,22 +947,22 @@ asm_function jsimd_idct_islow_neon
    sub             v16.4s, v6.4s, v10.4s   /* tmp13 - tmp0 */
    sub             v17.4s, v31.4s, v11.4s  /* tmp13 - tmp0 */

-    rshrn           v2.4h, v18.4s, #(CONST_BITS-PASS1_BITS)  /* wsptr[DCTSIZE*0] = (int)DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) */
-    rshrn           v3.4h, v22.4s, #(CONST_BITS-PASS1_BITS)  /* wsptr[DCTSIZE*1] = (int)DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) */
-    rshrn           v4.4h, v26.4s, #(CONST_BITS-PASS1_BITS)  /* wsptr[DCTSIZE*2] = (int)DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) */
-    rshrn           v5.4h, v14.4s, #(CONST_BITS-PASS1_BITS)  /* wsptr[DCTSIZE*3] = (int)DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) */
-    rshrn           v6.4h, v19.4s, #(CONST_BITS-PASS1_BITS)  /* wsptr[DCTSIZE*0] = (int)DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) */
-    rshrn           v7.4h, v23.4s, #(CONST_BITS-PASS1_BITS)  /* wsptr[DCTSIZE*1] = (int)DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) */
-    rshrn           v8.4h, v27.4s, #(CONST_BITS-PASS1_BITS)  /* wsptr[DCTSIZE*2] = (int)DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) */
-    rshrn           v9.4h, v15.4s, #(CONST_BITS-PASS1_BITS)  /* wsptr[DCTSIZE*3] = (int)DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) */
-    rshrn2          v2.8h, v16.4s, #(CONST_BITS-PASS1_BITS)  /* wsptr[DCTSIZE*4] = (int)DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) */
-    rshrn2          v3.8h, v28.4s, #(CONST_BITS-PASS1_BITS)  /* wsptr[DCTSIZE*5] = (int)DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) */
-    rshrn2          v4.8h, v24.4s, #(CONST_BITS-PASS1_BITS)  /* wsptr[DCTSIZE*6] = (int)DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) */
-    rshrn2          v5.8h, v20.4s, #(CONST_BITS-PASS1_BITS)  /* wsptr[DCTSIZE*7] = (int)DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) */
-    rshrn2          v6.8h, v17.4s, #(CONST_BITS-PASS1_BITS)  /* wsptr[DCTSIZE*4] = (int)DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) */
-    rshrn2          v7.8h, v29.4s, #(CONST_BITS-PASS1_BITS)  /* wsptr[DCTSIZE*5] = (int)DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) */
-    rshrn2          v8.8h, v25.4s, #(CONST_BITS-PASS1_BITS)  /* wsptr[DCTSIZE*6] = (int)DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) */
-    rshrn2          v9.8h, v21.4s, #(CONST_BITS-PASS1_BITS)  /* wsptr[DCTSIZE*7] = (int)DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) */
+    rshrn           v2.4h, v18.4s, #(CONST_BITS - PASS1_BITS)  /* wsptr[DCTSIZE*0] = (int)DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) */
+    rshrn           v3.4h, v22.4s, #(CONST_BITS - PASS1_BITS)  /* wsptr[DCTSIZE*1] = (int)DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) */
+    rshrn           v4.4h, v26.4s, #(CONST_BITS - PASS1_BITS)  /* wsptr[DCTSIZE*2] = (int)DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) */
+    rshrn           v5.4h, v14.4s, #(CONST_BITS - PASS1_BITS)  /* wsptr[DCTSIZE*3] = (int)DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) */
+    rshrn           v6.4h, v19.4s, #(CONST_BITS - PASS1_BITS)  /* wsptr[DCTSIZE*0] = (int)DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) */
+    rshrn           v7.4h, v23.4s, #(CONST_BITS - PASS1_BITS)  /* wsptr[DCTSIZE*1] = (int)DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) */
+    rshrn           v8.4h, v27.4s, #(CONST_BITS - PASS1_BITS)  /* wsptr[DCTSIZE*2] = (int)DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) */
+    rshrn           v9.4h, v15.4s, #(CONST_BITS - PASS1_BITS)  /* wsptr[DCTSIZE*3] = (int)DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) */
+    rshrn2          v2.8h, v16.4s, #(CONST_BITS - PASS1_BITS)  /* wsptr[DCTSIZE*4] = (int)DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) */
+    rshrn2          v3.8h, v28.4s, #(CONST_BITS - PASS1_BITS)  /* wsptr[DCTSIZE*5] = (int)DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) */
+    rshrn2          v4.8h, v24.4s, #(CONST_BITS - PASS1_BITS)  /* wsptr[DCTSIZE*6] = (int)DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) */
+    rshrn2          v5.8h, v20.4s, #(CONST_BITS - PASS1_BITS)  /* wsptr[DCTSIZE*7] = (int)DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) */
+    rshrn2          v6.8h, v17.4s, #(CONST_BITS - PASS1_BITS)  /* wsptr[DCTSIZE*4] = (int)DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) */
+    rshrn2          v7.8h, v29.4s, #(CONST_BITS - PASS1_BITS)  /* wsptr[DCTSIZE*5] = (int)DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) */
+    rshrn2          v8.8h, v25.4s, #(CONST_BITS - PASS1_BITS)  /* wsptr[DCTSIZE*6] = (int)DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) */
+    rshrn2          v9.8h, v21.4s, #(CONST_BITS - PASS1_BITS)  /* wsptr[DCTSIZE*7] = (int)DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) */
    b               1b

    .unreq          DCT_TABLE
@@ -1419,7 +1419,7 @@ asm_function jsimd_idct_4x4_neon
    st1             {v27.b}[7], [TMP4], 1
 #endif

-    /* vpop            {v8.4h - v15.4h}    ;not available */
+    /* vpop            {v8.4h - v15.4h}    (not available) */
    ld1             {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32
    ld1             {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32
    blr             x30
@@ -1487,7 +1487,7 @@ asm_function jsimd_idct_2x2_neon
       instruction ensures that those bits are set to zero. */
    uxtw x3, w3

-    /* vpush           {v8.4h - v15.4h}            ; not available */
+    /* vpush           {v8.4h - v15.4h}    (not available) */
    sub             sp, sp, 64
    mov             x9, sp

@@ -3457,136 +3457,136 @@ generate_jsimd_huff_encode_one_block 0
 */

 .macro LOAD16
-    ldr             T0d, [LUT, #(0*4)]
-    ldr             T1d, [LUT, #(8*4)]
+    ldr             T0d, [LUT, #(0 * 4)]
+    ldr             T1d, [LUT, #(8 * 4)]
    add             T0, BLOCK, T0, lsl #1
    add             T1, BLOCK, T1, lsl #1
    ld1             {Y0.h}[0], [T0]
    ld1             {Y1.h}[0], [T1]

-    ldr             T0d, [LUT, #(1*4)]
-    ldr             T1d, [LUT, #(9*4)]
+    ldr             T0d, [LUT, #(1 * 4)]
+    ldr             T1d, [LUT, #(9 * 4)]
    add             T0, BLOCK, T0, lsl #1
    add             T1, BLOCK, T1, lsl #1
    ld1             {Y0.h}[1], [T0]
    ld1             {Y1.h}[1], [T1]

-    ldr             T0d, [LUT, #(2*4)]
-    ldr             T1d, [LUT, #(10*4)]
+    ldr             T0d, [LUT, #(2 * 4)]
+    ldr             T1d, [LUT, #(10 * 4)]
    add             T0, BLOCK, T0, lsl #1
    add             T1, BLOCK, T1, lsl #1
    ld1             {Y0.h}[2], [T0]
    ld1             {Y1.h}[2], [T1]

-    ldr             T0d, [LUT, #(3*4)]
-    ldr             T1d, [LUT, #(11*4)]
+    ldr             T0d, [LUT, #(3 * 4)]
+    ldr             T1d, [LUT, #(11 * 4)]
    add             T0, BLOCK, T0, lsl #1
    add             T1, BLOCK, T1, lsl #1
    ld1             {Y0.h}[3], [T0]
    ld1             {Y1.h}[3], [T1]

-    ldr             T0d, [LUT, #(4*4)]
-    ldr             T1d, [LUT, #(12*4)]
+    ldr             T0d, [LUT, #(4 * 4)]
+    ldr             T1d, [LUT, #(12 * 4)]
    add             T0, BLOCK, T0, lsl #1
    add             T1, BLOCK, T1, lsl #1
    ld1             {Y0.h}[4], [T0]
    ld1             {Y1.h}[4], [T1]

-    ldr             T0d, [LUT, #(5*4)]
-    ldr             T1d, [LUT, #(13*4)]
+    ldr             T0d, [LUT, #(5 * 4)]
+    ldr             T1d, [LUT, #(13 * 4)]
    add             T0, BLOCK, T0, lsl #1
    add             T1, BLOCK, T1, lsl #1
    ld1             {Y0.h}[5], [T0]
    ld1             {Y1.h}[5], [T1]

-    ldr             T0d, [LUT, #(6*4)]
-    ldr             T1d, [LUT, #(14*4)]
+    ldr             T0d, [LUT, #(6 * 4)]
+    ldr             T1d, [LUT, #(14 * 4)]
    add             T0, BLOCK, T0, lsl #1
    add             T1, BLOCK, T1, lsl #1
    ld1             {Y0.h}[6], [T0]
    ld1             {Y1.h}[6], [T1]

-    ldr             T0d, [LUT, #(7*4)]
-    ldr             T1d, [LUT, #(15*4)]
+    ldr             T0d, [LUT, #(7 * 4)]
+    ldr             T1d, [LUT, #(15 * 4)]
    add             T0, BLOCK, T0, lsl #1
    add             T1, BLOCK, T1, lsl #1
    ld1             {Y0.h}[7], [T0]
    ld1             {Y1.h}[7], [T1]

-    add             LUT, LUT, #(16*4)
+    add             LUT, LUT, #(16 * 4)
 .endm

 .macro LOAD15
    eor             Y1.16b, Y1.16b, Y1.16b

-    ldr             T0d, [LUT, #(0*4)]
-    ldr             T1d, [LUT, #(8*4)]
+    ldr             T0d, [LUT, #(0 * 4)]
+    ldr             T1d, [LUT, #(8 * 4)]
    add             T0, BLOCK, T0, lsl #1
    add             T1, BLOCK, T1, lsl #1
    ld1             {Y0.h}[0], [T0]
    ld1             {Y1.h}[0], [T1]

-    ldr             T0d, [LUT, #(1*4)]
+    ldr             T0d, [LUT, #(1 * 4)]
    add             T0, BLOCK, T0, lsl #1
    ld1             {Y0.h}[1], [T0]

-    ldr             T0d, [LUT, #(2*4)]
+    ldr             T0d, [LUT, #(2 * 4)]
    add             T0, BLOCK, T0, lsl #1
    ld1             {Y0.h}[2], [T0]

-    ldr             T0d, [LUT, #(3*4)]
+    ldr             T0d, [LUT, #(3 * 4)]
    add             T0, BLOCK, T0, lsl #1
    ld1             {Y0.h}[3], [T0]

-    ldr             T0d, [LUT, #(4*4)]
+    ldr             T0d, [LUT, #(4 * 4)]
    add             T0, BLOCK, T0, lsl #1
    ld1             {Y0.h}[4], [T0]

-    ldr             T0d, [LUT, #(5*4)]
+    ldr             T0d, [LUT, #(5 * 4)]
    add             T0, BLOCK, T0, lsl #1
    ld1             {Y0.h}[5], [T0]

-    ldr             T0d, [LUT, #(6*4)]
+    ldr             T0d, [LUT, #(6 * 4)]
    add             T0, BLOCK, T0, lsl #1
    ld1             {Y0.h}[6], [T0]

-    ldr             T0d, [LUT, #(7*4)]
+    ldr             T0d, [LUT, #(7 * 4)]
    add             T0, BLOCK, T0, lsl #1
    ld1             {Y0.h}[7], [T0]

    cmp             LENEND, #2
    b.lt            1515f
-    ldr             T1d, [LUT, #(9*4)]
+    ldr             T1d, [LUT, #(9 * 4)]
    add             T1, BLOCK, T1, lsl #1
    ld1             {Y1.h}[1], [T1]

    cmp             LENEND, #3
    b.lt            1515f
-    ldr             T1d, [LUT, #(10*4)]
+    ldr             T1d, [LUT, #(10 * 4)]
    add             T1, BLOCK, T1, lsl #1
    ld1             {Y1.h}[2], [T1]

    cmp             LENEND, #4
    b.lt            1515f
-    ldr             T1d, [LUT, #(11*4)]
+    ldr             T1d, [LUT, #(11 * 4)]
    add             T1, BLOCK, T1, lsl #1
    ld1             {Y1.h}[3], [T1]

    cmp             LENEND, #5
    b.lt            1515f
-    ldr             T1d, [LUT, #(12*4)]
+    ldr             T1d, [LUT, #(12 * 4)]
    add             T1, BLOCK, T1, lsl #1
    ld1             {Y1.h}[4], [T1]

    cmp             LENEND, #6
    b.lt            1515f
-    ldr             T1d, [LUT, #(13*4)]
+    ldr             T1d, [LUT, #(13 * 4)]
    add             T1, BLOCK, T1, lsl #1
    ld1             {Y1.h}[5], [T1]

    cmp             LENEND, #7
    b.lt            1515f
-    ldr             T1d, [LUT, #(14*4)]
+    ldr             T1d, [LUT, #(14 * 4)]
    add             T1, BLOCK, T1, lsl #1
    ld1             {Y1.h}[6], [T1]

@@ -3594,35 +3594,35 @@ generate_jsimd_huff_encode_one_block 0
 .endm

 .macro LOAD8
-    ldr             T0d, [LUT, #(0*4)]
+    ldr             T0d, [LUT, #(0 * 4)]
    add             T0, BLOCK, T0, lsl #1
    ld1             {Y0.h}[0], [T0]

-    ldr             T0d, [LUT, #(1*4)]
+    ldr             T0d, [LUT, #(1 * 4)]
    add             T0, BLOCK, T0, lsl #1
    ld1             {Y0.h}[1], [T0]

-    ldr             T0d, [LUT, #(2*4)]
+    ldr             T0d, [LUT, #(2 * 4)]
    add             T0, BLOCK, T0, lsl #1
    ld1             {Y0.h}[2], [T0]

-    ldr             T0d, [LUT, #(3*4)]
+    ldr             T0d, [LUT, #(3 * 4)]
    add             T0, BLOCK, T0, lsl #1
    ld1             {Y0.h}[3], [T0]

-    ldr             T0d, [LUT, #(4*4)]
+    ldr             T0d, [LUT, #(4 * 4)]
    add             T0, BLOCK, T0, lsl #1
    ld1             {Y0.h}[4], [T0]

-    ldr             T0d, [LUT, #(5*4)]
+    ldr             T0d, [LUT, #(5 * 4)]
    add             T0, BLOCK, T0, lsl #1
    ld1             {Y0.h}[5], [T0]

-    ldr             T0d, [LUT, #(6*4)]
+    ldr             T0d, [LUT, #(6 * 4)]
    add             T0, BLOCK, T0, lsl #1
    ld1             {Y0.h}[6], [T0]

-    ldr             T0d, [LUT, #(7*4)]
+    ldr             T0d, [LUT, #(7 * 4)]
    add             T0, BLOCK, T0, lsl #1
    ld1             {Y0.h}[7], [T0]
 .endm
@@ -3630,43 +3630,43 @@ generate_jsimd_huff_encode_one_block 0
 .macro LOAD7
    eor             Y0.16b, Y0.16b, Y0.16b

-    ldr             T0d, [LUT, #(0*4)]
+    ldr             T0d, [LUT, #(0 * 4)]
    add             T0, BLOCK, T0, lsl #1
    ld1             {Y0.h}[0], [T0]

    cmp             LENEND, #2
    b.lt            77f
-    ldr             T1d, [LUT, #(1*4)]
+    ldr             T1d, [LUT, #(1 * 4)]
    add             T1, BLOCK, T1, lsl #1
    ld1             {Y0.h}[1], [T1]

    cmp             LENEND, #3
    b.lt            77f
-    ldr             T1d, [LUT, #(2*4)]
+    ldr             T1d, [LUT, #(2 * 4)]
    add             T1, BLOCK, T1, lsl #1
    ld1             {Y0.h}[2], [T1]

    cmp             LENEND, #4
    b.lt            77f
-    ldr             T1d, [LUT, #(3*4)]
+    ldr             T1d, [LUT, #(3 * 4)]
    add             T1, BLOCK, T1, lsl #1
    ld1             {Y0.h}[3], [T1]

    cmp             LENEND, #5
    b.lt            77f
-    ldr             T1d, [LUT, #(4*4)]
+    ldr             T1d, [LUT, #(4 * 4)]
    add             T1, BLOCK, T1, lsl #1
    ld1             {Y0.h}[4], [T1]

    cmp             LENEND, #6
    b.lt            77f
-    ldr             T1d, [LUT, #(5*4)]
+    ldr             T1d, [LUT, #(5 * 4)]
    add             T1, BLOCK, T1, lsl #1
    ld1             {Y0.h}[5], [T1]

    cmp             LENEND, #7
    b.lt            77f
-    ldr             T1d, [LUT, #(6*4)]
+    ldr             T1d, [LUT, #(6 * 4)]
    add             T1, BLOCK, T1, lsl #1
    ld1             {Y0.h}[6], [T1]

--- a/simd/mips/jsimd_dspr2.S
+++ b/simd/mips/jsimd_dspr2.S
--- a/simd/mips64/jsimd_mmi.h
+++ b/simd/mips64/jsimd_mmi.h
@@ -56,12 +56,14 @@
   ((uint64_t)(uint8_t)g << 8)  | \
   ((uint64_t)(uint8_t)h))
 #define _uint64_set1_pi8(a)  _uint64_set_pi8(a, a, a, a, a, a, a, a)
-#define _uint64_set_pi16(a, b, c, d)  (((uint64_t)(uint16_t)a << 48) | \
-                                       ((uint64_t)(uint16_t)b << 32) | \
-                                       ((uint64_t)(uint16_t)c << 16) | \
-                                       ((uint64_t)(uint16_t)d))
+#define _uint64_set_pi16(a, b, c, d) \
+  (((uint64_t)(uint16_t)a << 48) | \
+   ((uint64_t)(uint16_t)b << 32) | \
+   ((uint64_t)(uint16_t)c << 16) | \
+   ((uint64_t)(uint16_t)d))
 #define _uint64_set1_pi16(a)  _uint64_set_pi16(a, a, a, a)
-#define _uint64_set_pi32(a, b)  (((uint64_t)(uint32_t)a << 32) | \
-                                 ((uint64_t)(uint32_t)b))
+#define _uint64_set_pi32(a, b) \
+  (((uint64_t)(uint32_t)a << 32) | \
+   ((uint64_t)(uint32_t)b))

 #define get_const_value(index)  (*(__m64 *)&const_value[index])
--- a/simd/nasm/jsimdcfg.inc.h
+++ b/simd/nasm/jsimdcfg.inc.h
@@ -1,8 +1,10 @@
-// This file generates the include file for the assembly
-// implementations by abusing the C preprocessor.
-//
-// Note: Some things are manually defined as they need to
-// be mapped to NASM types.
+/*
+ * This file generates the include file for the assembly
+ * implementations by abusing the C preprocessor.
+ *
+ * Note: Some things are manually defined as they need to
+ * be mapped to NASM types.
+ */

 ;
 ; Automatically generated include file from jsimdcfg.inc.h
--- a/transupp.c
+++ b/transupp.c
@@ -816,8 +816,7 @@ do_flip_h(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
        ((j_common_ptr)srcinfo, dst_coef_arrays[ci], dst_blk_y,
         (JDIMENSION)compptr->v_samp_factor, TRUE);
      src_buffer = (*srcinfo->mem->access_virt_barray)
-        ((j_common_ptr)srcinfo, src_coef_arrays[ci],
-         dst_blk_y + y_crop_blocks,
+        ((j_common_ptr)srcinfo, src_coef_arrays[ci], dst_blk_y + y_crop_blocks,
         (JDIMENSION)compptr->v_samp_factor, FALSE);
      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
        dst_row_ptr = dst_buffer[offset_y];
@@ -830,8 +829,9 @@ do_flip_h(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
            src_ptr = src_row_ptr[comp_width - x_crop_blocks - dst_blk_x - 1];
            /* this unrolled loop doesn't need to know which row it's on... */
            for (k = 0; k < DCTSIZE2; k += 2) {
-              *dst_ptr++ = *src_ptr++;   /* copy even column */
-              *dst_ptr++ = -(*src_ptr++); /* copy odd column with sign change */
+              *dst_ptr++ = *src_ptr++;    /* copy even column */
+              *dst_ptr++ = -(*src_ptr++); /* copy odd column with sign
+                                             change */
            }
          } else {
            /* Copy last partial block(s) verbatim */
@@ -916,8 +916,7 @@ do_flip_v(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
        } else {
          /* Just copy row verbatim. */
          jcopy_block_row(src_buffer[offset_y] + x_crop_blocks,
-                          dst_buffer[offset_y],
-                          compptr->width_in_blocks);
+                          dst_buffer[offset_y], compptr->width_in_blocks);
        }
      }
    }
--- a/wrppm.c
+++ b/wrppm.c
@@ -5,7 +5,7 @@
 * Copyright (C) 1991-1996, Thomas G. Lane.
 * Modified 2009 by Guido Vollbeding.
 * libjpeg-turbo Modifications:
- * Copyright (C) 2017, 2019, D. R. Commander.
+ * Copyright (C) 2017, 2019-2020, D. R. Commander.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
 *
@@ -326,11 +326,12 @@ jinit_write_ppm(j_decompress_ptr cinfo)

  if (cinfo->quantize_colors || BITS_IN_JSAMPLE != 8 ||
      sizeof(JSAMPLE) != sizeof(char) ||
-      (cinfo->out_color_space != JCS_EXT_RGB
 #if RGB_RED == 0 && RGB_GREEN == 1 && RGB_BLUE == 2 && RGB_PIXELSIZE == 3
-       && cinfo->out_color_space != JCS_RGB
+      (cinfo->out_color_space != JCS_EXT_RGB &&
+       cinfo->out_color_space != JCS_RGB)) {
+#else
+      cinfo->out_color_space != JCS_EXT_RGB) {
 #endif
-      )) {
    /* When quantizing, we need an output buffer for colormap indexes
     * that's separate from the physical I/O buffer.  We also need a
     * separate buffer if pixel format translation must take place.