diff --git a/ChangeLog.md b/ChangeLog.md index dfd11b44..7ab1bdc3 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -48,6 +48,13 @@ conformance issue. rotating or transposing JPEG images that use 4:2:2 (h2v1) chroma subsampling. The h1v2 fancy upsampling algorithm is not currently SIMD-accelerated. +5. If merged upsampling isn't SIMD-accelerated but YCbCr-to-RGB conversion is, +then libjpeg-turbo will now disable merged upsampling when decompressing YCbCr +JPEG images into RGB or extended RGB output images. This significantly speeds +up the decompression of 4:2:0 and 4:2:2 JPEGs on ARM platforms if fancy +upsampling is not used (for example, if the `-nosmooth` option to djpeg is +specified.) + 1.5.0 ===== diff --git a/jdmaster.c b/jdmaster.c index 7908849b..9079dda6 100644 --- a/jdmaster.c +++ b/jdmaster.c @@ -22,6 +22,7 @@ #include "jpeglib.h" #include "jpegcomp.h" #include "jdmaster.h" +#include "jsimd.h" /* @@ -69,6 +70,17 @@ use_merged_upsample (j_decompress_ptr cinfo) cinfo->comp_info[1]._DCT_scaled_size != cinfo->_min_DCT_scaled_size || cinfo->comp_info[2]._DCT_scaled_size != cinfo->_min_DCT_scaled_size) return FALSE; +#ifdef WITH_SIMD + /* If YCbCr-to-RGB color conversion is SIMD-accelerated but merged upsampling + isn't, then disabling merged upsampling is likely to be faster when + decompressing YCbCr JPEG images. */ + if (!jsimd_can_h2v2_merged_upsample() && !jsimd_can_h2v1_merged_upsample() && + jsimd_can_ycc_rgb() && cinfo->jpeg_color_space == JCS_YCbCr && + (cinfo->out_color_space == JCS_RGB || + (cinfo->out_color_space >= JCS_EXT_RGB && + cinfo->out_color_space <= JCS_EXT_ARGB))) + return FALSE; +#endif /* ??? also need to test for upsample-time rescaling, when & if supported */ return TRUE; /* by golly, it'll work... */ #else