diff --git a/ChangeLog.txt b/ChangeLog.txt index 885a3268..5e05a384 100644 --- a/ChangeLog.txt +++ b/ChangeLog.txt @@ -22,6 +22,11 @@ See the comments in Makefile.am for information on how to re-enable the tests and to specify an expected result for them based on the particulars of your platform. +[4] The NULL color conversion routines have been significantly optimized, +which speeds up the compression of RGB and CMYK JPEGs by 5-20% when using +64-bit code and 0-3% when using 32-bit code, and the decompression of those +images by 10-30% when using 64-bit code and 3-12% when using 32-bit code. + 1.4.0 ===== diff --git a/jccolor.c b/jccolor.c index 4be75f71..34ea23b8 100644 --- a/jccolor.c +++ b/jccolor.c @@ -5,7 +5,7 @@ * Copyright (C) 1991-1996, Thomas G. Lane. * libjpeg-turbo Modifications: * Copyright 2009 Pierre Ossman for Cendio AB - * Copyright (C) 2009-2012, D. R. Commander. + * Copyright (C) 2009-2012, 2015 D. R. Commander. * Copyright (C) 2014, MIPS Technologies, Inc., California * For conditions of distribution and use, see the accompanying README file. * @@ -464,24 +464,54 @@ null_convert (j_compress_ptr cinfo, JDIMENSION output_row, int num_rows) { register JSAMPROW inptr; - register JSAMPROW outptr; + register JSAMPROW outptr, outptr0, outptr1, outptr2, outptr3; register JDIMENSION col; register int ci; int nc = cinfo->num_components; JDIMENSION num_cols = cinfo->image_width; - while (--num_rows >= 0) { - /* It seems fastest to make a separate pass for each component. */ - for (ci = 0; ci < nc; ci++) { - inptr = *input_buf; - outptr = output_buf[ci][output_row]; + if (nc == 3) { + while (--num_rows >= 0) { + inptr = *input_buf++; + outptr0 = output_buf[0][output_row]; + outptr1 = output_buf[1][output_row]; + outptr2 = output_buf[2][output_row]; + output_row++; for (col = 0; col < num_cols; col++) { - outptr[col] = inptr[ci]; /* don't need GETJSAMPLE() here */ - inptr += nc; + outptr0[col] = *inptr++; + outptr1[col] = *inptr++; + outptr2[col] = *inptr++; } } - input_buf++; - output_row++; + } else if (nc == 4) { + while (--num_rows >= 0) { + inptr = *input_buf++; + outptr0 = output_buf[0][output_row]; + outptr1 = output_buf[1][output_row]; + outptr2 = output_buf[2][output_row]; + outptr3 = output_buf[3][output_row]; + output_row++; + for (col = 0; col < num_cols; col++) { + outptr0[col] = *inptr++; + outptr1[col] = *inptr++; + outptr2[col] = *inptr++; + outptr3[col] = *inptr++; + } + } + } else { + while (--num_rows >= 0) { + /* It seems fastest to make a separate pass for each component. */ + for (ci = 0; ci < nc; ci++) { + inptr = *input_buf; + outptr = output_buf[ci][output_row]; + for (col = 0; col < num_cols; col++) { + outptr[col] = inptr[ci]; /* don't need GETJSAMPLE() here */ + inptr += nc; + } + } + input_buf++; + output_row++; + } } } diff --git a/jdcolor.c b/jdcolor.c index 779fa51f..38db90f2 100644 --- a/jdcolor.c +++ b/jdcolor.c @@ -6,7 +6,7 @@ * Modified 2011 by Guido Vollbeding. * libjpeg-turbo Modifications: * Copyright 2009 Pierre Ossman for Cendio AB - * Copyright (C) 2009, 2011-2012, 2014, D. R. Commander. + * Copyright (C) 2009, 2011-2012, 2014-2015, D. R. Commander. * Copyright (C) 2013, Linaro Limited. * For conditions of distribution and use, see the accompanying README file. * @@ -364,23 +364,53 @@ null_convert (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows) { - register JSAMPROW inptr, outptr; - register JDIMENSION count; + register JSAMPROW inptr, inptr0, inptr1, inptr2, inptr3, outptr; + register JDIMENSION col; register int num_components = cinfo->num_components; JDIMENSION num_cols = cinfo->output_width; int ci; - while (--num_rows >= 0) { - for (ci = 0; ci < num_components; ci++) { - inptr = input_buf[ci][input_row]; - outptr = output_buf[0] + ci; - for (count = num_cols; count > 0; count--) { - *outptr = *inptr++; /* needn't bother with GETJSAMPLE() here */ - outptr += num_components; + if (num_components == 3) { + while (--num_rows >= 0) { + inptr0 = input_buf[0][input_row]; + inptr1 = input_buf[1][input_row]; + inptr2 = input_buf[2][input_row]; + input_row++; + outptr = *output_buf++; + for (col = 0; col < num_cols; col++) { + *outptr++ = inptr0[col]; + *outptr++ = inptr1[col]; + *outptr++ = inptr2[col]; } } - input_row++; - output_buf++; + } else if (num_components == 4) { + while (--num_rows >= 0) { + inptr0 = input_buf[0][input_row]; + inptr1 = input_buf[1][input_row]; + inptr2 = input_buf[2][input_row]; + inptr3 = input_buf[3][input_row]; + input_row++; + outptr = *output_buf++; + for (col = 0; col < num_cols; col++) { + *outptr++ = inptr0[col]; + *outptr++ = inptr1[col]; + *outptr++ = inptr2[col]; + *outptr++ = inptr3[col]; + } + } + } else { + while (--num_rows >= 0) { + for (ci = 0; ci < num_components; ci++) { + inptr = input_buf[ci][input_row]; + outptr = *output_buf; + for (col = 0; col < num_cols; col++) { + outptr[ci] = inptr[col]; + outptr += num_components; + } + } + output_buf++; + input_row++; + } } }