diff --git a/ChangeLog.txt b/ChangeLog.txt index a1ad2e4d..9b1c317c 100644 --- a/ChangeLog.txt +++ b/ChangeLog.txt @@ -3,13 +3,21 @@ [1] Added further protections against invalid Huffman codes. -[2] Since the fast integer forward DCT seems to degrade for JPEG qualities -greater than 95, TurboJPEG/OSS will now automatically use the slow integer -forward DCT when generating JPEG images of quality 96 or greater. This -reduces compression performance by as much as 15% for these high-quality images -but is necessary to ensure that the images are perceptually lossless. +[2] The algorithm used by the SIMD quantization function cannot produce correct +results when the JPEG quality is >= 98 and the fast integer forward DCT is +used. Thus, the non-SIMD quantization function is now used for those cases, +and libjpeg-turbo should now produce identical output to libjpeg v6b in all +cases. -[3] Fixed visual artifacts in grayscale JPEG compression caused by a typo in +[3] Despite the above, the fast integer forward DCT still degrades somewhat for +JPEG qualities greater than 95, so TurboJPEG/OSS will now automatically use the +slow integer forward DCT when generating JPEG images of quality 96 or greater. +This reduces compression performance by as much as 15% for these high-quality +images but is necessary to ensure that the images are perceptually lossless. +It also ensures that the library can avoid the performance pitfall created by +[2]. + +[4] Fixed visual artifacts in grayscale JPEG compression caused by a typo in the RGB-to-chrominance lookup tables. diff --git a/Makefile.am b/Makefile.am index f8552ea1..a22ecda0 100644 --- a/Makefile.am +++ b/Makefile.am @@ -102,6 +102,8 @@ test: testclean all ./jpegut ./cjpeg -dct int -outfile testoutint.jpg $(srcdir)/testorig.ppm ./cjpeg -dct fast -opt -outfile testoutfst.jpg $(srcdir)/testorig.ppm + ./cjpeg -dct fast -quality 100 -opt -outfile testoutfst100.jpg $(srcdir)/testorig.ppm + cmp $(srcdir)/testimgfst100.jpg testoutfst100.jpg ./cjpeg -dct float -outfile testoutflt.jpg $(srcdir)/testorig.ppm cmp $(srcdir)/testimgint.jpg testoutint.jpg cmp $(srcdir)/testimgfst.jpg testoutfst.jpg diff --git a/README-turbo.txt b/README-turbo.txt index 40350d04..8fb002be 100755 --- a/README-turbo.txt +++ b/README-turbo.txt @@ -42,6 +42,34 @@ replacing the optimized jchuff.c and jdhuff.c with their unoptimized counterparts from the libjpeg v6b source. +******************************************************************************* +** Performance pitfalls +******************************************************************************* + +=============== +Restart Markers +=============== + +The optimized Huffman decoder in libjpeg-turbo does not handle restart markers +in a way that makes libjpeg happy, so it is necessary to use the slow Huffman +decoder when decompressing a JPEG image that has restart markers. This can +cause the decompression performance to drop by as much as 20%, but the +performance will still be much much greater than that of libjpeg v6b. Many +consumer packages, such as PhotoShop, use restart markers when generating JPEG +images, so images generated by those programs will experience this issue. + +=============================================== +Fast Integer Forward DCT at High Quality Levels +=============================================== + +The algorithm used by the SIMD-accelerated quantization function cannot produce +correct results whenever the fast integer forward DCT is used along with a JPEG +quality of 98-100. Thus, libjpeg-turbo must use the non-SIMD quantization +function in those cases. This causes performance to drop by as much as 40%. +It is therefore strongly advised that you use the slow integer forward DCT +whenever encoding images with a JPEG quality of 98 or higher. + + ******************************************************************************* ** Using libjpeg-turbo ******************************************************************************* diff --git a/jcdctmgr.c b/jcdctmgr.c index 156957ab..711f9dab 100644 --- a/jcdctmgr.c +++ b/jcdctmgr.c @@ -4,6 +4,7 @@ * Copyright (C) 1994-1996, Thomas G. Lane. * Copyright (C) 1999-2006, MIYASAKA Masaru. * Copyright 2009 Pierre Ossman for Cendio AB + * Copyright (C) 2011 D. R. Commander * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * @@ -39,6 +40,8 @@ typedef JMETHOD(void, float_quantize_method_ptr, (JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace)); +METHODDEF(void) quantize (JCOEFPTR, DCTELEM *, DCTELEM *); + typedef struct { struct jpeg_forward_dct pub; /* public fields */ @@ -160,7 +163,7 @@ flss (UINT16 val) * of in a consecutive manner, yet again in order to allow SIMD * routines. */ -LOCAL(void) +LOCAL(int) compute_reciprocal (UINT16 divisor, DCTELEM * dtbl) { UDCTELEM2 fq, fr; @@ -189,6 +192,9 @@ compute_reciprocal (UINT16 divisor, DCTELEM * dtbl) dtbl[DCTSIZE2 * 1] = (DCTELEM) c; /* correction + roundfactor */ dtbl[DCTSIZE2 * 2] = (DCTELEM) (1 << (sizeof(DCTELEM)*8*2 - r)); /* scale */ dtbl[DCTSIZE2 * 3] = (DCTELEM) r - sizeof(DCTELEM)*8; /* shift */ + + if(r <= 16) return 0; + else return 1; } /* @@ -232,7 +238,9 @@ start_pass_fdctmgr (j_compress_ptr cinfo) } dtbl = fdct->divisors[qtblno]; for (i = 0; i < DCTSIZE2; i++) { - compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]); + if(!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]) + && fdct->quantize == jsimd_quantize) + fdct->quantize = quantize; } break; #endif @@ -266,10 +274,12 @@ start_pass_fdctmgr (j_compress_ptr cinfo) } dtbl = fdct->divisors[qtblno]; for (i = 0; i < DCTSIZE2; i++) { - compute_reciprocal( + if(!compute_reciprocal( DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i], (INT32) aanscales[i]), - CONST_BITS-3), &dtbl[i]); + CONST_BITS-3), &dtbl[i]) + && fdct->quantize == jsimd_quantize) + fdct->quantize = quantize; } } break; diff --git a/testimgfst100.jpg b/testimgfst100.jpg new file mode 100644 index 00000000..36d9b75e Binary files /dev/null and b/testimgfst100.jpg differ diff --git a/win/Makefile b/win/Makefile index 9101c790..a63faf58 100755 --- a/win/Makefile +++ b/win/Makefile @@ -248,9 +248,11 @@ test: testclean $(ODIR)/cjpeg.exe $(ODIR)/djpeg.exe $(ODIR)/jpegtran.exe \ cd $(ODIR); ./jpegut $(ODIR)/cjpeg -dct int -outfile $(ODIR)/testoutint.jpg testorig.ppm $(ODIR)/cjpeg -dct fast -opt -outfile $(ODIR)/testoutfst.jpg testorig.ppm + $(ODIR)/cjpeg -dct fast -quality 100 -opt -outfile $(ODIR)/testoutfst100.jpg testorig.ppm $(ODIR)/cjpeg -dct float -outfile $(ODIR)/testoutflt.jpg testorig.ppm cmp testimgint.jpg $(ODIR)/testoutint.jpg cmp testimgfst.jpg $(ODIR)/testoutfst.jpg + cmp testimgfst100.jpg $(ODIR)/testoutfst100.jpg cmp testimgflt.jpg $(ODIR)/testoutflt.jpg $(ODIR)/djpeg -dct int -fast -ppm -outfile $(ODIR)/testoutint.ppm testorig.jpg $(ODIR)/djpeg -dct fast -ppm -outfile $(ODIR)/testoutfst.ppm testorig.jpg