From 36b2fecd0cbc5ce66c0c1e990f6ad9ac4cea1bed Mon Sep 17 00:00:00 2001 From: Frank Bossen Date: Tue, 30 Sep 2014 10:57:09 -0400 Subject: [PATCH] Merge branch 'floatovershoot' of https://github.com/pornel/mozjpeg into pornel-floatovershoot Conflicts: jcdctmgr.c (resolved) --- cjpeg.c | 5 ++- jcdctmgr.c | 108 +++++++++++++++++++++++++++++++++++++++++++++++------ jcparam.c | 2 + jpeglib.h | 5 ++- 4 files changed, 106 insertions(+), 14 deletions(-) diff --git a/cjpeg.c b/cjpeg.c index 35224b86..49203efa 100644 --- a/cjpeg.c +++ b/cjpeg.c @@ -188,6 +188,7 @@ usage (void) fprintf(stderr, " -tune-ssim Tune trellis optimization for SSIM\n"); fprintf(stderr, " -tune-ms-ssim Tune trellis optimization for MS-SSIM\n"); fprintf(stderr, "Switches for advanced users:\n"); + fprintf(stderr, " -noovershoot Disable black-on-white deringing via overshoot\n"); #ifdef C_ARITH_CODING_SUPPORTED fprintf(stderr, " -arithmetic Use arithmetic coding\n"); #endif @@ -534,7 +535,9 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv, cinfo->lambda_log_scale2 = 15.5; cinfo->use_lambda_weight_tbl = TRUE; jpeg_set_quality(cinfo, 75, TRUE); - + + } else if (keymatch(arg, "noovershoot", 11)) { + cinfo->overshoot_deringing = FALSE; } else { fprintf(stderr, "%s: unknown option '%s'\n", progname, arg); usage(); /* bogus switch */ diff --git a/jcdctmgr.c b/jcdctmgr.c index 543ee079..2b63cc37 100644 --- a/jcdctmgr.c +++ b/jcdctmgr.c @@ -31,6 +31,7 @@ typedef void (*forward_DCT_method_ptr) (DCTELEM * data); typedef void (*float_DCT_method_ptr) (FAST_FLOAT * data); typedef void (*preprocess_method_ptr)(DCTELEM*, const JQUANT_TBL*); +typedef void (*float_preprocess_method_ptr)(FAST_FLOAT*, const JQUANT_TBL*); typedef void (*convsamp_method_ptr) (JSAMPARRAY sample_data, JDIMENSION start_col, @@ -69,6 +70,7 @@ typedef struct { /* Same as above for the floating-point case. */ float_DCT_method_ptr float_dct; float_convsamp_method_ptr float_convsamp; + float_preprocess_method_ptr float_preprocess; float_quantize_method_ptr float_quantize; FAST_FLOAT * float_divisors[NUM_QUANT_TBLS]; FAST_FLOAT * float_workspace; @@ -352,7 +354,7 @@ start_pass_fdctmgr (j_compress_ptr cinfo) } } -METHODDEF(DCTELEM) +METHODDEF(float) catmull_rom(const DCTELEM value1, const DCTELEM value2, const DCTELEM value3, const DCTELEM value4, const float t, int size) { const int tan1 = (value3 - value1) * size; @@ -366,8 +368,8 @@ catmull_rom(const DCTELEM value1, const DCTELEM value2, const DCTELEM value3, co const float f3 = t3 - 2.f * t2 + t; const float f4 = t3 - t2; - return ceilf(value2 * f1 + tan1 * f3 + - value3 * f2 + tan2 * f4); + return value2 * f1 + tan1 * f3 + + value3 * f2 + tan2 * f4; } /** Prevents visible ringing artifacts near hard edges on white backgrounds. @@ -406,7 +408,7 @@ preprocess_deringing(DCTELEM *data, const JQUANT_TBL *quantization_table) } /* Too much overshoot is not good: increased amplitude will cost bits, and the cost is proportional to quantization (here using DC quant as a rough guide). */ - const int maxovershoot = maxsample + MIN(MIN(31, 2*quantization_table->quantval[0]), (maxsample * size - sum) / maxsample_count); + const DCTELEM maxovershoot = maxsample + MIN(MIN(31, 2*quantization_table->quantval[0]), (maxsample * size - sum) / maxsample_count); int n = 0; do { @@ -450,7 +452,72 @@ preprocess_deringing(DCTELEM *data, const JQUANT_TBL *quantization_table) float position = step; for(i = start; i < end; i++, position += step) { - DCTELEM tmp = catmull_rom(maxsample - fslope, maxsample, maxsample, maxsample - lslope, position, size); + DCTELEM tmp = ceilf(catmull_rom(maxsample - fslope, maxsample, maxsample, maxsample - lslope, position, size)); + data[jpeg_natural_order[i]] = MIN(tmp, maxovershoot); + } + n++; + } + while(n < size); +} + +/* + Float version of preprocess_deringing() + */ +METHODDEF(void) +float_preprocess_deringing(FAST_FLOAT *data, const JQUANT_TBL *quantization_table) +{ + const FAST_FLOAT maxsample = 255 - CENTERJSAMPLE; + const int size = DCTSIZE * DCTSIZE; + + FAST_FLOAT sum = 0; + int maxsample_count = 0; + int i; + for(i=0; i < size; i++) { + sum += data[i]; + if (data[i] >= maxsample) { + maxsample_count++; + } + } + + if (!maxsample_count || maxsample_count == size) { + return; + } + + const FAST_FLOAT maxovershoot = maxsample + MIN(MIN(31, 2*quantization_table->quantval[0]), (maxsample * size - sum) / maxsample_count); + + int n = 0; + do { + if (data[jpeg_natural_order[n]] < maxsample) { + n++; + continue; + } + + int start = n; + while(++n < size && data[jpeg_natural_order[n]] >= maxsample) {} + int end = n; + + const FAST_FLOAT f1 = data[jpeg_natural_order[start >= 1 ? start-1 : 0]]; + const FAST_FLOAT f2 = data[jpeg_natural_order[start >= 2 ? start-2 : 0]]; + + const FAST_FLOAT l1 = data[jpeg_natural_order[end < size-1 ? end : size-1]]; + const FAST_FLOAT l2 = data[jpeg_natural_order[end < size-2 ? end+1 : size-1]]; + + FAST_FLOAT fslope = MAX(f1-f2, maxsample-f1); + FAST_FLOAT lslope = MAX(l1-l2, maxsample-l1); + + if (start == 0) { + fslope = lslope; + } + if (end == size) { + lslope = fslope; + } + + const int size = end - start; + const float step = 1.f/(float)(size + 1); + float position = step; + + for(i = start; i < end; i++, position += step) { + FAST_FLOAT tmp = catmull_rom(maxsample - fslope, maxsample, maxsample, maxsample - lslope, position, size); data[jpeg_natural_order[i]] = MIN(tmp, maxovershoot); } n++; @@ -557,7 +624,7 @@ quantize (JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace) temp = -temp; temp += qval>>1; /* for rounding */ DIVIDE_BY(temp, qval); - temp = -temp; + temp = -temp; } else { temp += qval>>1; /* for rounding */ DIVIDE_BY(temp, qval); @@ -605,7 +672,9 @@ forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr, /* Load data into workspace, applying unsigned->signed conversion */ (*do_convsamp) (sample_data, start_col, workspace); - (*do_preprocess) (workspace, qtbl); + if (do_preprocess) { + (*do_preprocess) (workspace, qtbl); + } /* Perform the DCT */ (*do_dct) (workspace); @@ -712,6 +781,7 @@ forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr, /* This routine is heavily used, so it's worth coding it tightly. */ my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct; FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no]; + JQUANT_TBL *qtbl = cinfo->quant_tbl_ptrs[compptr->quant_tbl_no]; FAST_FLOAT * workspace; JDIMENSION bi; float v; @@ -721,6 +791,7 @@ forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr, /* Make sure the compiler doesn't look up these every pass */ float_DCT_method_ptr do_dct = fdct->float_dct; float_convsamp_method_ptr do_convsamp = fdct->float_convsamp; + float_preprocess_method_ptr do_preprocess = fdct->float_preprocess; float_quantize_method_ptr do_quantize = fdct->float_quantize; workspace = fdct->float_workspace; @@ -730,13 +801,17 @@ forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr, /* Load data into workspace, applying unsigned->signed conversion */ (*do_convsamp) (sample_data, start_col, workspace); + if (do_preprocess) { + (*do_preprocess) (workspace, qtbl); + } + /* Perform the DCT */ (*do_dct) (workspace); /* Save unquantized transform coefficients for later trellis quantization */ /* Currently save as integer values. Could save float values but would require */ /* modifications to memory allocation and trellis quantization */ - + if (dst) { int i; static const double aanscalefactor[DCTSIZE] = { @@ -926,14 +1001,14 @@ quantize_trellis(j_compress_ptr cinfo, c_derived_tbl *dctbl, c_derived_tbl *actb dc_cost_backtrack[k][bi] = l; } } - } + } } } /* Do AC coefficients */ for (i = Ss; i <= Se; i++) { int z = jpeg_natural_order[i]; - + int sign = src[bi][z] >> 31; int x = abs(src[bi][z]); int q = 8 * qtbl->quantval[z]; @@ -1202,7 +1277,11 @@ jinit_forward_dct (j_compress_ptr cinfo) else fdct->convsamp = convsamp; - fdct->preprocess = preprocess_deringing; + if (cinfo->overshoot_deringing) { + fdct->preprocess = preprocess_deringing; + } else { + fdct->preprocess = NULL; + } if (jsimd_can_quantize()) fdct->quantize = jsimd_quantize; @@ -1216,6 +1295,13 @@ jinit_forward_dct (j_compress_ptr cinfo) fdct->float_convsamp = jsimd_convsamp_float; else fdct->float_convsamp = convsamp_float; + + if (cinfo->overshoot_deringing) { + fdct->float_preprocess = float_preprocess_deringing; + } else { + fdct->float_preprocess = NULL; + } + if (jsimd_can_quantize_float()) fdct->float_quantize = jsimd_quantize_float; else diff --git a/jcparam.c b/jcparam.c index d3b8ea15..a299e86e 100644 --- a/jcparam.c +++ b/jcparam.c @@ -293,6 +293,8 @@ jpeg_set_defaults (j_compress_ptr cinfo) cinfo->do_fancy_downsampling = TRUE; #endif + cinfo->overshoot_deringing = cinfo->use_moz_defaults; + /* No input smoothing */ cinfo->smoothing_factor = 0; diff --git a/jpeglib.h b/jpeglib.h index e5d78ebd..7d1b90bc 100644 --- a/jpeglib.h +++ b/jpeglib.h @@ -386,13 +386,14 @@ struct jpeg_compress_struct { boolean use_scans_in_trellis; /* TRUE=use scans in trellis optimization */ boolean trellis_passes; /* TRUE=currently doing trellis-related passes */ boolean trellis_q_opt; /* TRUE=optimize quant table in trellis loop */ - + boolean overshoot_deringing; /* TRUE=preprocess input to reduce ringing of edges on white background */ + double norm_src[NUM_QUANT_TBLS][DCTSIZE2]; double norm_coef[NUM_QUANT_TBLS][DCTSIZE2]; int trellis_freq_split; /* splitting point for frequency in trellis quantization */ int trellis_num_loops; /* number of trellis loops */ - + int num_scans_luma; /* # of entries in scan_info array pertaining to luma (used when optimize_scans is TRUE */ int num_scans_luma_dc; int num_scans_chroma_dc;