Merge branch 'floatovershoot' of https://github.com/pornel/mozjpeg into pornel-floatovershoot

Conflicts: jcdctmgr.c (resolved)
2014-09-30 10:57:09 -04:00
parent 9de0e03f2f
commit 36b2fecd0c
4 changed files with 106 additions and 14 deletions
--- a/cjpeg.c
+++ b/cjpeg.c
@@ -188,6 +188,7 @@ usage (void)
  fprintf(stderr, "  -tune-ssim     Tune trellis optimization for SSIM\n");
  fprintf(stderr, "  -tune-ms-ssim  Tune trellis optimization for MS-SSIM\n");
  fprintf(stderr, "Switches for advanced users:\n");
+  fprintf(stderr, "  -noovershoot   Disable black-on-white deringing via overshoot\n");
 #ifdef C_ARITH_CODING_SUPPORTED
  fprintf(stderr, "  -arithmetic    Use arithmetic coding\n");
 #endif
@@ -534,7 +535,9 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv,
      cinfo->lambda_log_scale2 = 15.5;
      cinfo->use_lambda_weight_tbl = TRUE;
      jpeg_set_quality(cinfo, 75, TRUE);
-      
+
+    } else if (keymatch(arg, "noovershoot", 11)) {
+      cinfo->overshoot_deringing = FALSE;
    } else {
      fprintf(stderr, "%s: unknown option '%s'\n", progname, arg);
      usage();                  /* bogus switch */
--- a/jcdctmgr.c
+++ b/jcdctmgr.c
@@ -31,6 +31,7 @@ typedef void (*forward_DCT_method_ptr) (DCTELEM * data);
 typedef void (*float_DCT_method_ptr) (FAST_FLOAT * data);

 typedef void (*preprocess_method_ptr)(DCTELEM*, const JQUANT_TBL*);
+typedef void (*float_preprocess_method_ptr)(FAST_FLOAT*, const JQUANT_TBL*);

 typedef void (*convsamp_method_ptr) (JSAMPARRAY sample_data,
                                     JDIMENSION start_col,
@@ -69,6 +70,7 @@ typedef struct {
  /* Same as above for the floating-point case. */
  float_DCT_method_ptr float_dct;
  float_convsamp_method_ptr float_convsamp;
+  float_preprocess_method_ptr float_preprocess;
  float_quantize_method_ptr float_quantize;
  FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
  FAST_FLOAT * float_workspace;
@@ -352,7 +354,7 @@ start_pass_fdctmgr (j_compress_ptr cinfo)
  }
 }

-METHODDEF(DCTELEM)
+METHODDEF(float)
 catmull_rom(const DCTELEM value1, const DCTELEM value2, const DCTELEM value3, const DCTELEM value4, const float t, int size)
 {
  const int tan1 = (value3 - value1) * size;
@@ -366,8 +368,8 @@ catmull_rom(const DCTELEM value1, const DCTELEM value2, const DCTELEM value3, co
  const float f3 = t3 - 2.f * t2 + t;
  const float f4 = t3 - t2;

-  return ceilf(value2 * f1 + tan1 * f3 +
-               value3 * f2 + tan2 * f4);
+  return value2 * f1 + tan1 * f3 +
+         value3 * f2 + tan2 * f4;
 }

 /** Prevents visible ringing artifacts near hard edges on white backgrounds.
@@ -406,7 +408,7 @@ preprocess_deringing(DCTELEM *data, const JQUANT_TBL *quantization_table)
  }

  /* Too much overshoot is not good: increased amplitude will cost bits, and the cost is proportional to quantization (here using DC quant as a rough guide). */
-  const int maxovershoot = maxsample + MIN(MIN(31, 2*quantization_table->quantval[0]), (maxsample * size - sum) / maxsample_count);
+  const DCTELEM maxovershoot = maxsample + MIN(MIN(31, 2*quantization_table->quantval[0]), (maxsample * size - sum) / maxsample_count);

  int n = 0;
  do {
@@ -450,7 +452,72 @@ preprocess_deringing(DCTELEM *data, const JQUANT_TBL *quantization_table)
    float position = step;

    for(i = start; i < end; i++, position += step) {
-      DCTELEM tmp = catmull_rom(maxsample - fslope, maxsample, maxsample, maxsample - lslope, position, size);
+      DCTELEM tmp = ceilf(catmull_rom(maxsample - fslope, maxsample, maxsample, maxsample - lslope, position, size));
+      data[jpeg_natural_order[i]] = MIN(tmp, maxovershoot);
+    }
+    n++;
+  }
+  while(n < size);
+}
+
+/*
+  Float version of preprocess_deringing()
+ */
+METHODDEF(void)
+float_preprocess_deringing(FAST_FLOAT *data, const JQUANT_TBL *quantization_table)
+{
+  const FAST_FLOAT maxsample = 255 - CENTERJSAMPLE;
+  const int size = DCTSIZE * DCTSIZE;
+
+  FAST_FLOAT sum = 0;
+  int maxsample_count = 0;
+  int i;
+  for(i=0; i < size; i++) {
+    sum += data[i];
+    if (data[i] >= maxsample) {
+      maxsample_count++;
+    }
+  }
+
+  if (!maxsample_count || maxsample_count == size) {
+    return;
+  }
+
+  const FAST_FLOAT maxovershoot = maxsample + MIN(MIN(31, 2*quantization_table->quantval[0]), (maxsample * size - sum) / maxsample_count);
+
+  int n = 0;
+  do {
+    if (data[jpeg_natural_order[n]] < maxsample) {
+      n++;
+      continue;
+    }
+
+    int start = n;
+    while(++n < size && data[jpeg_natural_order[n]] >= maxsample) {}
+    int end = n;
+
+    const FAST_FLOAT f1 = data[jpeg_natural_order[start >= 1 ? start-1 : 0]];
+    const FAST_FLOAT f2 = data[jpeg_natural_order[start >= 2 ? start-2 : 0]];
+
+    const FAST_FLOAT l1 = data[jpeg_natural_order[end < size-1 ? end : size-1]];
+    const FAST_FLOAT l2 = data[jpeg_natural_order[end < size-2 ? end+1 : size-1]];
+
+    FAST_FLOAT fslope = MAX(f1-f2, maxsample-f1);
+    FAST_FLOAT lslope = MAX(l1-l2, maxsample-l1);
+
+    if (start == 0) {
+      fslope = lslope;
+    }
+    if (end == size) {
+      lslope = fslope;
+    }
+
+    const int size = end - start;
+    const float step = 1.f/(float)(size + 1);
+    float position = step;
+
+    for(i = start; i < end; i++, position += step) {
+      FAST_FLOAT tmp = catmull_rom(maxsample - fslope, maxsample, maxsample, maxsample - lslope, position, size);
      data[jpeg_natural_order[i]] = MIN(tmp, maxovershoot);
    }
    n++;
@@ -557,7 +624,7 @@ quantize (JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace)
      temp = -temp;
      temp += qval>>1;  /* for rounding */
      DIVIDE_BY(temp, qval);
-      temp = -temp;    
+      temp = -temp;
    } else {
      temp += qval>>1;  /* for rounding */
      DIVIDE_BY(temp, qval);
@@ -605,7 +672,9 @@ forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
    /* Load data into workspace, applying unsigned->signed conversion */
    (*do_convsamp) (sample_data, start_col, workspace);

-    (*do_preprocess) (workspace, qtbl);
+    if (do_preprocess) {
+      (*do_preprocess) (workspace, qtbl);
+    }

    /* Perform the DCT */
    (*do_dct) (workspace);
@@ -712,6 +781,7 @@ forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
  /* This routine is heavily used, so it's worth coding it tightly. */
  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
  FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
+  JQUANT_TBL *qtbl = cinfo->quant_tbl_ptrs[compptr->quant_tbl_no];
  FAST_FLOAT * workspace;
  JDIMENSION bi;
  float v;
@@ -721,6 +791,7 @@ forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
  /* Make sure the compiler doesn't look up these every pass */
  float_DCT_method_ptr do_dct = fdct->float_dct;
  float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
+  float_preprocess_method_ptr do_preprocess = fdct->float_preprocess;
  float_quantize_method_ptr do_quantize = fdct->float_quantize;
  workspace = fdct->float_workspace;

@@ -730,13 +801,17 @@ forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
    /* Load data into workspace, applying unsigned->signed conversion */
    (*do_convsamp) (sample_data, start_col, workspace);

+    if (do_preprocess) {
+      (*do_preprocess) (workspace, qtbl);
+    }
+
    /* Perform the DCT */
    (*do_dct) (workspace);

    /* Save unquantized transform coefficients for later trellis quantization */
    /* Currently save as integer values. Could save float values but would require */
    /* modifications to memory allocation and trellis quantization */
-    
+
    if (dst) {
      int i;
      static const double aanscalefactor[DCTSIZE] = {
@@ -926,14 +1001,14 @@ quantize_trellis(j_compress_ptr cinfo, c_derived_tbl *dctbl, c_derived_tbl *actb
              dc_cost_backtrack[k][bi] = l;
            }
          }
-        }        
+        }
      }
    }

    /* Do AC coefficients */
    for (i = Ss; i <= Se; i++) {
      int z = jpeg_natural_order[i];
-      
+
      int sign = src[bi][z] >> 31;
      int x = abs(src[bi][z]);
      int q = 8 * qtbl->quantval[z];
@@ -1202,7 +1277,11 @@ jinit_forward_dct (j_compress_ptr cinfo)
    else
      fdct->convsamp = convsamp;

-    fdct->preprocess = preprocess_deringing;
+    if (cinfo->overshoot_deringing) {
+      fdct->preprocess = preprocess_deringing;
+    } else {
+      fdct->preprocess = NULL;
+    }

    if (jsimd_can_quantize())
      fdct->quantize = jsimd_quantize;
@@ -1216,6 +1295,13 @@ jinit_forward_dct (j_compress_ptr cinfo)
      fdct->float_convsamp = jsimd_convsamp_float;
    else
      fdct->float_convsamp = convsamp_float;
+
+    if (cinfo->overshoot_deringing) {
+      fdct->float_preprocess = float_preprocess_deringing;
+    } else {
+      fdct->float_preprocess = NULL;
+    }
+
    if (jsimd_can_quantize_float())
      fdct->float_quantize = jsimd_quantize_float;
    else
--- a/jcparam.c
+++ b/jcparam.c
@@ -293,6 +293,8 @@ jpeg_set_defaults (j_compress_ptr cinfo)
  cinfo->do_fancy_downsampling = TRUE;
 #endif

+  cinfo->overshoot_deringing = cinfo->use_moz_defaults;
+
  /* No input smoothing */
  cinfo->smoothing_factor = 0;

--- a/jpeglib.h
+++ b/jpeglib.h
@@ -386,13 +386,14 @@ struct jpeg_compress_struct {
  boolean use_scans_in_trellis; /* TRUE=use scans in trellis optimization */
  boolean trellis_passes; /* TRUE=currently doing trellis-related passes */
  boolean trellis_q_opt; /* TRUE=optimize quant table in trellis loop */
-  
+  boolean overshoot_deringing; /* TRUE=preprocess input to reduce ringing of edges on white background */
+
  double norm_src[NUM_QUANT_TBLS][DCTSIZE2];
  double norm_coef[NUM_QUANT_TBLS][DCTSIZE2];

  int trellis_freq_split; /* splitting point for frequency in trellis quantization */
  int trellis_num_loops; /* number of trellis loops */
-  
+
  int num_scans_luma; /* # of entries in scan_info array pertaining to luma (used when optimize_scans is TRUE */
  int num_scans_luma_dc;
  int num_scans_chroma_dc;