Merge branch 'floatovershoot' of https://github.com/pornel/mozjpeg into pornel-floatovershoot
Conflicts: jcdctmgr.c (resolved)
This commit is contained in:
5
cjpeg.c
5
cjpeg.c
@@ -188,6 +188,7 @@ usage (void)
|
|||||||
fprintf(stderr, " -tune-ssim Tune trellis optimization for SSIM\n");
|
fprintf(stderr, " -tune-ssim Tune trellis optimization for SSIM\n");
|
||||||
fprintf(stderr, " -tune-ms-ssim Tune trellis optimization for MS-SSIM\n");
|
fprintf(stderr, " -tune-ms-ssim Tune trellis optimization for MS-SSIM\n");
|
||||||
fprintf(stderr, "Switches for advanced users:\n");
|
fprintf(stderr, "Switches for advanced users:\n");
|
||||||
|
fprintf(stderr, " -noovershoot Disable black-on-white deringing via overshoot\n");
|
||||||
#ifdef C_ARITH_CODING_SUPPORTED
|
#ifdef C_ARITH_CODING_SUPPORTED
|
||||||
fprintf(stderr, " -arithmetic Use arithmetic coding\n");
|
fprintf(stderr, " -arithmetic Use arithmetic coding\n");
|
||||||
#endif
|
#endif
|
||||||
@@ -534,7 +535,9 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv,
|
|||||||
cinfo->lambda_log_scale2 = 15.5;
|
cinfo->lambda_log_scale2 = 15.5;
|
||||||
cinfo->use_lambda_weight_tbl = TRUE;
|
cinfo->use_lambda_weight_tbl = TRUE;
|
||||||
jpeg_set_quality(cinfo, 75, TRUE);
|
jpeg_set_quality(cinfo, 75, TRUE);
|
||||||
|
|
||||||
|
} else if (keymatch(arg, "noovershoot", 11)) {
|
||||||
|
cinfo->overshoot_deringing = FALSE;
|
||||||
} else {
|
} else {
|
||||||
fprintf(stderr, "%s: unknown option '%s'\n", progname, arg);
|
fprintf(stderr, "%s: unknown option '%s'\n", progname, arg);
|
||||||
usage(); /* bogus switch */
|
usage(); /* bogus switch */
|
||||||
|
|||||||
108
jcdctmgr.c
108
jcdctmgr.c
@@ -31,6 +31,7 @@ typedef void (*forward_DCT_method_ptr) (DCTELEM * data);
|
|||||||
typedef void (*float_DCT_method_ptr) (FAST_FLOAT * data);
|
typedef void (*float_DCT_method_ptr) (FAST_FLOAT * data);
|
||||||
|
|
||||||
typedef void (*preprocess_method_ptr)(DCTELEM*, const JQUANT_TBL*);
|
typedef void (*preprocess_method_ptr)(DCTELEM*, const JQUANT_TBL*);
|
||||||
|
typedef void (*float_preprocess_method_ptr)(FAST_FLOAT*, const JQUANT_TBL*);
|
||||||
|
|
||||||
typedef void (*convsamp_method_ptr) (JSAMPARRAY sample_data,
|
typedef void (*convsamp_method_ptr) (JSAMPARRAY sample_data,
|
||||||
JDIMENSION start_col,
|
JDIMENSION start_col,
|
||||||
@@ -69,6 +70,7 @@ typedef struct {
|
|||||||
/* Same as above for the floating-point case. */
|
/* Same as above for the floating-point case. */
|
||||||
float_DCT_method_ptr float_dct;
|
float_DCT_method_ptr float_dct;
|
||||||
float_convsamp_method_ptr float_convsamp;
|
float_convsamp_method_ptr float_convsamp;
|
||||||
|
float_preprocess_method_ptr float_preprocess;
|
||||||
float_quantize_method_ptr float_quantize;
|
float_quantize_method_ptr float_quantize;
|
||||||
FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
|
FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
|
||||||
FAST_FLOAT * float_workspace;
|
FAST_FLOAT * float_workspace;
|
||||||
@@ -352,7 +354,7 @@ start_pass_fdctmgr (j_compress_ptr cinfo)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
METHODDEF(DCTELEM)
|
METHODDEF(float)
|
||||||
catmull_rom(const DCTELEM value1, const DCTELEM value2, const DCTELEM value3, const DCTELEM value4, const float t, int size)
|
catmull_rom(const DCTELEM value1, const DCTELEM value2, const DCTELEM value3, const DCTELEM value4, const float t, int size)
|
||||||
{
|
{
|
||||||
const int tan1 = (value3 - value1) * size;
|
const int tan1 = (value3 - value1) * size;
|
||||||
@@ -366,8 +368,8 @@ catmull_rom(const DCTELEM value1, const DCTELEM value2, const DCTELEM value3, co
|
|||||||
const float f3 = t3 - 2.f * t2 + t;
|
const float f3 = t3 - 2.f * t2 + t;
|
||||||
const float f4 = t3 - t2;
|
const float f4 = t3 - t2;
|
||||||
|
|
||||||
return ceilf(value2 * f1 + tan1 * f3 +
|
return value2 * f1 + tan1 * f3 +
|
||||||
value3 * f2 + tan2 * f4);
|
value3 * f2 + tan2 * f4;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Prevents visible ringing artifacts near hard edges on white backgrounds.
|
/** Prevents visible ringing artifacts near hard edges on white backgrounds.
|
||||||
@@ -406,7 +408,7 @@ preprocess_deringing(DCTELEM *data, const JQUANT_TBL *quantization_table)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Too much overshoot is not good: increased amplitude will cost bits, and the cost is proportional to quantization (here using DC quant as a rough guide). */
|
/* Too much overshoot is not good: increased amplitude will cost bits, and the cost is proportional to quantization (here using DC quant as a rough guide). */
|
||||||
const int maxovershoot = maxsample + MIN(MIN(31, 2*quantization_table->quantval[0]), (maxsample * size - sum) / maxsample_count);
|
const DCTELEM maxovershoot = maxsample + MIN(MIN(31, 2*quantization_table->quantval[0]), (maxsample * size - sum) / maxsample_count);
|
||||||
|
|
||||||
int n = 0;
|
int n = 0;
|
||||||
do {
|
do {
|
||||||
@@ -450,7 +452,72 @@ preprocess_deringing(DCTELEM *data, const JQUANT_TBL *quantization_table)
|
|||||||
float position = step;
|
float position = step;
|
||||||
|
|
||||||
for(i = start; i < end; i++, position += step) {
|
for(i = start; i < end; i++, position += step) {
|
||||||
DCTELEM tmp = catmull_rom(maxsample - fslope, maxsample, maxsample, maxsample - lslope, position, size);
|
DCTELEM tmp = ceilf(catmull_rom(maxsample - fslope, maxsample, maxsample, maxsample - lslope, position, size));
|
||||||
|
data[jpeg_natural_order[i]] = MIN(tmp, maxovershoot);
|
||||||
|
}
|
||||||
|
n++;
|
||||||
|
}
|
||||||
|
while(n < size);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
Float version of preprocess_deringing()
|
||||||
|
*/
|
||||||
|
METHODDEF(void)
|
||||||
|
float_preprocess_deringing(FAST_FLOAT *data, const JQUANT_TBL *quantization_table)
|
||||||
|
{
|
||||||
|
const FAST_FLOAT maxsample = 255 - CENTERJSAMPLE;
|
||||||
|
const int size = DCTSIZE * DCTSIZE;
|
||||||
|
|
||||||
|
FAST_FLOAT sum = 0;
|
||||||
|
int maxsample_count = 0;
|
||||||
|
int i;
|
||||||
|
for(i=0; i < size; i++) {
|
||||||
|
sum += data[i];
|
||||||
|
if (data[i] >= maxsample) {
|
||||||
|
maxsample_count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!maxsample_count || maxsample_count == size) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const FAST_FLOAT maxovershoot = maxsample + MIN(MIN(31, 2*quantization_table->quantval[0]), (maxsample * size - sum) / maxsample_count);
|
||||||
|
|
||||||
|
int n = 0;
|
||||||
|
do {
|
||||||
|
if (data[jpeg_natural_order[n]] < maxsample) {
|
||||||
|
n++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
int start = n;
|
||||||
|
while(++n < size && data[jpeg_natural_order[n]] >= maxsample) {}
|
||||||
|
int end = n;
|
||||||
|
|
||||||
|
const FAST_FLOAT f1 = data[jpeg_natural_order[start >= 1 ? start-1 : 0]];
|
||||||
|
const FAST_FLOAT f2 = data[jpeg_natural_order[start >= 2 ? start-2 : 0]];
|
||||||
|
|
||||||
|
const FAST_FLOAT l1 = data[jpeg_natural_order[end < size-1 ? end : size-1]];
|
||||||
|
const FAST_FLOAT l2 = data[jpeg_natural_order[end < size-2 ? end+1 : size-1]];
|
||||||
|
|
||||||
|
FAST_FLOAT fslope = MAX(f1-f2, maxsample-f1);
|
||||||
|
FAST_FLOAT lslope = MAX(l1-l2, maxsample-l1);
|
||||||
|
|
||||||
|
if (start == 0) {
|
||||||
|
fslope = lslope;
|
||||||
|
}
|
||||||
|
if (end == size) {
|
||||||
|
lslope = fslope;
|
||||||
|
}
|
||||||
|
|
||||||
|
const int size = end - start;
|
||||||
|
const float step = 1.f/(float)(size + 1);
|
||||||
|
float position = step;
|
||||||
|
|
||||||
|
for(i = start; i < end; i++, position += step) {
|
||||||
|
FAST_FLOAT tmp = catmull_rom(maxsample - fslope, maxsample, maxsample, maxsample - lslope, position, size);
|
||||||
data[jpeg_natural_order[i]] = MIN(tmp, maxovershoot);
|
data[jpeg_natural_order[i]] = MIN(tmp, maxovershoot);
|
||||||
}
|
}
|
||||||
n++;
|
n++;
|
||||||
@@ -557,7 +624,7 @@ quantize (JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace)
|
|||||||
temp = -temp;
|
temp = -temp;
|
||||||
temp += qval>>1; /* for rounding */
|
temp += qval>>1; /* for rounding */
|
||||||
DIVIDE_BY(temp, qval);
|
DIVIDE_BY(temp, qval);
|
||||||
temp = -temp;
|
temp = -temp;
|
||||||
} else {
|
} else {
|
||||||
temp += qval>>1; /* for rounding */
|
temp += qval>>1; /* for rounding */
|
||||||
DIVIDE_BY(temp, qval);
|
DIVIDE_BY(temp, qval);
|
||||||
@@ -605,7 +672,9 @@ forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
|
|||||||
/* Load data into workspace, applying unsigned->signed conversion */
|
/* Load data into workspace, applying unsigned->signed conversion */
|
||||||
(*do_convsamp) (sample_data, start_col, workspace);
|
(*do_convsamp) (sample_data, start_col, workspace);
|
||||||
|
|
||||||
(*do_preprocess) (workspace, qtbl);
|
if (do_preprocess) {
|
||||||
|
(*do_preprocess) (workspace, qtbl);
|
||||||
|
}
|
||||||
|
|
||||||
/* Perform the DCT */
|
/* Perform the DCT */
|
||||||
(*do_dct) (workspace);
|
(*do_dct) (workspace);
|
||||||
@@ -712,6 +781,7 @@ forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
|
|||||||
/* This routine is heavily used, so it's worth coding it tightly. */
|
/* This routine is heavily used, so it's worth coding it tightly. */
|
||||||
my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
|
my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
|
||||||
FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
|
FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
|
||||||
|
JQUANT_TBL *qtbl = cinfo->quant_tbl_ptrs[compptr->quant_tbl_no];
|
||||||
FAST_FLOAT * workspace;
|
FAST_FLOAT * workspace;
|
||||||
JDIMENSION bi;
|
JDIMENSION bi;
|
||||||
float v;
|
float v;
|
||||||
@@ -721,6 +791,7 @@ forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
|
|||||||
/* Make sure the compiler doesn't look up these every pass */
|
/* Make sure the compiler doesn't look up these every pass */
|
||||||
float_DCT_method_ptr do_dct = fdct->float_dct;
|
float_DCT_method_ptr do_dct = fdct->float_dct;
|
||||||
float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
|
float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
|
||||||
|
float_preprocess_method_ptr do_preprocess = fdct->float_preprocess;
|
||||||
float_quantize_method_ptr do_quantize = fdct->float_quantize;
|
float_quantize_method_ptr do_quantize = fdct->float_quantize;
|
||||||
workspace = fdct->float_workspace;
|
workspace = fdct->float_workspace;
|
||||||
|
|
||||||
@@ -730,13 +801,17 @@ forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
|
|||||||
/* Load data into workspace, applying unsigned->signed conversion */
|
/* Load data into workspace, applying unsigned->signed conversion */
|
||||||
(*do_convsamp) (sample_data, start_col, workspace);
|
(*do_convsamp) (sample_data, start_col, workspace);
|
||||||
|
|
||||||
|
if (do_preprocess) {
|
||||||
|
(*do_preprocess) (workspace, qtbl);
|
||||||
|
}
|
||||||
|
|
||||||
/* Perform the DCT */
|
/* Perform the DCT */
|
||||||
(*do_dct) (workspace);
|
(*do_dct) (workspace);
|
||||||
|
|
||||||
/* Save unquantized transform coefficients for later trellis quantization */
|
/* Save unquantized transform coefficients for later trellis quantization */
|
||||||
/* Currently save as integer values. Could save float values but would require */
|
/* Currently save as integer values. Could save float values but would require */
|
||||||
/* modifications to memory allocation and trellis quantization */
|
/* modifications to memory allocation and trellis quantization */
|
||||||
|
|
||||||
if (dst) {
|
if (dst) {
|
||||||
int i;
|
int i;
|
||||||
static const double aanscalefactor[DCTSIZE] = {
|
static const double aanscalefactor[DCTSIZE] = {
|
||||||
@@ -926,14 +1001,14 @@ quantize_trellis(j_compress_ptr cinfo, c_derived_tbl *dctbl, c_derived_tbl *actb
|
|||||||
dc_cost_backtrack[k][bi] = l;
|
dc_cost_backtrack[k][bi] = l;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Do AC coefficients */
|
/* Do AC coefficients */
|
||||||
for (i = Ss; i <= Se; i++) {
|
for (i = Ss; i <= Se; i++) {
|
||||||
int z = jpeg_natural_order[i];
|
int z = jpeg_natural_order[i];
|
||||||
|
|
||||||
int sign = src[bi][z] >> 31;
|
int sign = src[bi][z] >> 31;
|
||||||
int x = abs(src[bi][z]);
|
int x = abs(src[bi][z]);
|
||||||
int q = 8 * qtbl->quantval[z];
|
int q = 8 * qtbl->quantval[z];
|
||||||
@@ -1202,7 +1277,11 @@ jinit_forward_dct (j_compress_ptr cinfo)
|
|||||||
else
|
else
|
||||||
fdct->convsamp = convsamp;
|
fdct->convsamp = convsamp;
|
||||||
|
|
||||||
fdct->preprocess = preprocess_deringing;
|
if (cinfo->overshoot_deringing) {
|
||||||
|
fdct->preprocess = preprocess_deringing;
|
||||||
|
} else {
|
||||||
|
fdct->preprocess = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
if (jsimd_can_quantize())
|
if (jsimd_can_quantize())
|
||||||
fdct->quantize = jsimd_quantize;
|
fdct->quantize = jsimd_quantize;
|
||||||
@@ -1216,6 +1295,13 @@ jinit_forward_dct (j_compress_ptr cinfo)
|
|||||||
fdct->float_convsamp = jsimd_convsamp_float;
|
fdct->float_convsamp = jsimd_convsamp_float;
|
||||||
else
|
else
|
||||||
fdct->float_convsamp = convsamp_float;
|
fdct->float_convsamp = convsamp_float;
|
||||||
|
|
||||||
|
if (cinfo->overshoot_deringing) {
|
||||||
|
fdct->float_preprocess = float_preprocess_deringing;
|
||||||
|
} else {
|
||||||
|
fdct->float_preprocess = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
if (jsimd_can_quantize_float())
|
if (jsimd_can_quantize_float())
|
||||||
fdct->float_quantize = jsimd_quantize_float;
|
fdct->float_quantize = jsimd_quantize_float;
|
||||||
else
|
else
|
||||||
|
|||||||
@@ -293,6 +293,8 @@ jpeg_set_defaults (j_compress_ptr cinfo)
|
|||||||
cinfo->do_fancy_downsampling = TRUE;
|
cinfo->do_fancy_downsampling = TRUE;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
cinfo->overshoot_deringing = cinfo->use_moz_defaults;
|
||||||
|
|
||||||
/* No input smoothing */
|
/* No input smoothing */
|
||||||
cinfo->smoothing_factor = 0;
|
cinfo->smoothing_factor = 0;
|
||||||
|
|
||||||
|
|||||||
@@ -386,13 +386,14 @@ struct jpeg_compress_struct {
|
|||||||
boolean use_scans_in_trellis; /* TRUE=use scans in trellis optimization */
|
boolean use_scans_in_trellis; /* TRUE=use scans in trellis optimization */
|
||||||
boolean trellis_passes; /* TRUE=currently doing trellis-related passes */
|
boolean trellis_passes; /* TRUE=currently doing trellis-related passes */
|
||||||
boolean trellis_q_opt; /* TRUE=optimize quant table in trellis loop */
|
boolean trellis_q_opt; /* TRUE=optimize quant table in trellis loop */
|
||||||
|
boolean overshoot_deringing; /* TRUE=preprocess input to reduce ringing of edges on white background */
|
||||||
|
|
||||||
double norm_src[NUM_QUANT_TBLS][DCTSIZE2];
|
double norm_src[NUM_QUANT_TBLS][DCTSIZE2];
|
||||||
double norm_coef[NUM_QUANT_TBLS][DCTSIZE2];
|
double norm_coef[NUM_QUANT_TBLS][DCTSIZE2];
|
||||||
|
|
||||||
int trellis_freq_split; /* splitting point for frequency in trellis quantization */
|
int trellis_freq_split; /* splitting point for frequency in trellis quantization */
|
||||||
int trellis_num_loops; /* number of trellis loops */
|
int trellis_num_loops; /* number of trellis loops */
|
||||||
|
|
||||||
int num_scans_luma; /* # of entries in scan_info array pertaining to luma (used when optimize_scans is TRUE */
|
int num_scans_luma; /* # of entries in scan_info array pertaining to luma (used when optimize_scans is TRUE */
|
||||||
int num_scans_luma_dc;
|
int num_scans_luma_dc;
|
||||||
int num_scans_chroma_dc;
|
int num_scans_chroma_dc;
|
||||||
|
|||||||
Reference in New Issue
Block a user