diff --git a/cjpeg.c b/cjpeg.c index 82b1d73e..9ad6c7e6 100644 --- a/cjpeg.c +++ b/cjpeg.c @@ -174,7 +174,8 @@ usage (void) #endif fprintf(stderr, " -revert Revert to standard defaults (instead of mozjpeg defaults)\n"); fprintf(stderr, " -fastcrush Disable progressive scan optimization\n"); - fprintf(stderr, " -multidcscan Use multiple DC scans (may be incompatible with some JPEG decoders)\n"); + fprintf(stderr, " -opt-dc-scan Optimize DC scans (may be incompatible with some JPEG decoders)\n"); + fprintf(stderr, " -split-dc-scan Use one DC scan per component (may be incompatible with some JPEG decoders?)\n"); fprintf(stderr, " -notrellis Disable trellis optimization\n"); fprintf(stderr, " -tune-psnr Tune trellis optimization for PSNR\n"); fprintf(stderr, " -tune-hvs-psnr Tune trellis optimization for PSNR-HVS (default)\n"); @@ -353,7 +354,7 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv, lval *= 1000L; cinfo->mem->max_memory_to_use = lval * 1000L; - } else if (keymatch(arg, "multidcscan", 3)) { + } else if (keymatch(arg, "opt-dc-scan", 6)) { cinfo->one_dc_scan = FALSE; } else if (keymatch(arg, "optimize", 1) || keymatch(arg, "optimise", 1)) { @@ -479,6 +480,10 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv, usage(); cinfo->smoothing_factor = val; + } else if (keymatch(arg, "split-dc-scans", 3)) { + cinfo->one_dc_scan = FALSE; + cinfo->sep_dc_scan = TRUE; + } else if (keymatch(arg, "targa", 1)) { /* Input file is Targa format. */ is_targa = TRUE; @@ -487,6 +492,10 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv, /* disable trellis quantization */ cinfo->trellis_quant = FALSE; + } else if (keymatch(arg, "trellis-dc", 9)) { + /* enable DC trellis quantization */ + cinfo->trellis_quant_dc = TRUE; + } else if (keymatch(arg, "tune-psnr", 6)) { cinfo->use_flat_quant_tbl = TRUE; cinfo->lambda_log_scale1 = 9.0; diff --git a/jccoefct.c b/jccoefct.c index 0861d036..401812f5 100644 --- a/jccoefct.c +++ b/jccoefct.c @@ -361,10 +361,13 @@ compress_trellis_pass (j_compress_ptr cinfo, JSAMPIMAGE input_buf) JBLOCKARRAY buffer_dst; for (ci = 0; ci < cinfo->comps_in_scan; ci++) { + c_derived_tbl dctbl_data; + c_derived_tbl *dctbl = &dctbl_data; c_derived_tbl actbl_data; c_derived_tbl *actbl = &actbl_data; compptr = cinfo->cur_comp_info[ci]; + jpeg_make_c_derived_tbl(cinfo, TRUE, compptr->dc_tbl_no, &dctbl); jpeg_make_c_derived_tbl(cinfo, FALSE, compptr->ac_tbl_no, &actbl); /* Align the virtual buffer for this component. */ @@ -392,12 +395,15 @@ compress_trellis_pass (j_compress_ptr cinfo, JSAMPIMAGE input_buf) ndummy = (int) (blocks_across % h_samp_factor); if (ndummy > 0) ndummy = h_samp_factor - ndummy; + + lastDC = 0; + /* Perform DCT for all non-dummy blocks in this iMCU row. Each call * on forward_DCT processes a complete horizontal row of DCT blocks. */ for (block_row = 0; block_row < block_rows; block_row++) { thisblockrow = buffer[block_row]; - quantize_trellis(cinfo, actbl, thisblockrow, buffer_dst[block_row], blocks_across, cinfo->quant_tbl_ptrs[compptr->quant_tbl_no], cinfo->norm_src[compptr->quant_tbl_no], cinfo->norm_coef[compptr->quant_tbl_no]); + quantize_trellis(cinfo, dctbl, actbl, thisblockrow, buffer_dst[block_row], blocks_across, cinfo->quant_tbl_ptrs[compptr->quant_tbl_no], cinfo->norm_src[compptr->quant_tbl_no], cinfo->norm_coef[compptr->quant_tbl_no], &lastDC); if (ndummy > 0) { /* Create dummy blocks at the right edge of the image. */ diff --git a/jcdctmgr.c b/jcdctmgr.c index d819fcc5..f31b8ee6 100644 --- a/jcdctmgr.c +++ b/jcdctmgr.c @@ -615,10 +615,10 @@ static const float jpeg_lambda_weights_csf_luma[64] = { }; GLOBAL(void) -quantize_trellis(j_compress_ptr cinfo, c_derived_tbl *actbl, JBLOCKROW coef_blocks, JBLOCKROW src, JDIMENSION num_blocks, - JQUANT_TBL * qtbl, double *norm_src, double *norm_coef) +quantize_trellis(j_compress_ptr cinfo, c_derived_tbl *dctbl, c_derived_tbl *actbl, JBLOCKROW coef_blocks, JBLOCKROW src, JDIMENSION num_blocks, + JQUANT_TBL * qtbl, double *norm_src, double *norm_coef, JCOEF *last_dc_val) { - int i, j, k; + int i, j, k, l; float accumulated_zero_dist[DCTSIZE2]; float accumulated_cost[DCTSIZE2]; int run_start[DCTSIZE2]; @@ -628,6 +628,7 @@ quantize_trellis(j_compress_ptr cinfo, c_derived_tbl *actbl, JBLOCKROW coef_bloc float norm = 0.0; float lambda_base; float lambda; + float lambda_dc; const float *lambda_tbl = (cinfo->use_lambda_weight_tbl) ? jpeg_lambda_weights_csf_luma : jpeg_lambda_weights_flat; int Ss, Se; float *accumulated_zero_block_cost = NULL; @@ -641,6 +642,9 @@ quantize_trellis(j_compress_ptr cinfo, c_derived_tbl *actbl, JBLOCKROW coef_bloc int zero_run; int run_bits; int rate; + float *accumulated_dc_cost[3]; + int *dc_cost_backtrack[3]; + JCOEF *dc_candidate[3]; Ss = cinfo->Ss; Se = cinfo->Se; @@ -664,7 +668,13 @@ quantize_trellis(j_compress_ptr cinfo, c_derived_tbl *actbl, JBLOCKROW coef_bloc accumulated_block_cost[0] = 0; requires_eob[0] = 0; } - + if (cinfo->trellis_quant_dc) { + for (i = 0; i < 3; i++) { + accumulated_dc_cost[i] = (float *)malloc(num_blocks * SIZEOF(float)); + dc_cost_backtrack[i] = (int *)malloc(num_blocks * SIZEOF(int)); + dc_candidate[i] = (JCOEF *)malloc(num_blocks * SIZEOF(JCOEF)); + } + } norm = 0.0; for (i = 1; i < DCTSIZE2; i++) { norm += qtbl->quantval[i] * qtbl->quantval[i]; @@ -686,9 +696,65 @@ quantize_trellis(j_compress_ptr cinfo, c_derived_tbl *actbl, JBLOCKROW coef_bloc else lambda = pow(2.0, cinfo->lambda_log_scale1-12.0) * lambda_base; + lambda_dc = lambda * lambda_tbl[0]; + accumulated_zero_dist[Ss-1] = 0.0; accumulated_cost[Ss-1] = 0.0; + // Do DC coefficient + if (cinfo->trellis_quant_dc) { + int sign = src[bi][0] >> 31; + int x = abs(src[bi][0]); + int q = 8 * qtbl->quantval[0]; + int qval; + float dc_candidate_dist; + + qval = (x + q/2) / q; /* quantized value (round nearest) */ + for (k = 0; k < 3; k++) { + int delta; + int dc_delta; + int bits; + + dc_candidate[k][bi] = qval - 1 + k; + delta = dc_candidate[k][bi] * q - x; + dc_candidate_dist = delta * delta * lambda_dc; + dc_candidate[k][bi] *= 1 + 2*sign; + + if (bi == 0) { + dc_delta = dc_candidate[k][bi] - *last_dc_val; + + // Derive number of suffix bits + bits = 0; + dc_delta = abs(dc_delta); + while (dc_delta) { + dc_delta >>= 1; + bits++; + } + cost = bits + dctbl->ehufsi[bits] + dc_candidate_dist; + accumulated_dc_cost[k][0] = cost; + dc_cost_backtrack[k][0] = -1; + } else { + for (l = 0; l < 3; l++) { + dc_delta = dc_candidate[k][bi] - dc_candidate[l][bi-1]; + + // Derive number of suffix bits + bits = 0; + dc_delta = abs(dc_delta); + while (dc_delta) { + dc_delta >>= 1; + bits++; + } + cost = bits + dctbl->ehufsi[bits] + dc_candidate_dist + accumulated_dc_cost[l][bi-1]; + if (l == 0 || cost < accumulated_dc_cost[k][bi]) { + accumulated_dc_cost[k][bi] = cost; + dc_cost_backtrack[k][bi] = l; + } + } + } + } + } + + // Do AC coefficients for (i = Ss; i <= Se; i++) { int z = jpeg_natural_order[i]; @@ -872,6 +938,28 @@ quantize_trellis(j_compress_ptr cinfo, c_derived_tbl *actbl, JBLOCKROW coef_bloc } } } + + if (cinfo->trellis_quant_dc) { + j = 0; + for (i = 1; i < 3; i++) { + if (accumulated_dc_cost[i][num_blocks-1] < accumulated_dc_cost[j][num_blocks-1]) + j = i; + } + for (bi = num_blocks-1; bi >= 0; bi--) { + coef_blocks[bi][0] = dc_candidate[j][bi]; + j = dc_cost_backtrack[j][bi]; + } + + // Save DC predictor + *last_dc_val = coef_blocks[num_blocks-1][0]; + + for (i = 0; i < 3; i++) { + free(accumulated_dc_cost[i]); + free(dc_cost_backtrack[i]); + free(dc_candidate[i]); + } + } + } /* diff --git a/jchuff.h b/jchuff.h index ac469dab..f6195284 100644 --- a/jchuff.h +++ b/jchuff.h @@ -49,5 +49,5 @@ EXTERN(void) jpeg_gen_optimal_table JPP((j_compress_ptr cinfo, JHUFF_TBL * htbl, long freq[])); EXTERN(void) quantize_trellis - JPP((j_compress_ptr cinfo, c_derived_tbl *actbl, JBLOCKROW coef_blocks, JBLOCKROW src, JDIMENSION num_blocks, - JQUANT_TBL * qtbl, double *norm_src, double *norm_coef)); + JPP((j_compress_ptr cinfo, c_derived_tbl *dctbl, c_derived_tbl *actbl, JBLOCKROW coef_blocks, JBLOCKROW src, JDIMENSION num_blocks, + JQUANT_TBL * qtbl, double *norm_src, double *norm_coef, JCOEF *last_dc_val)); diff --git a/jcmaster.c b/jcmaster.c index 724e146b..52414a6d 100644 --- a/jcmaster.c +++ b/jcmaster.c @@ -45,6 +45,7 @@ typedef struct { int pass_number_scan_opt_base; /* pass number where scan optimization begins */ unsigned char * scan_buffer[64]; /* buffer for a given scan */ unsigned long scan_size[64]; /* size for a given scan */ + int actual_Al[64]; /* actual value of Al used for a scan */ unsigned long best_cost; /* bit count for best frequency split */ int best_freq_split_idx_luma; /* index for best frequency split (luma) */ int best_freq_split_idx_chroma; /* index for best frequency split (chroma) */ @@ -364,6 +365,8 @@ select_scan_parameters (j_compress_ptr cinfo) master->scan_number < cinfo->num_scans) cinfo->Al = master->best_Al_chroma; } + /* save value for later retrieval during printout of scans */ + master->actual_Al[master->scan_number] = cinfo->Al; } else #endif @@ -613,7 +616,7 @@ copy_buffer (j_compress_ptr cinfo, int scan_idx) for (i = 0; i < cinfo->scan_info[scan_idx].comps_in_scan; i++) fprintf(stderr, "%s%d", (i==0)?"":",", cinfo->scan_info[scan_idx].component_index[i]); fprintf(stderr, ": %d %d", cinfo->scan_info[scan_idx].Ss, cinfo->scan_info[scan_idx].Se); - fprintf(stderr, " %d %d", cinfo->scan_info[scan_idx].Ah, cinfo->scan_info[scan_idx].Al); + fprintf(stderr, " %d %d", cinfo->scan_info[scan_idx].Ah, master->actual_Al[scan_idx]); fprintf(stderr, "\n"); } @@ -757,7 +760,7 @@ select_scans (j_compress_ptr cinfo, int next_scan_number) if (cinfo->num_scans > cinfo->num_scans_luma && !cinfo->one_dc_scan) { base_scan_idx = cinfo->num_scans_luma; - if (master->interleave_chroma_dc) + if (master->interleave_chroma_dc && !cinfo->sep_dc_scan) copy_buffer(cinfo, base_scan_idx); else { copy_buffer(cinfo, base_scan_idx+1); diff --git a/jcparam.c b/jcparam.c index 4151bbe0..f03b2e6f 100644 --- a/jcparam.c +++ b/jcparam.c @@ -444,6 +444,7 @@ jpeg_set_defaults (j_compress_ptr cinfo) cinfo->trellis_freq_split = 8; cinfo->trellis_num_loops = 1; cinfo->trellis_q_opt = FALSE; + cinfo->trellis_quant_dc = FALSE; } @@ -839,6 +840,11 @@ jpeg_simple_progression (j_compress_ptr cinfo) /* Initial DC scan */ if (cinfo->one_dc_scan) scanptr = fill_dc_scans(scanptr, ncomps, 0, 0); + else if (cinfo->sep_dc_scan) { + scanptr = fill_a_scan(scanptr, 0, 0, 0, 0, 0); + scanptr = fill_a_scan(scanptr, 1, 0, 0, 0, 0); + scanptr = fill_a_scan(scanptr, 2, 0, 0, 0, 0); + } else { scanptr = fill_dc_scans(scanptr, 1, 0, 0); scanptr = fill_a_scan_pair(scanptr, 1, 0, 0, 0, 0); diff --git a/jpeglib.h b/jpeglib.h index 00a4658d..43f49924 100644 --- a/jpeglib.h +++ b/jpeglib.h @@ -379,7 +379,9 @@ struct jpeg_compress_struct { boolean use_moz_defaults; /* TRUE=use Mozilla defaults */ boolean optimize_scans; /* TRUE=optimize progressive coding scans */ boolean one_dc_scan; /* TRUE=use a single DC scan interleaving all components */ + boolean sep_dc_scan; /* TRUE=each DC scan is separate */ boolean trellis_quant; /* TRUE=use trellis quantization */ + boolean trellis_quant_dc; /* TRUE=use trellis quant for DC coefficient */ boolean trellis_eob_opt; /* TRUE=optimize for sequences of EOB */ boolean use_flat_quant_tbl; /* TRUE=use flat quantization table */ boolean use_lambda_weight_tbl; /* TRUE=use lambda weighting table */