From 2012e32f1933331abcd7802c8cf876061f3ddf6d Mon Sep 17 00:00:00 2001 From: Frank Bossen Date: Tue, 1 Apr 2014 20:14:37 +0200 Subject: [PATCH] Update trellis quantization to support progressive coding mode Trellis quantization is modified: - to work on the configurable spectral range Ss to Se - to optionally optimize runs of EOBs - to optionally split optimization between 2 spectral ranges In trellis quantization passes Huffman table code optimization is modified such as to generable a valid code length for each possible symbol by resetting frequency counters to 1 instead of 0 --- jccoefct.c | 15 +++--- jcdctmgr.c | 146 ++++++++++++++++++++++++++++++++++++++++++++++------- jcmaster.c | 29 ++++++++--- jcparam.c | 3 ++ jcphuff.c | 8 +++ jpeglib.h | 5 ++ 6 files changed, 176 insertions(+), 30 deletions(-) diff --git a/jccoefct.c b/jccoefct.c index 68368edc..9c7162f9 100644 --- a/jccoefct.c +++ b/jccoefct.c @@ -360,21 +360,22 @@ compress_trellis_pass (j_compress_ptr cinfo, JSAMPIMAGE input_buf) JBLOCKROW thisblockrow, lastblockrow; JBLOCKARRAY buffer_dst; - for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; - ci++, compptr++) { - + for (ci = 0; ci < cinfo->comps_in_scan; ci++) { c_derived_tbl actbl_data; c_derived_tbl *actbl = &actbl_data; - jpeg_make_c_derived_tbl(cinfo, FALSE, cinfo->comp_info[ci].ac_tbl_no, &actbl); + + compptr = cinfo->cur_comp_info[ci]; + + jpeg_make_c_derived_tbl(cinfo, FALSE, compptr->ac_tbl_no, &actbl); /* Align the virtual buffer for this component. */ buffer = (*cinfo->mem->access_virt_barray) - ((j_common_ptr) cinfo, coef->whole_image[ci], + ((j_common_ptr) cinfo, coef->whole_image[compptr->component_index], coef->iMCU_row_num * compptr->v_samp_factor, (JDIMENSION) compptr->v_samp_factor, TRUE); buffer_dst = (*cinfo->mem->access_virt_barray) - ((j_common_ptr) cinfo, coef->whole_image_uq[ci], + ((j_common_ptr) cinfo, coef->whole_image_uq[compptr->component_index], coef->iMCU_row_num * compptr->v_samp_factor, (JDIMENSION) compptr->v_samp_factor, TRUE); @@ -397,7 +398,7 @@ compress_trellis_pass (j_compress_ptr cinfo, JSAMPIMAGE input_buf) */ for (block_row = 0; block_row < block_rows; block_row++) { thisblockrow = buffer[block_row]; - quantize_trellis(cinfo, actbl, thisblockrow, buffer_dst[block_row], blocks_across, cinfo->quant_tbl_ptrs[cinfo->comp_info[ci].quant_tbl_no]); + quantize_trellis(cinfo, actbl, thisblockrow, buffer_dst[block_row], blocks_across, cinfo->quant_tbl_ptrs[compptr->quant_tbl_no]); if (ndummy > 0) { /* Create dummy blocks at the right edge of the image. */ diff --git a/jcdctmgr.c b/jcdctmgr.c index be3438a5..629b0053 100644 --- a/jcdctmgr.c +++ b/jcdctmgr.c @@ -584,12 +584,36 @@ quantize_trellis(j_compress_ptr cinfo, c_derived_tbl *actbl, JBLOCKROW coef_bloc int run_start[DCTSIZE2]; int bi; float best_cost; - int last_coeff_idx; // position of last nonzero coefficient + int last_coeff_idx; /* position of last nonzero coefficient */ float norm = 0.0; float lambda_base; float lambda; const float *lambda_tbl = (cinfo->use_lambda_weight_tbl) ? jpeg_lambda_weights_csf_luma : jpeg_lambda_weights_flat; + int Ss, Se; + float *accumulated_zero_block_cost; + float *accumulated_block_cost; + int *block_run_start; + int *requires_eob; + int has_eob; + float cost_all_zeros; + float best_cost_skip; + Ss = cinfo->Ss; + Se = cinfo->Se; + if (Ss == 0) + Ss = 1; + if (Se < Ss) + return; + if (cinfo->trellis_eob_opt) { + accumulated_zero_block_cost = (float *)malloc((num_blocks + 1) * SIZEOF(float)); + accumulated_block_cost = (float *)malloc((num_blocks + 1) * SIZEOF(float)); + block_run_start = (int *)malloc(num_blocks * SIZEOF(int)); + requires_eob = (int *)malloc((num_blocks + 1) * SIZEOF(int)); + accumulated_zero_block_cost[0] = 0; + accumulated_block_cost[0] = 0; + requires_eob[0] = 0; + } + if(!jpeg_nbits_table_init) { for(i = 0; i < 65536; i++) { int nbits = 0, temp = i; @@ -620,10 +644,10 @@ quantize_trellis(j_compress_ptr cinfo, c_derived_tbl *actbl, JBLOCKROW coef_bloc else lambda = pow(2.0, cinfo->lambda_log_scale1-12.0) * lambda_base; - accumulated_zero_dist[0] = 0.0; - accumulated_cost[0] = 0.0; + accumulated_zero_dist[Ss-1] = 0.0; + accumulated_cost[Ss-1] = 0.0; - for (i = 1; i < DCTSIZE2; i++) { + for (i = Ss; i <= Se; i++) { int z = jpeg_natural_order[i]; int sign = src[bi][z] >> 31; @@ -637,11 +661,11 @@ quantize_trellis(j_compress_ptr cinfo, c_derived_tbl *actbl, JBLOCKROW coef_bloc accumulated_zero_dist[i] = x * x * lambda * lambda_tbl[z] + accumulated_zero_dist[i-1]; - qval = (x + q/2) / q; // quantized value (round nearest) + qval = (x + q/2) / q; /* quantized value (round nearest) */ if (qval == 0) { coef_blocks[bi][z] = 0; - accumulated_cost[i] = 1e38; // Shouldn't be needed + accumulated_cost[i] = 1e38; /* Shouldn't be needed */ continue; } @@ -656,17 +680,24 @@ quantize_trellis(j_compress_ptr cinfo, c_derived_tbl *actbl, JBLOCKROW coef_bloc accumulated_cost[i] = 1e38; - for (j = 0; j < i; j++) { + for (j = Ss-1; j < i; j++) { int zz = jpeg_natural_order[j]; - if (j != 0 && coef_blocks[bi][zz] == 0) + if (j != Ss-1 && coef_blocks[bi][zz] == 0) continue; int zero_run = i - 1 - j; + if ((zero_run >> 4) && actbl->ehufsi[0xf0] == 0) + continue; + int run_bits = (zero_run >> 4) * actbl->ehufsi[0xf0]; zero_run &= 15; for (k = 0; k < num_candidates; k++) { - int rate = actbl->ehufsi[16 * zero_run + candidate_bits[k]] + candidate_bits[k] + run_bits; + int coef_bits = actbl->ehufsi[16 * zero_run + candidate_bits[k]]; + if (coef_bits == 0) + continue; + + int rate = coef_bits + candidate_bits[k] + run_bits; float cost = rate + candidate_dist[k]; cost += accumulated_zero_dist[i-1] - accumulated_zero_dist[j] + accumulated_cost[j]; @@ -679,26 +710,33 @@ quantize_trellis(j_compress_ptr cinfo, c_derived_tbl *actbl, JBLOCKROW coef_bloc } } - last_coeff_idx = 0; - best_cost = accumulated_zero_dist[DCTSIZE2-1] + actbl->ehufsi[0]; + last_coeff_idx = Ss-1; + best_cost = accumulated_zero_dist[Se] + actbl->ehufsi[0]; + cost_all_zeros = accumulated_zero_dist[Se]; + best_cost_skip = cost_all_zeros; - for (i = 1; i < DCTSIZE2; i++) { + for (i = Ss; i <= Se; i++) { int z = jpeg_natural_order[i]; if (coef_blocks[bi][z] != 0) { - float cost = accumulated_cost[i] + accumulated_zero_dist[DCTSIZE2-1] - accumulated_zero_dist[i]; - if (i < DCTSIZE2-1) + float cost = accumulated_cost[i] + accumulated_zero_dist[Se] - accumulated_zero_dist[i]; + float cost_wo_eob = cost; + + if (i < Se) cost += actbl->ehufsi[0]; if (cost < best_cost) { best_cost = cost; last_coeff_idx = i; + best_cost_skip = cost_wo_eob; } } } - // Zero out coefficients that are part of runs - i = DCTSIZE2 - 1; - while (i > 0) + has_eob = (last_coeff_idx < Se) + (last_coeff_idx == Ss-1); + + /* Zero out coefficients that are part of runs */ + i = Se; + while (i >= Ss) { while (i > last_coeff_idx) { int z = jpeg_natural_order[i]; @@ -708,6 +746,80 @@ quantize_trellis(j_compress_ptr cinfo, c_derived_tbl *actbl, JBLOCKROW coef_bloc last_coeff_idx = run_start[i]; i--; } + + if (cinfo->trellis_eob_opt) { + accumulated_zero_block_cost[bi+1] = accumulated_zero_block_cost[bi]; + accumulated_zero_block_cost[bi+1] += cost_all_zeros; + requires_eob[bi+1] = has_eob; + + best_cost = 1e38; + + if (has_eob != 2) { + for (i = 0; i <= bi; i++) { + int zero_block_run; + int nbits; + float cost; + + if (requires_eob[i] == 2) + continue; + + cost = best_cost_skip; /* cost of coding a nonzero block */ + cost += accumulated_zero_block_cost[bi]; + cost -= accumulated_zero_block_cost[i]; + cost += accumulated_block_cost[i]; + zero_block_run = bi - i + requires_eob[i]; + nbits = jpeg_nbits_table[zero_block_run]; + cost += actbl->ehufsi[16*nbits] + nbits; + + if (cost < best_cost) { + block_run_start[bi] = i; + best_cost = cost; + accumulated_block_cost[bi+1] = cost; + } + } + } + } + } + + if (cinfo->trellis_eob_opt) { + int last_block = num_blocks; + best_cost = 1e38; + + for (i = 0; i <= num_blocks; i++) { + int zero_block_run; + int nbits; + float cost = 0.0; + + if (requires_eob[i] == 2) + continue; + + cost += accumulated_zero_block_cost[num_blocks]; + cost -= accumulated_zero_block_cost[i]; + zero_block_run = num_blocks - i + requires_eob[i]; + nbits = jpeg_nbits_table[zero_block_run]; + cost += actbl->ehufsi[16*nbits] + nbits; + if (cost < best_cost) { + best_cost = cost; + last_block = i; + } + } + last_block--; + bi = num_blocks - 1; + while (bi >= 0) { + while (bi > last_block) { + for (j = Ss; j <= Se; j++) { + int z = jpeg_natural_order[j]; + coef_blocks[bi][z] = 0; + } + bi--; + } + last_block = block_run_start[bi]-1; + bi--; + } + free(accumulated_zero_block_cost); + free(accumulated_block_cost); + free(block_run_start); + free(requires_eob); } } diff --git a/jcmaster.c b/jcmaster.c index 0a307b1c..733ed3b6 100644 --- a/jcmaster.c +++ b/jcmaster.c @@ -329,7 +329,19 @@ select_scan_parameters (j_compress_ptr cinfo) #ifdef C_MULTISCAN_FILES_SUPPORTED my_master_ptr master = (my_master_ptr) cinfo->master; - if (cinfo->scan_info != NULL && !(cinfo->optimize_scans && master->pass_number < master->pass_number_scan_opt_base)) { + if (master->pass_number < master->pass_number_scan_opt_base) { + cinfo->comps_in_scan = 1; + if (cinfo->use_scans_in_trellis) { + cinfo->cur_comp_info[0] = &cinfo->comp_info[master->pass_number/4]; + cinfo->Ss = (master->pass_number%4 < 2) ? 1 : cinfo->trellis_freq_split+1; + cinfo->Se = (master->pass_number%4 < 2) ? cinfo->trellis_freq_split : DCTSIZE2-1; + } else { + cinfo->cur_comp_info[0] = &cinfo->comp_info[master->pass_number/2]; + cinfo->Ss = 1; + cinfo->Se = DCTSIZE2-1; + } + } + else if (cinfo->scan_info != NULL) { /* Prepare for current scan --- the script is already validated */ const jpeg_scan_info * scanptr = cinfo->scan_info + master->scan_number; @@ -469,6 +481,7 @@ METHODDEF(void) prepare_for_pass (j_compress_ptr cinfo) { my_master_ptr master = (my_master_ptr) cinfo->master; + cinfo->trellis_passes = master->pass_number < master->pass_number_scan_opt_base; switch (master->pass_type) { case main_pass: @@ -809,7 +822,7 @@ finish_pass_master (j_compress_ptr cinfo) break; case huff_opt_pass: /* next pass is always output of current scan */ - master->pass_type = output_pass; + master->pass_type = (master->pass_number < master->pass_number_scan_opt_base-1) ? trellis_pass : output_pass; break; case output_pass: /* next pass is either optimization or output of next scan */ @@ -824,7 +837,7 @@ finish_pass_master (j_compress_ptr cinfo) master->scan_number++; break; case trellis_pass: - master->pass_type = (cinfo->optimize_scans) ? huff_opt_pass : output_pass; + master->pass_type = (cinfo->optimize_scans || master->pass_number < master->pass_number_scan_opt_base-1) ? huff_opt_pass : output_pass; break; } @@ -885,8 +898,12 @@ jinit_c_master_control (j_compress_ptr cinfo, boolean transcode_only) else master->total_passes = cinfo->num_scans; - if (cinfo->trellis_quant) - master->total_passes += (cinfo->optimize_scans) ? 2 : 1; + if (cinfo->trellis_quant) { + if (cinfo->progressive_mode) + master->total_passes += ((cinfo->use_scans_in_trellis) ? 4 : 2) * cinfo->num_components; + else + master->total_passes += 1; + } if (cinfo->optimize_scans) { int i; @@ -895,6 +912,6 @@ jinit_c_master_control (j_compress_ptr cinfo, boolean transcode_only) for (i = 0; i < cinfo->num_scans; i++) master->scan_buffer[i] = NULL; - master->pass_number_scan_opt_base = (cinfo->trellis_quant) ? 2 : 0; + master->pass_number_scan_opt_base = ((cinfo->use_scans_in_trellis) ? 4 : 2) * cinfo->num_components; } } diff --git a/jcparam.c b/jcparam.c index 4f76c455..514ecd01 100644 --- a/jcparam.c +++ b/jcparam.c @@ -429,6 +429,9 @@ jpeg_set_defaults (j_compress_ptr cinfo) cinfo->trellis_quant = (cinfo->use_moz_defaults != 0) ? TRUE : FALSE; cinfo->lambda_log_scale1 = 17.0; cinfo->lambda_log_scale2 = 15.0; + + cinfo->use_scans_in_trellis = FALSE; + cinfo->trellis_freq_split = 8; } diff --git a/jcphuff.c b/jcphuff.c index 31028717..9acbd43d 100644 --- a/jcphuff.c +++ b/jcphuff.c @@ -167,6 +167,14 @@ start_pass_phuff (j_compress_ptr cinfo, boolean gather_statistics) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, 257 * SIZEOF(long)); MEMZERO(entropy->count_ptrs[tbl], 257 * SIZEOF(long)); + if (cinfo->trellis_passes) { + /* When generating tables for trellis passes, make sure that all */ + /* codewords have an assigned length */ + int i, j; + for (i = 0; i < 16; i++) + for (j = 0; j < 12; j++) + entropy->count_ptrs[tbl][16*i+j] = 1; + } } else { /* Compute derived values for Huffman table */ /* We may do this more than once for a table, but it's not expensive */ diff --git a/jpeglib.h b/jpeglib.h index 1f18ae86..7a4fc132 100644 --- a/jpeglib.h +++ b/jpeglib.h @@ -379,8 +379,13 @@ struct jpeg_compress_struct { int use_moz_defaults; /* nonzero if using Mozilla defaults, 1=crush, 2=trellis */ boolean optimize_scans; /* TRUE=optimize progressive coding scans */ boolean trellis_quant; /* TRUE=use trellis quantization */ + boolean trellis_eob_opt; /* TRUE=optimize for sequences of EOB */ boolean use_flat_quant_tbl; /* TRUE=use flat quantization table */ boolean use_lambda_weight_tbl; /* TRUE=use lambda weighting table */ + boolean use_scans_in_trellis; /* TRUE=use scans in trellis optimization */ + boolean trellis_passes; /* TRUE=currently doing trellis-related passes */ + + int trellis_freq_split; /* splitting point for frequency in trellis quantization */ int num_scans_luma; /* # of entries in scan_info array pertaining to luma (used when optimize_scans is TRUE */ int num_scans_luma_dc;