diff --git a/cjpeg.c b/cjpeg.c index 20382e8f..67e54f48 100644 --- a/cjpeg.c +++ b/cjpeg.c @@ -170,6 +170,12 @@ usage (void) #endif fprintf(stderr, " -revert Revert to standard defaults (instead of mozjpeg defaults)\n"); fprintf(stderr, " -fastcrush Disable progressive scan optimization\n"); + fprintf(stderr, " -multidcscan Use multiple DC scans (may be incompatible with some JPEG decoders)\n"); + fprintf(stderr, " -notrellis Disable trellis optimization\n"); + fprintf(stderr, " -tune-psnr Tune trellis optimization for PSNR\n"); + fprintf(stderr, " -tune-hvs-psnr Tune trellis optimization for PSNR-HVS (default)\n"); + fprintf(stderr, " -tune-ssim Tune trellis optimization for SSIM\n"); + fprintf(stderr, " -tune-ms-ssim Tune trellis optimization for MS-SSIM\n"); fprintf(stderr, "Switches for advanced users:\n"); #ifdef C_ARITH_CODING_SUPPORTED fprintf(stderr, " -arithmetic Use arithmetic coding\n"); @@ -302,6 +308,10 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv, } else if (keymatch(arg, "fastcrush", 4)) { cinfo->optimize_scans = FALSE; + } else if (keymatch(arg, "flat", 4)) { + cinfo->use_flat_quant_tbl = TRUE; + jpeg_set_quality(cinfo, 75, TRUE); + } else if (keymatch(arg, "grayscale", 2) || keymatch(arg, "greyscale",2)) { /* Force a monochrome JPEG file to be generated. */ jpeg_set_colorspace(cinfo, JCS_GRAYSCALE); @@ -310,6 +320,16 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv, /* Force an RGB JPEG file to be generated. */ jpeg_set_colorspace(cinfo, JCS_RGB); + } else if (keymatch(arg, "lambda1", 7)) { + if (++argn >= argc) /* advance to next argument */ + usage(); + cinfo->lambda_log_scale1 = atof(argv[argn]); + + } else if (keymatch(arg, "lambda2", 7)) { + if (++argn >= argc) /* advance to next argument */ + usage(); + cinfo->lambda_log_scale2 = atof(argv[argn]); + } else if (keymatch(arg, "maxmemory", 3)) { /* Maximum memory in Kb (or Mb with 'm'). */ long lval; @@ -323,6 +343,9 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv, lval *= 1000L; cinfo->mem->max_memory_to_use = lval * 1000L; + } else if (keymatch(arg, "multidcscan", 3)) { + cinfo->one_dc_scan = FALSE; + } else if (keymatch(arg, "optimize", 1) || keymatch(arg, "optimise", 1)) { /* Enable entropy parm optimization. */ #ifdef ENTROPY_OPT_SUPPORTED @@ -446,6 +469,38 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv, /* Input file is Targa format. */ is_targa = TRUE; + } else if (keymatch(arg, "notrellis", 1)) { + /* disable trellis quantization */ + cinfo->trellis_quant = FALSE; + + } else if (keymatch(arg, "tune-psnr", 6)) { + cinfo->use_flat_quant_tbl = TRUE; + cinfo->lambda_log_scale1 = 9.0; + cinfo->lambda_log_scale2 = 0.0; + cinfo->use_lambda_weight_tbl = FALSE; + jpeg_set_quality(cinfo, 75, TRUE); + + } else if (keymatch(arg, "tune-ssim", 6)) { + cinfo->use_flat_quant_tbl = TRUE; + cinfo->lambda_log_scale1 = 12.0; + cinfo->lambda_log_scale2 = 13.5; + cinfo->use_lambda_weight_tbl = FALSE; + jpeg_set_quality(cinfo, 75, TRUE); + + } else if (keymatch(arg, "tune-ms-ssim", 6)) { + cinfo->use_flat_quant_tbl = TRUE; + cinfo->lambda_log_scale1 = 10.5; + cinfo->lambda_log_scale2 = 13.0; + cinfo->use_lambda_weight_tbl = TRUE; + jpeg_set_quality(cinfo, 75, TRUE); + + } else if (keymatch(arg, "tune-hvs-psnr", 6)) { + cinfo->use_flat_quant_tbl = FALSE; + cinfo->lambda_log_scale1 = 16.0; + cinfo->lambda_log_scale2 = 15.5; + cinfo->use_lambda_weight_tbl = TRUE; + jpeg_set_quality(cinfo, 75, TRUE); + } else { usage(); /* bogus switch */ } diff --git a/configure.ac b/configure.ac index 19ea2641..82b3d7d5 100644 --- a/configure.ac +++ b/configure.ac @@ -93,6 +93,7 @@ if test "x${SUNCC}" = "xyes"; then fi # Checks for libraries. +AC_CHECK_LIB([m],[pow]) # Checks for header files. AC_HEADER_STDC diff --git a/jccoefct.c b/jccoefct.c index 1963ddb6..0861d036 100644 --- a/jccoefct.c +++ b/jccoefct.c @@ -2,6 +2,8 @@ * jccoefct.c * * Copyright (C) 1994-1997, Thomas G. Lane. + * mozjpeg Modifications: + * Copyright (C) 2014, Mozilla Corporation. * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * @@ -13,7 +15,7 @@ #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" - +#include "jchuff.h" /* We use a full-image coefficient buffer when doing Huffman optimization, * and also for writing multiple-scan JPEG files. In all cases, the DCT @@ -52,6 +54,10 @@ typedef struct { /* In multi-pass modes, we need a virtual block array for each component. */ jvirt_barray_ptr whole_image[MAX_COMPONENTS]; + + /* when using trellis quantization, need to keep a copy of all unquantized coefficients */ + jvirt_barray_ptr whole_image_uq[MAX_COMPONENTS]; + } my_coef_controller; typedef my_coef_controller * my_coef_ptr; @@ -66,6 +72,8 @@ METHODDEF(boolean) compress_first_pass METHODDEF(boolean) compress_output JPP((j_compress_ptr cinfo, JSAMPIMAGE input_buf)); #endif +METHODDEF(boolean) compress_trellis_pass + JPP((j_compress_ptr cinfo, JSAMPIMAGE input_buf)); LOCAL(void) @@ -122,6 +130,12 @@ start_pass_coef (j_compress_ptr cinfo, J_BUF_MODE pass_mode) coef->pub.compress_data = compress_output; break; #endif + case JBUF_REQUANT: + if (coef->whole_image[0] == NULL) + ERREXIT(cinfo, JERR_BAD_BUFFER_MODE); + coef->pub.compress_data = compress_trellis_pass; + break; + default: ERREXIT(cinfo, JERR_BAD_BUFFER_MODE); break; @@ -177,7 +191,7 @@ compress_data (j_compress_ptr cinfo, JSAMPIMAGE input_buf) (*cinfo->fdct->forward_DCT) (cinfo, compptr, input_buf[compptr->component_index], coef->MCU_buffer[blkn], - ypos, xpos, (JDIMENSION) blockcnt); + ypos, xpos, (JDIMENSION) blockcnt, NULL); if (blockcnt < compptr->MCU_width) { /* Create some dummy blocks at the right edge of the image. */ jzero_far((void FAR *) coef->MCU_buffer[blkn + blockcnt], @@ -252,6 +266,7 @@ compress_first_pass (j_compress_ptr cinfo, JSAMPIMAGE input_buf) jpeg_component_info *compptr; JBLOCKARRAY buffer; JBLOCKROW thisblockrow, lastblockrow; + JBLOCKARRAY buffer_dst; for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { @@ -260,6 +275,12 @@ compress_first_pass (j_compress_ptr cinfo, JSAMPIMAGE input_buf) ((j_common_ptr) cinfo, coef->whole_image[ci], coef->iMCU_row_num * compptr->v_samp_factor, (JDIMENSION) compptr->v_samp_factor, TRUE); + + buffer_dst = (*cinfo->mem->access_virt_barray) + ((j_common_ptr) cinfo, coef->whole_image_uq[ci], + coef->iMCU_row_num * compptr->v_samp_factor, + (JDIMENSION) compptr->v_samp_factor, TRUE); + /* Count non-dummy DCT block rows in this iMCU row. */ if (coef->iMCU_row_num < last_iMCU_row) block_rows = compptr->v_samp_factor; @@ -282,7 +303,7 @@ compress_first_pass (j_compress_ptr cinfo, JSAMPIMAGE input_buf) (*cinfo->fdct->forward_DCT) (cinfo, compptr, input_buf[ci], thisblockrow, (JDIMENSION) (block_row * DCTSIZE), - (JDIMENSION) 0, blocks_across); + (JDIMENSION) 0, blocks_across, buffer_dst[block_row]); if (ndummy > 0) { /* Create dummy blocks at the right edge of the image. */ thisblockrow += blocks_across; /* => first dummy block */ @@ -326,6 +347,101 @@ compress_first_pass (j_compress_ptr cinfo, JSAMPIMAGE input_buf) return compress_output(cinfo, input_buf); } +METHODDEF(boolean) +compress_trellis_pass (j_compress_ptr cinfo, JSAMPIMAGE input_buf) +{ + my_coef_ptr coef = (my_coef_ptr) cinfo->coef; + JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1; + JDIMENSION blocks_across, MCUs_across, MCUindex; + int bi, ci, h_samp_factor, block_row, block_rows, ndummy; + JCOEF lastDC; + jpeg_component_info *compptr; + JBLOCKARRAY buffer; + JBLOCKROW thisblockrow, lastblockrow; + JBLOCKARRAY buffer_dst; + + for (ci = 0; ci < cinfo->comps_in_scan; ci++) { + c_derived_tbl actbl_data; + c_derived_tbl *actbl = &actbl_data; + compptr = cinfo->cur_comp_info[ci]; + + jpeg_make_c_derived_tbl(cinfo, FALSE, compptr->ac_tbl_no, &actbl); + + /* Align the virtual buffer for this component. */ + buffer = (*cinfo->mem->access_virt_barray) + ((j_common_ptr) cinfo, coef->whole_image[compptr->component_index], + coef->iMCU_row_num * compptr->v_samp_factor, + (JDIMENSION) compptr->v_samp_factor, TRUE); + + buffer_dst = (*cinfo->mem->access_virt_barray) + ((j_common_ptr) cinfo, coef->whole_image_uq[compptr->component_index], + coef->iMCU_row_num * compptr->v_samp_factor, + (JDIMENSION) compptr->v_samp_factor, TRUE); + + /* Count non-dummy DCT block rows in this iMCU row. */ + if (coef->iMCU_row_num < last_iMCU_row) + block_rows = compptr->v_samp_factor; + else { + /* NB: can't use last_row_height here, since may not be set! */ + block_rows = (int) (compptr->height_in_blocks % compptr->v_samp_factor); + if (block_rows == 0) block_rows = compptr->v_samp_factor; + } + blocks_across = compptr->width_in_blocks; + h_samp_factor = compptr->h_samp_factor; + /* Count number of dummy blocks to be added at the right margin. */ + ndummy = (int) (blocks_across % h_samp_factor); + if (ndummy > 0) + ndummy = h_samp_factor - ndummy; + /* Perform DCT for all non-dummy blocks in this iMCU row. Each call + * on forward_DCT processes a complete horizontal row of DCT blocks. + */ + for (block_row = 0; block_row < block_rows; block_row++) { + thisblockrow = buffer[block_row]; + quantize_trellis(cinfo, actbl, thisblockrow, buffer_dst[block_row], blocks_across, cinfo->quant_tbl_ptrs[compptr->quant_tbl_no], cinfo->norm_src[compptr->quant_tbl_no], cinfo->norm_coef[compptr->quant_tbl_no]); + + if (ndummy > 0) { + /* Create dummy blocks at the right edge of the image. */ + thisblockrow += blocks_across; /* => first dummy block */ + jzero_far((void FAR *) thisblockrow, ndummy * SIZEOF(JBLOCK)); + lastDC = thisblockrow[-1][0]; + for (bi = 0; bi < ndummy; bi++) { + thisblockrow[bi][0] = lastDC; + } + } + } + /* If at end of image, create dummy block rows as needed. + * The tricky part here is that within each MCU, we want the DC values + * of the dummy blocks to match the last real block's DC value. + * This squeezes a few more bytes out of the resulting file... + */ + if (coef->iMCU_row_num == last_iMCU_row) { + blocks_across += ndummy; /* include lower right corner */ + MCUs_across = blocks_across / h_samp_factor; + for (block_row = block_rows; block_row < compptr->v_samp_factor; + block_row++) { + thisblockrow = buffer[block_row]; + lastblockrow = buffer[block_row-1]; + jzero_far((void FAR *) thisblockrow, + (size_t) (blocks_across * SIZEOF(JBLOCK))); + for (MCUindex = 0; MCUindex < MCUs_across; MCUindex++) { + lastDC = lastblockrow[h_samp_factor-1][0]; + for (bi = 0; bi < h_samp_factor; bi++) { + thisblockrow[bi][0] = lastDC; + } + thisblockrow += h_samp_factor; /* advance to next MCU in row */ + lastblockrow += h_samp_factor; + } + } + } + } + + /* NB: compress_output will increment iMCU_row_num if successful. + * A suspension return will result in redoing all the work above next time. + */ + + /* Emit data to the entropy encoder, sharing code with subsequent passes */ + return compress_output(cinfo, input_buf); +} /* * Process some data in subsequent passes of a multi-pass case. @@ -377,6 +493,7 @@ compress_output (j_compress_ptr cinfo, JSAMPIMAGE input_buf) } } } + /* Try to write the MCU. */ if (! (*cinfo->entropy->encode_mcu) (cinfo, coef->MCU_buffer)) { /* Suspension forced; update state counters and exit */ @@ -429,6 +546,14 @@ jinit_c_coef_controller (j_compress_ptr cinfo, boolean need_full_buffer) (JDIMENSION) jround_up((long) compptr->height_in_blocks, (long) compptr->v_samp_factor), (JDIMENSION) compptr->v_samp_factor); + + coef->whole_image_uq[ci] = (*cinfo->mem->request_virt_barray) + ((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE, + (JDIMENSION) jround_up((long) compptr->width_in_blocks, + (long) compptr->h_samp_factor), + (JDIMENSION) jround_up((long) compptr->height_in_blocks, + (long) compptr->v_samp_factor), + (JDIMENSION) compptr->v_samp_factor); } #else ERREXIT(cinfo, JERR_BAD_BUFFER_MODE); diff --git a/jcdctmgr.c b/jcdctmgr.c index 3234a01a..833b55fd 100644 --- a/jcdctmgr.c +++ b/jcdctmgr.c @@ -7,6 +7,8 @@ * Copyright (C) 1999-2006, MIYASAKA Masaru. * Copyright 2009 Pierre Ossman for Cendio AB * Copyright (C) 2011 D. R. Commander + * mozjpeg Modifications: + * Copyright (C) 2014, Mozilla Corporation. * For conditions of distribution and use, see the accompanying README file. * * This file contains the forward-DCT management logic. @@ -20,7 +22,8 @@ #include "jpeglib.h" #include "jdct.h" /* Private declarations for DCT subsystem */ #include "jsimddct.h" - +#include +#include /* Private subobject for this module */ @@ -412,7 +415,7 @@ METHODDEF(void) forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr, JSAMPARRAY sample_data, JBLOCKROW coef_blocks, JDIMENSION start_row, JDIMENSION start_col, - JDIMENSION num_blocks) + JDIMENSION num_blocks, JBLOCKROW dst) /* This version is used for integer DCT implementations. */ { /* This routine is heavily used, so it's worth coding it tightly. */ @@ -436,6 +439,16 @@ forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr, /* Perform the DCT */ (*do_dct) (workspace); + /* Save unquantized transform coefficients for later trellis quantization */ + if (dst) { + int i; + for (i = 0; i < DCTSIZE2; i++) { + dst[bi][i] = workspace[i]; + //printf("d%d ", workspace[i]); + } + //printf("\n"); + } + /* Quantize/descale the coefficients, and store into coef_blocks[] */ (*do_quantize) (coef_blocks[bi], divisors, workspace); } @@ -502,7 +515,7 @@ METHODDEF(void) forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr, JSAMPARRAY sample_data, JBLOCKROW coef_blocks, JDIMENSION start_row, JDIMENSION start_col, - JDIMENSION num_blocks) + JDIMENSION num_blocks, JBLOCKROW dst) /* This version is used for floating-point DCT implementations. */ { /* This routine is heavily used, so it's worth coding it tightly. */ @@ -534,6 +547,290 @@ forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr, #endif /* DCT_FLOAT_SUPPORTED */ +#include "jchuff.h" + +static unsigned char jpeg_nbits_table[65536]; +static int jpeg_nbits_table_init = 0; + +static const float jpeg_lambda_weights_flat[64] = { + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f +}; + +static const float jpeg_lambda_weights_csf_luma[64] = { + 3.35630f, 3.59892f, 3.20921f, 2.28102f, 1.42378f, 0.88079f, 0.58190f, 0.43454f, + 3.59893f, 3.21284f, 2.71282f, 1.98092f, 1.30506f, 0.83852f, 0.56346f, 0.42146f, + 3.20921f, 2.71282f, 2.12574f, 1.48616f, 0.99660f, 0.66132f, 0.45610f, 0.34609f, + 2.28102f, 1.98092f, 1.48616f, 0.97492f, 0.64622f, 0.43812f, 0.31074f, 0.24072f, + 1.42378f, 1.30506f, 0.99660f, 0.64623f, 0.42051f, 0.28446f, 0.20380f, 0.15975f, + 0.88079f, 0.83852f, 0.66132f, 0.43812f, 0.28446f, 0.19092f, 0.13635f, 0.10701f, + 0.58190f, 0.56346f, 0.45610f, 0.31074f, 0.20380f, 0.13635f, 0.09674f, 0.07558f, + 0.43454f, 0.42146f, 0.34609f, 0.24072f, 0.15975f, 0.10701f, 0.07558f, 0.05875f, +}; + +GLOBAL(void) +quantize_trellis(j_compress_ptr cinfo, c_derived_tbl *actbl, JBLOCKROW coef_blocks, JBLOCKROW src, JDIMENSION num_blocks, + JQUANT_TBL * qtbl, double *norm_src, double *norm_coef) +{ + int i, j, k; + float accumulated_zero_dist[DCTSIZE2]; + float accumulated_cost[DCTSIZE2]; + int run_start[DCTSIZE2]; + int bi; + float best_cost; + int last_coeff_idx; /* position of last nonzero coefficient */ + float norm = 0.0; + float lambda_base; + float lambda; + const float *lambda_tbl = (cinfo->use_lambda_weight_tbl) ? jpeg_lambda_weights_csf_luma : jpeg_lambda_weights_flat; + int Ss, Se; + float *accumulated_zero_block_cost; + float *accumulated_block_cost; + int *block_run_start; + int *requires_eob; + int has_eob; + float cost_all_zeros; + float best_cost_skip; + + Ss = cinfo->Ss; + Se = cinfo->Se; + if (Ss == 0) + Ss = 1; + if (Se < Ss) + return; + if (cinfo->trellis_eob_opt) { + accumulated_zero_block_cost = (float *)malloc((num_blocks + 1) * SIZEOF(float)); + accumulated_block_cost = (float *)malloc((num_blocks + 1) * SIZEOF(float)); + block_run_start = (int *)malloc(num_blocks * SIZEOF(int)); + requires_eob = (int *)malloc((num_blocks + 1) * SIZEOF(int)); + accumulated_zero_block_cost[0] = 0; + accumulated_block_cost[0] = 0; + requires_eob[0] = 0; + } + + if(!jpeg_nbits_table_init) { + for(i = 0; i < 65536; i++) { + int nbits = 0, temp = i; + while (temp) {temp >>= 1; nbits++;} + jpeg_nbits_table[i] = nbits; + } + jpeg_nbits_table_init = 1; + } + + norm = 0.0; + for (i = 1; i < DCTSIZE2; i++) { + norm += qtbl->quantval[i] * qtbl->quantval[i]; + } + norm /= 63.0; + + lambda_base = 1.0 / norm; + + for (bi = 0; bi < num_blocks; bi++) { + + norm = 0.0; + for (i = 1; i < DCTSIZE2; i++) { + norm += src[bi][i] * src[bi][i]; + } + norm /= 63.0; + + if (cinfo->lambda_log_scale2 > 0.0) + lambda = pow(2.0, cinfo->lambda_log_scale1) * lambda_base / (pow(2.0, cinfo->lambda_log_scale2) + norm); + else + lambda = pow(2.0, cinfo->lambda_log_scale1-12.0) * lambda_base; + + accumulated_zero_dist[Ss-1] = 0.0; + accumulated_cost[Ss-1] = 0.0; + + for (i = Ss; i <= Se; i++) { + int z = jpeg_natural_order[i]; + + int sign = src[bi][z] >> 31; + int x = abs(src[bi][z]); + int q = 8 * qtbl->quantval[z]; + int candidate[16]; + int candidate_bits[16]; + float candidate_dist[16]; + int num_candidates; + int qval; + + accumulated_zero_dist[i] = x * x * lambda * lambda_tbl[z] + accumulated_zero_dist[i-1]; + + qval = (x + q/2) / q; /* quantized value (round nearest) */ + + if (qval == 0) { + coef_blocks[bi][z] = 0; + accumulated_cost[i] = 1e38; /* Shouldn't be needed */ + continue; + } + + num_candidates = jpeg_nbits_table[qval]; + for (k = 0; k < num_candidates; k++) { + int delta; + candidate[k] = (k < num_candidates - 1) ? (2 << k) - 1 : qval; + delta = candidate[k] * q - x; + candidate_bits[k] = k+1; + candidate_dist[k] = delta * delta * lambda * lambda_tbl[z]; + } + + accumulated_cost[i] = 1e38; + + for (j = Ss-1; j < i; j++) { + int zz = jpeg_natural_order[j]; + if (j != Ss-1 && coef_blocks[bi][zz] == 0) + continue; + + int zero_run = i - 1 - j; + if ((zero_run >> 4) && actbl->ehufsi[0xf0] == 0) + continue; + + int run_bits = (zero_run >> 4) * actbl->ehufsi[0xf0]; + zero_run &= 15; + + for (k = 0; k < num_candidates; k++) { + int coef_bits = actbl->ehufsi[16 * zero_run + candidate_bits[k]]; + if (coef_bits == 0) + continue; + + int rate = coef_bits + candidate_bits[k] + run_bits; + float cost = rate + candidate_dist[k]; + cost += accumulated_zero_dist[i-1] - accumulated_zero_dist[j] + accumulated_cost[j]; + + if (cost < accumulated_cost[i]) { + coef_blocks[bi][z] = (candidate[k] ^ sign) - sign; + accumulated_cost[i] = cost; + run_start[i] = j; + } + } + } + } + + last_coeff_idx = Ss-1; + best_cost = accumulated_zero_dist[Se] + actbl->ehufsi[0]; + cost_all_zeros = accumulated_zero_dist[Se]; + best_cost_skip = cost_all_zeros; + + for (i = Ss; i <= Se; i++) { + int z = jpeg_natural_order[i]; + if (coef_blocks[bi][z] != 0) { + float cost = accumulated_cost[i] + accumulated_zero_dist[Se] - accumulated_zero_dist[i]; + float cost_wo_eob = cost; + + if (i < Se) + cost += actbl->ehufsi[0]; + + if (cost < best_cost) { + best_cost = cost; + last_coeff_idx = i; + best_cost_skip = cost_wo_eob; + } + } + } + + has_eob = (last_coeff_idx < Se) + (last_coeff_idx == Ss-1); + + /* Zero out coefficients that are part of runs */ + i = Se; + while (i >= Ss) + { + while (i > last_coeff_idx) { + int z = jpeg_natural_order[i]; + coef_blocks[bi][z] = 0; + i--; + } + last_coeff_idx = run_start[i]; + i--; + } + + if (cinfo->trellis_eob_opt) { + accumulated_zero_block_cost[bi+1] = accumulated_zero_block_cost[bi]; + accumulated_zero_block_cost[bi+1] += cost_all_zeros; + requires_eob[bi+1] = has_eob; + + best_cost = 1e38; + + if (has_eob != 2) { + for (i = 0; i <= bi; i++) { + int zero_block_run; + int nbits; + float cost; + + if (requires_eob[i] == 2) + continue; + + cost = best_cost_skip; /* cost of coding a nonzero block */ + cost += accumulated_zero_block_cost[bi]; + cost -= accumulated_zero_block_cost[i]; + cost += accumulated_block_cost[i]; + zero_block_run = bi - i + requires_eob[i]; + nbits = jpeg_nbits_table[zero_block_run]; + cost += actbl->ehufsi[16*nbits] + nbits; + + if (cost < best_cost) { + block_run_start[bi] = i; + best_cost = cost; + accumulated_block_cost[bi+1] = cost; + } + } + } + } + } + + if (cinfo->trellis_eob_opt) { + int last_block = num_blocks; + best_cost = 1e38; + + for (i = 0; i <= num_blocks; i++) { + int zero_block_run; + int nbits; + float cost = 0.0; + + if (requires_eob[i] == 2) + continue; + + cost += accumulated_zero_block_cost[num_blocks]; + cost -= accumulated_zero_block_cost[i]; + zero_block_run = num_blocks - i + requires_eob[i]; + nbits = jpeg_nbits_table[zero_block_run]; + cost += actbl->ehufsi[16*nbits] + nbits; + if (cost < best_cost) { + best_cost = cost; + last_block = i; + } + } + last_block--; + bi = num_blocks - 1; + while (bi >= 0) { + while (bi > last_block) { + for (j = Ss; j <= Se; j++) { + int z = jpeg_natural_order[j]; + coef_blocks[bi][z] = 0; + } + bi--; + } + last_block = block_run_start[bi]-1; + bi--; + } + free(accumulated_zero_block_cost); + free(accumulated_block_cost); + free(block_run_start); + free(requires_eob); + } + + if (cinfo->trellis_q_opt) { + for (bi = 0; bi < num_blocks; bi++) { + for (i = 1; i < DCTSIZE2; i++) { + norm_src[i] += src[bi][i] * coef_blocks[bi][i]; + norm_coef[i] += 8 * coef_blocks[bi][i] * coef_blocks[bi][i]; + } + } + } +} /* * Initialize FDCT manager. diff --git a/jchuff.h b/jchuff.h index a9599fc1..ac469dab 100644 --- a/jchuff.h +++ b/jchuff.h @@ -2,6 +2,8 @@ * jchuff.h * * Copyright (C) 1991-1997, Thomas G. Lane. + * mozjpeg Modifications: + * Copyright (C) 2014, Mozilla Corporation. * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * @@ -45,3 +47,7 @@ EXTERN(void) jpeg_make_c_derived_tbl /* Generate an optimal table definition given the specified counts */ EXTERN(void) jpeg_gen_optimal_table JPP((j_compress_ptr cinfo, JHUFF_TBL * htbl, long freq[])); + +EXTERN(void) quantize_trellis + JPP((j_compress_ptr cinfo, c_derived_tbl *actbl, JBLOCKROW coef_blocks, JBLOCKROW src, JDIMENSION num_blocks, + JQUANT_TBL * qtbl, double *norm_src, double *norm_coef)); diff --git a/jcmaster.c b/jcmaster.c index a4ed4839..ec663619 100644 --- a/jcmaster.c +++ b/jcmaster.c @@ -27,7 +27,8 @@ typedef enum { main_pass, /* input data, also do first output step */ huff_opt_pass, /* Huffman code optimization pass */ - output_pass /* data output pass */ + output_pass, /* data output pass */ + trellis_pass /* trellis quantization pass */ } c_pass_type; typedef struct { @@ -41,6 +42,7 @@ typedef struct { int scan_number; /* current index in scan_info[] */ /* fields for scan optimisation */ + int pass_number_scan_opt_base; /* pass number where scan optimization begins */ unsigned char * scan_buffer[64]; /* buffer for a given scan */ unsigned long scan_size[64]; /* size for a given scan */ unsigned long best_cost; /* bit count for best frequency split */ @@ -326,9 +328,21 @@ select_scan_parameters (j_compress_ptr cinfo) int ci; #ifdef C_MULTISCAN_FILES_SUPPORTED - if (cinfo->scan_info != NULL) { + my_master_ptr master = (my_master_ptr) cinfo->master; + if (master->pass_number < master->pass_number_scan_opt_base) { + cinfo->comps_in_scan = 1; + if (cinfo->use_scans_in_trellis) { + cinfo->cur_comp_info[0] = &cinfo->comp_info[master->pass_number/(4*cinfo->trellis_num_loops)]; + cinfo->Ss = (master->pass_number%4 < 2) ? 1 : cinfo->trellis_freq_split+1; + cinfo->Se = (master->pass_number%4 < 2) ? cinfo->trellis_freq_split : DCTSIZE2-1; + } else { + cinfo->cur_comp_info[0] = &cinfo->comp_info[master->pass_number/(2*cinfo->trellis_num_loops)]; + cinfo->Ss = 1; + cinfo->Se = DCTSIZE2-1; + } + } + else if (cinfo->scan_info != NULL) { /* Prepare for current scan --- the script is already validated */ - my_master_ptr master = (my_master_ptr) cinfo->master; const jpeg_scan_info * scanptr = cinfo->scan_info + master->scan_number; cinfo->comps_in_scan = scanptr->comps_in_scan; @@ -467,6 +481,7 @@ METHODDEF(void) prepare_for_pass (j_compress_ptr cinfo) { my_master_ptr master = (my_master_ptr) cinfo->master; + cinfo->trellis_passes = master->pass_number < master->pass_number_scan_opt_base; switch (master->pass_type) { case main_pass: @@ -534,6 +549,22 @@ prepare_for_pass (j_compress_ptr cinfo) (*cinfo->marker->write_scan_header) (cinfo); master->pub.call_pass_startup = FALSE; break; + case trellis_pass: + if (master->pass_number%(cinfo->num_components*(cinfo->use_scans_in_trellis?4:2)) == 1 && cinfo->trellis_q_opt) { + int i, j; + + for (i = 0; i < NUM_QUANT_TBLS; i++) { + for (j = 1; j < DCTSIZE2; j++) { + cinfo->norm_src[i][j] = 0.0; + cinfo->norm_coef[i][j] = 0.0; + } + } + } + (*cinfo->entropy->start_pass) (cinfo, TRUE); + (*cinfo->coef->start_pass) (cinfo, JBUF_REQUANT); + master->pub.call_pass_startup = FALSE; + break; + default: ERREXIT(cinfo, JERR_NOT_COMPILED); } @@ -575,6 +606,16 @@ copy_buffer (j_compress_ptr cinfo, int scan_idx) unsigned long size = master->scan_size[scan_idx]; unsigned char * src = master->scan_buffer[scan_idx]; + int i; + + if (cinfo->err->trace_level > 0) { + fprintf(stderr, "SCAN "); + for (i = 0; i < cinfo->scan_info[scan_idx].comps_in_scan; i++) + fprintf(stderr, "%s%d", (i==0)?"":",", cinfo->scan_info[scan_idx].component_index[i]); + fprintf(stderr, ": %d %d", cinfo->scan_info[scan_idx].Ss, cinfo->scan_info[scan_idx].Se); + fprintf(stderr, " %d %d", cinfo->scan_info[scan_idx].Ah, cinfo->scan_info[scan_idx].Al); + fprintf(stderr, "\n"); + } while (size >= cinfo->dest->free_in_buffer) { @@ -615,7 +656,7 @@ select_scans (j_compress_ptr cinfo, int next_scan_number) master->best_Al_luma = Al; } else { master->scan_number = luma_freq_split_scan_start - 1; - master->pass_number = 2 * master->scan_number + 1; + master->pass_number = 2 * master->scan_number + 1 + master->pass_number_scan_opt_base; } } @@ -640,7 +681,7 @@ select_scans (j_compress_ptr cinfo, int next_scan_number) (idx == 3 && master->best_freq_split_idx_luma != 2) || (idx == 4 && master->best_freq_split_idx_luma != 4)) { master->scan_number = cinfo->num_scans_luma - 1; - master->pass_number = 2 * master->scan_number + 1; + master->pass_number = 2 * master->scan_number + 1 + master->pass_number_scan_opt_base; master->pub.is_last_pass = (master->pass_number == master->total_passes - 1); } } @@ -672,7 +713,7 @@ select_scans (j_compress_ptr cinfo, int next_scan_number) master->best_Al_chroma = Al; } else { master->scan_number = chroma_freq_split_scan_start - 1; - master->pass_number = 2 * master->scan_number + 1; + master->pass_number = 2 * master->scan_number + 1 + master->pass_number_scan_opt_base; } } @@ -700,7 +741,7 @@ select_scans (j_compress_ptr cinfo, int next_scan_number) (idx == 3 && master->best_freq_split_idx_chroma != 2) || (idx == 4 && master->best_freq_split_idx_chroma != 4)) { master->scan_number = cinfo->num_scans - 1; - master->pass_number = 2 * master->scan_number + 1; + master->pass_number = 2 * master->scan_number + 1 + master->pass_number_scan_opt_base; master->pub.is_last_pass = (master->pass_number == master->total_passes - 1); } } @@ -713,7 +754,7 @@ select_scans (j_compress_ptr cinfo, int next_scan_number) copy_buffer(cinfo, 0); - if (cinfo->num_scans > cinfo->num_scans_luma) { + if (cinfo->num_scans > cinfo->num_scans_luma && !cinfo->one_dc_scan) { base_scan_idx = cinfo->num_scans_luma; if (master->interleave_chroma_dc) @@ -791,13 +832,17 @@ finish_pass_master (j_compress_ptr cinfo) /* next pass is either output of scan 0 (after optimization) * or output of scan 1 (if no optimization). */ - master->pass_type = output_pass; - if (! cinfo->optimize_coding) - master->scan_number++; + if (cinfo->trellis_quant) + master->pass_type = trellis_pass; + else { + master->pass_type = output_pass; + if (! cinfo->optimize_coding) + master->scan_number++; + } break; case huff_opt_pass: /* next pass is always output of current scan */ - master->pass_type = output_pass; + master->pass_type = (master->pass_number < master->pass_number_scan_opt_base-1) ? trellis_pass : output_pass; break; case output_pass: /* next pass is either optimization or output of next scan */ @@ -811,6 +856,24 @@ finish_pass_master (j_compress_ptr cinfo) master->scan_number++; break; + case trellis_pass: + master->pass_type = (cinfo->optimize_coding || master->pass_number < master->pass_number_scan_opt_base-1) ? huff_opt_pass : output_pass; + + if ((master->pass_number+1)%(cinfo->num_components*(cinfo->use_scans_in_trellis?4:2)) == 0 && cinfo->trellis_q_opt) { + int i, j; + + for (i = 0; i < NUM_QUANT_TBLS; i++) { + for (j = 1; j < DCTSIZE2; j++) { + if (cinfo->norm_coef[i][j] != 0.0) { + int q = (int)(cinfo->norm_src[i][j] / cinfo->norm_coef[i][j] + 0.5); + if (q > 254) q = 254; + if (q < 1) q = 1; + cinfo->quant_tbl_ptrs[i]->quantval[j] = q; + } + } + } + } + break; } master->pass_number++; @@ -870,6 +933,13 @@ jinit_c_master_control (j_compress_ptr cinfo, boolean transcode_only) else master->total_passes = cinfo->num_scans; + if (cinfo->trellis_quant) { + if (cinfo->progressive_mode) + master->total_passes += ((cinfo->use_scans_in_trellis) ? 4 : 2) * cinfo->num_components * cinfo->trellis_num_loops; + else + master->total_passes += 1; + } + if (cinfo->optimize_scans) { int i; master->best_Al_chroma = 0; @@ -877,4 +947,9 @@ jinit_c_master_control (j_compress_ptr cinfo, boolean transcode_only) for (i = 0; i < cinfo->num_scans; i++) master->scan_buffer[i] = NULL; } + + if (cinfo->trellis_quant) + master->pass_number_scan_opt_base = ((cinfo->use_scans_in_trellis) ? 4 : 2) * cinfo->num_components * cinfo->trellis_num_loops; + else + master->pass_number_scan_opt_base = 0; } diff --git a/jcparam.c b/jcparam.c index 89f4efd9..0ccb8b98 100644 --- a/jcparam.c +++ b/jcparam.c @@ -90,6 +90,16 @@ static const unsigned int std_chrominance_quant_tbl[DCTSIZE2] = { 99, 99, 99, 99, 99, 99, 99, 99 }; +static const unsigned int flat_quant_tbl[DCTSIZE2] = { + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16 +}; #if JPEG_LIB_VERSION >= 70 GLOBAL(void) @@ -101,9 +111,9 @@ jpeg_default_qtables (j_compress_ptr cinfo, boolean force_baseline) { /* Set up two quantization tables using the specified scaling */ jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl, - cinfo->q_scale_factor[0], force_baseline); + cinfo->q_scale_factor[0], force_baseline); jpeg_add_quant_table(cinfo, 1, std_chrominance_quant_tbl, - cinfo->q_scale_factor[1], force_baseline); + cinfo->q_scale_factor[1], force_baseline); } #endif @@ -118,10 +128,17 @@ jpeg_set_linear_quality (j_compress_ptr cinfo, int scale_factor, */ { /* Set up two quantization tables using the specified scaling */ - jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl, - scale_factor, force_baseline); - jpeg_add_quant_table(cinfo, 1, std_chrominance_quant_tbl, - scale_factor, force_baseline); + if (cinfo->use_flat_quant_tbl) { + jpeg_add_quant_table(cinfo, 0, flat_quant_tbl, + scale_factor, force_baseline); + jpeg_add_quant_table(cinfo, 1, flat_quant_tbl, + scale_factor, force_baseline); + } else { + jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl, + scale_factor, force_baseline); + jpeg_add_quant_table(cinfo, 1, std_chrominance_quant_tbl, + scale_factor, force_baseline); + } } @@ -325,6 +342,8 @@ jpeg_set_defaults (j_compress_ptr cinfo) } #ifdef C_PROGRESSIVE_SUPPORTED + cinfo->scan_info = NULL; + cinfo->num_scans = 0; if (!cinfo->use_moz_defaults) { /* Default is no multiple-scan output */ cinfo->scan_info = NULL; @@ -399,6 +418,8 @@ jpeg_set_defaults (j_compress_ptr cinfo) jpeg_default_colorspace(cinfo); + cinfo->one_dc_scan = TRUE; + #ifdef C_PROGRESSIVE_SUPPORTED if (cinfo->use_moz_defaults) { cinfo->optimize_scans = TRUE; @@ -406,6 +427,16 @@ jpeg_set_defaults (j_compress_ptr cinfo) } else cinfo->optimize_scans = FALSE; #endif + + cinfo->trellis_quant = cinfo->use_moz_defaults; + cinfo->lambda_log_scale1 = 16.0; + cinfo->lambda_log_scale2 = 15.5; + + cinfo->use_lambda_weight_tbl = TRUE; + cinfo->use_scans_in_trellis = FALSE; + cinfo->trellis_freq_split = 8; + cinfo->trellis_num_loops = 1; + cinfo->trellis_q_opt = FALSE; } @@ -673,7 +704,10 @@ jpeg_search_progression (j_compress_ptr cinfo) /* last 4 done conditionally */ /* luma DC by itself */ - scanptr = fill_dc_scans(scanptr, 1, 0, 0); + if (cinfo->one_dc_scan) + scanptr = fill_dc_scans(scanptr, ncomps, 0, 0); + else + scanptr = fill_dc_scans(scanptr, 1, 0, 0); scanptr = fill_a_scan(scanptr, 0, 1, 8, 0, 0); scanptr = fill_a_scan(scanptr, 0, 9, 63, 0, 0); @@ -761,7 +795,7 @@ jpeg_simple_progression (j_compress_ptr cinfo) nscans = 10; } else { /* All-purpose script for other color spaces. */ - if (cinfo->use_moz_defaults) { + if (cinfo->use_moz_defaults == TRUE) { if (ncomps > MAX_COMPS_IN_SCAN) nscans = 5 * ncomps; /* 2 DC + 4 AC scans per component */ else @@ -793,11 +827,15 @@ jpeg_simple_progression (j_compress_ptr cinfo) if (ncomps == 3 && cinfo->jpeg_color_space == JCS_YCbCr) { /* Custom script for YCbCr color images. */ - if (cinfo->use_moz_defaults) { + if (cinfo->use_moz_defaults == TRUE) { /* scan defined in jpeg_scan_rgb.txt in jpgcrush */ /* Initial DC scan */ - scanptr = fill_dc_scans(scanptr, 1, 0, 0); - scanptr = fill_a_scan_pair(scanptr, 1, 0, 0, 0, 0); + if (cinfo->one_dc_scan) + scanptr = fill_dc_scans(scanptr, ncomps, 0, 0); + else { + scanptr = fill_dc_scans(scanptr, 1, 0, 0); + scanptr = fill_a_scan_pair(scanptr, 1, 0, 0, 0, 0); + } /* Low frequency AC scans */ scanptr = fill_a_scan(scanptr, 0, 1, 8, 0, 2); scanptr = fill_a_scan(scanptr, 1, 1, 8, 0, 0); @@ -832,7 +870,7 @@ jpeg_simple_progression (j_compress_ptr cinfo) } } else { /* All-purpose script for other color spaces. */ - if (cinfo->use_moz_defaults) { + if (cinfo->use_moz_defaults == TRUE) { /* scan defined in jpeg_scan_bw.txt in jpgcrush */ /* DC component, no successive approximation */ scanptr = fill_dc_scans(scanptr, ncomps, 0, 0); diff --git a/jcphuff.c b/jcphuff.c index 340e4645..997c377b 100644 --- a/jcphuff.c +++ b/jcphuff.c @@ -169,6 +169,14 @@ start_pass_phuff (j_compress_ptr cinfo, boolean gather_statistics) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, 257 * SIZEOF(long)); MEMZERO(entropy->count_ptrs[tbl], 257 * SIZEOF(long)); + if (cinfo->trellis_passes) { + /* When generating tables for trellis passes, make sure that all */ + /* codewords have an assigned length */ + int i, j; + for (i = 0; i < 16; i++) + for (j = 0; j < 12; j++) + entropy->count_ptrs[tbl][16*i+j] = 1; + } } else { /* Compute derived values for Huffman table */ /* We may do this more than once for a table, but it's not expensive */ diff --git a/jpegint.h b/jpegint.h index 78717482..6ab4d5d7 100644 --- a/jpegint.h +++ b/jpegint.h @@ -3,6 +3,8 @@ * * Copyright (C) 1991-1997, Thomas G. Lane. * Modified 1997-2009 by Guido Vollbeding. + * mozjpeg Modifications: + * Copyright (C) 2014, Mozilla Corporation. * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * @@ -19,7 +21,9 @@ typedef enum { /* Operating modes for buffer controllers */ /* Remaining modes require a full-image buffer to have been created */ JBUF_SAVE_SOURCE, /* Run source subobject only, save output */ JBUF_CRANK_DEST, /* Run dest subobject only, using saved data */ - JBUF_SAVE_AND_PASS /* Run both subobjects, save output */ + JBUF_SAVE_AND_PASS, /* Run both subobjects, save output */ + JBUF_REQUANT /* Requantize */ + } J_BUF_MODE; /* Values of global_state field (jdapi.c has some dependencies on ordering!) */ @@ -107,7 +111,7 @@ struct jpeg_forward_dct { jpeg_component_info * compptr, JSAMPARRAY sample_data, JBLOCKROW coef_blocks, JDIMENSION start_row, JDIMENSION start_col, - JDIMENSION num_blocks)); + JDIMENSION num_blocks, JBLOCKROW dst)); }; /* Entropy encoding */ diff --git a/jpeglib.h b/jpeglib.h index 500af1a0..00a4658d 100644 --- a/jpeglib.h +++ b/jpeglib.h @@ -376,8 +376,22 @@ struct jpeg_compress_struct { int smoothing_factor; /* 1..100, or 0 for no input smoothing */ J_DCT_METHOD dct_method; /* DCT algorithm selector */ - boolean use_moz_defaults; /* TRUE if using Mozilla defaults */ + boolean use_moz_defaults; /* TRUE=use Mozilla defaults */ boolean optimize_scans; /* TRUE=optimize progressive coding scans */ + boolean one_dc_scan; /* TRUE=use a single DC scan interleaving all components */ + boolean trellis_quant; /* TRUE=use trellis quantization */ + boolean trellis_eob_opt; /* TRUE=optimize for sequences of EOB */ + boolean use_flat_quant_tbl; /* TRUE=use flat quantization table */ + boolean use_lambda_weight_tbl; /* TRUE=use lambda weighting table */ + boolean use_scans_in_trellis; /* TRUE=use scans in trellis optimization */ + boolean trellis_passes; /* TRUE=currently doing trellis-related passes */ + boolean trellis_q_opt; /* TRUE=optimize quant table in trellis loop */ + + double norm_src[NUM_QUANT_TBLS][DCTSIZE2]; + double norm_coef[NUM_QUANT_TBLS][DCTSIZE2]; + + int trellis_freq_split; /* splitting point for frequency in trellis quantization */ + int trellis_num_loops; /* number of trellis loops */ int num_scans_luma; /* # of entries in scan_info array pertaining to luma (used when optimize_scans is TRUE */ int num_scans_luma_dc; @@ -387,6 +401,9 @@ struct jpeg_compress_struct { int Al_max_luma; /* maximum value of Al tested when optimizing scans (luma) */ int Al_max_chroma; /* maximum value of Al tested when optimizing scans (chroma) */ + float lambda_log_scale1; + float lambda_log_scale2; + /* The restart interval can be specified in absolute MCUs by setting * restart_interval, or in MCU rows by setting restart_in_rows * (in which case the correct restart_interval will be figured diff --git a/jversion.h b/jversion.h index 68b3743c..437b1712 100644 --- a/jversion.h +++ b/jversion.h @@ -5,6 +5,8 @@ * Copyright (C) 1991-2012, Thomas G. Lane, Guido Vollbeding. * Modifications: * Copyright (C) 2010, 2012-2013, D. R. Commander. + * mozjpeg Modifications: + * Copyright (C) 2014, Mozilla Corporation. * For conditions of distribution and use, see the accompanying README file. * * This file contains software version identification. @@ -29,4 +31,6 @@ "Copyright (C) 1999-2006 MIYASAKA Masaru\n" \ "Copyright (C) 2009 Pierre Ossman for Cendio AB\n" \ "Copyright (C) 2009-2013 D. R. Commander\n" \ - "Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies)" + "Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies)\n" \ + "Copyright (C) 2014 Mozilla Corporation\n" +