diff --git a/.gitauthors b/.gitauthors new file mode 100644 index 00000000..7b724b5e --- /dev/null +++ b/.gitauthors @@ -0,0 +1,7 @@ + +dcommander = DRC +astrand = Peter Åstrand +ossman_ = Pierre Ossman +const_k = Constantin Kaplinsky +atkac = Adam Tkac + diff --git a/BUILDING.txt b/BUILDING.txt index 70d71240..b94e96f8 100644 --- a/BUILDING.txt +++ b/BUILDING.txt @@ -99,7 +99,7 @@ This will generate the following files under .libs/ cygjpeg-{version}.dll (Cygwin) Shared library for the libjpeg API - By default, {version} is 62.1.0, 7.1.0, or 8.0.2, depending on whether + By default, {version} is 62.2.0, 7.2.0, or 8.1.2, depending on whether libjpeg v6b (default), v7, or v8 emulation is enabled. If using Cygwin, {version} is 62, 7, or 8. diff --git a/CMakeLists.txt b/CMakeLists.txt index f0832421..709b90c7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -180,12 +180,12 @@ endif() # set(JPEG_SOURCES jcapimin.c jcapistd.c jccoefct.c jccolor.c jcdctmgr.c jchuff.c - jcinit.c jcmainct.c jcmarker.c jcmaster.c jcomapi.c jcparam.c jcphuff.c - jcprepct.c jcsample.c jctrans.c jdapimin.c jdapistd.c jdatadst.c jdatasrc.c - jdcoefct.c jdcolor.c jddctmgr.c jdhuff.c jdinput.c jdmainct.c jdmarker.c - jdmaster.c jdmerge.c jdphuff.c jdpostct.c jdsample.c jdtrans.c jerror.c - jfdctflt.c jfdctfst.c jfdctint.c jidctflt.c jidctfst.c jidctint.c jidctred.c - jquant1.c jquant2.c jutils.c jmemmgr.c jmemnobs.c) + jccompat.c jcinit.c jcmainct.c jcmarker.c jcmaster.c jcomapi.c jcparam.c + jcphuff.c jcprepct.c jcsample.c jctrans.c jdapimin.c jdapistd.c jdatadst.c + jdatasrc.c jdcoefct.c jdcolor.c jddctmgr.c jdhuff.c jdinput.c jdmainct.c + jdmarker.c jdmaster.c jdmerge.c jdphuff.c jdpostct.c jdsample.c jdtrans.c + jerror.c jfdctflt.c jfdctfst.c jfdctint.c jidctflt.c jidctfst.c jidctint.c + jidctred.c jquant1.c jquant2.c jutils.c jmemmgr.c jmemnobs.c) if(WITH_ARITH_ENC OR WITH_ARITH_DEC) set(JPEG_SOURCES ${JPEG_SOURCES} jaricom.c) diff --git a/ChangeLog.txt b/ChangeLog.txt index b158dc4d..ae88b498 100644 --- a/ChangeLog.txt +++ b/ChangeLog.txt @@ -1,5 +1,21 @@ -1.4 pre-beta -============ +1.4.0 +===== + +[1] Fixed a build issue on OS X PowerPC platforms (md5cmp failed to build +because OS X does not provide the le32toh() and htole32() functions.) + +[2] The non-SIMD RGB565 color conversion code did not work correctly on big +endian machines. This has been fixed. + +[3] Fixed an issue in tjPlaneSizeYUV() whereby it would erroneously return 1 +instead of -1 if componentID was > 0 and subsamp was TJSAMP_GRAY. + +[3] Fixed an issue in tjBufSizeYUV2() wherby it would erroneously return 0 +instead of -1 if width was < 1. + + +1.3.90 (1.4 beta1) +================== [1] New features in the TurboJPEG API: -- YUV planar images can now be generated with an arbitrary line padding diff --git a/Makefile.am b/Makefile.am index cc5435e2..1b47317c 100644 --- a/Makefile.am +++ b/Makefile.am @@ -16,13 +16,13 @@ HDRS = jchuff.h jdct.h jdhuff.h jerror.h jinclude.h jmemsys.h jmorecfg.h \ jpeg_nbits_table.h libjpeg_la_SOURCES = $(HDRS) jcapimin.c jcapistd.c jccoefct.c jccolor.c \ - jcdctmgr.c jchuff.c jcinit.c jcmainct.c jcmarker.c jcmaster.c \ - jcomapi.c jcparam.c jcphuff.c jcprepct.c jcsample.c jctrans.c \ - jdapimin.c jdapistd.c jdatadst.c jdatasrc.c jdcoefct.c jdcolor.c \ - jddctmgr.c jdhuff.c jdinput.c jdmainct.c jdmarker.c jdmaster.c \ - jdmerge.c jdphuff.c jdpostct.c jdsample.c jdtrans.c jerror.c \ - jfdctflt.c jfdctfst.c jfdctint.c jidctflt.c jidctfst.c jidctint.c \ - jidctred.c jquant1.c jquant2.c jutils.c jmemmgr.c jmemnobs.c + jccompat.c jcdctmgr.c jchuff.c jcinit.c jcmainct.c jcmarker.c \ + jcmaster.c jcomapi.c jcparam.c jcphuff.c jcprepct.c jcsample.c \ + jctrans.c jdapimin.c jdapistd.c jdatadst.c jdatasrc.c jdcoefct.c \ + jdcolor.c jddctmgr.c jdhuff.c jdinput.c jdmainct.c jdmarker.c \ + jdmaster.c jdmerge.c jdphuff.c jdpostct.c jdsample.c jdtrans.c \ + jerror.c jfdctflt.c jfdctfst.c jfdctint.c jidctflt.c jidctfst.c \ + jidctint.c jidctred.c jquant1.c jquant2.c jutils.c jmemmgr.c jmemnobs.c if WITH_ARITH libjpeg_la_SOURCES += jaricom.c diff --git a/cjpeg.c b/cjpeg.c index a56a1ecd..098dbc32 100644 --- a/cjpeg.c +++ b/cjpeg.c @@ -188,6 +188,7 @@ usage (void) fprintf(stderr, " -tune-ssim Tune trellis optimization for SSIM\n"); fprintf(stderr, " -tune-ms-ssim Tune trellis optimization for MS-SSIM\n"); fprintf(stderr, "Switches for advanced users:\n"); + fprintf(stderr, " -noovershoot Disable black-on-white deringing via overshoot\n"); #ifdef C_ARITH_CODING_SUPPORTED fprintf(stderr, " -arithmetic Use arithmetic coding\n"); #endif @@ -303,9 +304,10 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv, cinfo->dct_method = JDCT_IFAST; } else if (keymatch(argv[argn], "float", 2)) { cinfo->dct_method = JDCT_FLOAT; - } else + } else { fprintf(stderr, "%s: invalid argument for dct\n", progname); usage(); + } } else if (keymatch(arg, "debug", 1) || keymatch(arg, "verbose", 1)) { /* Enable debug printouts. */ @@ -323,10 +325,10 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv, cinfo->err->trace_level++; } else if (keymatch(arg, "fastcrush", 4)) { - cinfo->optimize_scans = FALSE; + jpeg_c_set_bool_param(cinfo, JBOOLEAN_OPTIMIZE_SCANS, FALSE); } else if (keymatch(arg, "flat", 4)) { - cinfo->use_flat_quant_tbl = TRUE; + jpeg_c_set_bool_param(cinfo, JBOOLEAN_USE_FLAT_QUANT_TBL, TRUE); jpeg_set_quality(cinfo, 75, TRUE); } else if (keymatch(arg, "grayscale", 2) || keymatch(arg, "greyscale",2)) { @@ -340,12 +342,14 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv, } else if (keymatch(arg, "lambda1", 7)) { if (++argn >= argc) /* advance to next argument */ usage(); - cinfo->lambda_log_scale1 = atof(argv[argn]); + jpeg_c_set_float_param(cinfo, JFLOAT_LAMBDA_LOG_SCALE1, + atof(argv[argn])); } else if (keymatch(arg, "lambda2", 7)) { if (++argn >= argc) /* advance to next argument */ usage(); - cinfo->lambda_log_scale2 = atof(argv[argn]); + jpeg_c_set_float_param(cinfo, JFLOAT_LAMBDA_LOG_SCALE2, + atof(argv[argn])); } else if (keymatch(arg, "maxmemory", 3)) { /* Maximum memory in Kb (or Mb with 'm'). */ @@ -361,7 +365,7 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv, cinfo->mem->max_memory_to_use = lval * 1000L; } else if (keymatch(arg, "opt-dc-scan", 6)) { - cinfo->one_dc_scan = FALSE; + jpeg_c_set_bool_param(cinfo, JBOOLEAN_ONE_DC_SCAN, FALSE); } else if (keymatch(arg, "optimize", 1) || keymatch(arg, "optimise", 1)) { /* Enable entropy parm optimization. */ @@ -448,7 +452,7 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv, } else if (keymatch(arg, "revert", 3)) { /* revert to old JPEG default */ - cinfo->use_moz_defaults = FALSE; + jpeg_c_set_bool_param(cinfo, JBOOLEAN_USE_MOZ_DEFAULTS, FALSE); jpeg_set_defaults(cinfo); } else if (keymatch(arg, "sample", 2)) { @@ -487,8 +491,8 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv, cinfo->smoothing_factor = val; } else if (keymatch(arg, "split-dc-scans", 3)) { - cinfo->one_dc_scan = FALSE; - cinfo->sep_dc_scan = TRUE; + jpeg_c_set_bool_param(cinfo, JBOOLEAN_ONE_DC_SCAN, FALSE); + jpeg_c_set_bool_param(cinfo, JBOOLEAN_SEP_DC_SCAN, TRUE); } else if (keymatch(arg, "targa", 1)) { /* Input file is Targa format. */ @@ -496,44 +500,46 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv, } else if (keymatch(arg, "notrellis-dc", 11)) { /* disable trellis quantization */ - cinfo->trellis_quant_dc = FALSE; + jpeg_c_set_bool_param(cinfo, JBOOLEAN_TRELLIS_QUANT_DC, FALSE); } else if (keymatch(arg, "notrellis", 1)) { /* disable trellis quantization */ - cinfo->trellis_quant = FALSE; + jpeg_c_set_bool_param(cinfo, JBOOLEAN_TRELLIS_QUANT, FALSE); } else if (keymatch(arg, "trellis-dc", 9)) { /* enable DC trellis quantization */ - cinfo->trellis_quant_dc = TRUE; + jpeg_c_set_bool_param(cinfo, JBOOLEAN_TRELLIS_QUANT_DC, TRUE); } else if (keymatch(arg, "tune-psnr", 6)) { - cinfo->use_flat_quant_tbl = TRUE; - cinfo->lambda_log_scale1 = 9.0; - cinfo->lambda_log_scale2 = 0.0; - cinfo->use_lambda_weight_tbl = FALSE; + jpeg_c_set_bool_param(cinfo, JBOOLEAN_USE_FLAT_QUANT_TBL, TRUE); + jpeg_c_set_float_param(cinfo, JFLOAT_LAMBDA_LOG_SCALE1, 9.0); + jpeg_c_set_float_param(cinfo, JFLOAT_LAMBDA_LOG_SCALE2, 0.0); + jpeg_c_set_bool_param(cinfo, JBOOLEAN_USE_LAMBDA_WEIGHT_TBL, FALSE); jpeg_set_quality(cinfo, 75, TRUE); } else if (keymatch(arg, "tune-ssim", 6)) { - cinfo->use_flat_quant_tbl = TRUE; - cinfo->lambda_log_scale1 = 12.0; - cinfo->lambda_log_scale2 = 13.5; - cinfo->use_lambda_weight_tbl = FALSE; + jpeg_c_set_bool_param(cinfo, JBOOLEAN_USE_FLAT_QUANT_TBL, TRUE); + jpeg_c_set_float_param(cinfo, JFLOAT_LAMBDA_LOG_SCALE1, 12.0); + jpeg_c_set_float_param(cinfo, JFLOAT_LAMBDA_LOG_SCALE2, 13.5); + jpeg_c_set_bool_param(cinfo, JBOOLEAN_USE_LAMBDA_WEIGHT_TBL, FALSE); jpeg_set_quality(cinfo, 75, TRUE); } else if (keymatch(arg, "tune-ms-ssim", 6)) { - cinfo->use_flat_quant_tbl = FALSE; - cinfo->lambda_log_scale1 = 14.25; - cinfo->lambda_log_scale2 = 12.75; - cinfo->use_lambda_weight_tbl = TRUE; + jpeg_c_set_bool_param(cinfo, JBOOLEAN_USE_FLAT_QUANT_TBL, FALSE); + jpeg_c_set_float_param(cinfo, JFLOAT_LAMBDA_LOG_SCALE1, 14.25); + jpeg_c_set_float_param(cinfo, JFLOAT_LAMBDA_LOG_SCALE2, 12.75); + jpeg_c_set_bool_param(cinfo, JBOOLEAN_USE_LAMBDA_WEIGHT_TBL, TRUE); jpeg_set_quality(cinfo, 75, TRUE); } else if (keymatch(arg, "tune-hvs-psnr", 6)) { - cinfo->use_flat_quant_tbl = FALSE; - cinfo->lambda_log_scale1 = 16.0; - cinfo->lambda_log_scale2 = 15.5; - cinfo->use_lambda_weight_tbl = TRUE; + jpeg_c_set_bool_param(cinfo, JBOOLEAN_USE_FLAT_QUANT_TBL, FALSE); + jpeg_c_set_float_param(cinfo, JFLOAT_LAMBDA_LOG_SCALE1, 16.0); + jpeg_c_set_float_param(cinfo, JFLOAT_LAMBDA_LOG_SCALE2, 15.5); + jpeg_c_set_bool_param(cinfo, JBOOLEAN_USE_LAMBDA_WEIGHT_TBL, TRUE); jpeg_set_quality(cinfo, 75, TRUE); - + + } else if (keymatch(arg, "noovershoot", 11)) { + jpeg_c_set_bool_param(cinfo, JBOOLEAN_OVERSHOOT_DERINGING, FALSE); } else { fprintf(stderr, "%s: unknown option '%s'\n", progname, arg); usage(); /* bogus switch */ @@ -628,7 +634,8 @@ main (int argc, char **argv) */ cinfo.in_color_space = JCS_RGB; /* arbitrary guess */ - cinfo.use_moz_defaults = TRUE; + if (jpeg_c_bool_param_supported(&cinfo, JBOOLEAN_USE_MOZ_DEFAULTS)) + jpeg_c_set_bool_param(&cinfo, JBOOLEAN_USE_MOZ_DEFAULTS, TRUE); jpeg_set_defaults(&cinfo); /* Scan command line to find file names. diff --git a/configure.ac b/configure.ac index 12387c6a..efbbdb86 100644 --- a/configure.ac +++ b/configure.ac @@ -189,7 +189,7 @@ fi RPM_CONFIG_ARGS= # Memory source/destination managers -SO_AGE=0 +SO_AGE=1 MEM_SRCDST_FUNCTIONS= if test "x${with_jpeg8}" != "xyes"; then AC_MSG_CHECKING([whether to include in-memory source/destination managers]) @@ -200,7 +200,7 @@ if test "x${with_jpeg8}" != "xyes"; then AC_MSG_RESULT(yes) AC_DEFINE([MEM_SRCDST_SUPPORTED], [1], [Support in-memory source/destination managers]) - SO_AGE=1 + SO_AGE=2 MEM_SRCDST_FUNCTIONS="global: jpeg_mem_dest; jpeg_mem_src;"; else AC_MSG_RESULT(no) @@ -487,6 +487,10 @@ if test "x${with_simd}" != "xno"; then fi fi ;; + powerpc*) + AC_MSG_RESULT([yes (powerpc)]) + simd_arch=powerpc + ;; *) AC_MSG_RESULT([no ("$host_cpu")]) with_simd=no; @@ -512,6 +516,7 @@ AM_CONDITIONAL([SIMD_X86_64], [test "x$simd_arch" = "xx86_64"]) AM_CONDITIONAL([SIMD_ARM], [test "x$simd_arch" = "xarm"]) AM_CONDITIONAL([SIMD_ARM_64], [test "x$simd_arch" = "xaarch64"]) AM_CONDITIONAL([SIMD_MIPS], [test "x$simd_arch" = "xmips"]) +AM_CONDITIONAL([SIMD_POWERPC], [test "x$simd_arch" = "xpowerpc"]) AM_CONDITIONAL([X86_64], [test "x$host_cpu" = "xx86_64" -o "x$host_cpu" = "xamd64"]) AM_CONDITIONAL([WITH_TURBOJPEG], [test "x$with_turbojpeg" != "xno"]) diff --git a/example.c b/example.c index 0b9574ef..72939acf 100644 --- a/example.c +++ b/example.c @@ -126,7 +126,7 @@ write_JPEG_file (char * filename, int quality) cinfo.image_height = image_height; cinfo.input_components = 3; /* # of color components per pixel */ cinfo.in_color_space = JCS_RGB; /* colorspace of input image */ - cinfo.use_moz_defaults = TRUE; /* use Mozilla defaults for improved compression */ + jpeg_c_set_bool_param(&cinfo, JBOOLEAN_USE_MOZ_DEFAULTS, TRUE); /* use Mozilla defaults for improved compression */ /* Now use the library's routine to set default compression parameters. * (You must set at least cinfo.in_color_space before calling this, diff --git a/git-init-svn.sh b/git-init-svn.sh new file mode 100755 index 00000000..786d775e --- /dev/null +++ b/git-init-svn.sh @@ -0,0 +1,26 @@ +#!/bin/sh +set -e + +# Make a local, clean libjpeg-turbo branch that tracks the remote libjpeg-turbo. +# This will allow pushing of imported libjpeg-turbo commits to the mozjpeg repository. +# The libjpeg-turbo branch must only contain imported SVN commits (with git-svn-id: in the message). +git branch -f -t libjpeg-turbo origin/libjpeg-turbo + +# Configure git-svn. "git svn fetch" will rebuild remaining git-svn metadata. +git config svn-remote.svn.url svn://svn.code.sf.net/p/libjpeg-turbo/code +git config svn-remote.svn.fetch trunk:refs/heads/libjpeg-turbo + +# Enable mapping of SVN usernames to git authors. +git config svn.authorsfile .gitauthors + +# Mark which libjpeg-turbo commit has been used to start mozjpeg. +# Required for accurate merging and blame. +echo > .git/info/grafts "72b66f9c77b3e4ae363b21e48145f635cec0b193 540789427ccae8e9e778151cbc16ab8ee88ac6a8" + +# To get changes from SVN: +# git svn fetch +# git push origin libjpeg-turbo +# +# To merge SVN changes with mozjpeg: +# git checkout master +# git merge libjpeg-turbo diff --git a/jcapimin.c b/jcapimin.c index 3b005d3f..b16f9e40 100644 --- a/jcapimin.c +++ b/jcapimin.c @@ -4,8 +4,8 @@ * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1998, Thomas G. Lane. * Modified 2003-2010 by Guido Vollbeding. - * It was modified by The libjpeg-turbo Project to include only code relevant - * to libjpeg-turbo. + * libjpeg-turbo Modifications: + * Copyright (C) 2014, D. R. Commander. * For conditions of distribution and use, see the accompanying README file. * * This file contains application interface code for the compression half @@ -22,6 +22,7 @@ #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" +#include "jmemsys.h" /* @@ -91,6 +92,13 @@ jpeg_CreateCompress (j_compress_ptr cinfo, int version, size_t structsize) /* OK, I'm ready */ cinfo->global_state = CSTATE_START; + + /* The master struct is used to store extension parameters, so we allocate it + * here. It is later reallocated by jinit_c_master_control(). + */ + cinfo->master = (struct jpeg_comp_master *) + jpeg_get_small ((j_common_ptr) cinfo, sizeof(struct jpeg_comp_master)); + MEMZERO(cinfo->master, sizeof(struct jpeg_comp_master)); } diff --git a/jcapistd.c b/jcapistd.c index 7c7bc1c3..2cc6d325 100644 --- a/jcapistd.c +++ b/jcapistd.c @@ -46,8 +46,9 @@ jpeg_start_compress (j_compress_ptr cinfo, boolean write_all_tables) jpeg_suppress_tables(cinfo, FALSE); /* mark all tables to be written */ /* setting up scan optimisation pattern failed, disable scan optimisation */ - if (cinfo->num_scans_luma == 0 || cinfo->scan_info == NULL || cinfo->num_scans == 0) - cinfo->optimize_scans = FALSE; + if (cinfo->master->num_scans_luma == 0 || cinfo->scan_info == NULL || + cinfo->num_scans == 0) + cinfo->master->optimize_scans = FALSE; /* (Re)initialize error mgr and destination modules */ (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo); diff --git a/jccoefct.c b/jccoefct.c index 5d621508..ad8e4e36 100644 --- a/jccoefct.c +++ b/jccoefct.c @@ -402,7 +402,12 @@ compress_trellis_pass (j_compress_ptr cinfo, JSAMPIMAGE input_buf) */ for (block_row = 0; block_row < block_rows; block_row++) { thisblockrow = buffer[block_row]; - quantize_trellis(cinfo, dctbl, actbl, thisblockrow, buffer_dst[block_row], blocks_across, cinfo->quant_tbl_ptrs[compptr->quant_tbl_no], cinfo->norm_src[compptr->quant_tbl_no], cinfo->norm_coef[compptr->quant_tbl_no], &lastDC); + quantize_trellis(cinfo, dctbl, actbl, thisblockrow, + buffer_dst[block_row], blocks_across, + cinfo->quant_tbl_ptrs[compptr->quant_tbl_no], + cinfo->master->norm_src[compptr->quant_tbl_no], + cinfo->master->norm_coef[compptr->quant_tbl_no], + &lastDC); if (ndummy > 0) { /* Create dummy blocks at the right edge of the image. */ diff --git a/jccompat.c b/jccompat.c new file mode 100644 index 00000000..cce130ef --- /dev/null +++ b/jccompat.c @@ -0,0 +1,217 @@ +/* + * jccompat.c + * + * Copyright (C) 2014, D. R. Commander. + * Copyright (C) 2014, Mozilla Corporation. + * For conditions of distribution and use, see the accompanying README file. + * + * This file contains accessor functions for extension parameters. These + * allow for extending the functionality of the libjpeg API without breaking + * backward ABI compatibility. + */ + +#define JPEG_INTERNALS +#include "jinclude.h" +#include "jpeglib.h" + + +GLOBAL(boolean) +jpeg_c_bool_param_supported (j_compress_ptr cinfo, J_BOOLEAN_PARAM param) +{ + switch (param) { + case JBOOLEAN_USE_MOZ_DEFAULTS: + case JBOOLEAN_OPTIMIZE_SCANS: + case JBOOLEAN_ONE_DC_SCAN: + case JBOOLEAN_SEP_DC_SCAN: + case JBOOLEAN_TRELLIS_QUANT: + case JBOOLEAN_TRELLIS_QUANT_DC: + case JBOOLEAN_TRELLIS_EOB_OPT: + case JBOOLEAN_USE_FLAT_QUANT_TBL: + case JBOOLEAN_USE_LAMBDA_WEIGHT_TBL: + case JBOOLEAN_USE_SCANS_IN_TRELLIS: + case JBOOLEAN_TRELLIS_PASSES: + case JBOOLEAN_TRELLIS_Q_OPT: + case JBOOLEAN_OVERSHOOT_DERINGING: + return TRUE; + } + + return FALSE; +} + + +GLOBAL(void) +jpeg_c_set_bool_param (j_compress_ptr cinfo, J_BOOLEAN_PARAM param, + boolean value) +{ + switch(param) { + case JBOOLEAN_USE_MOZ_DEFAULTS: + cinfo->master->use_moz_defaults = value; + break; + case JBOOLEAN_OPTIMIZE_SCANS: + cinfo->master->optimize_scans = value; + break; + case JBOOLEAN_ONE_DC_SCAN: + cinfo->master->one_dc_scan = value; + break; + case JBOOLEAN_SEP_DC_SCAN: + cinfo->master->sep_dc_scan = value; + break; + case JBOOLEAN_TRELLIS_QUANT: + cinfo->master->trellis_quant = value; + break; + case JBOOLEAN_TRELLIS_QUANT_DC: + cinfo->master->trellis_quant_dc = value; + break; + case JBOOLEAN_TRELLIS_EOB_OPT: + cinfo->master->trellis_eob_opt = value; + break; + case JBOOLEAN_USE_FLAT_QUANT_TBL: + cinfo->master->use_flat_quant_tbl = value; + break; + case JBOOLEAN_USE_LAMBDA_WEIGHT_TBL: + cinfo->master->use_lambda_weight_tbl = value; + break; + case JBOOLEAN_USE_SCANS_IN_TRELLIS: + cinfo->master->use_scans_in_trellis = value; + break; + case JBOOLEAN_TRELLIS_PASSES: + cinfo->master->trellis_passes = value; + break; + case JBOOLEAN_TRELLIS_Q_OPT: + cinfo->master->trellis_q_opt = value; + break; + case JBOOLEAN_OVERSHOOT_DERINGING: + cinfo->master->overshoot_deringing = value; + break; + default: + ERREXIT(cinfo, JERR_BAD_PARAM); + } +} + + +GLOBAL(boolean) +jpeg_c_get_bool_param (j_compress_ptr cinfo, J_BOOLEAN_PARAM param) +{ + switch(param) { + case JBOOLEAN_USE_MOZ_DEFAULTS: + return cinfo->master->use_moz_defaults; + case JBOOLEAN_OPTIMIZE_SCANS: + return cinfo->master->optimize_scans; + case JBOOLEAN_ONE_DC_SCAN: + return cinfo->master->one_dc_scan; + case JBOOLEAN_SEP_DC_SCAN: + return cinfo->master->sep_dc_scan; + case JBOOLEAN_TRELLIS_QUANT: + return cinfo->master->trellis_quant; + case JBOOLEAN_TRELLIS_QUANT_DC: + return cinfo->master->trellis_quant_dc; + case JBOOLEAN_TRELLIS_EOB_OPT: + return cinfo->master->trellis_eob_opt; + case JBOOLEAN_USE_FLAT_QUANT_TBL: + return cinfo->master->use_flat_quant_tbl; + case JBOOLEAN_USE_LAMBDA_WEIGHT_TBL: + return cinfo->master->use_lambda_weight_tbl; + case JBOOLEAN_USE_SCANS_IN_TRELLIS: + return cinfo->master->use_scans_in_trellis; + case JBOOLEAN_TRELLIS_PASSES: + return cinfo->master->trellis_passes; + case JBOOLEAN_TRELLIS_Q_OPT: + return cinfo->master->trellis_q_opt; + case JBOOLEAN_OVERSHOOT_DERINGING: + return cinfo->master->overshoot_deringing; + default: + ERREXIT(cinfo, JERR_BAD_PARAM); + } + + return FALSE; +} + + +GLOBAL(boolean) +jpeg_c_float_param_supported (j_compress_ptr cinfo, J_FLOAT_PARAM param) +{ + switch (param) { + case JFLOAT_LAMBDA_LOG_SCALE1: + case JFLOAT_LAMBDA_LOG_SCALE2: + return TRUE; + } + + return FALSE; +} + + +GLOBAL(void) +jpeg_c_set_float_param (j_compress_ptr cinfo, J_FLOAT_PARAM param, float value) +{ + switch (param) { + case JFLOAT_LAMBDA_LOG_SCALE1: + cinfo->master->lambda_log_scale1 = value; + break; + case JFLOAT_LAMBDA_LOG_SCALE2: + cinfo->master->lambda_log_scale2 = value; + break; + default: + ERREXIT(cinfo, JERR_BAD_PARAM); + } +} + + +GLOBAL(float) +jpeg_c_get_float_param (j_compress_ptr cinfo, J_FLOAT_PARAM param) +{ + switch (param) { + case JFLOAT_LAMBDA_LOG_SCALE1: + return cinfo->master->lambda_log_scale1; + case JFLOAT_LAMBDA_LOG_SCALE2: + return cinfo->master->lambda_log_scale2; + default: + ERREXIT(cinfo, JERR_BAD_PARAM); + } + + return -1; +} + + +GLOBAL(boolean) +jpeg_c_int_param_supported (j_compress_ptr cinfo, J_INT_PARAM param) +{ + switch (param) { + case JINT_TRELLIS_FREQ_SPLIT: + case JINT_TRELLIS_NUM_LOOPS: + return TRUE; + } + + return FALSE; +} + + +GLOBAL(void) +jpeg_c_set_int_param (j_compress_ptr cinfo, J_INT_PARAM param, int value) +{ + switch (param) { + case JINT_TRELLIS_FREQ_SPLIT: + cinfo->master->trellis_freq_split = value; + break; + case JINT_TRELLIS_NUM_LOOPS: + cinfo->master->trellis_num_loops = value; + break; + default: + ERREXIT(cinfo, JERR_BAD_PARAM); + } +} + + +GLOBAL(int) +jpeg_c_get_int_param (j_compress_ptr cinfo, J_INT_PARAM param) +{ + switch (param) { + case JINT_TRELLIS_FREQ_SPLIT: + return cinfo->master->trellis_freq_split; + case JINT_TRELLIS_NUM_LOOPS: + return cinfo->master->trellis_num_loops; + default: + ERREXIT(cinfo, JERR_BAD_PARAM); + } + + return -1; +} diff --git a/jcdctmgr.c b/jcdctmgr.c index 6cab5ab5..938325c6 100644 --- a/jcdctmgr.c +++ b/jcdctmgr.c @@ -30,6 +30,9 @@ typedef void (*forward_DCT_method_ptr) (DCTELEM * data); typedef void (*float_DCT_method_ptr) (FAST_FLOAT * data); +typedef void (*preprocess_method_ptr)(DCTELEM*, const JQUANT_TBL*); +typedef void (*float_preprocess_method_ptr)(FAST_FLOAT*, const JQUANT_TBL*); + typedef void (*convsamp_method_ptr) (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace); @@ -51,6 +54,7 @@ typedef struct { /* Pointer to the DCT routine actually in use */ forward_DCT_method_ptr dct; convsamp_method_ptr convsamp; + preprocess_method_ptr preprocess; quantize_method_ptr quantize; /* The actual post-DCT divisors --- not identical to the quant table @@ -66,6 +70,7 @@ typedef struct { /* Same as above for the floating-point case. */ float_DCT_method_ptr float_dct; float_convsamp_method_ptr float_convsamp; + float_preprocess_method_ptr float_preprocess; float_quantize_method_ptr float_quantize; FAST_FLOAT * float_divisors[NUM_QUANT_TBLS]; FAST_FLOAT * float_workspace; @@ -349,6 +354,190 @@ start_pass_fdctmgr (j_compress_ptr cinfo) } } +METHODDEF(float) +catmull_rom(const DCTELEM value1, const DCTELEM value2, const DCTELEM value3, const DCTELEM value4, const float t, int size) +{ + const int tan1 = (value3 - value1) * size; + const int tan2 = (value4 - value2) * size; + + const float t2 = t * t; + const float t3 = t2 * t; + + const float f1 = 2.f * t3 - 3.f * t2 + 1.f; + const float f2 = -2.f * t3 + 3.f * t2; + const float f3 = t3 - 2.f * t2 + t; + const float f4 = t3 - t2; + + return value2 * f1 + tan1 * f3 + + value3 * f2 + tan2 * f4; +} + +/** Prevents visible ringing artifacts near hard edges on white backgrounds. + + 1. JPEG can encode samples with higher values than it's possible to display (higher than 255 in RGB), + and the decoder will always clamp values to 0-255. To encode 255 you can use any value >= 255, + and distortions of the out-of-range values won't be visible as long as they decode to anything >= 255. + + 2. From DCT perspective pixels in a block are a waveform. Hard edges form square waves (bad). + Edges with white are similar to waveform clipping, and anti-clipping algorithms can turn square waves + into softer ones that compress better. + + */ +METHODDEF(void) +preprocess_deringing(DCTELEM *data, const JQUANT_TBL *quantization_table) +{ + const DCTELEM maxsample = 255 - CENTERJSAMPLE; + const int size = DCTSIZE * DCTSIZE; + + /* Decoders don't handle overflow of DC very well, so calculate + maximum overflow that is safe to do without increasing DC out of range */ + int sum = 0; + int maxsample_count = 0; + int i; + DCTELEM maxovershoot; + int n; + + for(i=0; i < size; i++) { + sum += data[i]; + if (data[i] >= maxsample) { + maxsample_count++; + } + } + + /* If nothing reaches max value there's nothing to overshoot + and if the block is completely flat, it's already the best case. */ + if (!maxsample_count || maxsample_count == size) { + return; + } + + /* Too much overshoot is not good: increased amplitude will cost bits, and the cost is proportional to quantization (here using DC quant as a rough guide). */ + maxovershoot = maxsample + MIN(MIN(31, 2*quantization_table->quantval[0]), (maxsample * size - sum) / maxsample_count); + + n = 0; + do { + int start, end, length; + DCTELEM f1, f2, l1, l2, fslope, lslope; + float step, position; + + /* Pixels are traversed in zig-zag order to process them as a line */ + if (data[jpeg_natural_order[n]] < maxsample) { + n++; + continue; + } + + /* Find a run of maxsample pixels. Start is the first pixel inside the range, end the first pixel outside. */ + start = n; + while(++n < size && data[jpeg_natural_order[n]] >= maxsample) {} + end = n; + + /* the run will be replaced with a catmull-rom interpolation of values from the edges */ + + /* Find suitable upward slope from pixels around edges of the run. + Just feeding nearby pixels as catmull rom points isn't good enough, + as slope with one sample before the edge may have been flattened by clipping, + and slope of two samples before the edge could be downward. */ + f1 = data[jpeg_natural_order[start >= 1 ? start-1 : 0]]; + f2 = data[jpeg_natural_order[start >= 2 ? start-2 : 0]]; + + l1 = data[jpeg_natural_order[end < size-1 ? end : size-1]]; + l2 = data[jpeg_natural_order[end < size-2 ? end+1 : size-1]]; + + fslope = MAX(f1-f2, maxsample-f1); + lslope = MAX(l1-l2, maxsample-l1); + + /* if slope at the start/end is unknown, just make the curve symmetric */ + if (start == 0) { + fslope = lslope; + } + if (end == size) { + lslope = fslope; + } + + /* The curve fits better if first and last pixel is omitted */ + length = end - start; + step = 1.f/(float)(length + 1); + position = step; + + for(i = start; i < end; i++, position += step) { + DCTELEM tmp = ceilf(catmull_rom(maxsample - fslope, maxsample, maxsample, maxsample - lslope, position, length)); + data[jpeg_natural_order[i]] = MIN(tmp, maxovershoot); + } + n++; + } + while(n < size); +} + +/* + Float version of preprocess_deringing() + */ +METHODDEF(void) +float_preprocess_deringing(FAST_FLOAT *data, const JQUANT_TBL *quantization_table) +{ + const FAST_FLOAT maxsample = 255 - CENTERJSAMPLE; + const int size = DCTSIZE * DCTSIZE; + + FAST_FLOAT sum = 0; + int maxsample_count = 0; + int i; + int n; + FAST_FLOAT maxovershoot; + + for(i=0; i < size; i++) { + sum += data[i]; + if (data[i] >= maxsample) { + maxsample_count++; + } + } + + if (!maxsample_count || maxsample_count == size) { + return; + } + + maxovershoot = maxsample + MIN(MIN(31, 2*quantization_table->quantval[0]), (maxsample * size - sum) / maxsample_count); + + n = 0; + do { + int start, end, length; + FAST_FLOAT f1, f2, l1, l2, fslope, lslope; + float step, position; + + if (data[jpeg_natural_order[n]] < maxsample) { + n++; + continue; + } + + start = n; + while(++n < size && data[jpeg_natural_order[n]] >= maxsample) {} + end = n; + + f1 = data[jpeg_natural_order[start >= 1 ? start-1 : 0]]; + f2 = data[jpeg_natural_order[start >= 2 ? start-2 : 0]]; + + l1 = data[jpeg_natural_order[end < size-1 ? end : size-1]]; + l2 = data[jpeg_natural_order[end < size-2 ? end+1 : size-1]]; + + fslope = MAX(f1-f2, maxsample-f1); + lslope = MAX(l1-l2, maxsample-l1); + + if (start == 0) { + fslope = lslope; + } + if (end == size) { + lslope = fslope; + } + + length = end - start; + step = 1.f/(float)(length + 1); + position = step; + + for(i = start; i < end; i++, position += step) { + FAST_FLOAT tmp = catmull_rom(maxsample - fslope, maxsample, maxsample, maxsample - lslope, position, length); + data[jpeg_natural_order[i]] = MIN(tmp, maxovershoot); + } + n++; + } + while(n < size); +} /* * Load data into workspace, applying unsigned->signed conversion. @@ -449,7 +638,7 @@ quantize (JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace) temp = -temp; temp += qval>>1; /* for rounding */ DIVIDE_BY(temp, qval); - temp = -temp; + temp = -temp; } else { temp += qval>>1; /* for rounding */ DIVIDE_BY(temp, qval); @@ -480,12 +669,14 @@ forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr, /* This routine is heavily used, so it's worth coding it tightly. */ my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct; DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no]; + JQUANT_TBL *qtbl = cinfo->quant_tbl_ptrs[compptr->quant_tbl_no]; DCTELEM * workspace; JDIMENSION bi; /* Make sure the compiler doesn't look up these every pass */ forward_DCT_method_ptr do_dct = fdct->dct; convsamp_method_ptr do_convsamp = fdct->convsamp; + preprocess_method_ptr do_preprocess = fdct->preprocess; quantize_method_ptr do_quantize = fdct->quantize; workspace = fdct->workspace; @@ -495,6 +686,10 @@ forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr, /* Load data into workspace, applying unsigned->signed conversion */ (*do_convsamp) (sample_data, start_col, workspace); + if (do_preprocess) { + (*do_preprocess) (workspace, qtbl); + } + /* Perform the DCT */ (*do_dct) (workspace); @@ -600,6 +795,7 @@ forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr, /* This routine is heavily used, so it's worth coding it tightly. */ my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct; FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no]; + JQUANT_TBL *qtbl = cinfo->quant_tbl_ptrs[compptr->quant_tbl_no]; FAST_FLOAT * workspace; JDIMENSION bi; float v; @@ -609,6 +805,7 @@ forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr, /* Make sure the compiler doesn't look up these every pass */ float_DCT_method_ptr do_dct = fdct->float_dct; float_convsamp_method_ptr do_convsamp = fdct->float_convsamp; + float_preprocess_method_ptr do_preprocess = fdct->float_preprocess; float_quantize_method_ptr do_quantize = fdct->float_quantize; workspace = fdct->float_workspace; @@ -618,13 +815,17 @@ forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr, /* Load data into workspace, applying unsigned->signed conversion */ (*do_convsamp) (sample_data, start_col, workspace); + if (do_preprocess) { + (*do_preprocess) (workspace, qtbl); + } + /* Perform the DCT */ (*do_dct) (workspace); /* Save unquantized transform coefficients for later trellis quantization */ /* Currently save as integer values. Could save float values but would require */ /* modifications to memory allocation and trellis quantization */ - + if (dst) { int i; static const double aanscalefactor[DCTSIZE] = { @@ -673,6 +874,8 @@ static const float jpeg_lambda_weights_csf_luma[64] = { 0.43454f, 0.42146f, 0.34609f, 0.24072f, 0.15975f, 0.10701f, 0.07558f, 0.05875f, }; +#define DC_TRELLIS_CANDIDATES 3 + GLOBAL(void) quantize_trellis(j_compress_ptr cinfo, c_derived_tbl *dctbl, c_derived_tbl *actbl, JBLOCKROW coef_blocks, JBLOCKROW src, JDIMENSION num_blocks, JQUANT_TBL * qtbl, double *norm_src, double *norm_coef, JCOEF *last_dc_val) @@ -688,7 +891,9 @@ quantize_trellis(j_compress_ptr cinfo, c_derived_tbl *dctbl, c_derived_tbl *actb float lambda_base; float lambda; float lambda_dc; - const float *lambda_tbl = (cinfo->use_lambda_weight_tbl) ? jpeg_lambda_weights_csf_luma : jpeg_lambda_weights_flat; + const float *lambda_tbl = (cinfo->master->use_lambda_weight_tbl) ? + jpeg_lambda_weights_csf_luma : + jpeg_lambda_weights_flat; int Ss, Se; float *accumulated_zero_block_cost = NULL; float *accumulated_block_cost = NULL; @@ -701,9 +906,9 @@ quantize_trellis(j_compress_ptr cinfo, c_derived_tbl *dctbl, c_derived_tbl *actb int zero_run; int run_bits; int rate; - float *accumulated_dc_cost[3]; - int *dc_cost_backtrack[3]; - JCOEF *dc_candidate[3]; + float *accumulated_dc_cost[DC_TRELLIS_CANDIDATES]; + int *dc_cost_backtrack[DC_TRELLIS_CANDIDATES]; + JCOEF *dc_candidate[DC_TRELLIS_CANDIDATES]; Ss = cinfo->Ss; Se = cinfo->Se; @@ -711,7 +916,7 @@ quantize_trellis(j_compress_ptr cinfo, c_derived_tbl *dctbl, c_derived_tbl *actb Ss = 1; if (Se < Ss) return; - if (cinfo->trellis_eob_opt) { + if (cinfo->master->trellis_eob_opt) { accumulated_zero_block_cost = (float *)malloc((num_blocks + 1) * sizeof(float)); accumulated_block_cost = (float *)malloc((num_blocks + 1) * sizeof(float)); block_run_start = (int *)malloc(num_blocks * sizeof(int)); @@ -727,8 +932,8 @@ quantize_trellis(j_compress_ptr cinfo, c_derived_tbl *dctbl, c_derived_tbl *actb accumulated_block_cost[0] = 0; requires_eob[0] = 0; } - if (cinfo->trellis_quant_dc) { - for (i = 0; i < 3; i++) { + if (cinfo->master->trellis_quant_dc) { + for (i = 0; i < DC_TRELLIS_CANDIDATES; i++) { accumulated_dc_cost[i] = (float *)malloc(num_blocks * sizeof(float)); dc_cost_backtrack[i] = (int *)malloc(num_blocks * sizeof(int)); dc_candidate[i] = (JCOEF *)malloc(num_blocks * sizeof(JCOEF)); @@ -755,18 +960,19 @@ quantize_trellis(j_compress_ptr cinfo, c_derived_tbl *dctbl, c_derived_tbl *actb } norm /= 63.0; - if (cinfo->lambda_log_scale2 > 0.0) - lambda = pow(2.0, cinfo->lambda_log_scale1) * lambda_base / (pow(2.0, cinfo->lambda_log_scale2) + norm); + if (cinfo->master->lambda_log_scale2 > 0.0) + lambda = pow(2.0, cinfo->master->lambda_log_scale1) * lambda_base / + (pow(2.0, cinfo->master->lambda_log_scale2) + norm); else - lambda = pow(2.0, cinfo->lambda_log_scale1-12.0) * lambda_base; + lambda = pow(2.0, cinfo->master->lambda_log_scale1 - 12.0) * lambda_base; lambda_dc = lambda * lambda_tbl[0]; accumulated_zero_dist[Ss-1] = 0.0; accumulated_cost[Ss-1] = 0.0; - - // Do DC coefficient - if (cinfo->trellis_quant_dc) { + + /* Do DC coefficient */ + if (cinfo->master->trellis_quant_dc) { int sign = src[bi][0] >> 31; int x = abs(src[bi][0]); int q = 8 * qtbl->quantval[0]; @@ -774,20 +980,20 @@ quantize_trellis(j_compress_ptr cinfo, c_derived_tbl *dctbl, c_derived_tbl *actb float dc_candidate_dist; qval = (x + q/2) / q; /* quantized value (round nearest) */ - for (k = 0; k < 3; k++) { + for (k = 0; k < DC_TRELLIS_CANDIDATES; k++) { int delta; int dc_delta; int bits; - - dc_candidate[k][bi] = qval - 1 + k; + + dc_candidate[k][bi] = qval - DC_TRELLIS_CANDIDATES/2 + k; delta = dc_candidate[k][bi] * q - x; dc_candidate_dist = delta * delta * lambda_dc; dc_candidate[k][bi] *= 1 + 2*sign; if (bi == 0) { dc_delta = dc_candidate[k][bi] - *last_dc_val; - - // Derive number of suffix bits + + /* Derive number of suffix bits */ bits = 0; dc_delta = abs(dc_delta); while (dc_delta) { @@ -798,10 +1004,10 @@ quantize_trellis(j_compress_ptr cinfo, c_derived_tbl *dctbl, c_derived_tbl *actb accumulated_dc_cost[k][0] = cost; dc_cost_backtrack[k][0] = -1; } else { - for (l = 0; l < 3; l++) { + for (l = 0; l < DC_TRELLIS_CANDIDATES; l++) { dc_delta = dc_candidate[k][bi] - dc_candidate[l][bi-1]; - - // Derive number of suffix bits + + /* Derive number of suffix bits */ bits = 0; dc_delta = abs(dc_delta); while (dc_delta) { @@ -814,14 +1020,14 @@ quantize_trellis(j_compress_ptr cinfo, c_derived_tbl *dctbl, c_derived_tbl *actb dc_cost_backtrack[k][bi] = l; } } - } + } } } - - // Do AC coefficients + + /* Do AC coefficients */ for (i = Ss; i <= Se; i++) { int z = jpeg_natural_order[i]; - + int sign = src[bi][z] >> 31; int x = abs(src[bi][z]); int q = 8 * qtbl->quantval[z]; @@ -919,7 +1125,7 @@ quantize_trellis(j_compress_ptr cinfo, c_derived_tbl *dctbl, c_derived_tbl *actb i--; } - if (cinfo->trellis_eob_opt) { + if (cinfo->master->trellis_eob_opt) { accumulated_zero_block_cost[bi+1] = accumulated_zero_block_cost[bi]; accumulated_zero_block_cost[bi+1] += cost_all_zeros; requires_eob[bi+1] = has_eob; @@ -953,7 +1159,7 @@ quantize_trellis(j_compress_ptr cinfo, c_derived_tbl *dctbl, c_derived_tbl *actb } } - if (cinfo->trellis_eob_opt) { + if (cinfo->master->trellis_eob_opt) { int last_block = num_blocks; best_cost = 1e38; @@ -994,7 +1200,7 @@ quantize_trellis(j_compress_ptr cinfo, c_derived_tbl *dctbl, c_derived_tbl *actb free(requires_eob); } - if (cinfo->trellis_q_opt) { + if (cinfo->master->trellis_q_opt) { for (bi = 0; bi < num_blocks; bi++) { for (i = 1; i < DCTSIZE2; i++) { norm_src[i] += src[bi][i] * coef_blocks[bi][i]; @@ -1003,9 +1209,9 @@ quantize_trellis(j_compress_ptr cinfo, c_derived_tbl *dctbl, c_derived_tbl *actb } } - if (cinfo->trellis_quant_dc) { + if (cinfo->master->trellis_quant_dc) { j = 0; - for (i = 1; i < 3; i++) { + for (i = 1; i < DC_TRELLIS_CANDIDATES; i++) { if (accumulated_dc_cost[i][num_blocks-1] < accumulated_dc_cost[j][num_blocks-1]) j = i; } @@ -1013,11 +1219,11 @@ quantize_trellis(j_compress_ptr cinfo, c_derived_tbl *dctbl, c_derived_tbl *actb coef_blocks[bi][0] = dc_candidate[j][bi]; j = dc_cost_backtrack[j][bi]; } - - // Save DC predictor + + /* Save DC predictor */ *last_dc_val = coef_blocks[num_blocks-1][0]; - - for (i = 0; i < 3; i++) { + + for (i = 0; i < DC_TRELLIS_CANDIDATES; i++) { free(accumulated_dc_cost[i]); free(dc_cost_backtrack[i]); free(dc_candidate[i]); @@ -1089,6 +1295,13 @@ jinit_forward_dct (j_compress_ptr cinfo) fdct->convsamp = jsimd_convsamp; else fdct->convsamp = convsamp; + + if (cinfo->master->overshoot_deringing) { + fdct->preprocess = preprocess_deringing; + } else { + fdct->preprocess = NULL; + } + if (jsimd_can_quantize()) fdct->quantize = jsimd_quantize; else @@ -1101,6 +1314,13 @@ jinit_forward_dct (j_compress_ptr cinfo) fdct->float_convsamp = jsimd_convsamp_float; else fdct->float_convsamp = convsamp_float; + + if (cinfo->master->overshoot_deringing) { + fdct->float_preprocess = float_preprocess_deringing; + } else { + fdct->float_preprocess = NULL; + } + if (jsimd_can_quantize_float()) fdct->float_quantize = jsimd_quantize_float; else diff --git a/jcinit.c b/jcinit.c index 6dfb4f70..f5ecf5f5 100644 --- a/jcinit.c +++ b/jcinit.c @@ -62,7 +62,8 @@ jinit_compress_master (j_compress_ptr cinfo) /* Need a full-image coefficient buffer in any multi-pass mode. */ jinit_c_coef_controller(cinfo, - (boolean) (cinfo->num_scans > 1 || cinfo->optimize_coding || cinfo->optimize_scans)); + (boolean) (cinfo->num_scans > 1 || cinfo->optimize_coding || + cinfo->master->optimize_scans)); jinit_c_main_controller(cinfo, FALSE /* never need full buffer here */); jinit_marker_writer(cinfo); diff --git a/jcmarker.c b/jcmarker.c index 3e78f299..a62da057 100644 --- a/jcmarker.c +++ b/jcmarker.c @@ -192,6 +192,7 @@ emit_multi_dqt (j_compress_ptr cinfo) int seen[MAX_COMPONENTS] = { 0 }; int fin_prec = 0; int ci; + int size = 0; for (ci = 0; ci < cinfo->num_components; ci++) { int tbl_num = cinfo->comp_info[ci].quant_tbl_no; @@ -210,7 +211,6 @@ emit_multi_dqt (j_compress_ptr cinfo) emit_marker(cinfo, M_DQT); - int size = 0; for (ci = 0; ci < cinfo->num_components; ci++) { int tbl_num = cinfo->comp_info[ci].quant_tbl_no; diff --git a/jcmaster.c b/jcmaster.c index 17058678..37e577cd 100644 --- a/jcmaster.c +++ b/jcmaster.c @@ -5,7 +5,7 @@ * Copyright (C) 1991-1997, Thomas G. Lane. * Modified 2003-2010 by Guido Vollbeding. * libjpeg-turbo Modifications: - * Copyright (C) 2010, D. R. Commander. + * Copyright (C) 2010, 2014, D. R. Commander. * mozjpeg Modifications: * Copyright (C) 2014, Mozilla Corporation. * For conditions of distribution and use, see the accompanying README file. @@ -20,6 +20,7 @@ #include "jinclude.h" #include "jpeglib.h" #include "jpegcomp.h" +#include "jmemsys.h" /* Private state */ @@ -193,7 +194,7 @@ validate_script (j_compress_ptr cinfo) /* -1 until that coefficient has been seen; then last Al for it */ #endif - if (cinfo->optimize_scans) { + if (cinfo->master->optimize_scans) { cinfo->progressive_mode = TRUE; /* When we optimize scans, there is redundancy in the scan list * and this function will fail. Therefore skip all this checking @@ -332,12 +333,18 @@ select_scan_parameters (j_compress_ptr cinfo) my_master_ptr master = (my_master_ptr) cinfo->master; if (master->pass_number < master->pass_number_scan_opt_base) { cinfo->comps_in_scan = 1; - if (cinfo->use_scans_in_trellis) { - cinfo->cur_comp_info[0] = &cinfo->comp_info[master->pass_number/(4*cinfo->trellis_num_loops)]; - cinfo->Ss = (master->pass_number%4 < 2) ? 1 : cinfo->trellis_freq_split+1; - cinfo->Se = (master->pass_number%4 < 2) ? cinfo->trellis_freq_split : DCTSIZE2-1; + if (cinfo->master->use_scans_in_trellis) { + cinfo->cur_comp_info[0] = + &cinfo->comp_info[master->pass_number / + (4 * cinfo->master->trellis_num_loops)]; + cinfo->Ss = (master->pass_number % 4 < 2) ? + 1 : cinfo->master->trellis_freq_split + 1; + cinfo->Se = (master->pass_number % 4 < 2) ? + cinfo->master->trellis_freq_split : DCTSIZE2 - 1; } else { - cinfo->cur_comp_info[0] = &cinfo->comp_info[master->pass_number/(2*cinfo->trellis_num_loops)]; + cinfo->cur_comp_info[0] = + &cinfo->comp_info[master->pass_number / + (2 * cinfo->master->trellis_num_loops)]; cinfo->Ss = 1; cinfo->Se = DCTSIZE2-1; } @@ -355,13 +362,16 @@ select_scan_parameters (j_compress_ptr cinfo) cinfo->Se = scanptr->Se; cinfo->Ah = scanptr->Ah; cinfo->Al = scanptr->Al; - if (cinfo->optimize_scans) { + if (cinfo->master->optimize_scans) { /* luma frequency split passes */ - if (master->scan_number >= cinfo->num_scans_luma_dc+3*cinfo->Al_max_luma+2 && - master->scan_number < cinfo->num_scans_luma) + if (master->scan_number >= cinfo->master->num_scans_luma_dc + + 3 * cinfo->master->Al_max_luma + 2 && + master->scan_number < cinfo->master->num_scans_luma) cinfo->Al = master->best_Al_luma; /* chroma frequency split passes */ - if (master->scan_number >= cinfo->num_scans_luma+cinfo->num_scans_chroma_dc+(6*cinfo->Al_max_chroma+4) && + if (master->scan_number >= cinfo->master->num_scans_luma + + cinfo->master->num_scans_chroma_dc + + (6 * cinfo->master->Al_max_chroma + 4) && master->scan_number < cinfo->num_scans) cinfo->Al = master->best_Al_chroma; } @@ -484,7 +494,8 @@ METHODDEF(void) prepare_for_pass (j_compress_ptr cinfo) { my_master_ptr master = (my_master_ptr) cinfo->master; - cinfo->trellis_passes = master->pass_number < master->pass_number_scan_opt_base; + cinfo->master->trellis_passes = + master->pass_number < master->pass_number_scan_opt_base; switch (master->pass_type) { case main_pass: @@ -537,7 +548,7 @@ prepare_for_pass (j_compress_ptr cinfo) select_scan_parameters(cinfo); per_scan_setup(cinfo); } - if (cinfo->optimize_scans) { + if (cinfo->master->optimize_scans) { master->saved_dest = cinfo->dest; cinfo->dest = NULL; master->scan_size[master->scan_number] = 0; @@ -553,13 +564,15 @@ prepare_for_pass (j_compress_ptr cinfo) master->pub.call_pass_startup = FALSE; break; case trellis_pass: - if (master->pass_number%(cinfo->num_components*(cinfo->use_scans_in_trellis?4:2)) == 1 && cinfo->trellis_q_opt) { + if (master->pass_number % + (cinfo->num_components * (cinfo->master->use_scans_in_trellis ? 4 : 2)) == 1 && + cinfo->master->trellis_q_opt) { int i, j; for (i = 0; i < NUM_QUANT_TBLS; i++) { for (j = 1; j < DCTSIZE2; j++) { - cinfo->norm_src[i][j] = 0.0; - cinfo->norm_coef[i][j] = 0.0; + cinfo->master->norm_src[i][j] = 0.0; + cinfo->master->norm_coef[i][j] = 0.0; } } } @@ -641,8 +654,11 @@ select_scans (j_compress_ptr cinfo, int next_scan_number) my_master_ptr master = (my_master_ptr) cinfo->master; int base_scan_idx = 0; - int luma_freq_split_scan_start = cinfo->num_scans_luma_dc + 3 * cinfo->Al_max_luma + 2; - int chroma_freq_split_scan_start = cinfo->num_scans_luma+cinfo->num_scans_chroma_dc+(6*cinfo->Al_max_chroma+4); + int luma_freq_split_scan_start = cinfo->master->num_scans_luma_dc + + 3 * cinfo->master->Al_max_luma + 2; + int chroma_freq_split_scan_start = cinfo->master->num_scans_luma + + cinfo->master->num_scans_chroma_dc + + (6 * cinfo->master->Al_max_chroma + 4); if (next_scan_number > 1 && next_scan_number <= luma_freq_split_scan_start) { if ((next_scan_number - 1) % 3 == 2) { @@ -663,7 +679,8 @@ select_scans (j_compress_ptr cinfo, int next_scan_number) } } - } else if (next_scan_number > luma_freq_split_scan_start && next_scan_number <= cinfo->num_scans_luma) { + } else if (next_scan_number > luma_freq_split_scan_start && + next_scan_number <= cinfo->master->num_scans_luma) { if (next_scan_number == luma_freq_split_scan_start + 1) { master->best_freq_split_idx_luma = 0; master->best_cost = master->scan_size[next_scan_number-1]; @@ -683,21 +700,25 @@ select_scans (j_compress_ptr cinfo, int next_scan_number) if ((idx == 2 && master->best_freq_split_idx_luma == 0) || (idx == 3 && master->best_freq_split_idx_luma != 2) || (idx == 4 && master->best_freq_split_idx_luma != 4)) { - master->scan_number = cinfo->num_scans_luma - 1; + master->scan_number = cinfo->master->num_scans_luma - 1; master->pass_number = 2 * master->scan_number + 1 + master->pass_number_scan_opt_base; master->pub.is_last_pass = (master->pass_number == master->total_passes - 1); } } - } else if (cinfo->num_scans > cinfo->num_scans_luma) { + } else if (cinfo->num_scans > cinfo->master->num_scans_luma) { + + if (next_scan_number == cinfo->master->num_scans_luma + + cinfo->master->num_scans_chroma_dc) { + base_scan_idx = cinfo->master->num_scans_luma; - if (next_scan_number == cinfo->num_scans_luma+cinfo->num_scans_chroma_dc) { - base_scan_idx = cinfo->num_scans_luma; - master->interleave_chroma_dc = master->scan_size[base_scan_idx] <= master->scan_size[base_scan_idx+1] + master->scan_size[base_scan_idx+2]; - } else if (next_scan_number > cinfo->num_scans_luma+cinfo->num_scans_chroma_dc && next_scan_number <= chroma_freq_split_scan_start) { - base_scan_idx = cinfo->num_scans_luma + cinfo->num_scans_chroma_dc; + } else if (next_scan_number > cinfo->master->num_scans_luma + + cinfo->master->num_scans_chroma_dc && + next_scan_number <= chroma_freq_split_scan_start) { + base_scan_idx = cinfo->master->num_scans_luma + + cinfo->master->num_scans_chroma_dc; if ((next_scan_number - base_scan_idx) % 6 == 4) { int Al = (next_scan_number - base_scan_idx) / 6; int i; @@ -757,10 +778,11 @@ select_scans (j_compress_ptr cinfo, int next_scan_number) copy_buffer(cinfo, 0); - if (cinfo->num_scans > cinfo->num_scans_luma && !cinfo->one_dc_scan) { - base_scan_idx = cinfo->num_scans_luma; + if (cinfo->num_scans > cinfo->master->num_scans_luma && + !cinfo->master->one_dc_scan) { + base_scan_idx = cinfo->master->num_scans_luma; - if (master->interleave_chroma_dc && !cinfo->sep_dc_scan) + if (master->interleave_chroma_dc && !cinfo->master->sep_dc_scan) copy_buffer(cinfo, base_scan_idx); else { copy_buffer(cinfo, base_scan_idx+1); @@ -779,7 +801,7 @@ select_scans (j_compress_ptr cinfo, int next_scan_number) for (Al = master->best_Al_luma-1; Al >= min_Al; Al--) copy_buffer(cinfo, 3 + 3*Al); - if (cinfo->num_scans > cinfo->num_scans_luma) { + if (cinfo->num_scans > cinfo->master->num_scans_luma) { if (master->best_freq_split_idx_chroma == 0) { copy_buffer(cinfo, chroma_freq_split_scan_start); copy_buffer(cinfo, chroma_freq_split_scan_start+1); @@ -791,7 +813,8 @@ select_scans (j_compress_ptr cinfo, int next_scan_number) copy_buffer(cinfo, chroma_freq_split_scan_start+4*(master->best_freq_split_idx_chroma-1)+5); } - base_scan_idx = cinfo->num_scans_luma + cinfo->num_scans_chroma_dc; + base_scan_idx = cinfo->master->num_scans_luma + + cinfo->master->num_scans_chroma_dc; for (Al = master->best_Al_chroma-1; Al >= min_Al; Al--) { copy_buffer(cinfo, base_scan_idx + 6*Al + 4); @@ -802,7 +825,7 @@ select_scans (j_compress_ptr cinfo, int next_scan_number) for (Al = min_Al-1; Al >= 0; Al--) { copy_buffer(cinfo, 3 + 3*Al); - if (cinfo->num_scans > cinfo->num_scans_luma) { + if (cinfo->num_scans > cinfo->master->num_scans_luma) { copy_buffer(cinfo, base_scan_idx + 6*Al + 4); copy_buffer(cinfo, base_scan_idx + 6*Al + 5); } @@ -835,7 +858,7 @@ finish_pass_master (j_compress_ptr cinfo) /* next pass is either output of scan 0 (after optimization) * or output of scan 1 (if no optimization). */ - if (cinfo->trellis_quant) + if (cinfo->master->trellis_quant) master->pass_type = trellis_pass; else { master->pass_type = output_pass; @@ -851,7 +874,7 @@ finish_pass_master (j_compress_ptr cinfo) /* next pass is either optimization or output of next scan */ if (cinfo->optimize_coding) master->pass_type = huff_opt_pass; - if (cinfo->optimize_scans) { + if (cinfo->master->optimize_scans) { (*cinfo->dest->term_destination)(cinfo); cinfo->dest = master->saved_dest; select_scans(cinfo, master->scan_number + 1); @@ -862,13 +885,16 @@ finish_pass_master (j_compress_ptr cinfo) case trellis_pass: master->pass_type = (cinfo->optimize_coding || master->pass_number < master->pass_number_scan_opt_base-1) ? huff_opt_pass : output_pass; - if ((master->pass_number+1)%(cinfo->num_components*(cinfo->use_scans_in_trellis?4:2)) == 0 && cinfo->trellis_q_opt) { + if ((master->pass_number + 1) % + (cinfo->num_components * (cinfo->master->use_scans_in_trellis ? 4 : 2)) == 0 && + cinfo->master->trellis_q_opt) { int i, j; for (i = 0; i < NUM_QUANT_TBLS; i++) { for (j = 1; j < DCTSIZE2; j++) { - if (cinfo->norm_coef[i][j] != 0.0) { - int q = (int)(cinfo->norm_src[i][j] / cinfo->norm_coef[i][j] + 0.5); + if (cinfo->master->norm_coef[i][j] != 0.0) { + int q = (int)(cinfo->master->norm_src[i][j] / + cinfo->master->norm_coef[i][j] + 0.5); if (q > 254) q = 254; if (q < 1) q = 1; cinfo->quant_tbl_ptrs[i]->quantval[j] = q; @@ -895,6 +921,11 @@ jinit_c_master_control (j_compress_ptr cinfo, boolean transcode_only) master = (my_master_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, sizeof(my_comp_master)); + if (cinfo->master) { + MEMCOPY(&master->pub, cinfo->master, sizeof(struct jpeg_comp_master)); + jpeg_free_small((j_common_ptr) cinfo, cinfo->master, + sizeof(struct jpeg_comp_master)); + } cinfo->master = (struct jpeg_comp_master *) master; master->pub.prepare_for_pass = prepare_for_pass; master->pub.pass_startup = pass_startup; @@ -937,12 +968,14 @@ jinit_c_master_control (j_compress_ptr cinfo, boolean transcode_only) master->total_passes = cinfo->num_scans; master->pass_number_scan_opt_base = 0; - if (cinfo->trellis_quant) { - master->pass_number_scan_opt_base = ((cinfo->use_scans_in_trellis) ? 4 : 2) * cinfo->num_components * cinfo->trellis_num_loops; + if (cinfo->master->trellis_quant) { + master->pass_number_scan_opt_base = + ((cinfo->master->use_scans_in_trellis) ? 4 : 2) * cinfo->num_components * + cinfo->master->trellis_num_loops; master->total_passes += master->pass_number_scan_opt_base; } - if (cinfo->optimize_scans) { + if (cinfo->master->optimize_scans) { int i; master->best_Al_chroma = 0; diff --git a/jcparam.c b/jcparam.c index d3b8ea15..8af8ae64 100644 --- a/jcparam.c +++ b/jcparam.c @@ -136,7 +136,7 @@ jpeg_set_linear_quality (j_compress_ptr cinfo, int scale_factor, */ { /* Set up two quantization tables using the specified scaling */ - if (cinfo->use_flat_quant_tbl) { + if (cinfo->master->use_flat_quant_tbl) { jpeg_add_quant_table(cinfo, 0, flat_quant_tbl, scale_factor, force_baseline); jpeg_add_quant_table(cinfo, 1, flat_quant_tbl, @@ -248,7 +248,7 @@ jpeg_set_defaults (j_compress_ptr cinfo) #ifdef C_PROGRESSIVE_SUPPORTED cinfo->scan_info = NULL; cinfo->num_scans = 0; - if (!cinfo->use_moz_defaults) { + if (!cinfo->master->use_moz_defaults) { /* Default is no multiple-scan output */ cinfo->scan_info = NULL; cinfo->num_scans = 0; @@ -266,7 +266,7 @@ jpeg_set_defaults (j_compress_ptr cinfo) cinfo->arith_code = FALSE; #ifdef ENTROPY_OPT_SUPPORTED - if (cinfo->use_moz_defaults) + if (cinfo->master->use_moz_defaults) /* By default, do extra passes to optimize entropy coding */ cinfo->optimize_coding = TRUE; else @@ -293,6 +293,8 @@ jpeg_set_defaults (j_compress_ptr cinfo) cinfo->do_fancy_downsampling = TRUE; #endif + cinfo->master->overshoot_deringing = cinfo->master->use_moz_defaults; + /* No input smoothing */ cinfo->smoothing_factor = 0; @@ -322,26 +324,26 @@ jpeg_set_defaults (j_compress_ptr cinfo) jpeg_default_colorspace(cinfo); - cinfo->one_dc_scan = TRUE; + cinfo->master->one_dc_scan = TRUE; #ifdef C_PROGRESSIVE_SUPPORTED - if (cinfo->use_moz_defaults) { - cinfo->optimize_scans = TRUE; + if (cinfo->master->use_moz_defaults) { + cinfo->master->optimize_scans = TRUE; jpeg_simple_progression(cinfo); } else - cinfo->optimize_scans = FALSE; + cinfo->master->optimize_scans = FALSE; #endif - cinfo->trellis_quant = cinfo->use_moz_defaults; - cinfo->lambda_log_scale1 = 16.0; - cinfo->lambda_log_scale2 = 15.5; + cinfo->master->trellis_quant = cinfo->master->use_moz_defaults; + cinfo->master->lambda_log_scale1 = 16.0; + cinfo->master->lambda_log_scale2 = 15.5; - cinfo->use_lambda_weight_tbl = TRUE; - cinfo->use_scans_in_trellis = FALSE; - cinfo->trellis_freq_split = 8; - cinfo->trellis_num_loops = 1; - cinfo->trellis_q_opt = FALSE; - cinfo->trellis_quant_dc = TRUE; + cinfo->master->use_lambda_weight_tbl = TRUE; + cinfo->master->use_scans_in_trellis = FALSE; + cinfo->master->trellis_freq_split = 8; + cinfo->master->trellis_num_loops = 1; + cinfo->master->trellis_q_opt = FALSE; + cinfo->master->trellis_quant_dc = TRUE; } @@ -574,7 +576,7 @@ jpeg_search_progression (j_compress_ptr cinfo) } else if (ncomps == 1) { nscans = 23; } else { - cinfo->num_scans_luma = 0; + cinfo->master->num_scans_luma = 0; return FALSE; } @@ -595,10 +597,12 @@ jpeg_search_progression (j_compress_ptr cinfo) cinfo->scan_info = scanptr; cinfo->num_scans = nscans; - cinfo->Al_max_luma = 3; - cinfo->num_scans_luma_dc = 1; - cinfo->num_frequency_splits = 5; - cinfo->num_scans_luma = cinfo->num_scans_luma_dc + (3 * cinfo->Al_max_luma + 2) + (2 * cinfo->num_frequency_splits + 1); + cinfo->master->Al_max_luma = 3; + cinfo->master->num_scans_luma_dc = 1; + cinfo->master->num_frequency_splits = 5; + cinfo->master->num_scans_luma = + cinfo->master->num_scans_luma_dc + (3 * cinfo->master->Al_max_luma + 2) + + (2 * cinfo->master->num_frequency_splits + 1); /* 23 scans for luma */ /* 1 scan for DC */ @@ -609,7 +613,7 @@ jpeg_search_progression (j_compress_ptr cinfo) /* last 4 done conditionally */ /* luma DC by itself */ - if (cinfo->one_dc_scan) + if (cinfo->master->one_dc_scan) scanptr = fill_dc_scans(scanptr, ncomps, 0, 0); else scanptr = fill_dc_scans(scanptr, 1, 0, 0); @@ -617,7 +621,7 @@ jpeg_search_progression (j_compress_ptr cinfo) scanptr = fill_a_scan(scanptr, 0, 1, 8, 0, 0); scanptr = fill_a_scan(scanptr, 0, 9, 63, 0, 0); - for (Al = 0; Al < cinfo->Al_max_luma; Al++) { + for (Al = 0; Al < cinfo->master->Al_max_luma; Al++) { scanptr = fill_a_scan(scanptr, 0, 1, 63, Al+1, Al); scanptr = fill_a_scan(scanptr, 0, 1, 8, 0, Al+1); scanptr = fill_a_scan(scanptr, 0, 9, 63, 0, Al+1); @@ -625,17 +629,17 @@ jpeg_search_progression (j_compress_ptr cinfo) scanptr = fill_a_scan(scanptr, 0, 1, 63, 0, 0); - for (i = 0; i < cinfo->num_frequency_splits; i++) { + for (i = 0; i < cinfo->master->num_frequency_splits; i++) { scanptr = fill_a_scan(scanptr, 0, 1, frequency_split[i], 0, 0); scanptr = fill_a_scan(scanptr, 0, frequency_split[i]+1, 63, 0, 0); } if (ncomps == 1) { - cinfo->Al_max_chroma = 0; - cinfo->num_scans_chroma_dc = 0; + cinfo->master->Al_max_chroma = 0; + cinfo->master->num_scans_chroma_dc = 0; } else { - cinfo->Al_max_chroma = 2; - cinfo->num_scans_chroma_dc = 3; + cinfo->master->Al_max_chroma = 2; + cinfo->master->num_scans_chroma_dc = 3; /* 41 scans for chroma */ /* chroma DC combined */ @@ -649,7 +653,7 @@ jpeg_search_progression (j_compress_ptr cinfo) scanptr = fill_a_scan(scanptr, 2, 1, 8, 0, 0); scanptr = fill_a_scan(scanptr, 2, 9, 63, 0, 0); - for (Al = 0; Al < cinfo->Al_max_chroma; Al++) { + for (Al = 0; Al < cinfo->master->Al_max_chroma; Al++) { scanptr = fill_a_scan(scanptr, 1, 1, 63, Al+1, Al); scanptr = fill_a_scan(scanptr, 2, 1, 63, Al+1, Al); scanptr = fill_a_scan(scanptr, 1, 1, 8, 0, Al+1); @@ -661,7 +665,7 @@ jpeg_search_progression (j_compress_ptr cinfo) scanptr = fill_a_scan(scanptr, 1, 1, 63, 0, 0); scanptr = fill_a_scan(scanptr, 2, 1, 63, 0, 0); - for (i = 0; i < cinfo->num_frequency_splits; i++) { + for (i = 0; i < cinfo->master->num_frequency_splits; i++) { scanptr = fill_a_scan(scanptr, 1, 1, frequency_split[i], 0, 0); scanptr = fill_a_scan(scanptr, 1, frequency_split[i]+1, 63, 0, 0); scanptr = fill_a_scan(scanptr, 2, 1, frequency_split[i], 0, 0); @@ -684,7 +688,7 @@ jpeg_simple_progression (j_compress_ptr cinfo) int nscans; jpeg_scan_info * scanptr; - if (cinfo->optimize_scans) { + if (cinfo->master->optimize_scans) { if (jpeg_search_progression(cinfo) == TRUE) return; } @@ -700,7 +704,7 @@ jpeg_simple_progression (j_compress_ptr cinfo) nscans = 10; } else { /* All-purpose script for other color spaces. */ - if (cinfo->use_moz_defaults == TRUE) { + if (cinfo->master->use_moz_defaults == TRUE) { if (ncomps > MAX_COMPS_IN_SCAN) nscans = 5 * ncomps; /* 2 DC + 4 AC scans per component */ else @@ -732,12 +736,12 @@ jpeg_simple_progression (j_compress_ptr cinfo) if (ncomps == 3 && cinfo->jpeg_color_space == JCS_YCbCr) { /* Custom script for YCbCr color images. */ - if (cinfo->use_moz_defaults == TRUE) { + if (cinfo->master->use_moz_defaults == TRUE) { /* scan defined in jpeg_scan_rgb.txt in jpgcrush */ /* Initial DC scan */ - if (cinfo->one_dc_scan) + if (cinfo->master->one_dc_scan) scanptr = fill_dc_scans(scanptr, ncomps, 0, 0); - else if (cinfo->sep_dc_scan) { + else if (cinfo->master->sep_dc_scan) { scanptr = fill_a_scan(scanptr, 0, 0, 0, 0, 0); scanptr = fill_a_scan(scanptr, 1, 0, 0, 0, 0); scanptr = fill_a_scan(scanptr, 2, 0, 0, 0, 0); @@ -780,7 +784,7 @@ jpeg_simple_progression (j_compress_ptr cinfo) } } else { /* All-purpose script for other color spaces. */ - if (cinfo->use_moz_defaults == TRUE) { + if (cinfo->master->use_moz_defaults == TRUE) { /* scan defined in jpeg_scan_bw.txt in jpgcrush */ /* DC component, no successive approximation */ scanptr = fill_dc_scans(scanptr, ncomps, 0, 0); diff --git a/jcphuff.c b/jcphuff.c index 4fbb36dd..da2464ad 100644 --- a/jcphuff.c +++ b/jcphuff.c @@ -171,7 +171,7 @@ start_pass_phuff (j_compress_ptr cinfo, boolean gather_statistics) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, 257 * sizeof(long)); MEMZERO(entropy->count_ptrs[tbl], 257 * sizeof(long)); - if (cinfo->trellis_passes) { + if (cinfo->master->trellis_passes) { /* When generating tables for trellis passes, make sure that all */ /* codewords have an assigned length */ int i, j; diff --git a/jcstest.c b/jcstest.c index c0eec6e7..79829e6b 100644 --- a/jcstest.c +++ b/jcstest.c @@ -87,7 +87,8 @@ int main(void) jpeg_create_compress(&cinfo); cinfo.input_components = 3; - cinfo.use_moz_defaults = TRUE; + if (jpeg_c_bool_param_supported(&cinfo, JBOOLEAN_USE_MOZ_DEFAULTS)) + jpeg_c_set_bool_param(&cinfo, JBOOLEAN_USE_MOZ_DEFAULTS, TRUE); jpeg_set_defaults(&cinfo); cinfo.in_color_space = JCS_EXT_RGB; jpeg_default_colorspace(&cinfo); diff --git a/jctrans.c b/jctrans.c index 8fffde03..f4be9f3b 100644 --- a/jctrans.c +++ b/jctrans.c @@ -6,6 +6,8 @@ * Modified 2000-2009 by Guido Vollbeding. * It was modified by The libjpeg-turbo Project to include only code relevant * to libjpeg-turbo. + * mozjpeg Modifications: + * Copyright (C) 2014, Mozilla Corporation. * For conditions of distribution and use, see the accompanying README file. * * This file contains library routines for transcoding compression, @@ -41,8 +43,8 @@ GLOBAL(void) jpeg_write_coefficients (j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays) { /* setting up scan optimisation pattern failed, disable scan optimisation */ - if (cinfo->num_scans_luma == 0) - cinfo->optimize_scans = FALSE; + if (cinfo->master->num_scans_luma == 0) + cinfo->master->optimize_scans = FALSE; if (cinfo->global_state != CSTATE_START) ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state); @@ -91,7 +93,7 @@ jpeg_copy_critical_parameters (j_decompress_ptr srcinfo, #endif /* Initialize all parameters to default values */ jpeg_set_defaults(dstinfo); - dstinfo->trellis_quant = FALSE; + dstinfo->master->trellis_quant = FALSE; /* jpeg_set_defaults may choose wrong colorspace, eg YCbCr if input is RGB. * Fix it to get the right header markers for the image colorspace. diff --git a/jdcol565.c b/jdcol565.c index a2c98f30..695f2621 100644 --- a/jdcol565.c +++ b/jdcol565.c @@ -5,6 +5,7 @@ * Copyright (C) 1991-1997, Thomas G. Lane. * Modifications: * Copyright (C) 2013, Linaro Limited. + * Copyright (C) 2014, D. R. Commander. * For conditions of distribution and use, see the accompanying README file. * * This file contains output colorspace conversion routines. @@ -13,42 +14,11 @@ /* This file is included by jdcolor.c */ -#define PACK_SHORT_565(r, g, b) ((((r) << 8) & 0xf800) | \ - (((g) << 3) & 0x7E0) | ((b) >> 3)) -#define PACK_TWO_PIXELS(l, r) ((r << 16) | l) -#define PACK_NEED_ALIGNMENT(ptr) (((size_t)(ptr)) & 3) - -#define WRITE_TWO_PIXELS(addr, pixels) { \ - ((INT16*)(addr))[0] = (pixels); \ - ((INT16*)(addr))[1] = (pixels) >> 16; \ -} -#define WRITE_TWO_ALIGNED_PIXELS(addr, pixels) ((*(INT32 *)(addr)) = pixels) - -#define DITHER_565_R(r, dither) ((r) + ((dither) & 0xFF)) -#define DITHER_565_G(g, dither) ((g) + (((dither) & 0xFF) >> 1)) -#define DITHER_565_B(b, dither) ((b) + ((dither) & 0xFF)) - - -/* Declarations for ordered dithering - * - * We use a 4x4 ordered dither array packed into 32 bits. This array is - * sufficent for dithering RGB888 to RGB565. - */ - -#define DITHER_MASK 0x3 -#define DITHER_ROTATE(x) (((x) << 24) | (((x) >> 8) & 0x00FFFFFF)) -static const INT32 dither_matrix[4] = { - 0x0008020A, - 0x0C040E06, - 0x030B0109, - 0x0F070D05 -}; - - -METHODDEF(void) -ycc_rgb565_convert (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) +INLINE +LOCAL(void) +ycc_rgb565_convert_internal (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) { my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; register int y, cb, cr; @@ -123,10 +93,11 @@ ycc_rgb565_convert (j_decompress_ptr cinfo, } -METHODDEF(void) -ycc_rgb565D_convert (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) +INLINE +LOCAL(void) +ycc_rgb565D_convert_internal (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) { my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; register int y, cb, cr; @@ -208,10 +179,11 @@ ycc_rgb565D_convert (j_decompress_ptr cinfo, } -METHODDEF(void) -rgb_rgb565_convert (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) +INLINE +LOCAL(void) +rgb_rgb565_convert_internal (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) { register JSAMPROW outptr; register JSAMPROW inptr0, inptr1, inptr2; @@ -262,10 +234,11 @@ rgb_rgb565_convert (j_decompress_ptr cinfo, } -METHODDEF(void) -rgb_rgb565D_convert (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) +INLINE +LOCAL(void) +rgb_rgb565D_convert_internal (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) { register JSAMPROW outptr; register JSAMPROW inptr0, inptr1, inptr2; @@ -320,10 +293,11 @@ rgb_rgb565D_convert (j_decompress_ptr cinfo, } -METHODDEF(void) -gray_rgb565_convert (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) +INLINE +LOCAL(void) +gray_rgb565_convert_internal (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) { register JSAMPROW inptr, outptr; register JDIMENSION col; @@ -359,10 +333,11 @@ gray_rgb565_convert (j_decompress_ptr cinfo, } -METHODDEF(void) -gray_rgb565D_convert (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) +INLINE +LOCAL(void) +gray_rgb565D_convert_internal (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) { register JSAMPROW inptr, outptr; register JDIMENSION col; diff --git a/jdcolor.c b/jdcolor.c index ffedabd5..779fa51f 100644 --- a/jdcolor.c +++ b/jdcolor.c @@ -544,7 +544,162 @@ ycck_cmyk_convert (j_decompress_ptr cinfo, } +/* + * RGB565 conversion + */ + +#define PACK_SHORT_565_LE(r, g, b) ((((r) << 8) & 0xF800) | \ + (((g) << 3) & 0x7E0) | ((b) >> 3)) +#define PACK_SHORT_565_BE(r, g, b) (((r) & 0xF8) | ((g) >> 5) | \ + (((g) << 11) & 0xE000) | \ + (((b) << 5) & 0x1F00)) + +#define PACK_TWO_PIXELS_LE(l, r) ((r << 16) | l) +#define PACK_TWO_PIXELS_BE(l, r) ((l << 16) | r) + +#define PACK_NEED_ALIGNMENT(ptr) (((size_t)(ptr)) & 3) + +#define WRITE_TWO_ALIGNED_PIXELS(addr, pixels) ((*(int *)(addr)) = pixels) + +#define DITHER_565_R(r, dither) ((r) + ((dither) & 0xFF)) +#define DITHER_565_G(g, dither) ((g) + (((dither) & 0xFF) >> 1)) +#define DITHER_565_B(b, dither) ((b) + ((dither) & 0xFF)) + + +/* Declarations for ordered dithering + * + * We use a 4x4 ordered dither array packed into 32 bits. This array is + * sufficent for dithering RGB888 to RGB565. + */ + +#define DITHER_MASK 0x3 +#define DITHER_ROTATE(x) (((x) << 24) | (((x) >> 8) & 0x00FFFFFF)) +static const INT32 dither_matrix[4] = { + 0x0008020A, + 0x0C040E06, + 0x030B0109, + 0x0F070D05 +}; + + +static INLINE boolean is_big_endian(void) +{ + int test_value = 1; + if(*(char *)&test_value != 1) + return TRUE; + return FALSE; +} + + +/* Include inline routines for RGB565 conversion */ + +#define PACK_SHORT_565 PACK_SHORT_565_LE +#define PACK_TWO_PIXELS PACK_TWO_PIXELS_LE +#define ycc_rgb565_convert_internal ycc_rgb565_convert_le +#define ycc_rgb565D_convert_internal ycc_rgb565D_convert_le +#define rgb_rgb565_convert_internal rgb_rgb565_convert_le +#define rgb_rgb565D_convert_internal rgb_rgb565D_convert_le +#define gray_rgb565_convert_internal gray_rgb565_convert_le +#define gray_rgb565D_convert_internal gray_rgb565D_convert_le #include "jdcol565.c" +#undef PACK_SHORT_565 +#undef PACK_TWO_PIXELS +#undef ycc_rgb565_convert_internal +#undef ycc_rgb565D_convert_internal +#undef rgb_rgb565_convert_internal +#undef rgb_rgb565D_convert_internal +#undef gray_rgb565_convert_internal +#undef gray_rgb565D_convert_internal + +#define PACK_SHORT_565 PACK_SHORT_565_BE +#define PACK_TWO_PIXELS PACK_TWO_PIXELS_BE +#define ycc_rgb565_convert_internal ycc_rgb565_convert_be +#define ycc_rgb565D_convert_internal ycc_rgb565D_convert_be +#define rgb_rgb565_convert_internal rgb_rgb565_convert_be +#define rgb_rgb565D_convert_internal rgb_rgb565D_convert_be +#define gray_rgb565_convert_internal gray_rgb565_convert_be +#define gray_rgb565D_convert_internal gray_rgb565D_convert_be +#include "jdcol565.c" +#undef PACK_SHORT_565 +#undef PACK_TWO_PIXELS +#undef ycc_rgb565_convert_internal +#undef ycc_rgb565D_convert_internal +#undef rgb_rgb565_convert_internal +#undef rgb_rgb565D_convert_internal +#undef gray_rgb565_convert_internal +#undef gray_rgb565D_convert_internal + + +METHODDEF(void) +ycc_rgb565_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + if (is_big_endian()) + ycc_rgb565_convert_be(cinfo, input_buf, input_row, output_buf, num_rows); + else + ycc_rgb565_convert_le(cinfo, input_buf, input_row, output_buf, num_rows); +} + + +METHODDEF(void) +ycc_rgb565D_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + if (is_big_endian()) + ycc_rgb565D_convert_be(cinfo, input_buf, input_row, output_buf, num_rows); + else + ycc_rgb565D_convert_le(cinfo, input_buf, input_row, output_buf, num_rows); +} + + +METHODDEF(void) +rgb_rgb565_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + if (is_big_endian()) + rgb_rgb565_convert_be(cinfo, input_buf, input_row, output_buf, num_rows); + else + rgb_rgb565_convert_le(cinfo, input_buf, input_row, output_buf, num_rows); +} + + +METHODDEF(void) +rgb_rgb565D_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + if (is_big_endian()) + rgb_rgb565D_convert_be(cinfo, input_buf, input_row, output_buf, num_rows); + else + rgb_rgb565D_convert_le(cinfo, input_buf, input_row, output_buf, num_rows); +} + + +METHODDEF(void) +gray_rgb565_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + if (is_big_endian()) + gray_rgb565_convert_be(cinfo, input_buf, input_row, output_buf, num_rows); + else + gray_rgb565_convert_le(cinfo, input_buf, input_row, output_buf, num_rows); +} + + +METHODDEF(void) +gray_rgb565D_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ + if (is_big_endian()) + gray_rgb565D_convert_be(cinfo, input_buf, input_row, output_buf, num_rows); + else + gray_rgb565D_convert_le(cinfo, input_buf, input_row, output_buf, num_rows); +} /* diff --git a/jdmerge.c b/jdmerge.c index f89d69f5..e13adb9f 100644 --- a/jdmerge.c +++ b/jdmerge.c @@ -5,7 +5,7 @@ * Copyright (C) 1994-1996, Thomas G. Lane. * Copyright 2009 Pierre Ossman for Cendio AB * libjpeg-turbo Modifications: - * Copyright (C) 2009, 2011, D. R. Commander. + * Copyright (C) 2009, 2011, 2014 D. R. Commander. * Copyright (C) 2013, Linaro Limited. * For conditions of distribution and use, see the accompanying README file. * @@ -45,38 +45,6 @@ #ifdef UPSAMPLE_MERGING_SUPPORTED -#define PACK_SHORT_565(r, g, b) ((((r) << 8) & 0xf800) | \ - (((g) << 3) & 0x7E0) | ((b) >> 3)) -#define PACK_TWO_PIXELS(l, r) ((r << 16) | l) -#define PACK_NEED_ALIGNMENT(ptr) (((size_t)(ptr)) & 3) - -#define WRITE_TWO_PIXELS(addr, pixels) { \ - ((INT16*)(addr))[0] = (pixels); \ - ((INT16*)(addr))[1] = (pixels) >> 16; \ -} -#define WRITE_TWO_ALIGNED_PIXELS(addr, pixels) ((*(INT32 *)(addr)) = pixels) - -#define DITHER_565_R(r, dither) ((r) + ((dither) & 0xFF)) -#define DITHER_565_G(g, dither) ((g) + (((dither) & 0xFF) >> 1)) -#define DITHER_565_B(b, dither) ((b) + ((dither) & 0xFF)) - - -/* Declarations for ordered dithering - * - * We use a 4x4 ordered dither array packed into 32 bits. This array is - * sufficent for dithering RGB888 to RGB565. - */ - -#define DITHER_MASK 0x3 -#define DITHER_ROTATE(x) (((x) << 24) | (((x) >> 8) & 0x00FFFFFF)) -static const INT32 dither_matrix[4] = { - 0x0008020A, - 0x0C040E06, - 0x030B0109, - 0x0F070D05 -}; - - /* Private subobject */ typedef struct { @@ -451,72 +419,106 @@ h2v2_merged_upsample (j_decompress_ptr cinfo, } +/* + * RGB565 conversion + */ + +#define PACK_SHORT_565_LE(r, g, b) ((((r) << 8) & 0xF800) | \ + (((g) << 3) & 0x7E0) | ((b) >> 3)) +#define PACK_SHORT_565_BE(r, g, b) (((r) & 0xF8) | ((g) >> 5) | \ + (((g) << 11) & 0xE000) | \ + (((b) << 5) & 0x1F00)) + +#define PACK_TWO_PIXELS_LE(l, r) ((r << 16) | l) +#define PACK_TWO_PIXELS_BE(l, r) ((l << 16) | r) + +#define PACK_NEED_ALIGNMENT(ptr) (((size_t)(ptr)) & 3) + +#define WRITE_TWO_PIXELS_LE(addr, pixels) { \ + ((INT16*)(addr))[0] = (pixels); \ + ((INT16*)(addr))[1] = (pixels) >> 16; \ +} +#define WRITE_TWO_PIXELS_BE(addr, pixels) { \ + ((INT16*)(addr))[1] = (pixels); \ + ((INT16*)(addr))[0] = (pixels) >> 16; \ +} + +#define DITHER_565_R(r, dither) ((r) + ((dither) & 0xFF)) +#define DITHER_565_G(g, dither) ((g) + (((dither) & 0xFF) >> 1)) +#define DITHER_565_B(b, dither) ((b) + ((dither) & 0xFF)) + + +/* Declarations for ordered dithering + * + * We use a 4x4 ordered dither array packed into 32 bits. This array is + * sufficent for dithering RGB888 to RGB565. + */ + +#define DITHER_MASK 0x3 +#define DITHER_ROTATE(x) (((x) << 24) | (((x) >> 8) & 0x00FFFFFF)) +static const INT32 dither_matrix[4] = { + 0x0008020A, + 0x0C040E06, + 0x030B0109, + 0x0F070D05 +}; + + +/* Include inline routines for RGB565 conversion */ + +#define PACK_SHORT_565 PACK_SHORT_565_LE +#define PACK_TWO_PIXELS PACK_TWO_PIXELS_LE +#define WRITE_TWO_PIXELS WRITE_TWO_PIXELS_LE +#define h2v1_merged_upsample_565_internal h2v1_merged_upsample_565_le +#define h2v1_merged_upsample_565D_internal h2v1_merged_upsample_565D_le +#define h2v2_merged_upsample_565_internal h2v2_merged_upsample_565_le +#define h2v2_merged_upsample_565D_internal h2v2_merged_upsample_565D_le +#include "jdmrg565.c" +#undef PACK_SHORT_565 +#undef PACK_TWO_PIXELS +#undef WRITE_TWO_PIXELS +#undef h2v1_merged_upsample_565_internal +#undef h2v1_merged_upsample_565D_internal +#undef h2v2_merged_upsample_565_internal +#undef h2v2_merged_upsample_565D_internal + +#define PACK_SHORT_565 PACK_SHORT_565_BE +#define PACK_TWO_PIXELS PACK_TWO_PIXELS_BE +#define WRITE_TWO_PIXELS WRITE_TWO_PIXELS_BE +#define h2v1_merged_upsample_565_internal h2v1_merged_upsample_565_be +#define h2v1_merged_upsample_565D_internal h2v1_merged_upsample_565D_be +#define h2v2_merged_upsample_565_internal h2v2_merged_upsample_565_be +#define h2v2_merged_upsample_565D_internal h2v2_merged_upsample_565D_be +#include "jdmrg565.c" +#undef PACK_SHORT_565 +#undef PACK_TWO_PIXELS +#undef WRITE_TWO_PIXELS +#undef h2v1_merged_upsample_565_internal +#undef h2v1_merged_upsample_565D_internal +#undef h2v2_merged_upsample_565_internal +#undef h2v2_merged_upsample_565D_internal + + +static INLINE boolean is_big_endian(void) +{ + int test_value = 1; + if(*(char *)&test_value != 1) + return TRUE; + return FALSE; +} + + METHODDEF(void) h2v1_merged_upsample_565 (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf) { - my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; - register int y, cred, cgreen, cblue; - int cb, cr; - register JSAMPROW outptr; - JSAMPROW inptr0, inptr1, inptr2; - JDIMENSION col; - /* copy these pointers into registers if possible */ - register JSAMPLE * range_limit = cinfo->sample_range_limit; - int * Crrtab = upsample->Cr_r_tab; - int * Cbbtab = upsample->Cb_b_tab; - INT32 * Crgtab = upsample->Cr_g_tab; - INT32 * Cbgtab = upsample->Cb_g_tab; - unsigned int r, g, b; - INT32 rgb; - SHIFT_TEMPS - - inptr0 = input_buf[0][in_row_group_ctr]; - inptr1 = input_buf[1][in_row_group_ctr]; - inptr2 = input_buf[2][in_row_group_ctr]; - outptr = output_buf[0]; - - /* Loop for each pair of output pixels */ - for (col = cinfo->output_width >> 1; col > 0; col--) { - /* Do the chroma part of the calculation */ - cb = GETJSAMPLE(*inptr1++); - cr = GETJSAMPLE(*inptr2++); - cred = Crrtab[cr]; - cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); - cblue = Cbbtab[cb]; - - /* Fetch 2 Y values and emit 2 pixels */ - y = GETJSAMPLE(*inptr0++); - r = range_limit[y + cred]; - g = range_limit[y + cgreen]; - b = range_limit[y + cblue]; - rgb = PACK_SHORT_565(r, g, b); - - y = GETJSAMPLE(*inptr0++); - r = range_limit[y + cred]; - g = range_limit[y + cgreen]; - b = range_limit[y + cblue]; - rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b)); - - WRITE_TWO_PIXELS(outptr, rgb); - outptr += 4; - } - - /* If image width is odd, do the last output column separately */ - if (cinfo->output_width & 1) { - cb = GETJSAMPLE(*inptr1); - cr = GETJSAMPLE(*inptr2); - cred = Crrtab[cr]; - cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); - cblue = Cbbtab[cb]; - y = GETJSAMPLE(*inptr0); - r = range_limit[y + cred]; - g = range_limit[y + cgreen]; - b = range_limit[y + cblue]; - rgb = PACK_SHORT_565(r, g, b); - *(INT16*)outptr = rgb; - } + if (is_big_endian()) + h2v1_merged_upsample_565_be(cinfo, input_buf, in_row_group_ctr, + output_buf); + else + h2v1_merged_upsample_565_le(cinfo, input_buf, in_row_group_ctr, + output_buf); } @@ -525,70 +527,12 @@ h2v1_merged_upsample_565D (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf) { - my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; - register int y, cred, cgreen, cblue; - int cb, cr; - register JSAMPROW outptr; - JSAMPROW inptr0, inptr1, inptr2; - JDIMENSION col; - /* copy these pointers into registers if possible */ - register JSAMPLE * range_limit = cinfo->sample_range_limit; - int * Crrtab = upsample->Cr_r_tab; - int * Cbbtab = upsample->Cb_b_tab; - INT32 * Crgtab = upsample->Cr_g_tab; - INT32 * Cbgtab = upsample->Cb_g_tab; - INT32 d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK]; - unsigned int r, g, b; - INT32 rgb; - SHIFT_TEMPS - - inptr0 = input_buf[0][in_row_group_ctr]; - inptr1 = input_buf[1][in_row_group_ctr]; - inptr2 = input_buf[2][in_row_group_ctr]; - outptr = output_buf[0]; - - /* Loop for each pair of output pixels */ - for (col = cinfo->output_width >> 1; col > 0; col--) { - /* Do the chroma part of the calculation */ - cb = GETJSAMPLE(*inptr1++); - cr = GETJSAMPLE(*inptr2++); - cred = Crrtab[cr]; - cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); - cblue = Cbbtab[cb]; - - /* Fetch 2 Y values and emit 2 pixels */ - y = GETJSAMPLE(*inptr0++); - r = range_limit[DITHER_565_R(y + cred, d0)]; - g = range_limit[DITHER_565_G(y + cgreen, d0)]; - b = range_limit[DITHER_565_B(y + cblue, d0)]; - d0 = DITHER_ROTATE(d0); - rgb = PACK_SHORT_565(r, g, b); - - y = GETJSAMPLE(*inptr0++); - r = range_limit[DITHER_565_R(y + cred, d0)]; - g = range_limit[DITHER_565_G(y + cgreen, d0)]; - b = range_limit[DITHER_565_B(y + cblue, d0)]; - d0 = DITHER_ROTATE(d0); - rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b)); - - WRITE_TWO_PIXELS(outptr, rgb); - outptr += 4; - } - - /* If image width is odd, do the last output column separately */ - if (cinfo->output_width & 1) { - cb = GETJSAMPLE(*inptr1); - cr = GETJSAMPLE(*inptr2); - cred = Crrtab[cr]; - cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); - cblue = Cbbtab[cb]; - y = GETJSAMPLE(*inptr0); - r = range_limit[DITHER_565_R(y + cred, d0)]; - g = range_limit[DITHER_565_G(y + cgreen, d0)]; - b = range_limit[DITHER_565_B(y + cblue, d0)]; - rgb = PACK_SHORT_565(r, g, b); - *(INT16*)outptr = rgb; - } + if (is_big_endian()) + h2v1_merged_upsample_565D_be(cinfo, input_buf, in_row_group_ctr, + output_buf); + else + h2v1_merged_upsample_565D_le(cinfo, input_buf, in_row_group_ctr, + output_buf); } @@ -597,92 +541,12 @@ h2v2_merged_upsample_565 (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf) { - my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; - register int y, cred, cgreen, cblue; - int cb, cr; - register JSAMPROW outptr0, outptr1; - JSAMPROW inptr00, inptr01, inptr1, inptr2; - JDIMENSION col; - /* copy these pointers into registers if possible */ - register JSAMPLE * range_limit = cinfo->sample_range_limit; - int * Crrtab = upsample->Cr_r_tab; - int * Cbbtab = upsample->Cb_b_tab; - INT32 * Crgtab = upsample->Cr_g_tab; - INT32 * Cbgtab = upsample->Cb_g_tab; - unsigned int r, g, b; - INT32 rgb; - SHIFT_TEMPS - - inptr00 = input_buf[0][in_row_group_ctr * 2]; - inptr01 = input_buf[0][in_row_group_ctr * 2 + 1]; - inptr1 = input_buf[1][in_row_group_ctr]; - inptr2 = input_buf[2][in_row_group_ctr]; - outptr0 = output_buf[0]; - outptr1 = output_buf[1]; - - /* Loop for each group of output pixels */ - for (col = cinfo->output_width >> 1; col > 0; col--) { - /* Do the chroma part of the calculation */ - cb = GETJSAMPLE(*inptr1++); - cr = GETJSAMPLE(*inptr2++); - cred = Crrtab[cr]; - cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); - cblue = Cbbtab[cb]; - - /* Fetch 4 Y values and emit 4 pixels */ - y = GETJSAMPLE(*inptr00++); - r = range_limit[y + cred]; - g = range_limit[y + cgreen]; - b = range_limit[y + cblue]; - rgb = PACK_SHORT_565(r, g, b); - - y = GETJSAMPLE(*inptr00++); - r = range_limit[y + cred]; - g = range_limit[y + cgreen]; - b = range_limit[y + cblue]; - rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b)); - - WRITE_TWO_PIXELS(outptr0, rgb); - outptr0 += 4; - - y = GETJSAMPLE(*inptr01++); - r = range_limit[y + cred]; - g = range_limit[y + cgreen]; - b = range_limit[y + cblue]; - rgb = PACK_SHORT_565(r, g, b); - - y = GETJSAMPLE(*inptr01++); - r = range_limit[y + cred]; - g = range_limit[y + cgreen]; - b = range_limit[y + cblue]; - rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b)); - - WRITE_TWO_PIXELS(outptr1, rgb); - outptr1 += 4; - } - - /* If image width is odd, do the last output column separately */ - if (cinfo->output_width & 1) { - cb = GETJSAMPLE(*inptr1); - cr = GETJSAMPLE(*inptr2); - cred = Crrtab[cr]; - cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); - cblue = Cbbtab[cb]; - - y = GETJSAMPLE(*inptr00); - r = range_limit[y + cred]; - g = range_limit[y + cgreen]; - b = range_limit[y + cblue]; - rgb = PACK_SHORT_565(r, g, b); - *(INT16*)outptr0 = rgb; - - y = GETJSAMPLE(*inptr01); - r = range_limit[y + cred]; - g = range_limit[y + cgreen]; - b = range_limit[y + cblue]; - rgb = PACK_SHORT_565(r, g, b); - *(INT16*)outptr1 = rgb; - } + if (is_big_endian()) + h2v2_merged_upsample_565_be(cinfo, input_buf, in_row_group_ctr, + output_buf); + else + h2v2_merged_upsample_565_le(cinfo, input_buf, in_row_group_ctr, + output_buf); } @@ -691,98 +555,12 @@ h2v2_merged_upsample_565D (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf) { - my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; - register int y, cred, cgreen, cblue; - int cb, cr; - register JSAMPROW outptr0, outptr1; - JSAMPROW inptr00, inptr01, inptr1, inptr2; - JDIMENSION col; - /* copy these pointers into registers if possible */ - register JSAMPLE * range_limit = cinfo->sample_range_limit; - int * Crrtab = upsample->Cr_r_tab; - int * Cbbtab = upsample->Cb_b_tab; - INT32 * Crgtab = upsample->Cr_g_tab; - INT32 * Cbgtab = upsample->Cb_g_tab; - INT32 d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK]; - INT32 d1 = dither_matrix[(cinfo->output_scanline+1) & DITHER_MASK]; - unsigned int r, g, b; - INT32 rgb; - SHIFT_TEMPS - - inptr00 = input_buf[0][in_row_group_ctr*2]; - inptr01 = input_buf[0][in_row_group_ctr*2 + 1]; - inptr1 = input_buf[1][in_row_group_ctr]; - inptr2 = input_buf[2][in_row_group_ctr]; - outptr0 = output_buf[0]; - outptr1 = output_buf[1]; - - /* Loop for each group of output pixels */ - for (col = cinfo->output_width >> 1; col > 0; col--) { - /* Do the chroma part of the calculation */ - cb = GETJSAMPLE(*inptr1++); - cr = GETJSAMPLE(*inptr2++); - cred = Crrtab[cr]; - cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); - cblue = Cbbtab[cb]; - - /* Fetch 4 Y values and emit 4 pixels */ - y = GETJSAMPLE(*inptr00++); - r = range_limit[DITHER_565_R(y + cred, d0)]; - g = range_limit[DITHER_565_G(y + cgreen, d0)]; - b = range_limit[DITHER_565_B(y + cblue, d0)]; - d0 = DITHER_ROTATE(d0); - rgb = PACK_SHORT_565(r, g, b); - - y = GETJSAMPLE(*inptr00++); - r = range_limit[DITHER_565_R(y + cred, d1)]; - g = range_limit[DITHER_565_G(y + cgreen, d1)]; - b = range_limit[DITHER_565_B(y + cblue, d1)]; - d1 = DITHER_ROTATE(d1); - rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b)); - - WRITE_TWO_PIXELS(outptr0, rgb); - outptr0 += 4; - - y = GETJSAMPLE(*inptr01++); - r = range_limit[DITHER_565_R(y + cred, d0)]; - g = range_limit[DITHER_565_G(y + cgreen, d0)]; - b = range_limit[DITHER_565_B(y + cblue, d0)]; - d0 = DITHER_ROTATE(d0); - rgb = PACK_SHORT_565(r, g, b); - - y = GETJSAMPLE(*inptr01++); - r = range_limit[DITHER_565_R(y + cred, d1)]; - g = range_limit[DITHER_565_G(y + cgreen, d1)]; - b = range_limit[DITHER_565_B(y + cblue, d1)]; - d1 = DITHER_ROTATE(d1); - rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b)); - - WRITE_TWO_PIXELS(outptr1, rgb); - outptr1 += 4; - } - - /* If image width is odd, do the last output column separately */ - if (cinfo->output_width & 1) { - cb = GETJSAMPLE(*inptr1); - cr = GETJSAMPLE(*inptr2); - cred = Crrtab[cr]; - cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); - cblue = Cbbtab[cb]; - - y = GETJSAMPLE(*inptr00); - r = range_limit[DITHER_565_R(y + cred, d0)]; - g = range_limit[DITHER_565_G(y + cgreen, d0)]; - b = range_limit[DITHER_565_B(y + cblue, d0)]; - rgb = PACK_SHORT_565(r, g, b); - *(INT16*)outptr0 = rgb; - - y = GETJSAMPLE(*inptr01); - r = range_limit[DITHER_565_R(y + cred, d1)]; - g = range_limit[DITHER_565_G(y + cgreen, d1)]; - b = range_limit[DITHER_565_B(y + cblue, d1)]; - rgb = PACK_SHORT_565(r, g, b); - *(INT16*)outptr1 = rgb; - } + if (is_big_endian()) + h2v2_merged_upsample_565D_be(cinfo, input_buf, in_row_group_ctr, + output_buf); + else + h2v2_merged_upsample_565D_le(cinfo, input_buf, in_row_group_ctr, + output_buf); } diff --git a/jdmrg565.c b/jdmrg565.c new file mode 100644 index 00000000..0a10bccb --- /dev/null +++ b/jdmrg565.c @@ -0,0 +1,355 @@ +/* + * jdmrg565.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1994-1996, Thomas G. Lane. + * libjpeg-turbo Modifications: + * Copyright (C) 2013, Linaro Limited. + * Copyright (C) 2014, D. R. Commander. + * For conditions of distribution and use, see the accompanying README file. + * + * This file contains code for merged upsampling/color conversion. + */ + + +INLINE +LOCAL(void) +h2v1_merged_upsample_565_internal (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; + register int y, cred, cgreen, cblue; + int cb, cr; + register JSAMPROW outptr; + JSAMPROW inptr0, inptr1, inptr2; + JDIMENSION col; + /* copy these pointers into registers if possible */ + register JSAMPLE * range_limit = cinfo->sample_range_limit; + int * Crrtab = upsample->Cr_r_tab; + int * Cbbtab = upsample->Cb_b_tab; + INT32 * Crgtab = upsample->Cr_g_tab; + INT32 * Cbgtab = upsample->Cb_g_tab; + unsigned int r, g, b; + INT32 rgb; + SHIFT_TEMPS + + inptr0 = input_buf[0][in_row_group_ctr]; + inptr1 = input_buf[1][in_row_group_ctr]; + inptr2 = input_buf[2][in_row_group_ctr]; + outptr = output_buf[0]; + + /* Loop for each pair of output pixels */ + for (col = cinfo->output_width >> 1; col > 0; col--) { + /* Do the chroma part of the calculation */ + cb = GETJSAMPLE(*inptr1++); + cr = GETJSAMPLE(*inptr2++); + cred = Crrtab[cr]; + cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cblue = Cbbtab[cb]; + + /* Fetch 2 Y values and emit 2 pixels */ + y = GETJSAMPLE(*inptr0++); + r = range_limit[y + cred]; + g = range_limit[y + cgreen]; + b = range_limit[y + cblue]; + rgb = PACK_SHORT_565(r, g, b); + + y = GETJSAMPLE(*inptr0++); + r = range_limit[y + cred]; + g = range_limit[y + cgreen]; + b = range_limit[y + cblue]; + rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b)); + + WRITE_TWO_PIXELS(outptr, rgb); + outptr += 4; + } + + /* If image width is odd, do the last output column separately */ + if (cinfo->output_width & 1) { + cb = GETJSAMPLE(*inptr1); + cr = GETJSAMPLE(*inptr2); + cred = Crrtab[cr]; + cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cblue = Cbbtab[cb]; + y = GETJSAMPLE(*inptr0); + r = range_limit[y + cred]; + g = range_limit[y + cgreen]; + b = range_limit[y + cblue]; + rgb = PACK_SHORT_565(r, g, b); + *(INT16*)outptr = rgb; + } + } + + +INLINE +LOCAL(void) +h2v1_merged_upsample_565D_internal (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; + register int y, cred, cgreen, cblue; + int cb, cr; + register JSAMPROW outptr; + JSAMPROW inptr0, inptr1, inptr2; + JDIMENSION col; + /* copy these pointers into registers if possible */ + register JSAMPLE * range_limit = cinfo->sample_range_limit; + int * Crrtab = upsample->Cr_r_tab; + int * Cbbtab = upsample->Cb_b_tab; + INT32 * Crgtab = upsample->Cr_g_tab; + INT32 * Cbgtab = upsample->Cb_g_tab; + INT32 d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK]; + unsigned int r, g, b; + INT32 rgb; + SHIFT_TEMPS + + inptr0 = input_buf[0][in_row_group_ctr]; + inptr1 = input_buf[1][in_row_group_ctr]; + inptr2 = input_buf[2][in_row_group_ctr]; + outptr = output_buf[0]; + + /* Loop for each pair of output pixels */ + for (col = cinfo->output_width >> 1; col > 0; col--) { + /* Do the chroma part of the calculation */ + cb = GETJSAMPLE(*inptr1++); + cr = GETJSAMPLE(*inptr2++); + cred = Crrtab[cr]; + cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cblue = Cbbtab[cb]; + + /* Fetch 2 Y values and emit 2 pixels */ + y = GETJSAMPLE(*inptr0++); + r = range_limit[DITHER_565_R(y + cred, d0)]; + g = range_limit[DITHER_565_G(y + cgreen, d0)]; + b = range_limit[DITHER_565_B(y + cblue, d0)]; + d0 = DITHER_ROTATE(d0); + rgb = PACK_SHORT_565(r, g, b); + + y = GETJSAMPLE(*inptr0++); + r = range_limit[DITHER_565_R(y + cred, d0)]; + g = range_limit[DITHER_565_G(y + cgreen, d0)]; + b = range_limit[DITHER_565_B(y + cblue, d0)]; + d0 = DITHER_ROTATE(d0); + rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b)); + + WRITE_TWO_PIXELS(outptr, rgb); + outptr += 4; + } + + /* If image width is odd, do the last output column separately */ + if (cinfo->output_width & 1) { + cb = GETJSAMPLE(*inptr1); + cr = GETJSAMPLE(*inptr2); + cred = Crrtab[cr]; + cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cblue = Cbbtab[cb]; + y = GETJSAMPLE(*inptr0); + r = range_limit[DITHER_565_R(y + cred, d0)]; + g = range_limit[DITHER_565_G(y + cgreen, d0)]; + b = range_limit[DITHER_565_B(y + cblue, d0)]; + rgb = PACK_SHORT_565(r, g, b); + *(INT16*)outptr = rgb; + } +} + + +INLINE +LOCAL(void) +h2v2_merged_upsample_565_internal (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; + register int y, cred, cgreen, cblue; + int cb, cr; + register JSAMPROW outptr0, outptr1; + JSAMPROW inptr00, inptr01, inptr1, inptr2; + JDIMENSION col; + /* copy these pointers into registers if possible */ + register JSAMPLE * range_limit = cinfo->sample_range_limit; + int * Crrtab = upsample->Cr_r_tab; + int * Cbbtab = upsample->Cb_b_tab; + INT32 * Crgtab = upsample->Cr_g_tab; + INT32 * Cbgtab = upsample->Cb_g_tab; + unsigned int r, g, b; + INT32 rgb; + SHIFT_TEMPS + + inptr00 = input_buf[0][in_row_group_ctr * 2]; + inptr01 = input_buf[0][in_row_group_ctr * 2 + 1]; + inptr1 = input_buf[1][in_row_group_ctr]; + inptr2 = input_buf[2][in_row_group_ctr]; + outptr0 = output_buf[0]; + outptr1 = output_buf[1]; + + /* Loop for each group of output pixels */ + for (col = cinfo->output_width >> 1; col > 0; col--) { + /* Do the chroma part of the calculation */ + cb = GETJSAMPLE(*inptr1++); + cr = GETJSAMPLE(*inptr2++); + cred = Crrtab[cr]; + cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cblue = Cbbtab[cb]; + + /* Fetch 4 Y values and emit 4 pixels */ + y = GETJSAMPLE(*inptr00++); + r = range_limit[y + cred]; + g = range_limit[y + cgreen]; + b = range_limit[y + cblue]; + rgb = PACK_SHORT_565(r, g, b); + + y = GETJSAMPLE(*inptr00++); + r = range_limit[y + cred]; + g = range_limit[y + cgreen]; + b = range_limit[y + cblue]; + rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b)); + + WRITE_TWO_PIXELS(outptr0, rgb); + outptr0 += 4; + + y = GETJSAMPLE(*inptr01++); + r = range_limit[y + cred]; + g = range_limit[y + cgreen]; + b = range_limit[y + cblue]; + rgb = PACK_SHORT_565(r, g, b); + + y = GETJSAMPLE(*inptr01++); + r = range_limit[y + cred]; + g = range_limit[y + cgreen]; + b = range_limit[y + cblue]; + rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b)); + + WRITE_TWO_PIXELS(outptr1, rgb); + outptr1 += 4; + } + + /* If image width is odd, do the last output column separately */ + if (cinfo->output_width & 1) { + cb = GETJSAMPLE(*inptr1); + cr = GETJSAMPLE(*inptr2); + cred = Crrtab[cr]; + cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cblue = Cbbtab[cb]; + + y = GETJSAMPLE(*inptr00); + r = range_limit[y + cred]; + g = range_limit[y + cgreen]; + b = range_limit[y + cblue]; + rgb = PACK_SHORT_565(r, g, b); + *(INT16*)outptr0 = rgb; + + y = GETJSAMPLE(*inptr01); + r = range_limit[y + cred]; + g = range_limit[y + cgreen]; + b = range_limit[y + cblue]; + rgb = PACK_SHORT_565(r, g, b); + *(INT16*)outptr1 = rgb; + } +} + + +INLINE +LOCAL(void) +h2v2_merged_upsample_565D_internal (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ + my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; + register int y, cred, cgreen, cblue; + int cb, cr; + register JSAMPROW outptr0, outptr1; + JSAMPROW inptr00, inptr01, inptr1, inptr2; + JDIMENSION col; + /* copy these pointers into registers if possible */ + register JSAMPLE * range_limit = cinfo->sample_range_limit; + int * Crrtab = upsample->Cr_r_tab; + int * Cbbtab = upsample->Cb_b_tab; + INT32 * Crgtab = upsample->Cr_g_tab; + INT32 * Cbgtab = upsample->Cb_g_tab; + INT32 d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK]; + INT32 d1 = dither_matrix[(cinfo->output_scanline+1) & DITHER_MASK]; + unsigned int r, g, b; + INT32 rgb; + SHIFT_TEMPS + + inptr00 = input_buf[0][in_row_group_ctr*2]; + inptr01 = input_buf[0][in_row_group_ctr*2 + 1]; + inptr1 = input_buf[1][in_row_group_ctr]; + inptr2 = input_buf[2][in_row_group_ctr]; + outptr0 = output_buf[0]; + outptr1 = output_buf[1]; + + /* Loop for each group of output pixels */ + for (col = cinfo->output_width >> 1; col > 0; col--) { + /* Do the chroma part of the calculation */ + cb = GETJSAMPLE(*inptr1++); + cr = GETJSAMPLE(*inptr2++); + cred = Crrtab[cr]; + cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cblue = Cbbtab[cb]; + + /* Fetch 4 Y values and emit 4 pixels */ + y = GETJSAMPLE(*inptr00++); + r = range_limit[DITHER_565_R(y + cred, d0)]; + g = range_limit[DITHER_565_G(y + cgreen, d0)]; + b = range_limit[DITHER_565_B(y + cblue, d0)]; + d0 = DITHER_ROTATE(d0); + rgb = PACK_SHORT_565(r, g, b); + + y = GETJSAMPLE(*inptr00++); + r = range_limit[DITHER_565_R(y + cred, d1)]; + g = range_limit[DITHER_565_G(y + cgreen, d1)]; + b = range_limit[DITHER_565_B(y + cblue, d1)]; + d1 = DITHER_ROTATE(d1); + rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b)); + + WRITE_TWO_PIXELS(outptr0, rgb); + outptr0 += 4; + + y = GETJSAMPLE(*inptr01++); + r = range_limit[DITHER_565_R(y + cred, d0)]; + g = range_limit[DITHER_565_G(y + cgreen, d0)]; + b = range_limit[DITHER_565_B(y + cblue, d0)]; + d0 = DITHER_ROTATE(d0); + rgb = PACK_SHORT_565(r, g, b); + + y = GETJSAMPLE(*inptr01++); + r = range_limit[DITHER_565_R(y + cred, d1)]; + g = range_limit[DITHER_565_G(y + cgreen, d1)]; + b = range_limit[DITHER_565_B(y + cblue, d1)]; + d1 = DITHER_ROTATE(d1); + rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b)); + + WRITE_TWO_PIXELS(outptr1, rgb); + outptr1 += 4; + } + + /* If image width is odd, do the last output column separately */ + if (cinfo->output_width & 1) { + cb = GETJSAMPLE(*inptr1); + cr = GETJSAMPLE(*inptr2); + cred = Crrtab[cr]; + cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cblue = Cbbtab[cb]; + + y = GETJSAMPLE(*inptr00); + r = range_limit[DITHER_565_R(y + cred, d0)]; + g = range_limit[DITHER_565_G(y + cgreen, d0)]; + b = range_limit[DITHER_565_B(y + cblue, d0)]; + rgb = PACK_SHORT_565(r, g, b); + *(INT16*)outptr0 = rgb; + + y = GETJSAMPLE(*inptr01); + r = range_limit[DITHER_565_R(y + cred, d1)]; + g = range_limit[DITHER_565_G(y + cgreen, d1)]; + b = range_limit[DITHER_565_B(y + cblue, d1)]; + rgb = PACK_SHORT_565(r, g, b); + *(INT16*)outptr1 = rgb; + } +} diff --git a/jerror.h b/jerror.h index 402613e0..816fdf9a 100644 --- a/jerror.h +++ b/jerror.h @@ -207,6 +207,7 @@ JMESSAGE(JERR_NO_ARITH_TABLE, "Arithmetic table 0x%02x was not defined") JMESSAGE(JWRN_ARITH_BAD_CODE, "Corrupt JPEG data: bad arithmetic code") #endif #endif +JMESSAGE(JERR_BAD_PARAM, "Bogus parameter") #ifdef JMAKE_ENUM_LIST diff --git a/jpegint.h b/jpegint.h index 37742dde..bc0d3dbe 100644 --- a/jpegint.h +++ b/jpegint.h @@ -57,6 +57,38 @@ struct jpeg_comp_master { /* State variables made visible to other modules */ boolean call_pass_startup; /* True if pass_startup must be called */ boolean is_last_pass; /* True during last pass */ + + /* Extension parameters */ + boolean use_moz_defaults; /* TRUE=use Mozilla defaults */ + boolean optimize_scans; /* TRUE=optimize progressive coding scans */ + boolean one_dc_scan; /* TRUE=use a single DC scan interleaving all components */ + boolean sep_dc_scan; /* TRUE=each DC scan is separate */ + boolean trellis_quant; /* TRUE=use trellis quantization */ + boolean trellis_quant_dc; /* TRUE=use trellis quant for DC coefficient */ + boolean trellis_eob_opt; /* TRUE=optimize for sequences of EOB */ + boolean use_flat_quant_tbl; /* TRUE=use flat quantization table */ + boolean use_lambda_weight_tbl; /* TRUE=use lambda weighting table */ + boolean use_scans_in_trellis; /* TRUE=use scans in trellis optimization */ + boolean trellis_passes; /* TRUE=currently doing trellis-related passes */ + boolean trellis_q_opt; /* TRUE=optimize quant table in trellis loop */ + boolean overshoot_deringing; /* TRUE=preprocess input to reduce ringing of edges on white background */ + + double norm_src[NUM_QUANT_TBLS][DCTSIZE2]; + double norm_coef[NUM_QUANT_TBLS][DCTSIZE2]; + + int trellis_freq_split; /* splitting point for frequency in trellis quantization */ + int trellis_num_loops; /* number of trellis loops */ + + int num_scans_luma; /* # of entries in scan_info array pertaining to luma (used when optimize_scans is TRUE */ + int num_scans_luma_dc; + int num_scans_chroma_dc; + int num_frequency_splits; + + int Al_max_luma; /* maximum value of Al tested when optimizing scans (luma) */ + int Al_max_chroma; /* maximum value of Al tested when optimizing scans (chroma) */ + + float lambda_log_scale1; + float lambda_log_scale2; }; /* Main buffer control (downsampled-data buffer) */ diff --git a/jpeglib.h b/jpeglib.h index e5d78ebd..2f000723 100644 --- a/jpeglib.h +++ b/jpeglib.h @@ -265,6 +265,45 @@ typedef enum { } J_DITHER_MODE; +/* These 32-bit GUIDs and the corresponding jpeg_*_get_*_param()/ + * jpeg_*_set_*_param() functions allow for extending the libjpeg API without + * breaking backward ABI compatibility. The actual parameters are stored in + * the opaque jpeg_comp_master and jpeg_decomp_master structs. + */ + +/* Boolean extension parameters */ + +typedef enum { + JBOOLEAN_USE_MOZ_DEFAULTS = 0xAE2F5D7F, /* TRUE=use Mozilla defaults */ + JBOOLEAN_OPTIMIZE_SCANS = 0x680C061E, /* TRUE=optimize progressive coding scans */ + JBOOLEAN_ONE_DC_SCAN = 0x3DA6A269, /* TRUE=use a single DC scan interleaving all components */ + JBOOLEAN_SEP_DC_SCAN = 0xE20DFA9F, /* TRUE=each DC scan is separate */ + JBOOLEAN_TRELLIS_QUANT = 0xC5122033, /* TRUE=use trellis quantization */ + JBOOLEAN_TRELLIS_QUANT_DC = 0x339D4C0C, /* TRUE=use trellis quant for DC coefficient */ + JBOOLEAN_TRELLIS_EOB_OPT = 0xD7F73780, /* TRUE=optimize for sequences of EOB */ + JBOOLEAN_USE_FLAT_QUANT_TBL = 0xE807EC6C, /* TRUE=use flat quantization table */ + JBOOLEAN_USE_LAMBDA_WEIGHT_TBL = 0x339DB65F, /* TRUE=use lambda weighting table */ + JBOOLEAN_USE_SCANS_IN_TRELLIS = 0xFD841435, /* TRUE=use scans in trellis optimization */ + JBOOLEAN_TRELLIS_PASSES = 0x3FF8A439, /* TRUE=currently doing trellis-related passes */ + JBOOLEAN_TRELLIS_Q_OPT = 0xE12AE269, /* TRUE=optimize quant table in trellis loop */ + JBOOLEAN_OVERSHOOT_DERINGING = 0x3F4BBBF9 /* TRUE=preprocess input to reduce ringing of edges on white background */ +} J_BOOLEAN_PARAM; + +/* Floating point parameters */ + +typedef enum { + JFLOAT_LAMBDA_LOG_SCALE1 = 0x5B61A599, + JFLOAT_LAMBDA_LOG_SCALE2 = 0xB9BBAE03 +} J_FLOAT_PARAM; + +/* Integer parameters */ + +typedef enum { + JINT_TRELLIS_FREQ_SPLIT = 0x6FAFF127, /* splitting point for frequency in trellis quantization */ + JINT_TRELLIS_NUM_LOOPS = 0xB63EBF39 /* number of trellis loops */ +} J_INT_PARAM; + + /* Common fields between JPEG compression and decompression master structs. */ #define jpeg_common_fields \ @@ -374,36 +413,6 @@ struct jpeg_compress_struct { int smoothing_factor; /* 1..100, or 0 for no input smoothing */ J_DCT_METHOD dct_method; /* DCT algorithm selector */ - boolean use_moz_defaults; /* TRUE=use Mozilla defaults */ - boolean optimize_scans; /* TRUE=optimize progressive coding scans */ - boolean one_dc_scan; /* TRUE=use a single DC scan interleaving all components */ - boolean sep_dc_scan; /* TRUE=each DC scan is separate */ - boolean trellis_quant; /* TRUE=use trellis quantization */ - boolean trellis_quant_dc; /* TRUE=use trellis quant for DC coefficient */ - boolean trellis_eob_opt; /* TRUE=optimize for sequences of EOB */ - boolean use_flat_quant_tbl; /* TRUE=use flat quantization table */ - boolean use_lambda_weight_tbl; /* TRUE=use lambda weighting table */ - boolean use_scans_in_trellis; /* TRUE=use scans in trellis optimization */ - boolean trellis_passes; /* TRUE=currently doing trellis-related passes */ - boolean trellis_q_opt; /* TRUE=optimize quant table in trellis loop */ - - double norm_src[NUM_QUANT_TBLS][DCTSIZE2]; - double norm_coef[NUM_QUANT_TBLS][DCTSIZE2]; - - int trellis_freq_split; /* splitting point for frequency in trellis quantization */ - int trellis_num_loops; /* number of trellis loops */ - - int num_scans_luma; /* # of entries in scan_info array pertaining to luma (used when optimize_scans is TRUE */ - int num_scans_luma_dc; - int num_scans_chroma_dc; - int num_frequency_splits; - - int Al_max_luma; /* maximum value of Al tested when optimizing scans (luma) */ - int Al_max_chroma; /* maximum value of Al tested when optimizing scans (chroma) */ - - float lambda_log_scale1; - float lambda_log_scale2; - /* The restart interval can be specified in absolute MCUs by setting * restart_interval, or in MCU rows by setting restart_in_rows * (in which case the correct restart_interval will be figured @@ -1073,6 +1082,27 @@ EXTERN(void) jpeg_destroy (j_common_ptr cinfo); /* Default restart-marker-resync procedure for use by data source modules */ EXTERN(boolean) jpeg_resync_to_restart (j_decompress_ptr cinfo, int desired); +/* Accessor functions for extension parameters */ +EXTERN(boolean) jpeg_c_bool_param_supported (j_compress_ptr cinfo, + J_BOOLEAN_PARAM param); +EXTERN(void) jpeg_c_set_bool_param (j_compress_ptr cinfo, + J_BOOLEAN_PARAM param, boolean value); +EXTERN(boolean) jpeg_c_get_bool_param (j_compress_ptr cinfo, + J_BOOLEAN_PARAM param); + +EXTERN(boolean) jpeg_c_float_param_supported (j_compress_ptr cinfo, + J_FLOAT_PARAM param); +EXTERN(void) jpeg_c_set_float_param (j_compress_ptr cinfo, J_FLOAT_PARAM param, + float value); +EXTERN(float) jpeg_c_get_float_param (j_compress_ptr cinfo, + J_FLOAT_PARAM param); + +EXTERN(boolean) jpeg_c_int_param_supported (j_compress_ptr cinfo, + J_INT_PARAM param); +EXTERN(void) jpeg_c_set_int_param (j_compress_ptr cinfo, J_INT_PARAM param, + int value); +EXTERN(int) jpeg_c_get_int_param (j_compress_ptr cinfo, J_INT_PARAM param); + /* These marker codes are exported since applications and data source modules * are likely to want to use them. diff --git a/jpegtran.c b/jpegtran.c index 5ee342db..8275d1a9 100644 --- a/jpegtran.c +++ b/jpegtran.c @@ -234,7 +234,7 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv, usage(); } else if (keymatch(arg, "fastcrush", 4)) { - cinfo->optimize_scans = FALSE; + jpeg_c_set_bool_param(cinfo, JBOOLEAN_OPTIMIZE_SCANS, FALSE); } else if (keymatch(arg, "grayscale", 1) || keymatch(arg, "greyscale",1)) { /* Force to grayscale. */ @@ -310,7 +310,7 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv, } else if (keymatch(arg, "revert", 3)) { /* revert to old JPEG default */ - cinfo->use_moz_defaults = FALSE; + jpeg_c_set_bool_param(cinfo, JBOOLEAN_USE_MOZ_DEFAULTS, FALSE); } else if (keymatch(arg, "rotate", 2)) { /* Rotate 90, 180, or 270 degrees (measured clockwise). */ @@ -415,7 +415,8 @@ main (int argc, char **argv) /* Initialize the JPEG compression object with default error handling. */ dstinfo.err = jpeg_std_error(&jdsterr); jpeg_create_compress(&dstinfo); - dstinfo.use_moz_defaults = TRUE; + if (jpeg_c_bool_param_supported(&dstinfo, JBOOLEAN_USE_MOZ_DEFAULTS)) + jpeg_c_set_bool_param(&dstinfo, JBOOLEAN_USE_MOZ_DEFAULTS, TRUE); /* Scan command line to find file names. * It is convenient to use just one switch-parsing routine, but the switch @@ -469,7 +470,8 @@ main (int argc, char **argv) #endif /* Specify data source for decompression */ - memsrc = dstinfo.use_moz_defaults; /* needed to revert to original */ + if (jpeg_c_bool_param_supported(&dstinfo, JBOOLEAN_USE_MOZ_DEFAULTS)) + memsrc = jpeg_c_get_bool_param(&dstinfo, JBOOLEAN_USE_MOZ_DEFAULTS); /* needed to revert to original */ #if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) if (memsrc) { size_t nbytes; @@ -555,7 +557,8 @@ main (int argc, char **argv) /* Specify data destination for compression */ #if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) - if (dstinfo.use_moz_defaults) + if (jpeg_c_bool_param_supported(&dstinfo, JBOOLEAN_USE_MOZ_DEFAULTS) && + jpeg_c_get_bool_param(&dstinfo, JBOOLEAN_USE_MOZ_DEFAULTS)) jpeg_mem_dest(&dstinfo, &outbuffer, &outsize); else #endif @@ -577,7 +580,8 @@ main (int argc, char **argv) /* Finish compression and release memory */ jpeg_finish_compress(&dstinfo); - if (dstinfo.use_moz_defaults) { + if (jpeg_c_bool_param_supported(&dstinfo, JBOOLEAN_USE_MOZ_DEFAULTS) && + jpeg_c_get_bool_param(&dstinfo, JBOOLEAN_USE_MOZ_DEFAULTS)) { size_t nbytes; unsigned char *buffer = outbuffer; diff --git a/md5/md5.c b/md5/md5.c index 7193e95b..b30df974 100644 --- a/md5/md5.c +++ b/md5/md5.c @@ -38,6 +38,15 @@ static void MD5Transform(unsigned int [4], const unsigned char [64]); #define Decode memcpy #else +/* + * OS X doesn't have le32toh() or htole32() + */ +#ifdef __APPLE__ +#include +#define le32toh(x) OSSwapLittleToHostInt32(x) +#define htole32(x) OSSwapHostToLittleInt32(x) +#endif + /* * Encodes input (unsigned int) into output (unsigned char). Assumes len is * a multiple of 4. diff --git a/rdswitch.c b/rdswitch.c index 75b2081c..66f8e59c 100644 --- a/rdswitch.c +++ b/rdswitch.c @@ -314,7 +314,8 @@ static const unsigned int flat_quant_tbl[DCTSIZE2] = { LOCAL(void) jpeg_default_qtables (j_compress_ptr cinfo, boolean force_baseline) { - if (cinfo->use_flat_quant_tbl) { + if (jpeg_c_bool_param_supported(cinfo, JBOOLEAN_USE_FLAT_QUANT_TBL) && + jpeg_c_get_bool_param(cinfo, JBOOLEAN_USE_FLAT_QUANT_TBL)) { jpeg_add_quant_table(cinfo, 0, flat_quant_tbl, q_scale_factor[0], force_baseline); jpeg_add_quant_table(cinfo, 1, flat_quant_tbl, diff --git a/simd/Makefile.am b/simd/Makefile.am index bc631452..5e664494 100644 --- a/simd/Makefile.am +++ b/simd/Makefile.am @@ -70,6 +70,13 @@ libsimd_la_SOURCES = jsimd_mips.c jsimd_mips_dspr2_asm.h jsimd_mips_dspr2.S endif +if SIMD_POWERPC + +libsimd_la_SOURCES = jsimd_powerpc.c jsimd_powerpc_altivec.c +libsimd_la_CFLAGS = -maltivec + +endif + AM_CPPFLAGS = -I$(top_srcdir) .asm.lo: diff --git a/simd/jsimd.h b/simd/jsimd.h index c5abd458..b0329728 100644 --- a/simd/jsimd.h +++ b/simd/jsimd.h @@ -2,7 +2,7 @@ * simd/jsimd.h * * Copyright 2009 Pierre Ossman for Cendio AB - * Copyright 2011 D. R. Commander + * Copyright (C) 2011, 2014 D. R. Commander * Copyright (C) 2013-2014, MIPS Technologies, Inc., California * Copyright (C) 2014 Linaro Limited * @@ -21,6 +21,7 @@ #define JSIMD_SSE2 0x08 #define JSIMD_ARM_NEON 0x10 #define JSIMD_MIPS_DSPR2 0x20 +#define JSIMD_ALTIVEC 0x40 /* SIMD Ext: retrieve SIMD/CPU information */ EXTERN(unsigned int) jpeg_simd_cpu_support (void); @@ -554,6 +555,8 @@ EXTERN(void) jsimd_fdct_ifast_neon (DCTELEM * data); EXTERN(void) jsimd_fdct_ifast_mips_dspr2 (DCTELEM * data); +EXTERN(void) jsimd_fdct_ifast_altivec (DCTELEM * data); + /* Floating Point Forward DCT */ EXTERN(void) jsimd_fdct_float_3dnow (FAST_FLOAT * data); diff --git a/simd/jsimd_powerpc.c b/simd/jsimd_powerpc.c new file mode 100644 index 00000000..a9a5965d --- /dev/null +++ b/simd/jsimd_powerpc.c @@ -0,0 +1,358 @@ +/* + * jsimd_powerpc64.c + * + * Copyright 2009 Pierre Ossman for Cendio AB + * Copyright 2009-2011, 2014 D. R. Commander + * + * Based on the x86 SIMD extension for IJG JPEG library, + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * For conditions of distribution and use, see copyright notice in jsimdext.inc + * + * This file contains the interface between the "normal" portions + * of the library and the SIMD implementations when running on a + * 64-bit x86 architecture. + */ + +#define JPEG_INTERNALS +#include "../jinclude.h" +#include "../jpeglib.h" +#include "../jsimd.h" +#include "../jdct.h" +#include "../jsimddct.h" +#include "jsimd.h" + +static unsigned int simd_support = ~0; + +LOCAL(void) +init_simd (void) +{ + char *env = NULL; + + if (simd_support != ~0U) + return; + + simd_support = JSIMD_ALTIVEC; + + /* Force different settings through environment variables */ + env = getenv("JSIMD_FORCENONE"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_support = 0; +} + +GLOBAL(int) +jsimd_can_rgb_ycc (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_rgb_gray (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_ycc_rgb (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_ycc_rgb565 (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_rgb_ycc_convert (j_compress_ptr cinfo, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ +} + +GLOBAL(void) +jsimd_rgb_gray_convert (j_compress_ptr cinfo, + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) +{ +} + +GLOBAL(void) +jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ +} + +GLOBAL(void) +jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows) +{ +} + +GLOBAL(int) +jsimd_can_h2v2_downsample (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_downsample (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ +} + +GLOBAL(void) +jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ +} + +GLOBAL(int) +jsimd_can_h2v2_upsample (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_upsample (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_h2v2_upsample (j_decompress_ptr cinfo, + jpeg_component_info * compptr, + JSAMPARRAY input_data, + JSAMPARRAY * output_data_ptr) +{ +} + +GLOBAL(void) +jsimd_h2v1_upsample (j_decompress_ptr cinfo, + jpeg_component_info * compptr, + JSAMPARRAY input_data, + JSAMPARRAY * output_data_ptr) +{ +} + +GLOBAL(int) +jsimd_can_h2v2_fancy_upsample (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_fancy_upsample (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, + jpeg_component_info * compptr, + JSAMPARRAY input_data, + JSAMPARRAY * output_data_ptr) +{ +} + +GLOBAL(void) +jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, + jpeg_component_info * compptr, + JSAMPARRAY input_data, + JSAMPARRAY * output_data_ptr) +{ +} + +GLOBAL(int) +jsimd_can_h2v2_merged_upsample (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_merged_upsample (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ +} + +GLOBAL(void) +jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) +{ +} + +GLOBAL(int) +jsimd_can_convsamp (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_convsamp_float (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, + DCTELEM * workspace) +{ +} + +GLOBAL(void) +jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, + FAST_FLOAT * workspace) +{ +} + +GLOBAL(int) +jsimd_can_fdct_islow (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_fdct_ifast (void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_ALTIVEC) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_fdct_float (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_fdct_islow (DCTELEM * data) +{ +} + +GLOBAL(void) +jsimd_fdct_ifast (DCTELEM * data) +{ + jsimd_fdct_ifast_altivec(data); +} + +GLOBAL(void) +jsimd_fdct_float (FAST_FLOAT * data) +{ +} + +GLOBAL(int) +jsimd_can_quantize (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_quantize_float (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors, + DCTELEM * workspace) +{ +} + +GLOBAL(void) +jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, + FAST_FLOAT * workspace) +{ +} + +GLOBAL(int) +jsimd_can_idct_2x2 (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_idct_4x4 (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ +} + +GLOBAL(void) +jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ +} + +GLOBAL(int) +jsimd_can_idct_islow (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_idct_ifast (void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_idct_float (void) +{ + return 0; +} + +GLOBAL(void) +jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ +} + +GLOBAL(void) +jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ +} + +GLOBAL(void) +jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ +} diff --git a/simd/jsimd_powerpc_altivec.c b/simd/jsimd_powerpc_altivec.c new file mode 100644 index 00000000..84132d03 --- /dev/null +++ b/simd/jsimd_powerpc_altivec.c @@ -0,0 +1,190 @@ +/* + * AltiVec optimizations for libjpeg-turbo + * + * Copyright (C) 2014, D. R. Commander. + * All rights reserved. + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +#define JPEG_INTERNALS +#include "../jinclude.h" +#include "../jpeglib.h" +#include "../jsimd.h" +#include "../jdct.h" +#include "../jsimddct.h" +#include "jsimd.h" +#include + +#define TRANSPOSE(row, col) \ +{ \ + __vector short row04l, row04h, row15l, row15h, \ + row26l, row26h, row37l, row37h; \ + __vector short col01e, col01o, col23e, col23o, \ + col45e, col45o, col67e, col67o; \ + \ + /* transpose coefficients (phase 1) */ \ + row04l = vec_mergeh(row##0, row##4); /* row04l=(00 40 01 41 02 42 03 43) */ \ + row04h = vec_mergel(row##0, row##4); /* row04h=(04 44 05 45 06 46 07 47) */ \ + row15l = vec_mergeh(row##1, row##5); /* row15l=(10 50 11 51 12 52 13 53) */ \ + row15h = vec_mergel(row##1, row##5); /* row15h=(14 54 15 55 16 56 17 57) */ \ + row26l = vec_mergeh(row##2, row##6); /* row26l=(20 60 21 61 22 62 23 63) */ \ + row26h = vec_mergel(row##2, row##6); /* row26h=(24 64 25 65 26 66 27 67) */ \ + row37l = vec_mergeh(row##3, row##7); /* row37l=(30 70 31 71 32 72 33 73) */ \ + row37h = vec_mergel(row##3, row##7); /* row37h=(34 74 35 75 36 76 37 77) */ \ + \ + /* transpose coefficients (phase 2) */ \ + col01e = vec_mergeh(row04l, row26l); /* col01e=(00 20 40 60 01 21 41 61} */ \ + col23e = vec_mergel(row04l, row26l); /* col23e=(02 22 42 62 03 23 43 63) */ \ + col45e = vec_mergeh(row04h, row26h); /* col45e=(04 24 44 64 05 25 45 65) */ \ + col67e = vec_mergel(row04h, row26h); /* col67e=(06 26 46 66 07 27 47 67) */ \ + col01o = vec_mergeh(row15l, row37l); /* col01o=(10 30 50 70 11 31 51 71) */ \ + col23o = vec_mergel(row15l, row37l); /* col23o=(12 32 52 72 13 33 53 73) */ \ + col45o = vec_mergeh(row15h, row37h); /* col45o=(14 34 54 74 15 35 55 75) */ \ + col67o = vec_mergel(row15h, row37h); /* col67o=(16 36 56 76 17 37 57 77) */ \ + \ + /* transpose coefficients (phase 3) */ \ + col##0 = vec_mergeh(col01e, col01o); /* col0=(00 10 20 30 40 50 60 70) */ \ + col##1 = vec_mergel(col01e, col01o); /* col1=(01 11 21 31 41 51 61 71} */ \ + col##2 = vec_mergeh(col23e, col23o); /* col2=(02 12 22 32 42 52 62 72) */ \ + col##3 = vec_mergel(col23e, col23o); /* col3=(03 13 23 33 43 53 63 73) */ \ + col##4 = vec_mergeh(col45e, col45o); /* col4=(04 14 24 34 44 54 64 74) */ \ + col##5 = vec_mergel(col45e, col45o); /* col5=(05 15 25 35 45 55 65 75) */ \ + col##6 = vec_mergeh(col67e, col67o); /* col6=(06 16 26 36 46 56 66 76) */ \ + col##7 = vec_mergel(col67e, col67o); /* col7=(07 17 27 37 47 57 67 77) */ \ +} + +static const __vector short constants __attribute__((aligned(16))) = +{ + 98 << 5, /* FIX(0.382683433) */ + 139 << 5, /* FIX(0.541196100) */ + 181 << 5, /* FIX(0.707106781) */ + 334 << 5 /* FIX(1.306562965) */ +}; + +#define DO_DCT() \ +{ \ + /* Even part */ \ + \ + tmp10 = vec_add(tmp0, tmp3); \ + tmp13 = vec_sub(tmp0, tmp3); \ + tmp11 = vec_add(tmp1, tmp2); \ + tmp12 = vec_sub(tmp1, tmp2); \ + \ + out0 = vec_add(tmp10, tmp11); \ + out4 = vec_sub(tmp10, tmp11); \ + \ + z1 = vec_add(tmp12, tmp13); \ + z1 = vec_sl(z1, PRE_MULTIPLY_SCALE_BITS); \ + z1 = vec_madds(z1, PW_0707, zero); \ + \ + out2 = vec_add(tmp13, z1); \ + out6 = vec_sub(tmp13, z1); \ + \ + /* Odd part */ \ + \ + tmp10 = vec_add(tmp4, tmp5); \ + tmp11 = vec_add(tmp5, tmp6); \ + tmp12 = vec_add(tmp6, tmp7); \ + \ + tmp10 = vec_sl(tmp10, PRE_MULTIPLY_SCALE_BITS); \ + tmp12 = vec_sl(tmp12, PRE_MULTIPLY_SCALE_BITS); \ + z5 = vec_sub(tmp10, tmp12); \ + z5 = vec_madds(z5, PW_0382, zero); \ + \ + z2 = vec_madds(tmp10, PW_0541, zero); \ + z2 = vec_add(z2, z5); \ + \ + z4 = vec_madds(tmp12, PW_1306, zero); \ + z4 = vec_add(z4, z5); \ + \ + tmp11 = vec_sl(tmp11, PRE_MULTIPLY_SCALE_BITS); \ + z3 = vec_madds(tmp11, PW_0707, zero); \ + \ + z11 = vec_add(tmp7, z3); \ + z13 = vec_sub(tmp7, z3); \ + \ + out5 = vec_add(z13, z2); \ + out3 = vec_sub(z13, z2); \ + out1 = vec_add(z11, z4); \ + out7 = vec_sub(z11, z4); \ +} + +void +jsimd_fdct_ifast_altivec (DCTELEM *data) +{ + __vector short row0, row1, row2, row3, row4, row5, row6, row7, + col0, col1, col2, col3, col4, col5, col6, col7, + tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp10, tmp11, tmp12, tmp13, + z1, z2, z3, z4, z5, z11, z13, + out0, out1, out2, out3, out4, out5, out6, out7; + + /* Constants */ + __vector short zero = vec_splat_s16(0), + PW_0382 = vec_splat(constants, 0), + PW_0541 = vec_splat(constants, 1), + PW_0707 = vec_splat(constants, 2), + PW_1306 = vec_splat(constants, 3); + __vector unsigned short PRE_MULTIPLY_SCALE_BITS = vec_splat_u16(2); + + /* Pass 1: process rows. */ + + row0 = *(__vector short *)&data[0]; + row1 = *(__vector short *)&data[8]; + row2 = *(__vector short *)&data[16]; + row3 = *(__vector short *)&data[24]; + row4 = *(__vector short *)&data[32]; + row5 = *(__vector short *)&data[40]; + row6 = *(__vector short *)&data[48]; + row7 = *(__vector short *)&data[56]; + + TRANSPOSE(row, col); + + tmp0 = vec_add(col0, col7); + tmp7 = vec_sub(col0, col7); + tmp1 = vec_add(col1, col6); + tmp6 = vec_sub(col1, col6); + tmp2 = vec_add(col2, col5); + tmp5 = vec_sub(col2, col5); + tmp3 = vec_add(col3, col4); + tmp4 = vec_sub(col3, col4); + + DO_DCT(); + + /* Pass 2: process columns. */ + + TRANSPOSE(out, row); + + tmp0 = vec_add(row0, row7); + tmp7 = vec_sub(row0, row7); + tmp1 = vec_add(row1, row6); + tmp6 = vec_sub(row1, row6); + tmp2 = vec_add(row2, row5); + tmp5 = vec_sub(row2, row5); + tmp3 = vec_add(row3, row4); + tmp4 = vec_sub(row3, row4); + + DO_DCT(); + + *(__vector short *)&data[0] = out0; + *(__vector short *)&data[8] = out1; + *(__vector short *)&data[16] = out2; + *(__vector short *)&data[24] = out3; + *(__vector short *)&data[32] = out4; + *(__vector short *)&data[40] = out5; + *(__vector short *)&data[48] = out6; + *(__vector short *)&data[56] = out7; +} diff --git a/turbojpeg.c b/turbojpeg.c index 657be0b9..dec18e3e 100644 --- a/turbojpeg.c +++ b/turbojpeg.c @@ -206,7 +206,7 @@ static int setCompDefaults(struct jpeg_compress_struct *cinfo, } cinfo->input_components=tjPixelSize[pixelFormat]; - cinfo->use_moz_defaults = TRUE; + cinfo->master->use_moz_defaults = TRUE; jpeg_set_defaults(cinfo); if((env=getenv("TJ_OPTIMIZE"))!=NULL && strlen(env)>0 && !strcmp(env, "1")) @@ -241,7 +241,7 @@ static int setCompDefaults(struct jpeg_compress_struct *cinfo, else jpeg_set_colorspace(cinfo, JCS_YCbCr); /* Set scan pattern again as colorspace might have changed */ - if (cinfo->use_moz_defaults) + if (cinfo->master->use_moz_defaults) jpeg_simple_progression(cinfo); cinfo->comp_info[0].h_samp_factor=tjMCUWidth[subsamp]/8; @@ -616,9 +616,10 @@ DLLEXPORT unsigned long DLLCALL tjBufSizeYUV2(int width, int pad, int height, nc=(subsamp==TJSAMP_GRAY? 1:3); for(i=0; iout_color_space == JCS_RGB565) { - #define red_mask 0xF800 - #define green_mask 0x7E0 - #define blue_mask 0x1F - unsigned char r, g, b; + boolean big_endian = is_big_endian(); unsigned short *inptr2 = (unsigned short *)inptr; for (col = cinfo->output_width; col > 0; col--) { - r = (*inptr2 & red_mask) >> 11; - g = (*inptr2 & green_mask) >> 5; - b = (*inptr2 & blue_mask); - outptr[0] = b << 3; - outptr[1] = g << 2; - outptr[2] = r << 3; + if (big_endian) { + outptr[0] = (*inptr2 >> 5) & 0xF8; + outptr[1] = ((*inptr2 << 5) & 0xE0) | ((*inptr2 >> 11) & 0x1C); + outptr[2] = *inptr2 & 0xF8; + } else { + outptr[0] = (*inptr2 << 3) & 0xF8; + outptr[1] = (*inptr2 >> 3) & 0xFC; + outptr[2] = (*inptr2 >> 8) & 0xF8; + } outptr += 3; inptr2++; } diff --git a/yuvjpeg.c b/yuvjpeg.c index 542ca826..0a94079c 100644 --- a/yuvjpeg.c +++ b/yuvjpeg.c @@ -217,7 +217,8 @@ int main(int argc, char *argv[]) { jpeg_stdio_dest(&cinfo, jpg_fd); - cinfo.use_moz_defaults = TRUE; + if (jpeg_c_bool_param_supported(&cinfo, JBOOLEAN_USE_MOZ_DEFAULTS)) + jpeg_c_set_bool_param(&cinfo, JBOOLEAN_USE_MOZ_DEFAULTS, TRUE); cinfo.image_width = luma_width; cinfo.image_height = luma_height;