Split up the forward DCT routine into three stages
Divide it into sample conversion, DCT and quantization in order to easily provide alternative implementations of each stage. git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/trunk@13 632fc199-4ca6-4c93-a231-07263d6284db
This commit is contained in:
219
jcdctmgr.c
219
jcdctmgr.c
@@ -19,11 +19,30 @@
|
|||||||
|
|
||||||
/* Private subobject for this module */
|
/* Private subobject for this module */
|
||||||
|
|
||||||
|
typedef JMETHOD(void, forward_DCT_method_ptr, (DCTELEM * data));
|
||||||
|
typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data));
|
||||||
|
|
||||||
|
typedef JMETHOD(void, convsamp_method_ptr,
|
||||||
|
(JSAMPARRAY sample_data, JDIMENSION start_col,
|
||||||
|
DCTELEM * workspace));
|
||||||
|
typedef JMETHOD(void, float_convsamp_method_ptr,
|
||||||
|
(JSAMPARRAY sample_data, JDIMENSION start_col,
|
||||||
|
FAST_FLOAT *workspace));
|
||||||
|
|
||||||
|
typedef JMETHOD(void, quantize_method_ptr,
|
||||||
|
(JCOEFPTR coef_block, DCTELEM * divisors,
|
||||||
|
DCTELEM * workspace));
|
||||||
|
typedef JMETHOD(void, float_quantize_method_ptr,
|
||||||
|
(JCOEFPTR coef_block, FAST_FLOAT * divisors,
|
||||||
|
FAST_FLOAT * workspace));
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
struct jpeg_forward_dct pub; /* public fields */
|
struct jpeg_forward_dct pub; /* public fields */
|
||||||
|
|
||||||
/* Pointer to the DCT routine actually in use */
|
/* Pointer to the DCT routine actually in use */
|
||||||
forward_DCT_method_ptr do_dct;
|
forward_DCT_method_ptr dct;
|
||||||
|
convsamp_method_ptr convsamp;
|
||||||
|
quantize_method_ptr quantize;
|
||||||
|
|
||||||
/* The actual post-DCT divisors --- not identical to the quant table
|
/* The actual post-DCT divisors --- not identical to the quant table
|
||||||
* entries, because of scaling (especially for an unnormalized DCT).
|
* entries, because of scaling (especially for an unnormalized DCT).
|
||||||
@@ -33,7 +52,9 @@ typedef struct {
|
|||||||
|
|
||||||
#ifdef DCT_FLOAT_SUPPORTED
|
#ifdef DCT_FLOAT_SUPPORTED
|
||||||
/* Same as above for the floating-point case. */
|
/* Same as above for the floating-point case. */
|
||||||
float_DCT_method_ptr do_float_dct;
|
float_DCT_method_ptr float_dct;
|
||||||
|
float_convsamp_method_ptr float_convsamp;
|
||||||
|
float_quantize_method_ptr float_quantize;
|
||||||
FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
|
FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
|
||||||
#endif
|
#endif
|
||||||
} my_fdct_controller;
|
} my_fdct_controller;
|
||||||
@@ -169,38 +190,20 @@ start_pass_fdctmgr (j_compress_ptr cinfo)
|
|||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Perform forward DCT on one or more blocks of a component.
|
* Load data into workspace, applying unsigned->signed conversion.
|
||||||
*
|
|
||||||
* The input samples are taken from the sample_data[] array starting at
|
|
||||||
* position start_row/start_col, and moving to the right for any additional
|
|
||||||
* blocks. The quantized coefficients are returned in coef_blocks[].
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
METHODDEF(void)
|
METHODDEF(void)
|
||||||
forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
|
convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace)
|
||||||
JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
|
|
||||||
JDIMENSION start_row, JDIMENSION start_col,
|
|
||||||
JDIMENSION num_blocks)
|
|
||||||
/* This version is used for integer DCT implementations. */
|
|
||||||
{
|
{
|
||||||
/* This routine is heavily used, so it's worth coding it tightly. */
|
register DCTELEM *workspaceptr;
|
||||||
my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
|
|
||||||
forward_DCT_method_ptr do_dct = fdct->do_dct;
|
|
||||||
DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
|
|
||||||
DCTELEM workspace[DCTSIZE2]; /* work area for FDCT subroutine */
|
|
||||||
JDIMENSION bi;
|
|
||||||
|
|
||||||
sample_data += start_row; /* fold in the vertical offset once */
|
|
||||||
|
|
||||||
for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
|
|
||||||
/* Load data into workspace, applying unsigned->signed conversion */
|
|
||||||
{ register DCTELEM *workspaceptr;
|
|
||||||
register JSAMPROW elemptr;
|
register JSAMPROW elemptr;
|
||||||
register int elemr;
|
register int elemr;
|
||||||
|
|
||||||
workspaceptr = workspace;
|
workspaceptr = workspace;
|
||||||
for (elemr = 0; elemr < DCTSIZE; elemr++) {
|
for (elemr = 0; elemr < DCTSIZE; elemr++) {
|
||||||
elemptr = sample_data[elemr] + start_col;
|
elemptr = sample_data[elemr] + start_col;
|
||||||
|
|
||||||
#if DCTSIZE == 8 /* unroll the inner loop */
|
#if DCTSIZE == 8 /* unroll the inner loop */
|
||||||
*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
|
*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
|
||||||
*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
|
*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
|
||||||
@@ -211,26 +214,31 @@ forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
|
|||||||
*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
|
*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
|
||||||
*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
|
*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
|
||||||
#else
|
#else
|
||||||
{ register int elemc;
|
{
|
||||||
for (elemc = DCTSIZE; elemc > 0; elemc--) {
|
register int elemc;
|
||||||
|
for (elemc = DCTSIZE; elemc > 0; elemc--)
|
||||||
*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
|
*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Perform the DCT */
|
|
||||||
(*do_dct) (workspace);
|
|
||||||
|
|
||||||
/* Quantize/descale the coefficients, and store into coef_blocks[] */
|
/*
|
||||||
{ register DCTELEM temp, qval;
|
* Quantize/descale the coefficients, and store into coef_blocks[].
|
||||||
|
*/
|
||||||
|
|
||||||
|
METHODDEF(void)
|
||||||
|
quantize (JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace)
|
||||||
|
{
|
||||||
|
register DCTELEM temp, qval;
|
||||||
register int i;
|
register int i;
|
||||||
register JCOEFPTR output_ptr = coef_blocks[bi];
|
register JCOEFPTR output_ptr = coef_block;
|
||||||
|
|
||||||
for (i = 0; i < DCTSIZE2; i++) {
|
for (i = 0; i < DCTSIZE2; i++) {
|
||||||
qval = divisors[i];
|
qval = divisors[i];
|
||||||
temp = workspace[i];
|
temp = workspace[i];
|
||||||
|
|
||||||
/* Divide the coefficient value by qval, ensuring proper rounding.
|
/* Divide the coefficient value by qval, ensuring proper rounding.
|
||||||
* Since C does not specify the direction of rounding for negative
|
* Since C does not specify the direction of rounding for negative
|
||||||
* quotients, we have to force the dividend positive for portability.
|
* quotients, we have to force the dividend positive for portability.
|
||||||
@@ -248,6 +256,7 @@ forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
|
|||||||
#else
|
#else
|
||||||
#define DIVIDE_BY(a,b) if (a >= b) a /= b; else a = 0
|
#define DIVIDE_BY(a,b) if (a >= b) a /= b; else a = 0
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (temp < 0) {
|
if (temp < 0) {
|
||||||
temp = -temp;
|
temp = -temp;
|
||||||
temp += qval>>1; /* for rounding */
|
temp += qval>>1; /* for rounding */
|
||||||
@@ -259,32 +268,57 @@ forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
|
|||||||
}
|
}
|
||||||
output_ptr[i] = (JCOEF) temp;
|
output_ptr[i] = (JCOEF) temp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Perform forward DCT on one or more blocks of a component.
|
||||||
|
*
|
||||||
|
* The input samples are taken from the sample_data[] array starting at
|
||||||
|
* position start_row/start_col, and moving to the right for any additional
|
||||||
|
* blocks. The quantized coefficients are returned in coef_blocks[].
|
||||||
|
*/
|
||||||
|
|
||||||
|
METHODDEF(void)
|
||||||
|
forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
|
||||||
|
JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
|
||||||
|
JDIMENSION start_row, JDIMENSION start_col,
|
||||||
|
JDIMENSION num_blocks)
|
||||||
|
/* This version is used for integer DCT implementations. */
|
||||||
|
{
|
||||||
|
/* This routine is heavily used, so it's worth coding it tightly. */
|
||||||
|
my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
|
||||||
|
DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
|
||||||
|
DCTELEM workspace[DCTSIZE2]; /* work area for FDCT subroutine */
|
||||||
|
JDIMENSION bi;
|
||||||
|
|
||||||
|
/* Make sure the compiler doesn't look up these every pass */
|
||||||
|
forward_DCT_method_ptr do_dct = fdct->dct;
|
||||||
|
convsamp_method_ptr do_convsamp = fdct->convsamp;
|
||||||
|
quantize_method_ptr do_quantize = fdct->quantize;
|
||||||
|
|
||||||
|
sample_data += start_row; /* fold in the vertical offset once */
|
||||||
|
|
||||||
|
for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
|
||||||
|
/* Load data into workspace, applying unsigned->signed conversion */
|
||||||
|
(*do_convsamp) (sample_data, start_col, workspace);
|
||||||
|
|
||||||
|
/* Perform the DCT */
|
||||||
|
(*do_dct) (workspace);
|
||||||
|
|
||||||
|
/* Quantize/descale the coefficients, and store into coef_blocks[] */
|
||||||
|
(*do_quantize) (coef_blocks[bi], divisors, workspace);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#ifdef DCT_FLOAT_SUPPORTED
|
#ifdef DCT_FLOAT_SUPPORTED
|
||||||
|
|
||||||
|
|
||||||
METHODDEF(void)
|
METHODDEF(void)
|
||||||
forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
|
convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT * workspace)
|
||||||
JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
|
|
||||||
JDIMENSION start_row, JDIMENSION start_col,
|
|
||||||
JDIMENSION num_blocks)
|
|
||||||
/* This version is used for floating-point DCT implementations. */
|
|
||||||
{
|
{
|
||||||
/* This routine is heavily used, so it's worth coding it tightly. */
|
register FAST_FLOAT *workspaceptr;
|
||||||
my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
|
|
||||||
float_DCT_method_ptr do_dct = fdct->do_float_dct;
|
|
||||||
FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
|
|
||||||
FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */
|
|
||||||
JDIMENSION bi;
|
|
||||||
|
|
||||||
sample_data += start_row; /* fold in the vertical offset once */
|
|
||||||
|
|
||||||
for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
|
|
||||||
/* Load data into workspace, applying unsigned->signed conversion */
|
|
||||||
{ register FAST_FLOAT *workspaceptr;
|
|
||||||
register JSAMPROW elemptr;
|
register JSAMPROW elemptr;
|
||||||
register int elemr;
|
register int elemr;
|
||||||
|
|
||||||
@@ -301,27 +335,28 @@ forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
|
|||||||
*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
|
*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
|
||||||
*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
|
*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
|
||||||
#else
|
#else
|
||||||
{ register int elemc;
|
{
|
||||||
for (elemc = DCTSIZE; elemc > 0; elemc--) {
|
register int elemc;
|
||||||
|
for (elemc = DCTSIZE; elemc > 0; elemc--)
|
||||||
*workspaceptr++ = (FAST_FLOAT)
|
*workspaceptr++ = (FAST_FLOAT)
|
||||||
(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
|
(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Perform the DCT */
|
|
||||||
(*do_dct) (workspace);
|
|
||||||
|
|
||||||
/* Quantize/descale the coefficients, and store into coef_blocks[] */
|
METHODDEF(void)
|
||||||
{ register FAST_FLOAT temp;
|
quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace)
|
||||||
|
{
|
||||||
|
register FAST_FLOAT temp;
|
||||||
register int i;
|
register int i;
|
||||||
register JCOEFPTR output_ptr = coef_blocks[bi];
|
register JCOEFPTR output_ptr = coef_block;
|
||||||
|
|
||||||
for (i = 0; i < DCTSIZE2; i++) {
|
for (i = 0; i < DCTSIZE2; i++) {
|
||||||
/* Apply the quantization and scaling factor */
|
/* Apply the quantization and scaling factor */
|
||||||
temp = workspace[i] * divisors[i];
|
temp = workspace[i] * divisors[i];
|
||||||
|
|
||||||
/* Round to nearest integer.
|
/* Round to nearest integer.
|
||||||
* Since C does not specify the direction of rounding for negative
|
* Since C does not specify the direction of rounding for negative
|
||||||
* quotients, we have to force the dividend positive for portability.
|
* quotients, we have to force the dividend positive for portability.
|
||||||
@@ -330,7 +365,38 @@ forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
|
|||||||
*/
|
*/
|
||||||
output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
|
output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
METHODDEF(void)
|
||||||
|
forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
|
||||||
|
JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
|
||||||
|
JDIMENSION start_row, JDIMENSION start_col,
|
||||||
|
JDIMENSION num_blocks)
|
||||||
|
/* This version is used for floating-point DCT implementations. */
|
||||||
|
{
|
||||||
|
/* This routine is heavily used, so it's worth coding it tightly. */
|
||||||
|
my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
|
||||||
|
FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
|
||||||
|
FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */
|
||||||
|
JDIMENSION bi;
|
||||||
|
|
||||||
|
/* Make sure the compiler doesn't look up these every pass */
|
||||||
|
float_DCT_method_ptr do_dct = fdct->float_dct;
|
||||||
|
float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
|
||||||
|
float_quantize_method_ptr do_quantize = fdct->float_quantize;
|
||||||
|
|
||||||
|
sample_data += start_row; /* fold in the vertical offset once */
|
||||||
|
|
||||||
|
for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
|
||||||
|
/* Load data into workspace, applying unsigned->signed conversion */
|
||||||
|
(*do_convsamp) (sample_data, start_col, workspace);
|
||||||
|
|
||||||
|
/* Perform the DCT */
|
||||||
|
(*do_dct) (workspace);
|
||||||
|
|
||||||
|
/* Quantize/descale the coefficients, and store into coef_blocks[] */
|
||||||
|
(*do_quantize) (coef_blocks[bi], divisors, workspace);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -353,23 +419,48 @@ jinit_forward_dct (j_compress_ptr cinfo)
|
|||||||
cinfo->fdct = (struct jpeg_forward_dct *) fdct;
|
cinfo->fdct = (struct jpeg_forward_dct *) fdct;
|
||||||
fdct->pub.start_pass = start_pass_fdctmgr;
|
fdct->pub.start_pass = start_pass_fdctmgr;
|
||||||
|
|
||||||
|
/* First determine the DCT... */
|
||||||
switch (cinfo->dct_method) {
|
switch (cinfo->dct_method) {
|
||||||
#ifdef DCT_ISLOW_SUPPORTED
|
#ifdef DCT_ISLOW_SUPPORTED
|
||||||
case JDCT_ISLOW:
|
case JDCT_ISLOW:
|
||||||
fdct->pub.forward_DCT = forward_DCT;
|
fdct->pub.forward_DCT = forward_DCT;
|
||||||
fdct->do_dct = jpeg_fdct_islow;
|
fdct->dct = jpeg_fdct_islow;
|
||||||
break;
|
break;
|
||||||
#endif
|
#endif
|
||||||
#ifdef DCT_IFAST_SUPPORTED
|
#ifdef DCT_IFAST_SUPPORTED
|
||||||
case JDCT_IFAST:
|
case JDCT_IFAST:
|
||||||
fdct->pub.forward_DCT = forward_DCT;
|
fdct->pub.forward_DCT = forward_DCT;
|
||||||
fdct->do_dct = jpeg_fdct_ifast;
|
fdct->dct = jpeg_fdct_ifast;
|
||||||
break;
|
break;
|
||||||
#endif
|
#endif
|
||||||
#ifdef DCT_FLOAT_SUPPORTED
|
#ifdef DCT_FLOAT_SUPPORTED
|
||||||
case JDCT_FLOAT:
|
case JDCT_FLOAT:
|
||||||
fdct->pub.forward_DCT = forward_DCT_float;
|
fdct->pub.forward_DCT = forward_DCT_float;
|
||||||
fdct->do_float_dct = jpeg_fdct_float;
|
fdct->float_dct = jpeg_fdct_float;
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
default:
|
||||||
|
ERREXIT(cinfo, JERR_NOT_COMPILED);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ...then the supporting stages. */
|
||||||
|
switch (cinfo->dct_method) {
|
||||||
|
#ifdef DCT_ISLOW_SUPPORTED
|
||||||
|
case JDCT_ISLOW:
|
||||||
|
#endif
|
||||||
|
#ifdef DCT_IFAST_SUPPORTED
|
||||||
|
case JDCT_IFAST:
|
||||||
|
#endif
|
||||||
|
#if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
|
||||||
|
fdct->convsamp = convsamp;
|
||||||
|
fdct->quantize = quantize;
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
#ifdef DCT_FLOAT_SUPPORTED
|
||||||
|
case JDCT_FLOAT:
|
||||||
|
fdct->float_convsamp = convsamp_float;
|
||||||
|
fdct->float_quantize = quantize_float;
|
||||||
break;
|
break;
|
||||||
#endif
|
#endif
|
||||||
default:
|
default:
|
||||||
|
|||||||
3
jdct.h
3
jdct.h
@@ -32,9 +32,6 @@ typedef int DCTELEM; /* 16 or 32 bits is fine */
|
|||||||
typedef INT32 DCTELEM; /* must have 32 bits */
|
typedef INT32 DCTELEM; /* must have 32 bits */
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
typedef JMETHOD(void, forward_DCT_method_ptr, (DCTELEM * data));
|
|
||||||
typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data));
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* An inverse DCT routine is given a pointer to the input JBLOCK and a pointer
|
* An inverse DCT routine is given a pointer to the input JBLOCK and a pointer
|
||||||
|
|||||||
Reference in New Issue
Block a user