Split up the forward DCT routine into three stages
Divide it into sample conversion, DCT and quantization in order to easily provide alternative implementations of each stage.
This commit is contained in:
219
jcdctmgr.c
219
jcdctmgr.c
@@ -19,11 +19,30 @@
|
||||
|
||||
/* Private subobject for this module */
|
||||
|
||||
typedef JMETHOD(void, forward_DCT_method_ptr, (DCTELEM * data));
|
||||
typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data));
|
||||
|
||||
typedef JMETHOD(void, convsamp_method_ptr,
|
||||
(JSAMPARRAY sample_data, JDIMENSION start_col,
|
||||
DCTELEM * workspace));
|
||||
typedef JMETHOD(void, float_convsamp_method_ptr,
|
||||
(JSAMPARRAY sample_data, JDIMENSION start_col,
|
||||
FAST_FLOAT *workspace));
|
||||
|
||||
typedef JMETHOD(void, quantize_method_ptr,
|
||||
(JCOEFPTR coef_block, DCTELEM * divisors,
|
||||
DCTELEM * workspace));
|
||||
typedef JMETHOD(void, float_quantize_method_ptr,
|
||||
(JCOEFPTR coef_block, FAST_FLOAT * divisors,
|
||||
FAST_FLOAT * workspace));
|
||||
|
||||
typedef struct {
|
||||
struct jpeg_forward_dct pub; /* public fields */
|
||||
|
||||
/* Pointer to the DCT routine actually in use */
|
||||
forward_DCT_method_ptr do_dct;
|
||||
forward_DCT_method_ptr dct;
|
||||
convsamp_method_ptr convsamp;
|
||||
quantize_method_ptr quantize;
|
||||
|
||||
/* The actual post-DCT divisors --- not identical to the quant table
|
||||
* entries, because of scaling (especially for an unnormalized DCT).
|
||||
@@ -33,7 +52,9 @@ typedef struct {
|
||||
|
||||
#ifdef DCT_FLOAT_SUPPORTED
|
||||
/* Same as above for the floating-point case. */
|
||||
float_DCT_method_ptr do_float_dct;
|
||||
float_DCT_method_ptr float_dct;
|
||||
float_convsamp_method_ptr float_convsamp;
|
||||
float_quantize_method_ptr float_quantize;
|
||||
FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
|
||||
#endif
|
||||
} my_fdct_controller;
|
||||
@@ -169,38 +190,20 @@ start_pass_fdctmgr (j_compress_ptr cinfo)
|
||||
|
||||
|
||||
/*
|
||||
* Perform forward DCT on one or more blocks of a component.
|
||||
*
|
||||
* The input samples are taken from the sample_data[] array starting at
|
||||
* position start_row/start_col, and moving to the right for any additional
|
||||
* blocks. The quantized coefficients are returned in coef_blocks[].
|
||||
* Load data into workspace, applying unsigned->signed conversion.
|
||||
*/
|
||||
|
||||
METHODDEF(void)
|
||||
forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
|
||||
JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
|
||||
JDIMENSION start_row, JDIMENSION start_col,
|
||||
JDIMENSION num_blocks)
|
||||
/* This version is used for integer DCT implementations. */
|
||||
convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace)
|
||||
{
|
||||
/* This routine is heavily used, so it's worth coding it tightly. */
|
||||
my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
|
||||
forward_DCT_method_ptr do_dct = fdct->do_dct;
|
||||
DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
|
||||
DCTELEM workspace[DCTSIZE2]; /* work area for FDCT subroutine */
|
||||
JDIMENSION bi;
|
||||
|
||||
sample_data += start_row; /* fold in the vertical offset once */
|
||||
|
||||
for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
|
||||
/* Load data into workspace, applying unsigned->signed conversion */
|
||||
{ register DCTELEM *workspaceptr;
|
||||
register DCTELEM *workspaceptr;
|
||||
register JSAMPROW elemptr;
|
||||
register int elemr;
|
||||
|
||||
workspaceptr = workspace;
|
||||
for (elemr = 0; elemr < DCTSIZE; elemr++) {
|
||||
elemptr = sample_data[elemr] + start_col;
|
||||
|
||||
#if DCTSIZE == 8 /* unroll the inner loop */
|
||||
*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
|
||||
*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
|
||||
@@ -211,26 +214,31 @@ forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
|
||||
*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
|
||||
*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
|
||||
#else
|
||||
{ register int elemc;
|
||||
for (elemc = DCTSIZE; elemc > 0; elemc--) {
|
||||
{
|
||||
register int elemc;
|
||||
for (elemc = DCTSIZE; elemc > 0; elemc--)
|
||||
*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Perform the DCT */
|
||||
(*do_dct) (workspace);
|
||||
|
||||
/* Quantize/descale the coefficients, and store into coef_blocks[] */
|
||||
{ register DCTELEM temp, qval;
|
||||
/*
|
||||
* Quantize/descale the coefficients, and store into coef_blocks[].
|
||||
*/
|
||||
|
||||
METHODDEF(void)
|
||||
quantize (JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace)
|
||||
{
|
||||
register DCTELEM temp, qval;
|
||||
register int i;
|
||||
register JCOEFPTR output_ptr = coef_blocks[bi];
|
||||
register JCOEFPTR output_ptr = coef_block;
|
||||
|
||||
for (i = 0; i < DCTSIZE2; i++) {
|
||||
qval = divisors[i];
|
||||
temp = workspace[i];
|
||||
|
||||
/* Divide the coefficient value by qval, ensuring proper rounding.
|
||||
* Since C does not specify the direction of rounding for negative
|
||||
* quotients, we have to force the dividend positive for portability.
|
||||
@@ -248,6 +256,7 @@ forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
|
||||
#else
|
||||
#define DIVIDE_BY(a,b) if (a >= b) a /= b; else a = 0
|
||||
#endif
|
||||
|
||||
if (temp < 0) {
|
||||
temp = -temp;
|
||||
temp += qval>>1; /* for rounding */
|
||||
@@ -259,32 +268,57 @@ forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
|
||||
}
|
||||
output_ptr[i] = (JCOEF) temp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Perform forward DCT on one or more blocks of a component.
|
||||
*
|
||||
* The input samples are taken from the sample_data[] array starting at
|
||||
* position start_row/start_col, and moving to the right for any additional
|
||||
* blocks. The quantized coefficients are returned in coef_blocks[].
|
||||
*/
|
||||
|
||||
METHODDEF(void)
|
||||
forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
|
||||
JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
|
||||
JDIMENSION start_row, JDIMENSION start_col,
|
||||
JDIMENSION num_blocks)
|
||||
/* This version is used for integer DCT implementations. */
|
||||
{
|
||||
/* This routine is heavily used, so it's worth coding it tightly. */
|
||||
my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
|
||||
DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
|
||||
DCTELEM workspace[DCTSIZE2]; /* work area for FDCT subroutine */
|
||||
JDIMENSION bi;
|
||||
|
||||
/* Make sure the compiler doesn't look up these every pass */
|
||||
forward_DCT_method_ptr do_dct = fdct->dct;
|
||||
convsamp_method_ptr do_convsamp = fdct->convsamp;
|
||||
quantize_method_ptr do_quantize = fdct->quantize;
|
||||
|
||||
sample_data += start_row; /* fold in the vertical offset once */
|
||||
|
||||
for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
|
||||
/* Load data into workspace, applying unsigned->signed conversion */
|
||||
(*do_convsamp) (sample_data, start_col, workspace);
|
||||
|
||||
/* Perform the DCT */
|
||||
(*do_dct) (workspace);
|
||||
|
||||
/* Quantize/descale the coefficients, and store into coef_blocks[] */
|
||||
(*do_quantize) (coef_blocks[bi], divisors, workspace);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#ifdef DCT_FLOAT_SUPPORTED
|
||||
|
||||
|
||||
METHODDEF(void)
|
||||
forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
|
||||
JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
|
||||
JDIMENSION start_row, JDIMENSION start_col,
|
||||
JDIMENSION num_blocks)
|
||||
/* This version is used for floating-point DCT implementations. */
|
||||
convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT * workspace)
|
||||
{
|
||||
/* This routine is heavily used, so it's worth coding it tightly. */
|
||||
my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
|
||||
float_DCT_method_ptr do_dct = fdct->do_float_dct;
|
||||
FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
|
||||
FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */
|
||||
JDIMENSION bi;
|
||||
|
||||
sample_data += start_row; /* fold in the vertical offset once */
|
||||
|
||||
for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
|
||||
/* Load data into workspace, applying unsigned->signed conversion */
|
||||
{ register FAST_FLOAT *workspaceptr;
|
||||
register FAST_FLOAT *workspaceptr;
|
||||
register JSAMPROW elemptr;
|
||||
register int elemr;
|
||||
|
||||
@@ -301,27 +335,28 @@ forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
|
||||
*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
|
||||
*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
|
||||
#else
|
||||
{ register int elemc;
|
||||
for (elemc = DCTSIZE; elemc > 0; elemc--) {
|
||||
{
|
||||
register int elemc;
|
||||
for (elemc = DCTSIZE; elemc > 0; elemc--)
|
||||
*workspaceptr++ = (FAST_FLOAT)
|
||||
(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Perform the DCT */
|
||||
(*do_dct) (workspace);
|
||||
|
||||
/* Quantize/descale the coefficients, and store into coef_blocks[] */
|
||||
{ register FAST_FLOAT temp;
|
||||
METHODDEF(void)
|
||||
quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace)
|
||||
{
|
||||
register FAST_FLOAT temp;
|
||||
register int i;
|
||||
register JCOEFPTR output_ptr = coef_blocks[bi];
|
||||
register JCOEFPTR output_ptr = coef_block;
|
||||
|
||||
for (i = 0; i < DCTSIZE2; i++) {
|
||||
/* Apply the quantization and scaling factor */
|
||||
temp = workspace[i] * divisors[i];
|
||||
|
||||
/* Round to nearest integer.
|
||||
* Since C does not specify the direction of rounding for negative
|
||||
* quotients, we have to force the dividend positive for portability.
|
||||
@@ -330,7 +365,38 @@ forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
|
||||
*/
|
||||
output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
METHODDEF(void)
|
||||
forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
|
||||
JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
|
||||
JDIMENSION start_row, JDIMENSION start_col,
|
||||
JDIMENSION num_blocks)
|
||||
/* This version is used for floating-point DCT implementations. */
|
||||
{
|
||||
/* This routine is heavily used, so it's worth coding it tightly. */
|
||||
my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
|
||||
FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
|
||||
FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */
|
||||
JDIMENSION bi;
|
||||
|
||||
/* Make sure the compiler doesn't look up these every pass */
|
||||
float_DCT_method_ptr do_dct = fdct->float_dct;
|
||||
float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
|
||||
float_quantize_method_ptr do_quantize = fdct->float_quantize;
|
||||
|
||||
sample_data += start_row; /* fold in the vertical offset once */
|
||||
|
||||
for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
|
||||
/* Load data into workspace, applying unsigned->signed conversion */
|
||||
(*do_convsamp) (sample_data, start_col, workspace);
|
||||
|
||||
/* Perform the DCT */
|
||||
(*do_dct) (workspace);
|
||||
|
||||
/* Quantize/descale the coefficients, and store into coef_blocks[] */
|
||||
(*do_quantize) (coef_blocks[bi], divisors, workspace);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -353,23 +419,48 @@ jinit_forward_dct (j_compress_ptr cinfo)
|
||||
cinfo->fdct = (struct jpeg_forward_dct *) fdct;
|
||||
fdct->pub.start_pass = start_pass_fdctmgr;
|
||||
|
||||
/* First determine the DCT... */
|
||||
switch (cinfo->dct_method) {
|
||||
#ifdef DCT_ISLOW_SUPPORTED
|
||||
case JDCT_ISLOW:
|
||||
fdct->pub.forward_DCT = forward_DCT;
|
||||
fdct->do_dct = jpeg_fdct_islow;
|
||||
fdct->dct = jpeg_fdct_islow;
|
||||
break;
|
||||
#endif
|
||||
#ifdef DCT_IFAST_SUPPORTED
|
||||
case JDCT_IFAST:
|
||||
fdct->pub.forward_DCT = forward_DCT;
|
||||
fdct->do_dct = jpeg_fdct_ifast;
|
||||
fdct->dct = jpeg_fdct_ifast;
|
||||
break;
|
||||
#endif
|
||||
#ifdef DCT_FLOAT_SUPPORTED
|
||||
case JDCT_FLOAT:
|
||||
fdct->pub.forward_DCT = forward_DCT_float;
|
||||
fdct->do_float_dct = jpeg_fdct_float;
|
||||
fdct->float_dct = jpeg_fdct_float;
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
ERREXIT(cinfo, JERR_NOT_COMPILED);
|
||||
break;
|
||||
}
|
||||
|
||||
/* ...then the supporting stages. */
|
||||
switch (cinfo->dct_method) {
|
||||
#ifdef DCT_ISLOW_SUPPORTED
|
||||
case JDCT_ISLOW:
|
||||
#endif
|
||||
#ifdef DCT_IFAST_SUPPORTED
|
||||
case JDCT_IFAST:
|
||||
#endif
|
||||
#if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
|
||||
fdct->convsamp = convsamp;
|
||||
fdct->quantize = quantize;
|
||||
break;
|
||||
#endif
|
||||
#ifdef DCT_FLOAT_SUPPORTED
|
||||
case JDCT_FLOAT:
|
||||
fdct->float_convsamp = convsamp_float;
|
||||
fdct->float_quantize = quantize_float;
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
|
||||
3
jdct.h
3
jdct.h
@@ -32,9 +32,6 @@ typedef int DCTELEM; /* 16 or 32 bits is fine */
|
||||
typedef INT32 DCTELEM; /* must have 32 bits */
|
||||
#endif
|
||||
|
||||
typedef JMETHOD(void, forward_DCT_method_ptr, (DCTELEM * data));
|
||||
typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data));
|
||||
|
||||
|
||||
/*
|
||||
* An inverse DCT routine is given a pointer to the input JBLOCK and a pointer
|
||||
|
||||
Reference in New Issue
Block a user