Split up the forward DCT routine into three stages

Divide it into sample conversion, DCT and quantization in order to easily provide alternative implementations of each stage. git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/trunk@13 632fc199-4ca6-4c93-a231-07263d6284db
2009-03-09 10:37:20 +00:00
parent 5557fd2217
commit 49dcbfbf13
2 changed files with 201 additions and 113 deletions
--- a/jcdctmgr.c
+++ b/jcdctmgr.c
@@ -19,11 +19,30 @@
 /* Private subobject for this module */
 typedef JMETHOD(void, forward_DCT_method_ptr, (DCTELEM * data));
 typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data));
 typedef JMETHOD(void, convsamp_method_ptr,
                (JSAMPARRAY sample_data, JDIMENSION start_col,
                 DCTELEM * workspace));
 typedef JMETHOD(void, float_convsamp_method_ptr,
                (JSAMPARRAY sample_data, JDIMENSION start_col,
                 FAST_FLOAT *workspace));
 typedef JMETHOD(void, quantize_method_ptr,
                (JCOEFPTR coef_block, DCTELEM * divisors,
                 DCTELEM * workspace));
 typedef JMETHOD(void, float_quantize_method_ptr,
                (JCOEFPTR coef_block, FAST_FLOAT * divisors,
                 FAST_FLOAT * workspace));
 typedef struct {
  struct jpeg_forward_dct pub;	/* public fields */
  /* Pointer to the DCT routine actually in use */
-  forward_DCT_method_ptr do_dct;
+  forward_DCT_method_ptr dct;
  convsamp_method_ptr convsamp;
  quantize_method_ptr quantize;
  /* The actual post-DCT divisors --- not identical to the quant table
   * entries, because of scaling (especially for an unnormalized DCT).
@@ -33,7 +52,9 @@ typedef struct {
 #ifdef DCT_FLOAT_SUPPORTED
  /* Same as above for the floating-point case. */
-  float_DCT_method_ptr do_float_dct;
+  float_DCT_method_ptr float_dct;
  float_convsamp_method_ptr float_convsamp;
  float_quantize_method_ptr float_quantize;
  FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
 #endif
 } my_fdct_controller;
@@ -169,38 +190,20 @@ start_pass_fdctmgr (j_compress_ptr cinfo)
 /*
- * Perform forward DCT on one or more blocks of a component.
+ * Load data into workspace, applying unsigned->signed conversion.
 *
 * The input samples are taken from the sample_data[] array starting at
 * position start_row/start_col, and moving to the right for any additional
 * blocks. The quantized coefficients are returned in coef_blocks[].
 */
 METHODDEF(void)
-forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
+convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace)
 	     JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
 	     JDIMENSION start_row, JDIMENSION start_col,
 	     JDIMENSION num_blocks)
 /* This version is used for integer DCT implementations. */
 {
-  /* This routine is heavily used, so it's worth coding it tightly. */
+  register DCTELEM *workspaceptr;
  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
  forward_DCT_method_ptr do_dct = fdct->do_dct;
  DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
  DCTELEM workspace[DCTSIZE2];	/* work area for FDCT subroutine */
  JDIMENSION bi;
  sample_data += start_row;	/* fold in the vertical offset once */
  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
    /* Load data into workspace, applying unsigned->signed conversion */
    { register DCTELEM *workspaceptr;
  register JSAMPROW elemptr;
  register int elemr;
  workspaceptr = workspace;
  for (elemr = 0; elemr < DCTSIZE; elemr++) {
    elemptr = sample_data[elemr] + start_col;
 #if DCTSIZE == 8		/* unroll the inner loop */
    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
@@ -211,26 +214,31 @@ forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
 #else
-	{ register int elemc;
+    {
-	  for (elemc = DCTSIZE; elemc > 0; elemc--) {
+      register int elemc;
      for (elemc = DCTSIZE; elemc > 0; elemc--)
        *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
    }
 	}
 #endif
  }
 }
    /* Perform the DCT */
    (*do_dct) (workspace);
-    /* Quantize/descale the coefficients, and store into coef_blocks[] */
+/*
-    { register DCTELEM temp, qval;
+ * Quantize/descale the coefficients, and store into coef_blocks[].
 */
 METHODDEF(void)
 quantize (JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace)
 {
  register DCTELEM temp, qval;
  register int i;
-      register JCOEFPTR output_ptr = coef_blocks[bi];
+  register JCOEFPTR output_ptr = coef_block;
  for (i = 0; i < DCTSIZE2; i++) {
    qval = divisors[i];
    temp = workspace[i];
    /* Divide the coefficient value by qval, ensuring proper rounding.
     * Since C does not specify the direction of rounding for negative
     * quotients, we have to force the dividend positive for portability.
@@ -248,6 +256,7 @@ forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
 #else
 #define DIVIDE_BY(a,b)	if (a >= b) a /= b; else a = 0
 #endif
    if (temp < 0) {
      temp = -temp;
      temp += qval>>1;	/* for rounding */
@@ -260,31 +269,56 @@ forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
    output_ptr[i] = (JCOEF) temp;
  }
 }
 /*
 * Perform forward DCT on one or more blocks of a component.
 *
 * The input samples are taken from the sample_data[] array starting at
 * position start_row/start_col, and moving to the right for any additional
 * blocks. The quantized coefficients are returned in coef_blocks[].
 */
 METHODDEF(void)
 forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
 	     JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
 	     JDIMENSION start_row, JDIMENSION start_col,
 	     JDIMENSION num_blocks)
 /* This version is used for integer DCT implementations. */
 {
  /* This routine is heavily used, so it's worth coding it tightly. */
  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
  DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
  DCTELEM workspace[DCTSIZE2];	/* work area for FDCT subroutine */
  JDIMENSION bi;
  /* Make sure the compiler doesn't look up these every pass */
  forward_DCT_method_ptr do_dct = fdct->dct;
  convsamp_method_ptr do_convsamp = fdct->convsamp;
  quantize_method_ptr do_quantize = fdct->quantize;
  sample_data += start_row;	/* fold in the vertical offset once */
  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
    /* Load data into workspace, applying unsigned->signed conversion */
    (*do_convsamp) (sample_data, start_col, workspace);
    /* Perform the DCT */
    (*do_dct) (workspace);
    /* Quantize/descale the coefficients, and store into coef_blocks[] */
    (*do_quantize) (coef_blocks[bi], divisors, workspace);
  }
 }
 #ifdef DCT_FLOAT_SUPPORTED
 METHODDEF(void)
-forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
+convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT * workspace)
 		   JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
 		   JDIMENSION start_row, JDIMENSION start_col,
 		   JDIMENSION num_blocks)
 /* This version is used for floating-point DCT implementations. */
 {
-  /* This routine is heavily used, so it's worth coding it tightly. */
+  register FAST_FLOAT *workspaceptr;
  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
  float_DCT_method_ptr do_dct = fdct->do_float_dct;
  FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
  FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */
  JDIMENSION bi;
  sample_data += start_row;	/* fold in the vertical offset once */
  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
    /* Load data into workspace, applying unsigned->signed conversion */
    { register FAST_FLOAT *workspaceptr;
  register JSAMPROW elemptr;
  register int elemr;
@@ -301,27 +335,28 @@ forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
    *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
    *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
 #else
-	{ register int elemc;
+    {
-	  for (elemc = DCTSIZE; elemc > 0; elemc--) {
+      register int elemc;
      for (elemc = DCTSIZE; elemc > 0; elemc--)
        *workspaceptr++ = (FAST_FLOAT)
                          (GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
    }
 	}
 #endif
  }
 }
    /* Perform the DCT */
    (*do_dct) (workspace);
-    /* Quantize/descale the coefficients, and store into coef_blocks[] */
+METHODDEF(void)
-    { register FAST_FLOAT temp;
+quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace)
 {
  register FAST_FLOAT temp;
  register int i;
-      register JCOEFPTR output_ptr = coef_blocks[bi];
+  register JCOEFPTR output_ptr = coef_block;
  for (i = 0; i < DCTSIZE2; i++) {
    /* Apply the quantization and scaling factor */
    temp = workspace[i] * divisors[i];
    /* Round to nearest integer.
     * Since C does not specify the direction of rounding for negative
     * quotients, we have to force the dividend positive for portability.
@@ -331,6 +366,37 @@ forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
    output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
  }
 }
 METHODDEF(void)
 forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
 		   JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
 		   JDIMENSION start_row, JDIMENSION start_col,
 		   JDIMENSION num_blocks)
 /* This version is used for floating-point DCT implementations. */
 {
  /* This routine is heavily used, so it's worth coding it tightly. */
  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
  FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
  FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */
  JDIMENSION bi;
  /* Make sure the compiler doesn't look up these every pass */
  float_DCT_method_ptr do_dct = fdct->float_dct;
  float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
  float_quantize_method_ptr do_quantize = fdct->float_quantize;
  sample_data += start_row;	/* fold in the vertical offset once */
  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
    /* Load data into workspace, applying unsigned->signed conversion */
    (*do_convsamp) (sample_data, start_col, workspace);
    /* Perform the DCT */
    (*do_dct) (workspace);
    /* Quantize/descale the coefficients, and store into coef_blocks[] */
    (*do_quantize) (coef_blocks[bi], divisors, workspace);
  }
 }
@@ -353,23 +419,48 @@ jinit_forward_dct (j_compress_ptr cinfo)
  cinfo->fdct = (struct jpeg_forward_dct *) fdct;
  fdct->pub.start_pass = start_pass_fdctmgr;
  /* First determine the DCT... */
  switch (cinfo->dct_method) {
 #ifdef DCT_ISLOW_SUPPORTED
  case JDCT_ISLOW:
    fdct->pub.forward_DCT = forward_DCT;
-    fdct->do_dct = jpeg_fdct_islow;
+    fdct->dct = jpeg_fdct_islow;
    break;
 #endif
 #ifdef DCT_IFAST_SUPPORTED
  case JDCT_IFAST:
    fdct->pub.forward_DCT = forward_DCT;
-    fdct->do_dct = jpeg_fdct_ifast;
+    fdct->dct = jpeg_fdct_ifast;
    break;
 #endif
 #ifdef DCT_FLOAT_SUPPORTED
  case JDCT_FLOAT:
    fdct->pub.forward_DCT = forward_DCT_float;
-    fdct->do_float_dct = jpeg_fdct_float;
+    fdct->float_dct = jpeg_fdct_float;
    break;
 #endif
  default:
    ERREXIT(cinfo, JERR_NOT_COMPILED);
    break;
  }
  /* ...then the supporting stages. */
  switch (cinfo->dct_method) {
 #ifdef DCT_ISLOW_SUPPORTED
  case JDCT_ISLOW:
 #endif
 #ifdef DCT_IFAST_SUPPORTED
  case JDCT_IFAST:
 #endif
 #if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
    fdct->convsamp = convsamp;
    fdct->quantize = quantize;
    break;
 #endif
 #ifdef DCT_FLOAT_SUPPORTED
  case JDCT_FLOAT:
    fdct->float_convsamp = convsamp_float;
    fdct->float_quantize = quantize_float;
    break;
 #endif
  default:
--- a/jdct.h
+++ b/jdct.h
@@ -32,9 +32,6 @@ typedef int DCTELEM;		/* 16 or 32 bits is fine */
 typedef INT32 DCTELEM;		/* must have 32 bits */
 #endif
 typedef JMETHOD(void, forward_DCT_method_ptr, (DCTELEM * data));
 typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data));
 /*
 * An inverse DCT routine is given a pointer to the input JBLOCK and a pointer