Include Huffman codec optimizations borrowed from TurboJPEG

git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/trunk@24 632fc199-4ca6-4c93-a231-07263d6284db
2009-03-12 17:24:27 +00:00
parent 0747ad2b12
commit 99313388cc
1 changed files with 146 additions and 108 deletions
--- a/jchuff.c
+++ b/jchuff.c
@@ -14,12 +14,34 @@
 * permanent JPEG objects only upon successful completion of an MCU.
 */
 /* Modifications:
 * Copyright (C)2007 Sun Microsystems, Inc.
 * Copyright (C)2009 D. R. Commander
 *
 * This library is free software and may be redistributed and/or modified under
 * the terms of the wxWindows Library License, Version 3.1 or (at your option)
 * any later version.  The full license is in the LICENSE.txt file included
 * with this distribution.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * wxWindows Library License for more details.
 */
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
 #include "jchuff.h"		/* Declarations shared with jcphuff.c */
 static unsigned char jpeg_first_bit_table[65536];
 int jpeg_first_bit_table_init=0;
 #define CALC_FIRST_BIT(nbits, t)                       \
  nbits = jpeg_first_bit_table[t&255];                 \
  if (t > 255) nbits = jpeg_first_bit_table[t>>8] + 8;
 /* Expanded entropy encoder object for Huffman encoding.
 *
 * The savable_state subrecord contains fields that change within an MCU,
@@ -261,6 +283,15 @@ jpeg_make_c_derived_tbl (j_compress_ptr cinfo, boolean isDC, int tblno,
    dtbl->ehufco[i] = huffcode[p];
    dtbl->ehufsi[i] = huffsize[p];
  }
  if(!jpeg_first_bit_table_init) {
    for(i = 0; i < 65536; i++) {
      int bit = 0, val = i;
      while (val) {val >>= 1;  bit++;}
      jpeg_first_bit_table[i] = bit;
    }
    jpeg_first_bit_table_init = 1;
  }
 }
@@ -297,147 +328,154 @@ dump_buffer (working_state * state)
 * between calls, so 24 bits are sufficient.
 */
-inline
+/***************************************************************/
 LOCAL(boolean)
 emit_bits (working_state * state, unsigned int code, int size)
 /* Emit some bits; return TRUE if successful, FALSE if must suspend */
 {
  /* This routine is heavily used, so it's worth coding tightly. */
  register INT32 put_buffer = (INT32) code;
  register int put_bits = state->cur.put_bits;
-  /* if size is 0, caller used an invalid Huffman table entry */
+#define DUMP_BITS_(code, size) {                                \
-  if (size == 0)
+  put_bits += size;                                             \
-    ERREXIT(state->cinfo, JERR_HUFF_MISSING_CODE);
+  put_buffer = (put_buffer << size) | code;                     \
  if (put_bits > 7)                                             \
    while(put_bits > 7)                                         \
      if (0xFF == (*buffer++ =  put_buffer >> (put_bits -= 8))) \
        *buffer++ = 0;                                          \
 }
-  put_buffer &= (((INT32) 1)<<size) - 1; /* mask off any extra bits in code */
+/***************************************************************/
  put_bits += size;		/* new number of bits in buffer */
  put_buffer <<= 24 - put_bits; /* align incoming bits */
-  put_buffer |= state->cur.put_buffer; /* and merge with old buffer contents */
+#define DUMP_BITS(code, size) {                                 \
-  
+  put_bits += size;                                             \
-  while (put_bits >= 8) {
+  put_buffer = (put_buffer << size) | code;                     \
-    int c = (int) ((put_buffer >> 16) & 0xFF);
+  if (put_bits > 15) {                                          \
-    
+    if (0xFF == (*buffer++ =  put_buffer >> (put_bits -= 8)))   \
-    emit_byte(state, c, return FALSE);
+      *buffer++ = 0;                                            \
-    if (c == 0xFF) {		/* need to stuff a zero byte? */
+    if (0xFF == (*buffer++ =  put_buffer >> (put_bits -= 8)))   \
-      emit_byte(state, 0, return FALSE);
+      *buffer++ = 0;                                            \
-    }
+  }                                                             \
-    put_buffer <<= 8;
+ }
    put_bits -= 8;
  }
-  state->cur.put_buffer = put_buffer; /* update state variables */
+/***************************************************************/
  state->cur.put_bits = put_bits;
-  return TRUE;
+#define DUMP_SINGLE_VALUE(ht, codevalue) { \
-}
+  size = ht->ehufsi[codevalue];            \
  code = ht->ehufco[codevalue];            \
                                           \
  DUMP_BITS(code, size)                    \
 }
 /***************************************************************/
 #define DUMP_VALUE(ht, codevalue, t, nbits) { \
  size = ht->ehufsi[codevalue];               \
  code = ht->ehufco[codevalue];               \
  t &= ~(-1 << nbits);                        \
  DUMP_BITS(code, size)                       \
  DUMP_BITS(t, nbits)                         \
 }
 /***************************************************************/
 LOCAL(boolean)
 flush_bits (working_state * state)
 {
-  if (! emit_bits(state, 0x7F, 7)) /* fill any partial byte with ones */
+  unsigned char *buffer;
-    return FALSE;
+  int put_buffer, put_bits;
  if ((state)->free_in_buffer < DCTSIZE2 * 2)
    if (! dump_buffer(state)) return FALSE;
  buffer = state->next_output_byte;
  put_buffer = state->cur.put_buffer;
  put_bits = state->cur.put_bits;
  DUMP_BITS_(0x7F, 7)
  state->cur.put_buffer = 0;	/* and reset bit-buffer to empty */
  state->cur.put_bits = 0;
  state->free_in_buffer -= (buffer - state->next_output_byte);
  state->next_output_byte = buffer;
  if ((state)->free_in_buffer < DCTSIZE2 * 2) 
    if (! dump_buffer(state)) return FALSE;
  return TRUE;
 }
 /* Encode a single block's worth of coefficients */
 LOCAL(boolean)
 encode_one_block (working_state * state, JCOEFPTR block, int last_dc_val,
 		  c_derived_tbl *dctbl, c_derived_tbl *actbl)
 {
-  register int temp, temp2;
+  int temp, temp2;
-  register int nbits;
+  int nbits;
-  register int k, r, i;
+  int r, sflag, size, code;
-  
+  unsigned char *buffer;
  int put_buffer, put_bits;
  int code_0xf0 = actbl->ehufco[0xf0], size_0xf0 = actbl->ehufsi[0xf0];
  if ((state)->free_in_buffer < DCTSIZE2 * 2)
    if (! dump_buffer(state)) return FALSE;
  buffer = state->next_output_byte;
  put_buffer = state->cur.put_buffer;
  put_bits = state->cur.put_bits;
  /* Encode the DC coefficient difference per section F.1.2.1 */
  temp = temp2 = block[0] - last_dc_val;
-  if (temp < 0) {
+  sflag = temp >> 31;
-    temp = -temp;		/* temp is abs value of input */
+  temp -= ((temp + temp) & sflag);
-    /* For a negative input, want temp2 = bitwise complement of abs(input) */
+  temp2 += sflag;
-    /* This code assumes we are on a two's complement machine */
+  CALC_FIRST_BIT(nbits, temp)
-    temp2--;
+  DUMP_VALUE(dctbl, nbits, temp2, nbits)
  }
  /* Find the number of bits needed for the magnitude of the coefficient */
  nbits = 0;
  while (temp) {
    nbits++;
    temp >>= 1;
  }
  /* Check for out-of-range coefficient values.
   * Since we're encoding a difference, the range limit is twice as much.
   */
  if (nbits > MAX_COEF_BITS+1)
    ERREXIT(state->cinfo, JERR_BAD_DCT_COEF);
  /* Emit the Huffman-coded symbol for the number of bits */
  if (! emit_bits(state, dctbl->ehufco[nbits], dctbl->ehufsi[nbits]))
    return FALSE;
  /* Emit that number of bits of the value, if positive, */
  /* or the complement of its magnitude, if negative. */
  if (nbits)			/* emit_bits rejects calls with size 0 */
    if (! emit_bits(state, (unsigned int) temp2, nbits))
      return FALSE;
  /* Encode the AC coefficients per section F.1.2.2 */
  r = 0;			/* r = run length of zeros */
  for (k = 1; k < DCTSIZE2; k++) {
    if ((temp = block[jpeg_natural_order[k]]) == 0) {
      r++;
    } else {
      /* if run length > 15, must emit special run-length-16 codes (0xF0) */
      while (r > 15) {
 	if (! emit_bits(state, actbl->ehufco[0xF0], actbl->ehufsi[0xF0]))
 	  return FALSE;
 	r -= 16;
      }
-      temp2 = temp;
+#define innerloop(order) {  \
-      if (temp < 0) {
+  temp2  = *(JCOEF*)((unsigned char*)block + order);  \
-	temp = -temp;		/* temp is abs value of input */
+  if(temp2 == 0) r++;  \
-	/* This code assumes we are on a two's complement machine */
+  else {  \
-	temp2--;
+    temp = (JCOEF)temp2;  \
-      }
+    sflag = temp >> 31;  \
-      
+    temp = (temp ^ sflag) - sflag;  \
-      /* Find the number of bits needed for the magnitude of the coefficient */
+    temp2 += sflag;  \
-      nbits = 1;		/* there must be at least one 1 bit */
+    nbits = jpeg_first_bit_table[temp];  \
-      while ((temp >>= 1))
+    for(; r > 15; r -= 16) DUMP_BITS(code_0xf0, size_0xf0)  \
-	nbits++;
+    sflag = (r << 4) + nbits;  \
-      /* Check for out-of-range coefficient values */
+    DUMP_VALUE(actbl, sflag, temp2, nbits)  \
-      if (nbits > MAX_COEF_BITS)
+    r = 0;  \
-	ERREXIT(state->cinfo, JERR_BAD_DCT_COEF);
+  }}
      /* Emit Huffman symbol for run length / number of bits */
      i = (r << 4) + nbits;
      if (! emit_bits(state, actbl->ehufco[i], actbl->ehufsi[i]))
 	return FALSE;
-      /* Emit that number of bits of the value, if positive, */
+  innerloop(2*1);   innerloop(2*8);   innerloop(2*16);  innerloop(2*9);
-      /* or the complement of its magnitude, if negative. */
+  innerloop(2*2);   innerloop(2*3);   innerloop(2*10);  innerloop(2*17);
-      if (! emit_bits(state, (unsigned int) temp2, nbits))
+  innerloop(2*24);  innerloop(2*32);  innerloop(2*25);  innerloop(2*18);
-	return FALSE;
+  innerloop(2*11);  innerloop(2*4);   innerloop(2*5);   innerloop(2*12);
-      
+  innerloop(2*19);  innerloop(2*26);  innerloop(2*33);  innerloop(2*40);
-      r = 0;
+  innerloop(2*48);  innerloop(2*41);  innerloop(2*34);  innerloop(2*27);
-    }
+  innerloop(2*20);  innerloop(2*13);  innerloop(2*6);   innerloop(2*7);
-  }
+  innerloop(2*14);  innerloop(2*21);  innerloop(2*28);  innerloop(2*35);
  innerloop(2*42);  innerloop(2*49);  innerloop(2*56);  innerloop(2*57);
  innerloop(2*50);  innerloop(2*43);  innerloop(2*36);  innerloop(2*29);
  innerloop(2*22);  innerloop(2*15);  innerloop(2*23);  innerloop(2*30);
  innerloop(2*37);  innerloop(2*44);  innerloop(2*51);  innerloop(2*58);
  innerloop(2*59);  innerloop(2*52);  innerloop(2*45);  innerloop(2*38);
  innerloop(2*31);  innerloop(2*39);  innerloop(2*46);  innerloop(2*53);
  innerloop(2*60);  innerloop(2*61);  innerloop(2*54);  innerloop(2*47);
  innerloop(2*55);  innerloop(2*62);  innerloop(2*63);
  /* If the last coef(s) were zero, emit an end-of-block code */
-  if (r > 0)
+  if (r > 0) DUMP_SINGLE_VALUE(actbl, 0x0)
-    if (! emit_bits(state, actbl->ehufco[0], actbl->ehufsi[0]))
+
-      return FALSE;
+  state->cur.put_buffer = put_buffer;
  state->cur.put_bits = put_bits;
  state->free_in_buffer -= (buffer - state->next_output_byte);
  state->next_output_byte = buffer;
  if ((state)->free_in_buffer < DCTSIZE2 * 2)
    if (! dump_buffer(state)) return FALSE;
  return TRUE;
 }