Patch obtained from: https://sourceforge.net/projects/jpeg/files/ftp.oceana.com Author date taken from original announcement and timestamp of patch tarball: https://groups.google.com/g/comp.protocols.dicom/c/rrkP8BxoMRk/m/Ij4dfprggp8J
274 lines
9.0 KiB
C
274 lines
9.0 KiB
C
/*
|
|
* jchuff.c
|
|
*
|
|
* Copyright (C) 1991-1998, Thomas G. Lane.
|
|
* This file is part of the Independent JPEG Group's software.
|
|
* For conditions of distribution and use, see the accompanying README file.
|
|
*
|
|
* This file contains Huffman entropy decoding routines which are shared
|
|
* by the sequential, progressive and lossless decoders.
|
|
*/
|
|
|
|
#define JPEG_INTERNALS
|
|
#include "jinclude.h"
|
|
#include "jpeglib.h"
|
|
#include "jchuff.h" /* Declarations shared with jc*huff.c */
|
|
|
|
|
|
/*
|
|
* Compute the derived values for a Huffman table.
|
|
* This routine also performs some validation checks on the table.
|
|
*/
|
|
|
|
GLOBAL(void)
|
|
jpeg_make_c_derived_tbl (j_compress_ptr cinfo, boolean isDC, int tblno,
|
|
c_derived_tbl ** pdtbl)
|
|
{
|
|
JHUFF_TBL *htbl;
|
|
c_derived_tbl *dtbl;
|
|
int p, i, l, lastp, si, maxsymbol;
|
|
char huffsize[257];
|
|
unsigned int huffcode[257];
|
|
unsigned int code;
|
|
|
|
/* Note that huffsize[] and huffcode[] are filled in code-length order,
|
|
* paralleling the order of the symbols themselves in htbl->huffval[].
|
|
*/
|
|
|
|
/* Find the input Huffman table */
|
|
if (tblno < 0 || tblno >= NUM_HUFF_TBLS)
|
|
ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
|
|
htbl =
|
|
isDC ? cinfo->dc_huff_tbl_ptrs[tblno] : cinfo->ac_huff_tbl_ptrs[tblno];
|
|
if (htbl == NULL)
|
|
ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
|
|
|
|
/* Allocate a workspace if we haven't already done so. */
|
|
if (*pdtbl == NULL)
|
|
*pdtbl = (c_derived_tbl *)
|
|
(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
|
|
SIZEOF(c_derived_tbl));
|
|
dtbl = *pdtbl;
|
|
|
|
/* Figure C.1: make table of Huffman code length for each symbol */
|
|
|
|
p = 0;
|
|
for (l = 1; l <= 16; l++) {
|
|
i = (int) htbl->bits[l];
|
|
if (i < 0 || p + i > 256) /* protect against table overrun */
|
|
ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
|
|
while (i--)
|
|
huffsize[p++] = (char) l;
|
|
}
|
|
huffsize[p] = 0;
|
|
lastp = p;
|
|
|
|
/* Figure C.2: generate the codes themselves */
|
|
/* We also validate that the counts represent a legal Huffman code tree. */
|
|
|
|
code = 0;
|
|
si = huffsize[0];
|
|
p = 0;
|
|
while (huffsize[p]) {
|
|
while (((int) huffsize[p]) == si) {
|
|
huffcode[p++] = code;
|
|
code++;
|
|
}
|
|
/* code is now 1 more than the last code used for codelength si; but
|
|
* it must still fit in si bits, since no code is allowed to be all ones.
|
|
*/
|
|
if (((INT32) code) >= (((INT32) 1) << si))
|
|
ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
|
|
code <<= 1;
|
|
si++;
|
|
}
|
|
|
|
/* Figure C.3: generate encoding tables */
|
|
/* These are code and size indexed by symbol value */
|
|
|
|
/* Set all codeless symbols to have code length 0;
|
|
* this lets us detect duplicate VAL entries here, and later
|
|
* allows emit_bits to detect any attempt to emit such symbols.
|
|
*/
|
|
MEMZERO(dtbl->ehufsi, SIZEOF(dtbl->ehufsi));
|
|
|
|
/* This is also a convenient place to check for out-of-range
|
|
* and duplicated VAL entries. We allow 0..255 for AC symbols
|
|
* but only 0..16 for DC. (We could constrain them further
|
|
* based on data depth and mode, but this seems enough.)
|
|
*/
|
|
maxsymbol = isDC ? 16 : 255;
|
|
|
|
for (p = 0; p < lastp; p++) {
|
|
i = htbl->huffval[p];
|
|
if (i < 0 || i > maxsymbol || dtbl->ehufsi[i])
|
|
ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
|
|
dtbl->ehufco[i] = huffcode[p];
|
|
dtbl->ehufsi[i] = huffsize[p];
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* Generate the best Huffman code table for the given counts, fill htbl.
|
|
*
|
|
* The JPEG standard requires that no symbol be assigned a codeword of all
|
|
* one bits (so that padding bits added at the end of a compressed segment
|
|
* can't look like a valid code). Because of the canonical ordering of
|
|
* codewords, this just means that there must be an unused slot in the
|
|
* longest codeword length category. Section K.2 of the JPEG spec suggests
|
|
* reserving such a slot by pretending that symbol 256 is a valid symbol
|
|
* with count 1. In theory that's not optimal; giving it count zero but
|
|
* including it in the symbol set anyway should give a better Huffman code.
|
|
* But the theoretically better code actually seems to come out worse in
|
|
* practice, because it produces more all-ones bytes (which incur stuffed
|
|
* zero bytes in the final file). In any case the difference is tiny.
|
|
*
|
|
* The JPEG standard requires Huffman codes to be no more than 16 bits long.
|
|
* If some symbols have a very small but nonzero probability, the Huffman tree
|
|
* must be adjusted to meet the code length restriction. We currently use
|
|
* the adjustment method suggested in JPEG section K.2. This method is *not*
|
|
* optimal; it may not choose the best possible limited-length code. But
|
|
* typically only very-low-frequency symbols will be given less-than-optimal
|
|
* lengths, so the code is almost optimal. Experimental comparisons against
|
|
* an optimal limited-length-code algorithm indicate that the difference is
|
|
* microscopic --- usually less than a hundredth of a percent of total size.
|
|
* So the extra complexity of an optimal algorithm doesn't seem worthwhile.
|
|
*/
|
|
|
|
GLOBAL(void)
|
|
jpeg_gen_optimal_table (j_compress_ptr cinfo, JHUFF_TBL * htbl, long freq[])
|
|
{
|
|
#define MAX_CLEN 32 /* assumed maximum initial code length */
|
|
UINT8 bits[MAX_CLEN+1]; /* bits[k] = # of symbols with code length k */
|
|
int codesize[257]; /* codesize[k] = code length of symbol k */
|
|
int others[257]; /* next symbol in current branch of tree */
|
|
int c1, c2;
|
|
int p, i, j;
|
|
long v;
|
|
|
|
/* This algorithm is explained in section K.2 of the JPEG standard */
|
|
|
|
MEMZERO(bits, SIZEOF(bits));
|
|
MEMZERO(codesize, SIZEOF(codesize));
|
|
for (i = 0; i < 257; i++)
|
|
others[i] = -1; /* init links to empty */
|
|
|
|
freq[256] = 1; /* make sure 256 has a nonzero count */
|
|
/* Including the pseudo-symbol 256 in the Huffman procedure guarantees
|
|
* that no real symbol is given code-value of all ones, because 256
|
|
* will be placed last in the largest codeword category.
|
|
*/
|
|
|
|
/* Huffman's basic algorithm to assign optimal code lengths to symbols */
|
|
|
|
for (;;) {
|
|
/* Find the smallest nonzero frequency, set c1 = its symbol */
|
|
/* In case of ties, take the larger symbol number */
|
|
c1 = -1;
|
|
v = 1000000000L;
|
|
for (i = 0; i <= 256; i++) {
|
|
if (freq[i] && freq[i] <= v) {
|
|
v = freq[i];
|
|
c1 = i;
|
|
}
|
|
}
|
|
|
|
/* Find the next smallest nonzero frequency, set c2 = its symbol */
|
|
/* In case of ties, take the larger symbol number */
|
|
c2 = -1;
|
|
v = 1000000000L;
|
|
for (i = 0; i <= 256; i++) {
|
|
if (freq[i] && freq[i] <= v && i != c1) {
|
|
v = freq[i];
|
|
c2 = i;
|
|
}
|
|
}
|
|
|
|
/* Done if we've merged everything into one frequency */
|
|
if (c2 < 0)
|
|
break;
|
|
|
|
/* Else merge the two counts/trees */
|
|
freq[c1] += freq[c2];
|
|
freq[c2] = 0;
|
|
|
|
/* Increment the codesize of everything in c1's tree branch */
|
|
codesize[c1]++;
|
|
while (others[c1] >= 0) {
|
|
c1 = others[c1];
|
|
codesize[c1]++;
|
|
}
|
|
|
|
others[c1] = c2; /* chain c2 onto c1's tree branch */
|
|
|
|
/* Increment the codesize of everything in c2's tree branch */
|
|
codesize[c2]++;
|
|
while (others[c2] >= 0) {
|
|
c2 = others[c2];
|
|
codesize[c2]++;
|
|
}
|
|
}
|
|
|
|
/* Now count the number of symbols of each code length */
|
|
for (i = 0; i <= 256; i++) {
|
|
if (codesize[i]) {
|
|
/* The JPEG standard seems to think that this can't happen, */
|
|
/* but I'm paranoid... */
|
|
if (codesize[i] > MAX_CLEN)
|
|
ERREXIT(cinfo, JERR_HUFF_CLEN_OVERFLOW);
|
|
|
|
bits[codesize[i]]++;
|
|
}
|
|
}
|
|
|
|
/* JPEG doesn't allow symbols with code lengths over 16 bits, so if the pure
|
|
* Huffman procedure assigned any such lengths, we must adjust the coding.
|
|
* Here is what the JPEG spec says about how this next bit works:
|
|
* Since symbols are paired for the longest Huffman code, the symbols are
|
|
* removed from this length category two at a time. The prefix for the pair
|
|
* (which is one bit shorter) is allocated to one of the pair; then,
|
|
* skipping the BITS entry for that prefix length, a code word from the next
|
|
* shortest nonzero BITS entry is converted into a prefix for two code words
|
|
* one bit longer.
|
|
*/
|
|
|
|
for (i = MAX_CLEN; i > 16; i--) {
|
|
while (bits[i] > 0) {
|
|
j = i - 2; /* find length of new prefix to be used */
|
|
while (bits[j] == 0)
|
|
j--;
|
|
|
|
bits[i] -= 2; /* remove two symbols */
|
|
bits[i-1]++; /* one goes in this length */
|
|
bits[j+1] += 2; /* two new symbols in this length */
|
|
bits[j]--; /* symbol of this length is now a prefix */
|
|
}
|
|
}
|
|
|
|
/* Remove the count for the pseudo-symbol 256 from the largest codelength */
|
|
while (bits[i] == 0) /* find largest codelength still in use */
|
|
i--;
|
|
bits[i]--;
|
|
|
|
/* Return final symbol counts (only for lengths 0..16) */
|
|
MEMCOPY(htbl->bits, bits, SIZEOF(htbl->bits));
|
|
|
|
/* Return a list of the symbols sorted by code length */
|
|
/* It's not real clear to me why we don't need to consider the codelength
|
|
* changes made above, but the JPEG spec seems to think this works.
|
|
*/
|
|
p = 0;
|
|
for (i = 1; i <= MAX_CLEN; i++) {
|
|
for (j = 0; j <= 255; j++) {
|
|
if (codesize[j] == i) {
|
|
htbl->huffval[p] = (UINT8) j;
|
|
p++;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Set sent_table FALSE so updated table will be written to JPEG file. */
|
|
htbl->sent_table = FALSE;
|
|
}
|