Files
mozjpeg/jchuff.c
DRC ec6e451d05 Lossless JPEG support: Add copyright attributions
Referring to
https://github.com/libjpeg-turbo/libjpeg-turbo/issues/402#issuecomment-768348440
and
https://github.com/libjpeg-turbo/libjpeg-turbo/issues/402#issuecomment-770221584

Ken Murchison clarified that it was his intent to release the lossless
JPEG patch under the IJG License and that adding his name to the
copyright headers would be sufficient to acknowledge that any
derivatives are based on his work.
2022-10-21 16:53:53 -05:00

276 lines
9.0 KiB
C

/*
* jchuff.c
*
* This file was part of the Independent JPEG Group's software:
* Copyright (C) 1991-1998, Thomas G. Lane.
* Lossless JPEG Modifications:
* Copyright (C) 1999, Ken Murchison.
* For conditions of distribution and use, see the accompanying README file.
*
* This file contains Huffman entropy decoding routines which are shared
* by the sequential, progressive and lossless decoders.
*/
#define JPEG_INTERNALS
#include "jinclude.h"
#include "jpeglib.h"
#include "jchuff.h" /* Declarations shared with jc*huff.c */
/*
* Compute the derived values for a Huffman table.
* This routine also performs some validation checks on the table.
*/
GLOBAL(void)
jpeg_make_c_derived_tbl (j_compress_ptr cinfo, boolean isDC, int tblno,
c_derived_tbl ** pdtbl)
{
JHUFF_TBL *htbl;
c_derived_tbl *dtbl;
int p, i, l, lastp, si, maxsymbol;
char huffsize[257];
unsigned int huffcode[257];
unsigned int code;
/* Note that huffsize[] and huffcode[] are filled in code-length order,
* paralleling the order of the symbols themselves in htbl->huffval[].
*/
/* Find the input Huffman table */
if (tblno < 0 || tblno >= NUM_HUFF_TBLS)
ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
htbl =
isDC ? cinfo->dc_huff_tbl_ptrs[tblno] : cinfo->ac_huff_tbl_ptrs[tblno];
if (htbl == NULL)
ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
/* Allocate a workspace if we haven't already done so. */
if (*pdtbl == NULL)
*pdtbl = (c_derived_tbl *)
(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
SIZEOF(c_derived_tbl));
dtbl = *pdtbl;
/* Figure C.1: make table of Huffman code length for each symbol */
p = 0;
for (l = 1; l <= 16; l++) {
i = (int) htbl->bits[l];
if (i < 0 || p + i > 256) /* protect against table overrun */
ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
while (i--)
huffsize[p++] = (char) l;
}
huffsize[p] = 0;
lastp = p;
/* Figure C.2: generate the codes themselves */
/* We also validate that the counts represent a legal Huffman code tree. */
code = 0;
si = huffsize[0];
p = 0;
while (huffsize[p]) {
while (((int) huffsize[p]) == si) {
huffcode[p++] = code;
code++;
}
/* code is now 1 more than the last code used for codelength si; but
* it must still fit in si bits, since no code is allowed to be all ones.
*/
if (((INT32) code) >= (((INT32) 1) << si))
ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
code <<= 1;
si++;
}
/* Figure C.3: generate encoding tables */
/* These are code and size indexed by symbol value */
/* Set all codeless symbols to have code length 0;
* this lets us detect duplicate VAL entries here, and later
* allows emit_bits to detect any attempt to emit such symbols.
*/
MEMZERO(dtbl->ehufsi, SIZEOF(dtbl->ehufsi));
/* This is also a convenient place to check for out-of-range
* and duplicated VAL entries. We allow 0..255 for AC symbols
* but only 0..16 for DC. (We could constrain them further
* based on data depth and mode, but this seems enough.)
*/
maxsymbol = isDC ? 16 : 255;
for (p = 0; p < lastp; p++) {
i = htbl->huffval[p];
if (i < 0 || i > maxsymbol || dtbl->ehufsi[i])
ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
dtbl->ehufco[i] = huffcode[p];
dtbl->ehufsi[i] = huffsize[p];
}
}
/*
* Generate the best Huffman code table for the given counts, fill htbl.
*
* The JPEG standard requires that no symbol be assigned a codeword of all
* one bits (so that padding bits added at the end of a compressed segment
* can't look like a valid code). Because of the canonical ordering of
* codewords, this just means that there must be an unused slot in the
* longest codeword length category. Section K.2 of the JPEG spec suggests
* reserving such a slot by pretending that symbol 256 is a valid symbol
* with count 1. In theory that's not optimal; giving it count zero but
* including it in the symbol set anyway should give a better Huffman code.
* But the theoretically better code actually seems to come out worse in
* practice, because it produces more all-ones bytes (which incur stuffed
* zero bytes in the final file). In any case the difference is tiny.
*
* The JPEG standard requires Huffman codes to be no more than 16 bits long.
* If some symbols have a very small but nonzero probability, the Huffman tree
* must be adjusted to meet the code length restriction. We currently use
* the adjustment method suggested in JPEG section K.2. This method is *not*
* optimal; it may not choose the best possible limited-length code. But
* typically only very-low-frequency symbols will be given less-than-optimal
* lengths, so the code is almost optimal. Experimental comparisons against
* an optimal limited-length-code algorithm indicate that the difference is
* microscopic --- usually less than a hundredth of a percent of total size.
* So the extra complexity of an optimal algorithm doesn't seem worthwhile.
*/
GLOBAL(void)
jpeg_gen_optimal_table (j_compress_ptr cinfo, JHUFF_TBL * htbl, long freq[])
{
#define MAX_CLEN 32 /* assumed maximum initial code length */
UINT8 bits[MAX_CLEN+1]; /* bits[k] = # of symbols with code length k */
int codesize[257]; /* codesize[k] = code length of symbol k */
int others[257]; /* next symbol in current branch of tree */
int c1, c2;
int p, i, j;
long v;
/* This algorithm is explained in section K.2 of the JPEG standard */
MEMZERO(bits, SIZEOF(bits));
MEMZERO(codesize, SIZEOF(codesize));
for (i = 0; i < 257; i++)
others[i] = -1; /* init links to empty */
freq[256] = 1; /* make sure 256 has a nonzero count */
/* Including the pseudo-symbol 256 in the Huffman procedure guarantees
* that no real symbol is given code-value of all ones, because 256
* will be placed last in the largest codeword category.
*/
/* Huffman's basic algorithm to assign optimal code lengths to symbols */
for (;;) {
/* Find the smallest nonzero frequency, set c1 = its symbol */
/* In case of ties, take the larger symbol number */
c1 = -1;
v = 1000000000L;
for (i = 0; i <= 256; i++) {
if (freq[i] && freq[i] <= v) {
v = freq[i];
c1 = i;
}
}
/* Find the next smallest nonzero frequency, set c2 = its symbol */
/* In case of ties, take the larger symbol number */
c2 = -1;
v = 1000000000L;
for (i = 0; i <= 256; i++) {
if (freq[i] && freq[i] <= v && i != c1) {
v = freq[i];
c2 = i;
}
}
/* Done if we've merged everything into one frequency */
if (c2 < 0)
break;
/* Else merge the two counts/trees */
freq[c1] += freq[c2];
freq[c2] = 0;
/* Increment the codesize of everything in c1's tree branch */
codesize[c1]++;
while (others[c1] >= 0) {
c1 = others[c1];
codesize[c1]++;
}
others[c1] = c2; /* chain c2 onto c1's tree branch */
/* Increment the codesize of everything in c2's tree branch */
codesize[c2]++;
while (others[c2] >= 0) {
c2 = others[c2];
codesize[c2]++;
}
}
/* Now count the number of symbols of each code length */
for (i = 0; i <= 256; i++) {
if (codesize[i]) {
/* The JPEG standard seems to think that this can't happen, */
/* but I'm paranoid... */
if (codesize[i] > MAX_CLEN)
ERREXIT(cinfo, JERR_HUFF_CLEN_OVERFLOW);
bits[codesize[i]]++;
}
}
/* JPEG doesn't allow symbols with code lengths over 16 bits, so if the pure
* Huffman procedure assigned any such lengths, we must adjust the coding.
* Here is what the JPEG spec says about how this next bit works:
* Since symbols are paired for the longest Huffman code, the symbols are
* removed from this length category two at a time. The prefix for the pair
* (which is one bit shorter) is allocated to one of the pair; then,
* skipping the BITS entry for that prefix length, a code word from the next
* shortest nonzero BITS entry is converted into a prefix for two code words
* one bit longer.
*/
for (i = MAX_CLEN; i > 16; i--) {
while (bits[i] > 0) {
j = i - 2; /* find length of new prefix to be used */
while (bits[j] == 0)
j--;
bits[i] -= 2; /* remove two symbols */
bits[i-1]++; /* one goes in this length */
bits[j+1] += 2; /* two new symbols in this length */
bits[j]--; /* symbol of this length is now a prefix */
}
}
/* Remove the count for the pseudo-symbol 256 from the largest codelength */
while (bits[i] == 0) /* find largest codelength still in use */
i--;
bits[i]--;
/* Return final symbol counts (only for lengths 0..16) */
MEMCOPY(htbl->bits, bits, SIZEOF(htbl->bits));
/* Return a list of the symbols sorted by code length */
/* It's not real clear to me why we don't need to consider the codelength
* changes made above, but the JPEG spec seems to think this works.
*/
p = 0;
for (i = 1; i <= MAX_CLEN; i++) {
for (j = 0; j <= 255; j++) {
if (codesize[j] == i) {
htbl->huffval[p] = (UINT8) j;
p++;
}
}
}
/* Set sent_table FALSE so updated table will be written to JPEG file. */
htbl->sent_table = FALSE;
}