diff --git a/jpegut.c b/jpegut.c index be2f126b..49ea18da 100644 --- a/jpegut.c +++ b/jpegut.c @@ -226,13 +226,13 @@ int checkbuf(unsigned char *buf, int w, int h, int ps, int subsamp, int flags) #define PAD(v, p) ((v+(p)-1)&(~((p)-1))) int checkbufyuv(unsigned char *buf, unsigned long size, int w, int h, - int subsamp, int decode) + int subsamp) { int i, j; int hsf=_hsf[subsamp], vsf=_vsf[subsamp]; int pw=PAD(w, hsf), ph=PAD(h, vsf); - int cw=PAD(pw/hsf, decode? 8:1), ch=PAD(ph/vsf, decode? 8:1); - int ypitch=PAD(pw, decode? 8:4), uvpitch=PAD(cw, decode? 8:4); + int cw=pw/hsf, ch=ph/vsf; + int ypitch=PAD(pw, 4), uvpitch=PAD(cw, 4); int retval=1; unsigned long correctsize=ypitch*ph + (subsamp==TJ_GRAYSCALE? 0:uvpitch*ch*2); @@ -389,7 +389,7 @@ void gentestjpeg(tjhandle hnd, unsigned char *jpegbuf, unsigned long *size, writejpeg(jpegbuf, *size, tempstr); if(yuv==YUVENCODE) { - if(checkbufyuv(jpegbuf, *size, w, h, subsamp, 0)) printf("Passed."); + if(checkbufyuv(jpegbuf, *size, w, h, subsamp)) printf("Passed."); else printf("FAILED!"); } else printf("Done."); @@ -406,7 +406,9 @@ void gentestbmp(tjhandle hnd, unsigned char *jpegbuf, unsigned long jpegsize, const char *pixformat; int _w=0, _h=0; double t; unsigned long size=0; int hsf=_hsf[subsamp], vsf=_vsf[subsamp]; - int pw=PAD(w, 8), ph=PAD(h, 8), cw=PAD(pw/hsf, 8), ch=PAD(ph/vsf, 8); + int pw=PAD(w, hsf), ph=PAD(h, vsf); + int cw=pw/hsf, ch=ph/vsf; + int ypitch=PAD(pw, 4), uvpitch=PAD(cw, 4); if(yuv==YUVDECODE) flags|=TJ_YUV; else if(yuv==YUVENCODE) return; @@ -435,7 +437,7 @@ void gentestbmp(tjhandle hnd, unsigned char *jpegbuf, unsigned long jpegsize, } if(yuv==YUVDECODE) - size=pw*ph + (subsamp==TJ_GRAYSCALE? 0:cw*ch*(ps-1)); + size=ypitch*ph + (subsamp==TJ_GRAYSCALE? 0:uvpitch*ch*2); else size=w*h*ps; if((bmpbuf=(unsigned char *)malloc(size+1))==NULL) @@ -450,7 +452,7 @@ void gentestbmp(tjhandle hnd, unsigned char *jpegbuf, unsigned long jpegsize, if(yuv==YUVDECODE) { - if(checkbufyuv(bmpbuf, size, pw, ph, subsamp, 1)) + if(checkbufyuv(bmpbuf, size, pw, ph, subsamp)) printf("Passed."); else printf("FAILED!"); } @@ -589,7 +591,9 @@ int main(int argc, char *argv[]) if(doyuv) { yuv=YUVDECODE; + dotest(48, 48, 3, TJ_444, "test"); dotest(35, 39, 3, TJ_444, "test"); + dotest(48, 48, 1, TJ_GRAYSCALE, "test"); dotest(39, 41, 1, TJ_GRAYSCALE, "test"); } diff --git a/jpgtest.cxx b/jpgtest.cxx index d0e34b41..b6dddb63 100644 --- a/jpgtest.cxx +++ b/jpgtest.cxx @@ -79,13 +79,15 @@ void dotest(unsigned char *srcbuf, int w, int h, BMPPIXELFORMAT pf, int bu, int ps=_ps[pf]; int pitch=w*ps, yuvsize; int hsf=_hsf[jpegsub], vsf=_vsf[jpegsub]; - int pw=PAD(w, 8), ph=PAD(h, 8), cw=PAD(pw/hsf, 8), ch=PAD(ph/vsf, 8); + int pw=PAD(w, hsf), ph=PAD(h, vsf); + int cw=pw/hsf, ch=ph/vsf; + int ypitch=PAD(pw, 4), uvpitch=PAD(cw, 4); flags |= _flags[pf]; if(bu) flags |= TJ_BOTTOMUP; if(yuv==YUVENCODE) flags |= TJ_YUV; - yuvsize=pw*ph + (jpegsub==TJ_GRAYSCALE? 0:cw*ch*2); + yuvsize=ypitch*ph + (jpegsub==TJ_GRAYSCALE? 0:uvpitch*ch*2); if((rgbbuf=(unsigned char *)malloc(max(yuvsize, pitch*h))) == NULL) _throwunix("allocating image buffer"); diff --git a/turbojpeg.h b/turbojpeg.h index d2fd6d06..e80512b6 100644 --- a/turbojpeg.h +++ b/turbojpeg.h @@ -56,19 +56,23 @@ enum {TJ_444=0, TJ_422, TJ_420, TJ_GRAYSCALE}; #define TJ_YUV 512 /* If passed to tjCompress(), this causes TurboJPEG/OSS to use the accelerated color conversion routines in libjpeg-turbo to produce a planar - YUV image that is suitable for X Video. Specifically, if a component is - subsampled along the horizontal dimension, then the width of the plane for - that component is padded to 2 in the output image (same goes for the - height, if the component is subsampled along the vertical dimension.) - Also, each line of each plane in the output image is padded to 4 bytes. - Although this will work with any subsampling option, it is really only - useful in combination with TJ_420, which produces an image compatible with - the I420 (AKA "YUV420P") format. + YUV image that is suitable for X Video. Specifically, if the chrominance + components are subsampled along the horizontal dimension, then the width + of the luminance plane is padded to 2 in the output image (same goes for + the height of the luminance plane, if the chrominance components are + subsampled along the vertical dimension.) Also, each line of each plane + in the output image is padded to 4 bytes. Although this will work with + any subsampling option, it is really only useful in combination with + TJ_420, which produces an image compatible with the I420 (AKA "YUV420P") + format. If passed to tjDecompress(), this tells TurboJPEG/OSS to perform JPEG decompression but to leave out the color conversion step, so a planar YUV - image is generated instead of an RGB image. In this case, the width and - height of all planes are padded to 8 in the output image. + image is generated instead of an RGB image. The padding of the planes in + this image is the same as in the above case. Note that, if the width or + height of the output image is not a multiple of 8 (or a multiple of 16 + along any dimension in which chrominance subsampling is used), then an + intermediate buffer copy will be performed within TurboJPEG/OSS. */ typedef void* tjhandle; diff --git a/turbojpegl.c b/turbojpegl.c index 5ca4b4a3..5e39f525 100644 --- a/turbojpegl.c +++ b/turbojpegl.c @@ -406,11 +406,16 @@ DLLEXPORT int DLLCALL tjDecompress(tjhandle h, int flags) { int i, row; JSAMPROW *row_pointer=NULL, *outbuf[MAX_COMPONENTS]; - int cw[MAX_COMPONENTS], ch[MAX_COMPONENTS]; + int cw[MAX_COMPONENTS], ch[MAX_COMPONENTS], iw[MAX_COMPONENTS], + tmpbufsize=0, usetmpbuf=0, th[MAX_COMPONENTS]; + JSAMPLE *_tmpbuf=NULL; JSAMPROW *tmpbuf[MAX_COMPONENTS]; checkhandle(h); - for(i=0; ijerr.jb)) { // this will execute if LIBJPEG has an error for(i=0; inum_components; i++) { jpeg_component_info *compptr=&dinfo->comp_info[i]; - cw[i]=compptr->width_in_blocks*DCTSIZE; - ch[i]=compptr->height_in_blocks*DCTSIZE; + int ih; + iw[i]=compptr->width_in_blocks*DCTSIZE; + ih=compptr->height_in_blocks*DCTSIZE; + cw[i]=PAD(width, dinfo->max_h_samp_factor)*compptr->h_samp_factor + /dinfo->max_h_samp_factor; + ch[i]=PAD(height, dinfo->max_v_samp_factor)*compptr->v_samp_factor + /dinfo->max_v_samp_factor; + if(iw[i]!=cw[i] || ih!=ch[i]) + { + usetmpbuf=1; + th[i]=compptr->v_samp_factor*DCTSIZE/dinfo->max_v_samp_factor; + tmpbufsize+=iw[i]*th[i]; + } if((outbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*ch[i]))==NULL) _throw("Memory allocation failed in tjInitDecompress()"); for(row=0; rownum_components; i++) + { + jpeg_component_info *compptr=&dinfo->comp_info[i]; + if((tmpbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*th[i]))==NULL) + _throw("Memory allocation failed in tjInitDecompress()"); + for(row=0; rowdinfo); if(flags&TJ_YUV) { - for(row=0; rowdinfo.output_height; - row+=j->dinfo.max_v_samp_factor*DCTSIZE) + j_decompress_ptr dinfo=&j->dinfo; + for(row=0; rowoutput_height; + row+=dinfo->max_v_samp_factor*DCTSIZE) { JSAMPARRAY yuvptr[MAX_COMPONENTS]; - for(i=0; idinfo.num_components; i++) + for(i=0; inum_components; i++) { - jpeg_component_info *compptr=&j->dinfo.comp_info[i]; - yuvptr[i]=&outbuf[i][row*compptr->v_samp_factor/j->dinfo.max_v_samp_factor]; + jpeg_component_info *compptr=&dinfo->comp_info[i]; + if(usetmpbuf) yuvptr[i]=tmpbuf[i]; + else yuvptr[i]=&outbuf[i][row*compptr->v_samp_factor + /dinfo->max_v_samp_factor]; + } + jpeg_read_raw_data(dinfo, yuvptr, dinfo->max_v_samp_factor*DCTSIZE); + if(usetmpbuf) + { + int j; + for(i=0; inum_components; i++) + { + jpeg_component_info *compptr=&dinfo->comp_info[i]; + for(j=0; joutput_height-row); j++) + { + memcpy(outbuf[i][row*compptr->v_samp_factor + /dinfo->max_v_samp_factor+j], tmpbuf[i][j], cw[i]); + } + } } - jpeg_read_raw_data(&j->dinfo, yuvptr, j->dinfo.max_v_samp_factor*DCTSIZE); } } else @@ -514,7 +567,11 @@ DLLEXPORT int DLLCALL tjDecompress(tjhandle h, jpeg_finish_decompress(&j->dinfo); for(i=0; i