#include "stdafx.h" #pragma hdrstop /* * jccolor.c * * Copyright (C) 1991-1996, Thomas G. Lane. * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * * This file contains input colorspace conversion routines. */ #pragma warning( disable : 4799 ) #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" #ifdef NIFTY #include #define SCALE_PREC 5 #define SCALE_RND (1 << (SCALE_PREC - 1)) #define SCALE (1 << SCALE_PREC) #define unscale(x) (((long)(x) + SCALE_RND) >> SCALE_PREC) #define clip(x) (((long)(x) & ~0xff) ? (((long)(x) < 0) ? 0 : 255) : (long)(x)) #endif /* Private subobject */ typedef struct { struct jpeg_color_converter pub; /* public fields */ /* Private state for RGB->YCC conversion */ INT32 * rgb_ycc_tab; /* => table for RGB to YCbCr conversion */ } my_color_converter; typedef my_color_converter * my_cconvert_ptr; extern void MRGB2YCbCr( int rows, int cols, unsigned char *inRGB, unsigned char *outY, unsigned char *outU, unsigned char *outV); extern void MRGBA2YCbCrA( int rows, int cols, unsigned char *inRGB, unsigned char *outY, unsigned char *outU, unsigned char *outV, unsigned char *outA); extern void MRGBA2YCbCrALegacy( int rows, int cols, unsigned char *inRGB, unsigned char *outY, unsigned char *outU, unsigned char *outV, unsigned char *outA); // ****************************************************************** // Macros and Constants #define FCONVERSION_BITS 15 #define ICONVERSION_BITS 8 const __int64 const_0 = 0x0000000000000000; const __int64 const_1 = 0x0001000100010001; const __int64 const_128 = 0x0080008000800080; // These constants correspond to CCIR 601-1 // Y = [ (9798*R + 19235*G + 3736*B) / 32768] // Cb = [(-5529*R - 10855*G + 16384*B) / 32768] + 128 // Cr = [(16384*R - 13720*G - 2664*B) / 32768] + 128 //Conventional floating point equations: // Y = 0.29900 * R + 0.58700 * G + 0.11400 * B // Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + 0.5 // Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + 0.5 //Yr = 2646 Yg = 4b23 Yb = 0e98 //Ur = ea67 Ug = d599 Ub = 4000 //Vr = 4000 Vg = ca68 Vb = f598 // constants for RGB->YCrCb const __int64 const_YR0GR = 0x264600004B232646; const __int64 const_YBG0B = 0x0E984B2300000E98; const __int64 const_UR0GR = 0xEA670000D599EA67; const __int64 const_UBG0B = 0x4000D59900004000; const __int64 const_VR0GR = 0x40000000CA684000; const __int64 const_VBG0B = 0xF598CA680000F598; // constants for RGBA->YCrCbA const __int64 const2_YGRGR = 0x4B2326464B232646; const __int64 const2_Y0B0B = 0x00000E9800000E98; const __int64 const2_UGRGR = 0xD599EA67D599EA67; const __int64 const2_U0B0B = 0x0000400000004000; const __int64 const2_VGRGR = 0xCA684000CA684000; const __int64 const2_V0B0B = 0x0000F5980000F598; const __int64 const2_A = 0x0001000000010000; const __int64 const2_Legacy = 0x00FFFFFF00FFFFFF; // These constants correspond to the original FPX SDK // ... using 2^15 //Y = [ (9869*R + 19738*G + 3290*B) / 32768] //Cb = [(-4935*R - 9869*G + 14739*B) / 32768] + 128 //Cr = [(14312*R - 12336*G - 2056*B) / 32768] + 128 //Conventional floating point equations: // Y = 0.30118*R + 0.60235*G + 0.10039*B // Cb = -0.15059*R - 0.30118*G + 0.44981*B + 0.5 // Cr = 0.43676*R - 0.37647*G - 0.06274*G + 0.5 //Yr = 268d Yg = 4d1a Yb = 0cda //Ur = ecb9 Ug = d973 Ub = 3993 //Vr = 37e8 Vg = cfd0 Vb = f7f8 // constants for RGB->YCrCb //const __int64 const_YR0GR = 0x268D00004D1A268D; //const __int64 const_YBG0B = 0x0CDA4D1A00000CDA; //const __int64 const_UR0GR = 0xECB90000D973ECB9; //const __int64 const_UBG0B = 0x3993D97300003993; //const __int64 const_VR0GR = 0x37E80000CFD037E8; //const __int64 const_VBG0B = 0xF7F8CFD00000F7F8; // constants for RGBA->YCrCbA //const __int64 const2_YGRGR = 0x4D1A268D4D1A268D; //const __int64 const2_Y0B0B = 0x00000CDA00000CDA; //const __int64 const2_UGRGR = 0xD973ECB9D973ECB9; //const __int64 const2_U0B0B = 0x0000399300003993; //const __int64 const2_VGRGR = 0xCFD037E8CFD037E8; //const __int64 const2_V0B0B = 0x0000F7F80000F7F8; //const __int64 const2_A = 0x0001000000010000; //const __int64 const2_Legacy = 0x00FFFFFF00FFFFFF; // ... using 2^8 //const __int64 const_X0YY0 = 0x0000010001000000; //const __int64 const_RVUVU = 0x019A0000019A0000; //const __int64 const_GVUVU = 0xFF33FFABFF33FFAB; //const __int64 const_BVUVU = 0x0000020000000200; __int64 temp0, tempY, tempU, tempV, tempA; /**************** RGB -> YCbCr conversion: most common case **************/ /* * YCbCr is defined per CCIR 601-1, except that Cb and Cr are * normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5. * The conversion equations to be implemented are therefore * Y = 0.29900 * R + 0.58700 * G + 0.11400 * B * Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE * Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE * (These numbers are derived from TIFF 6.0 section 21, dated 3-June-92.) * Note: older versions of the IJG code used a zero offset of MAXJSAMPLE/2, * rather than CENTERJSAMPLE, for Cb and Cr. This gave equal positive and * negative swings for Cb/Cr, but meant that grayscale values (Cb=Cr=0) * were not represented exactly. Now we sacrifice exact representation of * maximum red and maximum blue in order to get exact grayscales. * * To avoid floating-point arithmetic, we represent the fractional constants * as integers scaled up by 2^16 (about 4 digits precision); we have to divide * the products by 2^16, with appropriate rounding, to get the correct answer. * * For even more speed, we avoid doing any multiplications in the inner loop * by precalculating the constants times R,G,B for all possible values. * For 8-bit JSAMPLEs this is very reasonable (only 256 entries per table); * for 12-bit samples it is still acceptable. It's not very reasonable for * 16-bit samples, but if you want lossless storage you shouldn't be changing * colorspace anyway. * The CENTERJSAMPLE offsets and the rounding fudge-factor of 0.5 are included * in the tables to save adding them separately in the inner loop. */ #define SCALEBITS 16 /* speediest right-shift on some machines */ #define CBCR_OFFSET ((INT32) CENTERJSAMPLE << SCALEBITS) #define ONE_HALF ((INT32) 1 << (SCALEBITS-1)) #define FIX(x) ((INT32) ((x) * (1L< Y section */ #define G_Y_OFF (1*(MAXJSAMPLE+1)) /* offset to G => Y section */ #define B_Y_OFF (2*(MAXJSAMPLE+1)) /* etc. */ #define R_CB_OFF (3*(MAXJSAMPLE+1)) #define G_CB_OFF (4*(MAXJSAMPLE+1)) #define B_CB_OFF (5*(MAXJSAMPLE+1)) #define R_CR_OFF B_CB_OFF /* B=>Cb, R=>Cr are the same */ #define G_CR_OFF (6*(MAXJSAMPLE+1)) #define B_CR_OFF (7*(MAXJSAMPLE+1)) #define TABLE_SIZE (8*(MAXJSAMPLE+1)) #ifdef NIFTY /* * Initialize for RGB->PhotoYCC colorspace conversion. */ METHODDEF (void) rgb_pycc_start (j_compress_ptr cinfo) { } /* * RGB->PhotoYCC colorspace convertion. */ METHODDEF (void) rgb_pycc_convert (j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, JDIMENSION output_row, int num_rows) { my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert; register JSAMPROW inptr; register JSAMPROW outptr0, outptr1, outptr2; register JDIMENSION col; JDIMENSION num_cols = cinfo->image_width; unsigned char r, g, b; while (--num_rows >= 0) { inptr = *input_buf++; outptr0 = output_buf[0][output_row]; outptr1 = output_buf[1][output_row]; outptr2 = output_buf[2][output_row]; output_row++; for (col = 0; col < num_cols; col++) { r = GETJSAMPLE(inptr[RGB_RED]); g = GETJSAMPLE(inptr[RGB_GREEN]); b = GETJSAMPLE(inptr[RGB_BLUE]); inptr+=RGB_PIXELSIZE; /* Y */ outptr0[col] = (JSAMPLE)((float)((float)r * 0.2200179046) + (float)((float)g * 0.4322754970) + (float)((float)b * 0.0838667868)); /* C1 */ outptr1[col] = (JSAMPLE)((float)((float)r * -0.1347546425) - (float)((float)g * 0.2647563169) + (float)((float)b * 0.3995109594) + 156); /* C2 */ outptr2[col] = (JSAMPLE)((float)((float)r * 0.3849177482) - (float)((float)g * 0.3223733380) + (float)((float)b * 0.0625444102) + 137); } } } #endif /* * Initialize for RGB->YCC colorspace conversion. */ METHODDEF(void) rgb_ycc_start (j_compress_ptr cinfo) { my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; INT32 * rgb_ycc_tab; INT32 i; /* Allocate and fill in the conversion tables. */ cconvert->rgb_ycc_tab = rgb_ycc_tab = (INT32 *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, (TABLE_SIZE * SIZEOF(INT32))); for (i = 0; i <= MAXJSAMPLE; i++) { rgb_ycc_tab[i+R_Y_OFF] = FIX(0.29900) * i; rgb_ycc_tab[i+G_Y_OFF] = FIX(0.58700) * i; rgb_ycc_tab[i+B_Y_OFF] = FIX(0.11400) * i + ONE_HALF; rgb_ycc_tab[i+R_CB_OFF] = (-FIX(0.16874)) * i; rgb_ycc_tab[i+G_CB_OFF] = (-FIX(0.33126)) * i; /* We use a rounding fudge-factor of 0.5-epsilon for Cb and Cr. * This ensures that the maximum output will round to MAXJSAMPLE * not MAXJSAMPLE+1, and thus that we don't have to range-limit. */ rgb_ycc_tab[i+B_CB_OFF] = FIX(0.50000) * i + CBCR_OFFSET + ONE_HALF-1; /* B=>Cb and R=>Cr tables are the same rgb_ycc_tab[i+R_CR_OFF] = FIX(0.50000) * i + CBCR_OFFSET + ONE_HALF-1; */ rgb_ycc_tab[i+G_CR_OFF] = (-FIX(0.41869)) * i; rgb_ycc_tab[i+B_CR_OFF] = (-FIX(0.08131)) * i; } } /* * Convert some rows of samples to the JPEG colorspace. * * Note that we change from the application's interleaved-pixel format * to our internal noninterleaved, one-plane-per-component format. * The input buffer is therefore three times as wide as the output buffer. * * A starting row offset is provided only for the output buffer. The caller * can easily adjust the passed input_buf value to accommodate any row * offset required on that side. */ METHODDEF(void) rgb_ycc_convert (j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, JDIMENSION output_row, int num_rows) { my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; register int r, g, b; register INT32 * ctab = cconvert->rgb_ycc_tab; register JSAMPROW inptr; register JSAMPROW outptr0, outptr1, outptr2; register JDIMENSION col; JDIMENSION num_cols = cinfo->image_width; JDIMENSION tail_cols = num_cols&7; JDIMENSION mmx_cols=num_cols&~7; while (--num_rows >= 0) { inptr = *input_buf++; outptr0 = output_buf[0][output_row]; outptr1 = output_buf[1][output_row]; outptr2 = output_buf[2][output_row]; output_row++; // // Need to add #ifdef for Alpha port // #if defined (_X86_) if (vfMMXMachine) { MRGB2YCbCr( (int)(1), mmx_cols, inptr, outptr0, outptr1, outptr2); inptr += 3*mmx_cols; for (col = mmx_cols; col < num_cols; col++) { r = GETJSAMPLE(inptr[RGB_RED]); g = GETJSAMPLE(inptr[RGB_GREEN]); b = GETJSAMPLE(inptr[RGB_BLUE]); inptr += RGB_PIXELSIZE; /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations * must be too; we do not need an explicit range-limiting operation. * Hence the value being shifted is never negative, and we don't * need the general RIGHT_SHIFT macro. */ /* Y */ outptr0[col] = (JSAMPLE) ((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF]) >> SCALEBITS); /* Cb */ outptr1[col] = (JSAMPLE) ((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF]) >> SCALEBITS); /* Cr */ outptr2[col] = (JSAMPLE) ((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF]) >> SCALEBITS); } } else #endif { for (col = 0; col < num_cols; col++) { r = GETJSAMPLE(inptr[RGB_RED]); g = GETJSAMPLE(inptr[RGB_GREEN]); b = GETJSAMPLE(inptr[RGB_BLUE]); inptr += RGB_PIXELSIZE; /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations * must be too; we do not need an explicit range-limiting operation. * Hence the value being shifted is never negative, and we don't * need the general RIGHT_SHIFT macro. */ /* Y */ outptr0[col] = (JSAMPLE) ((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF]) >> SCALEBITS); /* Cb */ outptr1[col] = (JSAMPLE) ((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF]) >> SCALEBITS); /* Cr */ outptr2[col] = (JSAMPLE) ((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF]) >> SCALEBITS); } } } } /**************** Cases other than RGB -> YCbCr **************/ /* * Convert some rows of samples to the JPEG colorspace. * This version handles RGB->grayscale conversion, which is the same * as the RGB->Y portion of RGB->YCbCr. * We assume rgb_ycc_start has been called (we only use the Y tables). */ METHODDEF(void) rgb_gray_convert (j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, JDIMENSION output_row, int num_rows) { my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; register int r, g, b; register INT32 * ctab = cconvert->rgb_ycc_tab; register JSAMPROW inptr; register JSAMPROW outptr; register JDIMENSION col; JDIMENSION num_cols = cinfo->image_width; while (--num_rows >= 0) { inptr = *input_buf++; outptr = output_buf[0][output_row]; output_row++; for (col = 0; col < num_cols; col++) { r = GETJSAMPLE(inptr[RGB_RED]); g = GETJSAMPLE(inptr[RGB_GREEN]); b = GETJSAMPLE(inptr[RGB_BLUE]); inptr += RGB_PIXELSIZE; /* Y */ outptr[col] = (JSAMPLE) ((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF]) >> SCALEBITS); } } } #ifdef NIFTY METHODDEF (void) rgba_ycbcra_convert (j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, JDIMENSION output_row, int num_rows) { my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; register int r, g, b; register INT32 * ctab = cconvert->rgb_ycc_tab; register JSAMPROW inptr; register JSAMPROW outptr0, outptr1, outptr2, outptr3; register JDIMENSION col; JDIMENSION num_cols = cinfo->image_width; JDIMENSION tail_cols = num_cols&7; JDIMENSION mmx_cols=num_cols&~7; while (--num_rows >= 0) { inptr = *input_buf++; outptr0 = output_buf[0][output_row]; outptr1 = output_buf[1][output_row]; outptr2 = output_buf[2][output_row]; outptr3 = output_buf[3][output_row]; output_row++; // // Need to add #ifdef for Alpha port // #if defined (_X86_) if (vfMMXMachine) { MRGBA2YCbCrA( (int)(1), mmx_cols, inptr, outptr0, outptr1, outptr2, outptr3); inptr += 4*mmx_cols; for (col = mmx_cols; col < num_cols; col++) { r = GETJSAMPLE(inptr[0]); g = GETJSAMPLE(inptr[1]); b = GETJSAMPLE(inptr[2]); /* Alpha passes through as-is */ outptr3[col] = inptr[3]; /* don't need GETJSAMPLE here */ inptr += 4; /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations * must be too; we do not need an explicit range-limiting operation. * Hence the value being shifted is never negative, and we don't * need the general RIGHT_SHIFT macro. */ /* Y */ outptr0[col] = (JSAMPLE) ((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF]) >> SCALEBITS); /* Cb */ outptr1[col] = (JSAMPLE) ((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF]) >> SCALEBITS); /* Cr */ outptr2[col] = (JSAMPLE) ((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF]) >> SCALEBITS); } } else #endif // defined (_X86_) { for (col = 0; col < num_cols; col++) { r = GETJSAMPLE(inptr[0]); g = GETJSAMPLE(inptr[1]); b = GETJSAMPLE(inptr[2]); /* Alpha passes through as-is */ outptr3[col] = inptr[3]; /* don't need GETJSAMPLE here */ inptr += 4; /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations * must be too; we do not need an explicit range-limiting operation. * Hence the value being shifted is never negative, and we don't * need the general RIGHT_SHIFT macro. */ /* Y */ outptr0[col] = (JSAMPLE) ((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF]) >> SCALEBITS); /* Cb */ outptr1[col] = (JSAMPLE) ((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF]) >> SCALEBITS); /* Cr */ outptr2[col] = (JSAMPLE) ((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF]) >> SCALEBITS); } } } } METHODDEF (void) rgba_ycbcralegacy_convert (j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, JDIMENSION output_row, int num_rows) { my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; register int r, g, b; register INT32 * ctab = cconvert->rgb_ycc_tab; register JSAMPROW inptr; register JSAMPROW outptr0, outptr1, outptr2, outptr3; register JDIMENSION col; JDIMENSION num_cols = cinfo->image_width; JDIMENSION tail_cols = num_cols&7; JDIMENSION mmx_cols=num_cols&~7; while (--num_rows >= 0) { inptr = *input_buf++; outptr0 = output_buf[0][output_row]; outptr1 = output_buf[1][output_row]; outptr2 = output_buf[2][output_row]; outptr3 = output_buf[3][output_row]; output_row++; // // Need to add #ifdef for Alpha port // #if defined (_X86_) if (vfMMXMachine) { MRGBA2YCbCrALegacy( (int)(1), mmx_cols, inptr, outptr0, outptr1, outptr2, outptr3); inptr += 4*mmx_cols; for (col = mmx_cols; col < num_cols; col++) { r = MAXJSAMPLE - GETJSAMPLE(inptr[0]); g = MAXJSAMPLE - GETJSAMPLE(inptr[1]); b = MAXJSAMPLE - GETJSAMPLE(inptr[2]); /* Alpha passes through as-is */ outptr3[col] = inptr[3]; /* don't need GETJSAMPLE here */ inptr += 4; /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations * must be too; we do not need an explicit range-limiting operation. * Hence the value being shifted is never negative, and we don't * need the general RIGHT_SHIFT macro. */ /* Y */ outptr0[col] = (JSAMPLE) ((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF]) >> SCALEBITS); /* Cb */ outptr1[col] = (JSAMPLE) ((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF]) >> SCALEBITS); /* Cr */ outptr2[col] = (JSAMPLE) ((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF]) >> SCALEBITS); } } else #endif // defined (_X86_) { for (col = 0; col < num_cols; col++) { r = MAXJSAMPLE - GETJSAMPLE(inptr[0]); g = MAXJSAMPLE - GETJSAMPLE(inptr[1]); b = MAXJSAMPLE - GETJSAMPLE(inptr[2]); /* Alpha passes through as-is */ outptr3[col] = inptr[3]; /* don't need GETJSAMPLE here */ inptr += 4; /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations * must be too; we do not need an explicit range-limiting operation. * Hence the value being shifted is never negative, and we don't * need the general RIGHT_SHIFT macro. */ /* Y */ outptr0[col] = (JSAMPLE) ((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF]) >> SCALEBITS); /* Cb */ outptr1[col] = (JSAMPLE) ((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF]) >> SCALEBITS); /* Cr */ outptr2[col] = (JSAMPLE) ((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF]) >> SCALEBITS); } } } } #endif /* * Convert some rows of samples to the JPEG colorspace. * This version handles Adobe-style CMYK->YCCK conversion, * where we convert R=1-C, G=1-M, and B=1-Y to YCbCr using the same * conversion as above, while passing K (black) unchanged. * We assume rgb_ycc_start has been called. */ METHODDEF(void) cmyk_ycck_convert (j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, JDIMENSION output_row, int num_rows) { my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; register int r, g, b; register INT32 * ctab = cconvert->rgb_ycc_tab; register JSAMPROW inptr; register JSAMPROW outptr0, outptr1, outptr2, outptr3; register JDIMENSION col; JDIMENSION num_cols = cinfo->image_width; while (--num_rows >= 0) { inptr = *input_buf++; outptr0 = output_buf[0][output_row]; outptr1 = output_buf[1][output_row]; outptr2 = output_buf[2][output_row]; outptr3 = output_buf[3][output_row]; output_row++; for (col = 0; col < num_cols; col++) { r = MAXJSAMPLE - GETJSAMPLE(inptr[0]); g = MAXJSAMPLE - GETJSAMPLE(inptr[1]); b = MAXJSAMPLE - GETJSAMPLE(inptr[2]); /* K passes through as-is */ outptr3[col] = inptr[3]; /* don't need GETJSAMPLE here */ inptr += 4; /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations * must be too; we do not need an explicit range-limiting operation. * Hence the value being shifted is never negative, and we don't * need the general RIGHT_SHIFT macro. */ /* Y */ outptr0[col] = (JSAMPLE) ((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF]) >> SCALEBITS); /* Cb */ outptr1[col] = (JSAMPLE) ((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF]) >> SCALEBITS); /* Cr */ outptr2[col] = (JSAMPLE) ((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF]) >> SCALEBITS); } } } /* * Convert some rows of samples to the JPEG colorspace. * This version handles grayscale output with no conversion. * The source can be either plain grayscale or YCbCr (since Y == gray). */ METHODDEF(void) grayscale_convert (j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, JDIMENSION output_row, int num_rows) { register JSAMPROW inptr; register JSAMPROW outptr; register JDIMENSION col; JDIMENSION num_cols = cinfo->image_width; int instride = cinfo->input_components; while (--num_rows >= 0) { inptr = *input_buf++; outptr = output_buf[0][output_row]; output_row++; for (col = 0; col < num_cols; col++) { outptr[col] = inptr[0]; /* don't need GETJSAMPLE() here */ inptr += instride; } } } /* * Convert some rows of samples to the JPEG colorspace. * This version handles multi-component colorspaces without conversion. * We assume input_components == num_components. */ METHODDEF(void) null_convert (j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, JDIMENSION output_row, int num_rows) { register JSAMPROW inptr; register JSAMPROW outptr; register JDIMENSION col; register int ci; int nc = cinfo->num_components; JDIMENSION num_cols = cinfo->image_width; while (--num_rows >= 0) { /* It seems fastest to make a separate pass for each component. */ for (ci = 0; ci < nc; ci++) { inptr = *input_buf; outptr = output_buf[ci][output_row]; for (col = 0; col < num_cols; col++) { outptr[col] = inptr[ci]; /* don't need GETJSAMPLE() here */ inptr += nc; } } input_buf++; output_row++; } } /* * Empty method for start_pass. */ METHODDEF(void) null_method (j_compress_ptr cinfo) { /* no work needed */ } /* * Module initialization routine for input colorspace conversion. */ GLOBAL(void) jinit_color_converter (j_compress_ptr cinfo) { my_cconvert_ptr cconvert; cconvert = (my_cconvert_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_color_converter)); cinfo->cconvert = (struct jpeg_color_converter *) cconvert; /* set start_pass to null method until we find out differently */ cconvert->pub.start_pass = null_method; /* Make sure input_components agrees with in_color_space */ switch (cinfo->in_color_space) { case JCS_GRAYSCALE: if (cinfo->input_components != 1) ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE); break; #ifdef NIFTY case JCS_YCC: if (cinfo->input_components != 3) ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE); break; case JCS_RGBA: if (cinfo->input_components != 4) ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE); break; case JCS_YCbCrA: if (cinfo->input_components != 4) ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE); break; case JCS_YCbCrALegacy: if (cinfo->input_components != 4) ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE); break; case JCS_YCCA: if (cinfo->input_components != 4) ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE); break; #endif case JCS_RGB: #if RGB_PIXELSIZE != 3 if (cinfo->input_components != RGB_PIXELSIZE) ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE); break; #endif /* else share code with YCbCr */ case JCS_YCbCr: if (cinfo->input_components != 3) ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE); break; case JCS_CMYK: case JCS_YCCK: if (cinfo->input_components != 4) ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE); break; default: /* JCS_UNKNOWN can be anything */ if (cinfo->input_components < 1) ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE); break; } /* Check num_components, set conversion method based on requested space */ switch (cinfo->jpeg_color_space) { case JCS_GRAYSCALE: if (cinfo->num_components != 1) ERREXIT(cinfo, JERR_BAD_J_COLORSPACE); if (cinfo->in_color_space == JCS_GRAYSCALE) cconvert->pub.color_convert = grayscale_convert; else if (cinfo->in_color_space == JCS_RGB) { cconvert->pub.start_pass = rgb_ycc_start; cconvert->pub.color_convert = rgb_gray_convert; } else if (cinfo->in_color_space == JCS_YCbCr) cconvert->pub.color_convert = grayscale_convert; else ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL); break; #ifdef NIFTY /* Store and compress data as PhotoYCC */ /* Only current option is to start with PhotoYCC * although I do include the function RGB->PhotoYCC * in the compressor, I don't think it's a good idea * to rotate to PhotoYCC from RGB in this context. * If subsampling is required, then just use YCrCb. */ case JCS_YCC: if (cinfo->num_components != 3) ERREXIT(cinfo, JERR_BAD_J_COLORSPACE); if (cinfo->in_color_space == JCS_YCC) cconvert->pub.color_convert = null_convert; else if (cinfo->in_color_space == JCS_RGB) { /* this is where the RGB->PhotoYCC could be called */ ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL); } else { ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL); } break; case JCS_YCCA: if (cinfo->num_components != 4) ERREXIT(cinfo, JERR_BAD_J_COLORSPACE); if (cinfo->in_color_space == JCS_YCCA) cconvert->pub.color_convert = null_convert; else ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL); break; case JCS_RGBA: if (cinfo->num_components != 4) ERREXIT(cinfo, JERR_BAD_J_COLORSPACE); if (cinfo->in_color_space == JCS_RGBA) { cconvert->pub.color_convert = null_convert; } else { ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL); } break; case JCS_YCbCrA: if (cinfo->num_components != 4) ERREXIT(cinfo, JERR_BAD_J_COLORSPACE); if (cinfo->in_color_space == JCS_YCbCrA) cconvert->pub.color_convert = null_convert; else if (cinfo->in_color_space == JCS_RGBA) { cconvert->pub.start_pass = rgb_ycc_start; cconvert->pub.color_convert = rgba_ycbcra_convert; } else ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL); break; case JCS_YCbCrALegacy: if (cinfo->num_components != 4) ERREXIT(cinfo, JERR_BAD_J_COLORSPACE); if (cinfo->in_color_space == JCS_YCbCrALegacy) cconvert->pub.color_convert = null_convert; else if (cinfo->in_color_space == JCS_RGBA) { cconvert->pub.start_pass = rgb_ycc_start; cconvert->pub.color_convert = rgba_ycbcralegacy_convert; } else ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL); break; #endif case JCS_RGB: if (cinfo->num_components != 3) ERREXIT(cinfo, JERR_BAD_J_COLORSPACE); if (cinfo->in_color_space == JCS_RGB && RGB_PIXELSIZE == 3) cconvert->pub.color_convert = null_convert; else ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL); break; case JCS_YCbCr: if (cinfo->num_components != 3) ERREXIT(cinfo, JERR_BAD_J_COLORSPACE); if (cinfo->in_color_space == JCS_RGB) { cconvert->pub.start_pass = rgb_ycc_start; cconvert->pub.color_convert = rgb_ycc_convert; } else if (cinfo->in_color_space == JCS_YCbCr) cconvert->pub.color_convert = null_convert; else ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL); break; case JCS_CMYK: if (cinfo->num_components != 4) ERREXIT(cinfo, JERR_BAD_J_COLORSPACE); if (cinfo->in_color_space == JCS_CMYK) cconvert->pub.color_convert = null_convert; else ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL); break; case JCS_YCCK: if (cinfo->num_components != 4) ERREXIT(cinfo, JERR_BAD_J_COLORSPACE); if (cinfo->in_color_space == JCS_CMYK) { cconvert->pub.start_pass = rgb_ycc_start; cconvert->pub.color_convert = cmyk_ycck_convert; } else if (cinfo->in_color_space == JCS_YCCK) cconvert->pub.color_convert = null_convert; else ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL); break; default: /* allow null conversion of JCS_UNKNOWN */ if (cinfo->jpeg_color_space != cinfo->in_color_space || cinfo->num_components != cinfo->input_components) ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL); cconvert->pub.color_convert = null_convert; break; } } // // Need to add #ifdef for Alpha port // #if defined (_X86_) void MRGB2YCbCr( int rows, int cols, unsigned char *inRGB, unsigned char *outY, unsigned char *outU, unsigned char *outV) { // make global to ensure proper stack alignment // __int64 temp0, tempY, tempU, tempV; __asm { // initializations //DS - IJG will always call with rows=1, so don't multiply // mov eax, rows // mov ebx, cols // mul ebx ;number pixels // reorder to take advantage of v-pipe mov esi, cols mov eax, inRGB shr esi, 3 ;number of loops = (rows*cols)/8 mov edx, outV mov edi, esi ;loop counter in edi mov ecx, outU mov ebx, outY // top of loop RGBtoYUV: movq mm1, [eax] ;load #1 G2R2B1G1R1B0G0R0 -> mm1 pxor mm6, mm6 ;0 -> mm6 movq mm0, mm1 ;G2R2B1G1R1B0G0R0 -> mm0 psrlq mm1, 16 ;00G2R2B1G1R1B0 -> mm1 punpcklbw mm0, const_0 ;R1B0G0R0 -> mm0 movq mm7, mm1 ;00G2R2B1G1R1B0 -> mm7 punpcklbw mm1, const_0 ;B1G1R1B0 -> mm1 movq mm2, mm0 ;R1B0G0R0 -> mm2 pmaddwd mm0, const_YR0GR ;yrR1,ygG0+yrR0 -> mm0 movq mm3, mm1 ;B1G1R1B0 -> mm3 pmaddwd mm1, const_YBG0B ;ybB1+ygG1,ybB0 -> mm1 movq mm4, mm2 ;R1B0G0R0 -> mm4 pmaddwd mm2, const_UR0GR ;urR1,ugG0+urR0 -> mm2 movq mm5, mm3 ;B1G1R1B0 -> mm5 pmaddwd mm3, const_UBG0B ;ubB1+ugG1,ubB0 -> mm3 punpckhbw mm7, mm6 ;00G2R2 -> mm7 pmaddwd mm4, const_VR0GR ;vrR1,vgG0+vrR0 -> mm4 paddd mm0, mm1 ;Y1Y0 -> mm0 pmaddwd mm5, const_VBG0B ;vbB1+vgG1,vbB0 -> mm5 // nop movq mm1, [eax][8] ;load #2 R5B4G4R4B3G3R3B2 -> mm1 paddd mm2, mm3 ;U1U0 -> mm2 movq mm6, mm1 ;R5B4G4R4B3G3R3B2 -> mm6 // nop punpcklbw mm1, const_0 ;B3G3R3B2 -> mm1 paddd mm4, mm5 ;V1V0 -> mm4 movq mm5, mm1 ;B3G3R3B2 -> mm5 psllq mm1, 32 ;R3B200 -> mm1 paddd mm1, mm7 ;R3B200 + 00G2R2 = R3B2G2R2 -> mm1 // nop punpckhbw mm6, const_0 ;R5B4G4R4 -> mm6 movq mm3, mm1 ;R3B2G2R2 -> mm3 pmaddwd mm1, const_YR0GR ;yrR3,ygG2+yrR2 -> mm1 movq mm7, mm5 ;B3G3R3B2 -> mm7 pmaddwd mm5, const_YBG0B ;ybB3+ygG3,ybB2 -> mm5 psrad mm0, FCONVERSION_BITS ;32-bit scaled Y1Y0 -> mm0 movq temp0, mm6 ;R5B4G4R4 -> temp0 movq mm6, mm3 ;R3B2G2R2 -> mm6 pmaddwd mm6, const_UR0GR ;urR3,ugG2+urR2 -> mm6 psrad mm2, FCONVERSION_BITS ;32-bit scaled U1U0 -> mm2 paddd mm1, mm5 ;Y3Y2 -> mm1 movq mm5, mm7 ;B3G3R3B2 -> mm5 pmaddwd mm7, const_UBG0B ;ubB3+ugG3,ubB2 psrad mm1, FCONVERSION_BITS ;32-bit scaled Y3Y2 -> mm1 pmaddwd mm3, const_VR0GR ;vrR3,vgG2+vgR2 packssdw mm0, mm1 ;Y3Y2Y1Y0 -> mm0 pmaddwd mm5, const_VBG0B ;vbB3+vgG3,vbB2 -> mm5 psrad mm4, FCONVERSION_BITS ;32-bit scaled V1V0 -> mm4 movq mm1, [eax][16] ;load #3 B7G7R7B6G6R6B5G5 -> mm7 paddd mm6, mm7 ;U3U2 -> mm6 movq mm7, mm1 ;B7G7R7B6G6R6B5G5 -> mm1 psrad mm6, FCONVERSION_BITS ;32-bit scaled U3U2 -> mm6 paddd mm3, mm5 ;V3V2 -> mm3 psllq mm7, 16 ;R7B6G6R6B5G500 -> mm7 movq mm5, mm7 ;R7B6G6R6B5G500 -> mm5 psrad mm3, FCONVERSION_BITS ;32-bit scaled V3V2 -> mm3 movq tempY, mm0 ;32-bit scaled Y3Y2Y1Y0 -> tempY packssdw mm2, mm6 ;32-bit scaled U3U2U1U0 -> mm2 movq mm0, temp0 ;R5B4G4R4 -> mm0 // nop punpcklbw mm7, const_0 ;B5G500 -> mm7 movq mm6, mm0 ;R5B4G4R4 -> mm6 movq tempU, mm2 ;32-bit scaled U3U2U1U0 -> tempU psrlq mm0, 32 ;00R5B4 -> mm0 paddw mm7, mm0 ;B5G5R5B4 -> mm7 movq mm2, mm6 ;B5B4G4R4 -> mm2 pmaddwd mm2, const_YR0GR ;yrR5,ygG4+yrR4 -> mm2 movq mm0, mm7 ;B5G5R5B4 -> mm0 pmaddwd mm7, const_YBG0B ;ybB5+ygG5,ybB4 -> mm7 packssdw mm4, mm3 ;32-bit scaled V3V2V1V0 -> mm4 add eax, 24 ;increment RGB count // nop ;//JS movq tempV, mm4 ;32-bit scaled V3V2V1V0 -> tempV movq mm4, mm6 ;B5B4G4R4 -> mm4 pmaddwd mm6, const_UR0GR ;urR5,ugG4+urR4 movq mm3, mm0 ;B5G5R5B4 -> mm0 pmaddwd mm0, const_UBG0B ;ubB5+ugG5,ubB4 paddd mm2, mm7 ;Y5Y4 -> mm2 pmaddwd mm4, const_VR0GR ;vrR5,vgG4+vrR4 -> mm4 pxor mm7, mm7 ;0 -> mm7 pmaddwd mm3, const_VBG0B ;vbB5+vgG5,vbB4 -> mm3 punpckhbw mm1, mm7 ;B7G7R7B6 -> mm1 paddd mm0, mm6 ;U5U4 -> mm0 movq mm6, mm1 ;B7G7R7B6 -> mm6 pmaddwd mm6, const_YBG0B ;ybB7+ygG7,ybB6 -> mm6 punpckhbw mm5, mm7 ;R7B6G6R6 -> mm5 movq mm7, mm5 ;R7B6G6R6 -> mm7 paddd mm3, mm4 ;V5V4 -> mm3 pmaddwd mm5, const_YR0GR ;yrR7,ygG6+yrR6 -> mm5 movq mm4, mm1 ;B7G7R7B6 -> mm4 pmaddwd mm4,const_UBG0B ;ubB7+ugG7,ubB6 -> mm4 psrad mm0, FCONVERSION_BITS ;32-bit scaled U5U4 -> mm0 psrad mm2, FCONVERSION_BITS ;32-bit scaled Y5Y4 -> mm2 nop ;//JS paddd mm6, mm5 ;Y7Y6 -> mm6 movq mm5, mm7 ;R7B6G6R6 -> mm5 pmaddwd mm7, const_UR0GR ;urR7,ugG6+ugR6 -> mm7 psrad mm3, FCONVERSION_BITS ;32-bit scaled V5V4 -> mm3 pmaddwd mm1, const_VBG0B ;vbB7+vgG7,vbB6 -> mm1 psrad mm6, FCONVERSION_BITS ;32-bit scaled Y7Y6 -> mm6 packssdw mm2, mm6 ;Y7Y6Y5Y4 -> mm2 // nop ;//JS pmaddwd mm5, const_VR0GR ;vrR7,vgG6+vrR6 -> mm5 paddd mm7, mm4 ;U7U6 -> mm7 psrad mm7, FCONVERSION_BITS ;32-bit scaled U7U6 -> mm7 // nop movq mm6, tempY ;32-bit scaled Y3Y2Y1Y0 -> mm6 packssdw mm0, mm7 ;32-bit scaled U7U6U5U4 -> mm0 movq mm4, tempU ;32-bit scaled U3U2U1U0 -> mm4 packuswb mm6, mm2 ;all 8 Y values -> mm6 movq mm7, const_128 ;128,128,128,128 -> mm7 paddd mm1, mm5 ;V7V6 -> mm1 paddw mm0, mm7 ;add offset to U7U6U5U4 // nop paddw mm4, mm7 ;add offset to U3U2U1U0 psrad mm1, FCONVERSION_BITS ;32-bit scaled V7V6 -> mm1 movq [ebx], mm6 ;store Y packuswb mm4, mm0 ;all 8 U values -> mm4 movq mm5, tempV ;32-bit scaled V3V2V1V0 -> mm5 packssdw mm3, mm1 ;V7V6V5V4 -> mm3 paddw mm5, mm7 ;add offset to V3V2V1V0 paddw mm3, mm7 ;add offset to V7V6V5V4 movq [ecx], mm4 ;store U packuswb mm5, mm3 ;all 8 V values -> mm5 add ebx, 8 ;increment Y count add ecx, 8 ;increment U count movq [edx], mm5 ;store V // nop add edx, 8 ;increment V count // nop dec edi ;decrement loop counter jnz RGBtoYUV ;do 24 more bytes if not 0 //JS The following emms instruction is purposely commented out. //emms // commented out since it is done after the DCT } // end of __asm } // end of MRGB2YCbCr void MRGBA2YCbCrA( int rows, int cols, unsigned char *inRGBA, unsigned char *outY, unsigned char *outU, unsigned char *outV, unsigned char *outA) { // make global to align on stack properly // __int64 tempY, tempU, tempV, tempA; // written by Dave Shade - Intel Corp. // Feb '97 // // This color space conversion routine converts // true color pixels from RGBA to YCbCrA // one pass through the loop processes 4 pixels // there is no provision for cols not an even multiple of 4 __asm { // initializations //DS - IJG will always call with rows=1, so don't multiply // mov eax, rows // mov ebx, cols // mul ebx ;number pixels // reorder to take advantage of Pentium v-pipe mov edi, cols mov eax, inRGBA shr edi, 2 ;number of loops = (rows*cols)/4 mov edx, outV mov ecx, outU mov esi, outA mov ebx, outY // top of loop RGBAtoYUVA: movq mm3, [eax+8] ;load #1 A1B1G1R1A0B0G0R0 -> mm3 pxor mm6, mm6 ;0 -> mm6 movq mm4, mm3 ;A1B1G1R1A0B0G0R0 -> mm4 psrlq mm3, 32 ;00000000A1B1G1R1 -> mm3 punpcklwd mm4, mm3 ;A1B1A0B0G1R1G0R0 -> mm4 add esi, 4 movq mm0, mm4 ;A1B1A0B0G1R1G0R0 -> mm0 punpckhbw mm4, mm6 ;A1B1A0B0 -> mm4 movq mm3, mm4 ;A1B1A0B0 -> mm3 punpcklbw mm0, mm6 ;G1R1G0R0 -> mm0 pmaddwd mm3, const2_Y0B0B ;ybB1,ybB0 -> mm3 movq mm1, mm0 ;G1R1G0R0 -> mm1 pmaddwd mm0, const2_YGRGR ;yrG1+ygR1,ygG0+yrR0 -> mm0 movq mm5, mm4 ;A1B1A0B0 -> mm5 pmaddwd mm4, const2_U0B0B ;ubB1,ubB0 -> mm4 movq mm2, mm1 ;G1R1G0R0 -> mm2 pmaddwd mm1, const2_UGRGR ;urG1+ugR1,ugG0+urR0 -> mm1 movq mm7, mm5 ;A1B1A0B0 -> mm7 pmaddwd mm5, const2_V0B0B ;vbB1,vbB0 -> mm5 paddd mm0, mm3 ;Y1Y0 -> mm0 pmaddwd mm2, const2_VGRGR ;vgG1+vrR1,vgG0+vrR0 -> mm2 psrad mm0, FCONVERSION_BITS ;32 bit scaled Y1Y0 movq mm3, [eax] ;*load #2 A3B3G3R3A2B2G2R2 -> mm3 paddd mm1, mm4 ;U1U0 -> mm2 pmaddwd mm7, const2_A ;1*A1,1*A0 psrad mm1, FCONVERSION_BITS ;32 bit scaled U1U0 movq tempY, mm0 ;write out Y1Y0 in 32 bit format paddd mm2, mm5 ;V1V0 -> mm2 movq mm4, mm3 ;*A3B3G3R3A2B2G2R2 -> mm4 psrad mm2, FCONVERSION_BITS ;32bit scaled V1V0 movq tempU, mm1 ;write out U1U0 in 32 bit format psrlq mm3, 32 ;*00000000A3B3G3R3 -> mm3 movq tempV, mm2 ;write out V1V0 in 32 bit format punpcklwd mm4, mm3 ;*A3B3A2B2G3R3G2R2 -> mm4 movq tempA, mm7 movq mm0, mm4 ;*A3B3A2B2G3R3G2R2 -> mm0 punpckhbw mm4, mm6 ;*A3B3A2B2 -> mm4 add eax, 16 movq mm3, mm4 ;*A3B3A2B2 -> mm3 punpcklbw mm0, mm6 ;*G3R3G2R2 -> mm0 pmaddwd mm3, const2_Y0B0B ;*ybB3,ybB2 -> mm3 movq mm1, mm0 ;*G3R3G2R2 -> mm1 pmaddwd mm0, const2_YGRGR ;*yrG3+ygR3,ygG2+yrR2 -> mm0 movq mm5, mm4 ;*A3B3A2B2 -> mm5 pmaddwd mm4, const2_U0B0B ;*ubB3,ubB2 -> mm4 movq mm2, mm1 ;*G3R3G2R2 -> mm2 pmaddwd mm1, const2_UGRGR ;*urG3+ugR3,ugG2+urR2 -> mm1 movq mm7, mm5 ;*A3B3A2B2 -> mm7 pmaddwd mm5, const2_V0B0B ;*vbB3,vbB2 -> mm5 paddd mm0, mm3 ;*Y3Y2 -> mm0 pmaddwd mm2, const2_VGRGR ;*vgG3+vrR3,vgG2+vrR2 -> mm2 psrad mm0, FCONVERSION_BITS pmaddwd mm7, const2_A ;* 1*A3,1*A2 paddd mm1, mm4 ;*U3U2 -> mm2 movq mm6, const_128 psrad mm1, FCONVERSION_BITS packssdw mm0, tempY ;*pack Y3Y2,Y1Y0 -> mm0 paddd mm2, mm5 ;*V3V2 -> mm2 psrad mm2, FCONVERSION_BITS add ebx, 4 packssdw mm1, tempU ;*pack U3U2,U1U0 -> mm1 packssdw mm2, tempV ;*pack V3V2,V1V0 -> mm2 paddw mm1, mm6 ;add 128 packssdw mm7, tempA ;*pack A3A2,A1A0 -> mm7 paddw mm2, mm6 ;add 128 packuswb mm0, mm0 add ecx, 4 packuswb mm1, mm1 add edx, 4 movd [ebx-4], mm0 packuswb mm2, mm2 movd [ecx-4], mm1 packuswb mm7, mm7 movd [edx-4], mm2 movd [esi-4], mm7 dec edi jnz RGBAtoYUVA //JS The following emms instruction is purposely commented out. //emms // commented out since it is done after the DCT } // end of __asm } // end of MRGBA2YCbCrA void MRGBA2YCbCrALegacy( int rows, int cols, unsigned char *inRGBA, unsigned char *outY, unsigned char *outU, unsigned char *outV, unsigned char *outA) { // ensure proper stack alignment by making global // __int64 tempY, tempU, tempV, tempA; // written by Dave Shade - Intel Corp. // Feb '97 // // This color space conversion routine converts // true color pixels from RGBA to YCbCrA // This routine subtracts the RGB components from 255 before converting them // one pass through the loop processes 4 pixels // there is no provision for cols not an even multiple of 4 __asm { // initializations //DS - IJG will always call with rows=1, so don't multiply // mov eax, rows // mov ebx, cols // mul ebx ;number pixels // reorder to take advantage of Pentium v-pipe mov edi, cols mov eax, inRGBA shr edi, 2 ;number of loops = (rows*cols)/4 mov edx, outV mov ecx, outU mov esi, outA mov ebx, outY // top of loop RGBAtoYUVALegacy: movq mm3, [eax+8] ;load #1 A1B1G1R1A0B0G0R0 -> mm3 pxor mm6, mm6 ;0 -> mm6 pxor mm3, const2_Legacy ; subtract MaxJSample FlashPix rev. 1 "thing" movq mm4, mm3 ;A1B1G1R1A0B0G0R0 -> mm4 psrlq mm3, 32 ;00000000A1B1G1R1 -> mm3 punpcklwd mm4, mm3 ;A1B1A0B0G1R1G0R0 -> mm4 add esi, 4 ;opportunistically increment pointer movq mm0, mm4 ;A1B1A0B0G1R1G0R0 -> mm0 punpckhbw mm4, mm6 ;A1B1A0B0 -> mm4 movq mm3, mm4 ;A1B1A0B0 -> mm3 punpcklbw mm0, mm6 ;G1R1G0R0 -> mm0 pmaddwd mm3, const2_Y0B0B ;ybB1,ybB0 -> mm3 movq mm1, mm0 ;G1R1G0R0 -> mm1 pmaddwd mm0, const2_YGRGR ;yrG1+ygR1,ygG0+yrR0 -> mm0 movq mm5, mm4 ;A1B1A0B0 -> mm5 pmaddwd mm4, const2_U0B0B ;ubB1,ubB0 -> mm4 movq mm2, mm1 ;G1R1G0R0 -> mm2 pmaddwd mm1, const2_UGRGR ;urG1+ugR1,ugG0+urR0 -> mm1 movq mm7, mm5 ;A1B1A0B0 -> mm7 pmaddwd mm5, const2_V0B0B ;vbB1,vbB0 -> mm5 paddd mm0, mm3 ;Y1Y0 -> mm0 pmaddwd mm2, const2_VGRGR ;vgG1+vrR1,vgG0+vrR0 -> mm2 psrad mm0, FCONVERSION_BITS ;32 bit scaled Y1Y0 psrld mm7, 16 ;shift A1A0 down movq mm3, [eax] ;*load #2 A3B3G3R3A2B2G2R2 -> mm3 paddd mm1, mm4 ;U1U0 -> mm2 pxor mm3, const2_Legacy psrad mm1, FCONVERSION_BITS ;32 bit scaled U1U0 movq tempY, mm0 ;write out Y1Y0 in 32 bit format paddd mm2, mm5 ;V1V0 -> mm2 movq mm4, mm3 ;*A3B3G3R3A2B2G2R2 -> mm4 psrad mm2, FCONVERSION_BITS ;32bit scaled V1V0 movq tempU, mm1 ;write out U1U0 in 32 bit format psrlq mm3, 32 ;*00000000A3B3G3R3 -> mm3 movq tempV, mm2 ;write out V1V0 in 32 bit format punpcklwd mm4, mm3 ;*A3B3A2B2G3R3G2R2 -> mm4 movq tempA, mm7 movq mm0, mm4 ;*A3B3A2B2G3R3G2R2 -> mm0 punpckhbw mm4, mm6 ;*A3B3A2B2 -> mm4 add eax, 16 ;opportunistically increment pointer movq mm3, mm4 ;*A3B3A2B2 -> mm3 punpcklbw mm0, mm6 ;*G3R3G2R2 -> mm0 pmaddwd mm3, const2_Y0B0B ;*ybB3,ybB2 -> mm3 movq mm1, mm0 ;*G3R3G2R2 -> mm1 pmaddwd mm0, const2_YGRGR ;*yrG3+ygR3,ygG2+yrR2 -> mm0 movq mm5, mm4 ;*A3B3A2B2 -> mm5 pmaddwd mm4, const2_U0B0B ;*ubB3,ubB2 -> mm4 movq mm2, mm1 ;*G3R3G2R2 -> mm2 pmaddwd mm1, const2_UGRGR ;*urG3+ugR3,ugG2+urR2 -> mm1 movq mm7, mm5 ;*A3B3A2B2 -> mm7 pmaddwd mm5, const2_V0B0B ;*vbB3,vbB2 -> mm5 paddd mm0, mm3 ;*Y3Y2 -> mm0 pmaddwd mm2, const2_VGRGR ;*vgG3+vrR3,vgG2+vrR2 -> mm2 psrad mm0, FCONVERSION_BITS ;shift Y3Y2 by 15 bits psrld mm7, 16 ;shift the alpha values down paddd mm1, mm4 ;*U3U2 -> mm2 movq mm6, const_128 ; load mm6 with 128 psrad mm1, FCONVERSION_BITS ;shift U3U2 by 15 bits packssdw mm0, tempY ;*pack Y3Y2,Y1Y0 -> mm0 paddd mm2, mm5 ;*V3V2 -> mm2 packssdw mm1, tempU ;*pack U3U2,U1U0 -> mm1 psrad mm2, FCONVERSION_BITS ;shift V3V2 by 15 bits add ebx, 4 ;opportunistically increment pointer packssdw mm2, tempV ;pack V3V2,V1V0 -> mm2 paddw mm1, mm6 ;add 128 packssdw mm7, tempA ;pack A3A2,A1A0 -> mm7 paddw mm2, mm6 ;add 128 packuswb mm0, mm0 ;pack Y3Y2Y1Y0 from 16 bit to 8 bit add ecx, 4 ;opportunistically increment pointer packuswb mm1, mm1 ;pack U3U2U1U0 from 16 bit to 8 bit add edx, 4 ;opportunistically increment pointer movd [ebx-4], mm0 ;write out Y3Y2Y1Y0 packuswb mm2, mm2 ;pack V3V2V1V0 from 16 bit to 8 bit movd [ecx-4], mm1 ;write out U3U2U1U0 packuswb mm7, mm7 ;pack A3A2A1A0 from 16 bit to 8 bits movd [edx-4], mm2 ;write out V3V2V1V0 movd [esi-4], mm7 ;write out A3A2A1A0 dec edi ;subtract 4 from number of pixels jnz RGBAtoYUVALegacy //JS The following emms instruction is purposely commented out. //emms // commented out since it is done after the DCT } // end of __asm } // end of MRGBA2YCbCrALegacy #endif // defined (_X86_)