From 97083487adc637b829ea7fe38c685c17b8f08523 Mon Sep 17 00:00:00 2001 From: Oliver Schinagl Date: Mon, 16 May 2005 19:59:55 +0000 Subject: Working idct --- src/idct.c | 316 ++++++++++++++++++++++--------------------------------------- 1 file changed, 115 insertions(+), 201 deletions(-) diff --git a/src/idct.c b/src/idct.c index 082a41a..db83be8 100644 --- a/src/idct.c +++ b/src/idct.c @@ -1,201 +1,115 @@ -#define CONST_BITS 8 -#define PASS1_BITS 2 - -#define FIX_1_082392200 ((INT32) 277) /* FIX(1.082392200) */ -#define FIX_1_414213562 ((INT32) 362) /* FIX(1.414213562) */ -#define FIX_1_847759065 ((INT32) 473) /* FIX(1.847759065) */ -#define FIX_2_613125930 ((INT32) 669) /* FIX(2.613125930) */ - -#define DEQUANTIZE(coef,quantval) (((coef)) * (quantval)) - -/* - * Perform dequantization and inverse DCT on one block of coefficients. - */ - -void idct (j_decompress_ptr cinfo, jpeg_component_info * compptr, - int FAR *coef_block, - JSAMPARRAY output_buf, JDIMENSION output_col) { - int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; - int tmp10, tmp11, tmp12, tmp13; - int z5, z10, z11, z12, z13; - int inptr; - int * quantptr; - int * wsptr; - JSAMPROW outptr; - JSAMPLE *range_limit = IDCT_range_limit(cinfo); - int ctr; - int workspace[DCTSIZE2]; /* buffers data between passes */ - SHIFT_TEMPS /* for DESCALE */ - ISHIFT_TEMPS /* for IDESCALE */ - - /* Pass 1: process columns from input, store into work array. */ - - inptr = coef_block; - quantptr = (IFAST_MULT_TYPE *) compptr->dct_table; - wsptr = workspace; - for (ctr = DCTSIZE; ctr > 0; ctr--) { - /* Due to quantization, we will usually find that many of the input - * coefficients are zero, especially the AC terms. We can exploit this - * by short-circuiting the IDCT calculation for any column in which all - * the AC terms are zero. In that case each output is equal to the - * DC coefficient (with scale factor as needed). - */ - - if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 && - inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 && - inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 && - inptr[DCTSIZE*7] == 0) { - /* AC terms all zero */ - int dcval = (int) DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); - - wsptr[DCTSIZE*0] = dcval; - wsptr[DCTSIZE*1] = dcval; - wsptr[DCTSIZE*2] = dcval; - wsptr[DCTSIZE*3] = dcval; - wsptr[DCTSIZE*4] = dcval; - wsptr[DCTSIZE*5] = dcval; - wsptr[DCTSIZE*6] = dcval; - wsptr[DCTSIZE*7] = dcval; - - inptr++; /* advance pointers to next column */ - quantptr++; - wsptr++; - continue; - } - - /* Even part */ - - tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); - tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); - tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); - tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); - - tmp10 = tmp0 + tmp2; /* phase 3 */ - tmp11 = tmp0 - tmp2; - - tmp13 = tmp1 + tmp3; /* phases 5-3 */ - tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */ - - tmp0 = tmp10 + tmp13; /* phase 2 */ - tmp3 = tmp10 - tmp13; - tmp1 = tmp11 + tmp12; - tmp2 = tmp11 - tmp12; - - /* Odd part */ - - tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); - tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); - tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); - tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); - - z13 = tmp6 + tmp5; /* phase 6 */ - z10 = tmp6 - tmp5; - z11 = tmp4 + tmp7; - z12 = tmp4 - tmp7; - - tmp7 = z11 + z13; /* phase 5 */ - tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */ - - z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */ - tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */ - tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; /* -2*(c2+c6) */ - - tmp6 = tmp12 - tmp7; /* phase 2 */ - tmp5 = tmp11 - tmp6; - tmp4 = tmp10 + tmp5; - - wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7); - wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7); - wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6); - wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6); - wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5); - wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5); - wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4); - wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4); - - inptr++; /* advance pointers to next column */ - quantptr++; - wsptr++; - } - - /* Pass 2: process rows from work array, store into output array. - * Note that we must descale the results by a factor of 8 == 2**3, - * and also undo the PASS1_BITS scaling. - */ - - wsptr = workspace; - for (ctr = 0; ctr < DCTSIZE; ctr++) { - outptr = output_buf[ctr] + output_col; - /* Rows of zeroes can be exploited in the same way as we did with columns. - * However, the column calculation has created many nonzero AC terms, so - * the simplification applies less often (typically 5% to 10% of the time). - * on machines with very fast multiplication, it's possible that the - * test takes more time than it's worth. In that case this section - * may be commented out. - */ - -#ifndef NO_ZERO_ROW_TEST - if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 && - wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) { - /* AC terms all zero */ - JSAMPLE dcval = range_limit[IDESCALE(wsptr[0], PASS1_BITS+3) & RANGE_MASK]; - - outptr[0] = dcval; - outptr[1] = dcval; - outptr[2] = dcval; - outptr[3] = dcval; - outptr[4] = dcval; - outptr[5] = dcval; - outptr[6] = dcval; - outptr[7] = dcval; - - wsptr += DCTSIZE; /* advance pointer to next row */ - continue; - } -#endif - - /* Even part */ - - tmp10 = ((DCTELEM) wsptr[0] + (DCTELEM) wsptr[4]); - tmp11 = ((DCTELEM) wsptr[0] - (DCTELEM) wsptr[4]); - - tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]); - tmp12 = MULTIPLY((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6], FIX_1_414213562) - tmp13; - - tmp0 = tmp10 + tmp13; - tmp3 = tmp10 - tmp13; - tmp1 = tmp11 + tmp12; - tmp2 = tmp11 - tmp12; - - /* Odd part */ - - z13 = (DCTELEM) wsptr[5] + (DCTELEM) wsptr[3]; - z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3]; - z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7]; - z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7]; - - tmp7 = z11 + z13; /* phase 5 */ - tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */ - - z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */ - tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */ - tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; /* -2*(c2+c6) */ - - tmp6 = tmp12 - tmp7; /* phase 2 */ - tmp5 = tmp11 - tmp6; - tmp4 = tmp10 + tmp5; - - /* Final output stage: scale down by a factor of 8 and range-limit */ - - outptr[0] = range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS+3) & RANGE_MASK]; - outptr[7] = range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS+3) & RANGE_MASK]; - outptr[1] = range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS+3) & RANGE_MASK]; - outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3) & RANGE_MASK]; - outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3) & RANGE_MASK]; - outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3) & RANGE_MASK]; - outptr[4] = range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS+3) & RANGE_MASK]; - outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3) & RANGE_MASK]; - - wsptr += DCTSIZE; /* advance pointer to next row */ - } -} +/*---------------------------------------------*/ +/* File : fast_idct.c, utilities for jfif view */ +/* Author : Pierre Guerrier, march 1998 */ +/* IDCT code by Geert Janssen */ +/*---------------------------------------------*/ +#include "idct.h" + +/*-------------------------------------------------------*/ +/* JPEG data types here */ +/*-------------------------------------------------------*/ + + +/* This version is IEEE compliant using 16-bit arithmetic. */ + +/* The number of bits coefficients are scaled up before 2-D IDCT: */ +#define S_BITS 3 +/* The number of bits in the fractional part of a fixed point constant: */ +#define C_BITS 14 + +#define SCALE(x,n) ((x) << (n)) + +/* This version is vital in passing overall mean error test. */ +#define DESCALE(x, n) (((x) + (1 << ((n)-1)) - ((x) < 0)) >> (n)) + +#define ADD(x, y) ((x) + (y)) +#define SUB(x, y) ((x) - (y)) +#define CMUL(C, x) (((C) * (x) + (1 << (C_BITS-1))) >> C_BITS) + +/* Butterfly: but(a,b,x,y) = rot(sqrt(2),4,a,b,x,y) */ +#define but(a,b,x,y) { x = SUB(a,b); y = ADD(a,b); } + +/* Inverse 1-D Discrete Cosine Transform. + Result Y is scaled up by factor sqrt(8). + Original Loeffler algorithm. +*/ +static void +idct_1d(int *Y) +{ + int z1[8], z2[8], z3[8]; + + /* Stage 1: */ + but(Y[0], Y[4], z1[1], z1[0]); + /* rot(sqrt(2), 6, Y[2], Y[6], &z1[2], &z1[3]); */ + z1[2] = SUB(CMUL( 8867, Y[2]), CMUL(21407, Y[6])); + z1[3] = ADD(CMUL(21407, Y[2]), CMUL( 8867, Y[6])); + but(Y[1], Y[7], z1[4], z1[7]); + /* z1[5] = CMUL(sqrt(2), Y[3]); + z1[6] = CMUL(sqrt(2), Y[5]); + */ + z1[5] = CMUL(23170, Y[3]); + z1[6] = CMUL(23170, Y[5]); + + /* Stage 2: */ + but(z1[0], z1[3], z2[3], z2[0]); + but(z1[1], z1[2], z2[2], z2[1]); + but(z1[4], z1[6], z2[6], z2[4]); + but(z1[7], z1[5], z2[5], z2[7]); + + /* Stage 3: */ + z3[0] = z2[0]; + z3[1] = z2[1]; + z3[2] = z2[2]; + z3[3] = z2[3]; + /* rot(1, 3, z2[4], z2[7], &z3[4], &z3[7]); */ + z3[4] = SUB(CMUL(13623, z2[4]), CMUL( 9102, z2[7])); + z3[7] = ADD(CMUL( 9102, z2[4]), CMUL(13623, z2[7])); + /* rot(1, 1, z2[5], z2[6], &z3[5], &z3[6]); */ + z3[5] = SUB(CMUL(16069, z2[5]), CMUL( 3196, z2[6])); + z3[6] = ADD(CMUL( 3196, z2[5]), CMUL(16069, z2[6])); + + /* Final stage 4: */ + but(z3[0], z3[7], Y[7], Y[0]); + but(z3[1], z3[6], Y[6], Y[1]); + but(z3[2], z3[5], Y[5], Y[2]); + but(z3[3], z3[4], Y[4], Y[3]); +} + +/* Inverse 2-D Discrete Cosine Transform. */ +void +IDCT(const int input[64], int *output) +{ + int Y[64]; + int k,l; + + printf("\n"); + + /* Pass 1: process rows. */ + for (k = 0; k < 8; k++) { + + /* Prescale k-th row: */ + for (l = 0; l < 8; l++) + Y[8*k+l] = SCALE(input[8*k+l], S_BITS); + + /* 1-D IDCT on k-th row: */ + idct_1d(&Y[8*k+0]); + /* Result Y is scaled up by factor sqrt(8)*2^S_BITS. */ + } + + /* Pass 2: process columns. */ + for (l = 0; l < 8; l++) { + int Yc[8]; + + for (k = 0; k < 8; k++) Yc[k] = Y[8*k+l]; + /* 1-D IDCT on l-th column: */ + idct_1d(Yc); + /* Result is once more scaled up by a factor sqrt(8). */ + for (k = 0; k < 8; k++) { + int r = 128 + DESCALE(Yc[k], S_BITS+3); /* includes level shift */ + + /* Clip to 8 bits unsigned: */ + r = r > 0 ? (r < 255 ? r : 255) : 0; + output[8*k+l] = r; + } + } +} -- cgit v0.12