From 97083487adc637b829ea7fe38c685c17b8f08523 Mon Sep 17 00:00:00 2001
From: Oliver Schinagl <oliver@schinagl.nl>
Date: Mon, 16 May 2005 19:59:55 +0000
Subject: Working idct

---
 src/idct.c | 316 ++++++++++++++++++++++---------------------------------------
 1 file changed, 115 insertions(+), 201 deletions(-)

diff --git a/src/idct.c b/src/idct.c
index 082a41a..db83be8 100644
--- a/src/idct.c
+++ b/src/idct.c
@@ -1,201 +1,115 @@
-#define CONST_BITS  8
-#define PASS1_BITS  2
-
-#define FIX_1_082392200  ((INT32)  277)		/* FIX(1.082392200) */
-#define FIX_1_414213562  ((INT32)  362)		/* FIX(1.414213562) */
-#define FIX_1_847759065  ((INT32)  473)		/* FIX(1.847759065) */
-#define FIX_2_613125930  ((INT32)  669)		/* FIX(2.613125930) */
-
-#define DEQUANTIZE(coef,quantval)  (((coef)) * (quantval))
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients.
- */
-
-void idct (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 int FAR *coef_block,
-		 JSAMPARRAY output_buf, JDIMENSION output_col) {
-	int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
-	int tmp10, tmp11, tmp12, tmp13;
-	int z5, z10, z11, z12, z13;
-	int inptr;
-	int * quantptr;
-	int * wsptr;
-	JSAMPROW outptr;
-	JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-	int ctr;
-	int workspace[DCTSIZE2];	/* buffers data between passes */
-	SHIFT_TEMPS			/* for DESCALE */
-	ISHIFT_TEMPS			/* for IDESCALE */
-
-	/* Pass 1: process columns from input, store into work array. */
-
-	inptr = coef_block;
-	quantptr = (IFAST_MULT_TYPE *) compptr->dct_table;
-	wsptr = workspace;
-	for (ctr = DCTSIZE; ctr > 0; ctr--) {
-	/* Due to quantization, we will usually find that many of the input
-	 * coefficients are zero, especially the AC terms.  We can exploit this
-	 * by short-circuiting the IDCT calculation for any column in which all
-	 * the AC terms are zero.  In that case each output is equal to the
-	 * DC coefficient (with scale factor as needed).
-	 */
-		
-		if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
-			inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
-			inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
-			inptr[DCTSIZE*7] == 0) {
-			/* AC terms all zero */
-			int dcval = (int) DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-
-			wsptr[DCTSIZE*0] = dcval;
-			wsptr[DCTSIZE*1] = dcval;
-			wsptr[DCTSIZE*2] = dcval;
-			wsptr[DCTSIZE*3] = dcval;
-			wsptr[DCTSIZE*4] = dcval;
-			wsptr[DCTSIZE*5] = dcval;
-			wsptr[DCTSIZE*6] = dcval;
-			wsptr[DCTSIZE*7] = dcval;
-
-			inptr++;			/* advance pointers to next column */
-			quantptr++;
-			wsptr++;
-			continue;
-		}
-    
-		/* Even part */
-
-		tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-		tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-		tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-		tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-		tmp10 = tmp0 + tmp2;	/* phase 3 */
-		tmp11 = tmp0 - tmp2;
-
-		tmp13 = tmp1 + tmp3;	/* phases 5-3 */
-		tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */
-
-		tmp0 = tmp10 + tmp13;	/* phase 2 */
-		tmp3 = tmp10 - tmp13;
-		tmp1 = tmp11 + tmp12;
-		tmp2 = tmp11 - tmp12;
-    
-		/* Odd part */
-
-		tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-		tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-		tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-		tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-
-		z13 = tmp6 + tmp5;		/* phase 6 */
-		z10 = tmp6 - tmp5;
-		z11 = tmp4 + tmp7;
-		z12 = tmp4 - tmp7;
-
-		tmp7 = z11 + z13;		/* phase 5 */
-		tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
-
-		z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */
-		tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */
-		tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; /* -2*(c2+c6) */
-
-		tmp6 = tmp12 - tmp7;	/* phase 2 */
-		tmp5 = tmp11 - tmp6;
-		tmp4 = tmp10 + tmp5;
-
-		wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7);
-		wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7);
-		wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6);
-		wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6);
-		wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5);
-		wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5);
-		wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4);
-		wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4);
-
-		inptr++;			/* advance pointers to next column */
-		quantptr++;
-		wsptr++;
-	}
-  
-	/* Pass 2: process rows from work array, store into output array.
-	 * Note that we must descale the results by a factor of 8 == 2**3,
-	 * and also undo the PASS1_BITS scaling.
-	 */
-
-	wsptr = workspace;
-	for (ctr = 0; ctr < DCTSIZE; ctr++) {
-		outptr = output_buf[ctr] + output_col;
-		/* Rows of zeroes can be exploited in the same way as we did with columns.
-		 * However, the column calculation has created many nonzero AC terms, so
-		 * the simplification applies less often (typically 5% to 10% of the time).
-		 * on machines with very fast multiplication, it's possible that the
-		 * test takes more time than it's worth.  In that case this section
-		 * may be commented out.
-		 */
-    
-#ifndef NO_ZERO_ROW_TEST
-		if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
-			wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
-			/* AC terms all zero */
-			JSAMPLE dcval = range_limit[IDESCALE(wsptr[0], PASS1_BITS+3) & RANGE_MASK];
-
-			outptr[0] = dcval;
-			outptr[1] = dcval;
-			outptr[2] = dcval;
-			outptr[3] = dcval;
-			outptr[4] = dcval;
-			outptr[5] = dcval;
-			outptr[6] = dcval;
-			outptr[7] = dcval;
-
-			wsptr += DCTSIZE;		/* advance pointer to next row */
-			continue;
-		}
-#endif
-    
-		/* Even part */
-
-		tmp10 = ((DCTELEM) wsptr[0] + (DCTELEM) wsptr[4]);
-		tmp11 = ((DCTELEM) wsptr[0] - (DCTELEM) wsptr[4]);
-
-		tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]);
-		tmp12 = MULTIPLY((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6], FIX_1_414213562) - tmp13;
-
-		tmp0 = tmp10 + tmp13;
-		tmp3 = tmp10 - tmp13;
-		tmp1 = tmp11 + tmp12;
-		tmp2 = tmp11 - tmp12;
-
-		/* Odd part */
-
-		z13 = (DCTELEM) wsptr[5] + (DCTELEM) wsptr[3];
-		z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3];
-		z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7];
-		z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7];
-
-		tmp7 = z11 + z13;		/* phase 5 */
-		tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
-
-		z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */
-		tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */
-		tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; /* -2*(c2+c6) */
-
-		tmp6 = tmp12 - tmp7;	/* phase 2 */
-		tmp5 = tmp11 - tmp6;
-		tmp4 = tmp10 + tmp5;
-
-		/* Final output stage: scale down by a factor of 8 and range-limit */
-
-		outptr[0] = range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS+3) & RANGE_MASK];
-		outptr[7] = range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS+3) & RANGE_MASK];
-		outptr[1] = range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS+3) & RANGE_MASK];
-		outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3) & RANGE_MASK];
-		outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3) & RANGE_MASK];
-		outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3) & RANGE_MASK];
-		outptr[4] = range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS+3) & RANGE_MASK];
-		outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3) & RANGE_MASK];
-
-		wsptr += DCTSIZE;		/* advance pointer to next row */
-	}
-}
+/*---------------------------------------------*/
+/* File : fast_idct.c, utilities for jfif view */
+/* Author : Pierre Guerrier, march 1998	       */
+/* IDCT code by Geert Janssen	     	       */
+/*---------------------------------------------*/
+#include "idct.h"
+
+/*-------------------------------------------------------*/
+/* JPEG data types here					*/
+/*-------------------------------------------------------*/
+
+
+/* This version is IEEE compliant using 16-bit arithmetic. */
+
+/* The number of bits coefficients are scaled up before 2-D IDCT: */
+#define S_BITS	         3
+/* The number of bits in the fractional part of a fixed point constant: */
+#define C_BITS		14
+
+#define SCALE(x,n)	((x) << (n))
+
+/* This version is vital in passing overall mean error test. */
+#define DESCALE(x, n)	(((x) + (1 << ((n)-1)) - ((x) < 0)) >> (n))
+
+#define ADD(x, y)	((x) + (y))
+#define SUB(x, y)	((x) - (y))
+#define CMUL(C, x)	(((C) * (x) + (1 << (C_BITS-1))) >> C_BITS)
+
+/* Butterfly: but(a,b,x,y) = rot(sqrt(2),4,a,b,x,y) */
+#define but(a,b,x,y)	{ x = SUB(a,b); y = ADD(a,b); }
+
+/* Inverse 1-D Discrete Cosine Transform.
+   Result Y is scaled up by factor sqrt(8).
+   Original Loeffler algorithm.
+*/
+static void
+idct_1d(int *Y)
+{
+  int z1[8], z2[8], z3[8];
+
+  /* Stage 1: */
+  but(Y[0], Y[4], z1[1], z1[0]);
+  /* rot(sqrt(2), 6, Y[2], Y[6], &z1[2], &z1[3]); */
+  z1[2] = SUB(CMUL( 8867, Y[2]), CMUL(21407, Y[6]));
+  z1[3] = ADD(CMUL(21407, Y[2]), CMUL( 8867, Y[6]));
+  but(Y[1], Y[7], z1[4], z1[7]);
+  /* z1[5] = CMUL(sqrt(2), Y[3]);
+     z1[6] = CMUL(sqrt(2), Y[5]);
+  */
+  z1[5] = CMUL(23170, Y[3]);
+  z1[6] = CMUL(23170, Y[5]);
+
+  /* Stage 2: */
+  but(z1[0], z1[3], z2[3], z2[0]);
+  but(z1[1], z1[2], z2[2], z2[1]);
+  but(z1[4], z1[6], z2[6], z2[4]);
+  but(z1[7], z1[5], z2[5], z2[7]);
+
+  /* Stage 3: */
+  z3[0] = z2[0];
+  z3[1] = z2[1];
+  z3[2] = z2[2];
+  z3[3] = z2[3];
+  /* rot(1, 3, z2[4], z2[7], &z3[4], &z3[7]); */
+  z3[4] = SUB(CMUL(13623, z2[4]), CMUL( 9102, z2[7]));
+  z3[7] = ADD(CMUL( 9102, z2[4]), CMUL(13623, z2[7]));
+  /* rot(1, 1, z2[5], z2[6], &z3[5], &z3[6]); */
+  z3[5] = SUB(CMUL(16069, z2[5]), CMUL( 3196, z2[6]));
+  z3[6] = ADD(CMUL( 3196, z2[5]), CMUL(16069, z2[6]));
+
+  /* Final stage 4: */
+  but(z3[0], z3[7], Y[7], Y[0]);
+  but(z3[1], z3[6], Y[6], Y[1]);
+  but(z3[2], z3[5], Y[5], Y[2]);
+  but(z3[3], z3[4], Y[4], Y[3]);
+}
+
+/* Inverse 2-D Discrete Cosine Transform. */
+void
+IDCT(const int input[64], int *output)
+{
+  int Y[64];
+  int k,l;
+
+  	printf("\n");
+
+  /* Pass 1: process rows. */
+  for (k = 0; k < 8; k++) {
+
+    /* Prescale k-th row: */
+    for (l = 0; l < 8; l++)
+      Y[8*k+l] = SCALE(input[8*k+l], S_BITS);
+
+    /* 1-D IDCT on k-th row: */
+    idct_1d(&Y[8*k+0]);
+    /* Result Y is scaled up by factor sqrt(8)*2^S_BITS. */
+  }
+
+  /* Pass 2: process columns. */
+  for (l = 0; l < 8; l++) {
+    int Yc[8];
+
+    for (k = 0; k < 8; k++) Yc[k] = Y[8*k+l];
+    /* 1-D IDCT on l-th column: */
+    idct_1d(Yc);
+    /* Result is once more scaled up by a factor sqrt(8). */
+    for (k = 0; k < 8; k++) {
+      int r = 128 + DESCALE(Yc[k], S_BITS+3); /* includes level shift */
+
+      /* Clip to 8 bits unsigned: */
+      r = r > 0 ? (r < 255 ? r : 255) : 0;
+      output[8*k+l] = r;
+    }
+  }
+}
-- 
cgit v0.12