From 0692fd3ae8c1219b81098de4fa7a8d7a90422afe Mon Sep 17 00:00:00 2001 From: Oliver Schinagl Date: Thu, 31 Mar 2005 19:32:13 +0000 Subject: Initial dct modules with dummy main. forward dct compiles cleanly. Needs to be verified if working. inverse dct needs some type fixing (from original libjpeg). --- Makefile | 2 + include/dct.h | 11 ++++ src/fdct.c | 118 ++++++++++++++++++++++++++++++++++ src/idct.c | 201 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/main.c | 14 ++++ 5 files changed, 346 insertions(+) create mode 100644 Makefile create mode 100644 include/dct.h create mode 100644 src/fdct.c create mode 100644 src/idct.c create mode 100644 src/main.c diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..e7eff81 --- /dev/null +++ b/Makefile @@ -0,0 +1,2 @@ +all: + gcc -o test src/main.c src/fdct.c -I include/ diff --git a/include/dct.h b/include/dct.h new file mode 100644 index 0000000..d202de5 --- /dev/null +++ b/include/dct.h @@ -0,0 +1,11 @@ +#define CONST_BITS 8 +#define DCTSIZE 8 +#define DCTSIZE_BLOCK (DCTSIZE * DCTSIZE) + +#define RIGHT_SHIFT(x,shft) ((x) >> (shft)) + +#define DESCALE(x,n) RIGHT_SHIFT((x) + (1 << ((n)-1)), n) + +#define MULTIPLY(var,constant) (DESCALE((var) * (constant), CONST_BITS)) + +void fdct(int *data); diff --git a/src/fdct.c b/src/fdct.c new file mode 100644 index 0000000..3631de6 --- /dev/null +++ b/src/fdct.c @@ -0,0 +1,118 @@ +#include "dct.h" + +#define FIX_0_382683433 (98) /* FIX(0.382683433) */ +#define FIX_0_541196100 (139) /* FIX(0.541196100) */ +#define FIX_0_707106781 (181) /* FIX(0.707106781) */ +#define FIX_1_306562965 (334) /* FIX(1.306562965) */ + +/* + * Perform the forward DCT on one block of samples. + */ +void fdct(int *data) { + int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + int tmp10, tmp11, tmp12, tmp13; + int z1, z2, z3, z4, z5, z11, z13; + int *dataptr; + int ctr; +// SHIFT_TEMPS + + /* Pass 1: process rows. */ + + dataptr = data; + for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { + tmp0 = dataptr[0] + dataptr[7]; + tmp7 = dataptr[0] - dataptr[7]; + tmp1 = dataptr[1] + dataptr[6]; + tmp6 = dataptr[1] - dataptr[6]; + tmp2 = dataptr[2] + dataptr[5]; + tmp5 = dataptr[2] - dataptr[5]; + tmp3 = dataptr[3] + dataptr[4]; + tmp4 = dataptr[3] - dataptr[4]; + + /* Even part */ + + tmp10 = tmp0 + tmp3; /* phase 2 */ + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + + dataptr[0] = tmp10 + tmp11; /* phase 3 */ + dataptr[4] = tmp10 - tmp11; + + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */ + dataptr[2] = tmp13 + z1; /* phase 5 */ + dataptr[6] = tmp13 - z1; + + /* Odd part */ + + tmp10 = tmp4 + tmp5; /* phase 2 */ + tmp11 = tmp5 + tmp6; + tmp12 = tmp6 + tmp7; + + /* The rotator is modified from fig 4-8 to avoid extra negations. */ + z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */ + z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */ + z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */ + z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */ + + z11 = tmp7 + z3; /* phase 5 */ + z13 = tmp7 - z3; + + dataptr[5] = z13 + z2; /* phase 6 */ + dataptr[3] = z13 - z2; + dataptr[1] = z11 + z4; + dataptr[7] = z11 - z4; + + dataptr += DCTSIZE; /* advance pointer to next row */ + } + + /* Pass 2: process columns. */ + + dataptr = data; + for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { + tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7]; + tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7]; + tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6]; + tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6]; + tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5]; + tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5]; + tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4]; + tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; + + /* Even part */ + + tmp10 = tmp0 + tmp3; /* phase 2 */ + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + + dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */ + dataptr[DCTSIZE*4] = tmp10 - tmp11; + + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */ + dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */ + dataptr[DCTSIZE*6] = tmp13 - z1; + + /* Odd part */ + + tmp10 = tmp4 + tmp5; /* phase 2 */ + tmp11 = tmp5 + tmp6; + tmp12 = tmp6 + tmp7; + + /* The rotator is modified from fig 4-8 to avoid extra negations. */ + z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */ + z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */ + z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */ + z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */ + + z11 = tmp7 + z3; /* phase 5 */ + z13 = tmp7 - z3; + + dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */ + dataptr[DCTSIZE*3] = z13 - z2; + dataptr[DCTSIZE*1] = z11 + z4; + dataptr[DCTSIZE*7] = z11 - z4; + + dataptr++; /* advance pointer to next column */ + } +} diff --git a/src/idct.c b/src/idct.c new file mode 100644 index 0000000..082a41a --- /dev/null +++ b/src/idct.c @@ -0,0 +1,201 @@ +#define CONST_BITS 8 +#define PASS1_BITS 2 + +#define FIX_1_082392200 ((INT32) 277) /* FIX(1.082392200) */ +#define FIX_1_414213562 ((INT32) 362) /* FIX(1.414213562) */ +#define FIX_1_847759065 ((INT32) 473) /* FIX(1.847759065) */ +#define FIX_2_613125930 ((INT32) 669) /* FIX(2.613125930) */ + +#define DEQUANTIZE(coef,quantval) (((coef)) * (quantval)) + +/* + * Perform dequantization and inverse DCT on one block of coefficients. + */ + +void idct (j_decompress_ptr cinfo, jpeg_component_info * compptr, + int FAR *coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) { + int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + int tmp10, tmp11, tmp12, tmp13; + int z5, z10, z11, z12, z13; + int inptr; + int * quantptr; + int * wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[DCTSIZE2]; /* buffers data between passes */ + SHIFT_TEMPS /* for DESCALE */ + ISHIFT_TEMPS /* for IDESCALE */ + + /* Pass 1: process columns from input, store into work array. */ + + inptr = coef_block; + quantptr = (IFAST_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = DCTSIZE; ctr > 0; ctr--) { + /* Due to quantization, we will usually find that many of the input + * coefficients are zero, especially the AC terms. We can exploit this + * by short-circuiting the IDCT calculation for any column in which all + * the AC terms are zero. In that case each output is equal to the + * DC coefficient (with scale factor as needed). + */ + + if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 && + inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 && + inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 && + inptr[DCTSIZE*7] == 0) { + /* AC terms all zero */ + int dcval = (int) DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + + wsptr[DCTSIZE*0] = dcval; + wsptr[DCTSIZE*1] = dcval; + wsptr[DCTSIZE*2] = dcval; + wsptr[DCTSIZE*3] = dcval; + wsptr[DCTSIZE*4] = dcval; + wsptr[DCTSIZE*5] = dcval; + wsptr[DCTSIZE*6] = dcval; + wsptr[DCTSIZE*7] = dcval; + + inptr++; /* advance pointers to next column */ + quantptr++; + wsptr++; + continue; + } + + /* Even part */ + + tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + + tmp10 = tmp0 + tmp2; /* phase 3 */ + tmp11 = tmp0 - tmp2; + + tmp13 = tmp1 + tmp3; /* phases 5-3 */ + tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */ + + tmp0 = tmp10 + tmp13; /* phase 2 */ + tmp3 = tmp10 - tmp13; + tmp1 = tmp11 + tmp12; + tmp2 = tmp11 - tmp12; + + /* Odd part */ + + tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); + + z13 = tmp6 + tmp5; /* phase 6 */ + z10 = tmp6 - tmp5; + z11 = tmp4 + tmp7; + z12 = tmp4 - tmp7; + + tmp7 = z11 + z13; /* phase 5 */ + tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */ + + z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */ + tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */ + tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; /* -2*(c2+c6) */ + + tmp6 = tmp12 - tmp7; /* phase 2 */ + tmp5 = tmp11 - tmp6; + tmp4 = tmp10 + tmp5; + + wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7); + wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7); + wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6); + wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6); + wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5); + wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5); + wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4); + wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4); + + inptr++; /* advance pointers to next column */ + quantptr++; + wsptr++; + } + + /* Pass 2: process rows from work array, store into output array. + * Note that we must descale the results by a factor of 8 == 2**3, + * and also undo the PASS1_BITS scaling. + */ + + wsptr = workspace; + for (ctr = 0; ctr < DCTSIZE; ctr++) { + outptr = output_buf[ctr] + output_col; + /* Rows of zeroes can be exploited in the same way as we did with columns. + * However, the column calculation has created many nonzero AC terms, so + * the simplification applies less often (typically 5% to 10% of the time). + * on machines with very fast multiplication, it's possible that the + * test takes more time than it's worth. In that case this section + * may be commented out. + */ + +#ifndef NO_ZERO_ROW_TEST + if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 && + wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) { + /* AC terms all zero */ + JSAMPLE dcval = range_limit[IDESCALE(wsptr[0], PASS1_BITS+3) & RANGE_MASK]; + + outptr[0] = dcval; + outptr[1] = dcval; + outptr[2] = dcval; + outptr[3] = dcval; + outptr[4] = dcval; + outptr[5] = dcval; + outptr[6] = dcval; + outptr[7] = dcval; + + wsptr += DCTSIZE; /* advance pointer to next row */ + continue; + } +#endif + + /* Even part */ + + tmp10 = ((DCTELEM) wsptr[0] + (DCTELEM) wsptr[4]); + tmp11 = ((DCTELEM) wsptr[0] - (DCTELEM) wsptr[4]); + + tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]); + tmp12 = MULTIPLY((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6], FIX_1_414213562) - tmp13; + + tmp0 = tmp10 + tmp13; + tmp3 = tmp10 - tmp13; + tmp1 = tmp11 + tmp12; + tmp2 = tmp11 - tmp12; + + /* Odd part */ + + z13 = (DCTELEM) wsptr[5] + (DCTELEM) wsptr[3]; + z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3]; + z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7]; + z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7]; + + tmp7 = z11 + z13; /* phase 5 */ + tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */ + + z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */ + tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */ + tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; /* -2*(c2+c6) */ + + tmp6 = tmp12 - tmp7; /* phase 2 */ + tmp5 = tmp11 - tmp6; + tmp4 = tmp10 + tmp5; + + /* Final output stage: scale down by a factor of 8 and range-limit */ + + outptr[0] = range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS+3) & RANGE_MASK]; + outptr[7] = range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS+3) & RANGE_MASK]; + outptr[1] = range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS+3) & RANGE_MASK]; + outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3) & RANGE_MASK]; + outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3) & RANGE_MASK]; + outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3) & RANGE_MASK]; + outptr[4] = range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS+3) & RANGE_MASK]; + outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3) & RANGE_MASK]; + + wsptr += DCTSIZE; /* advance pointer to next row */ + } +} diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..7c9b4ed --- /dev/null +++ b/src/main.c @@ -0,0 +1,14 @@ +#include + +#include "dct.h" + +int main(void) { + int retval; + int workspace[DCTSIZE_BLOCK]; + + retval = 0; + + fdct(workspace); + + return retval; +} -- cgit v0.12