diff -bBru src-ori/bitstream/bitstream.c src/bitstream/bitstream.c
--- src-ori/bitstream/bitstream.c	Wed Jan 12 10:57:45 2005
+++ src/bitstream/bitstream.c	Wed Jan 12 12:39:29 2005
@@ -24,13 +24,234 @@
  *
  ****************************************************************************/
 
-#include <string.h>
-#include <stdio.h>
+#include <e32std.h>
+#include "../global_all_dll.h"
+ BitstreamInit(Bitstream * const bs,
+ 			  void *const bitstream,
+ 			  uint32_t length)
+ {
+
+	uint32_t tmp;
+	size_t bitpos;
+	ptr_t adjbitstream = (ptr_t)bitstream;
+
+	/*
+	 * Start the stream on a uint32_t boundary, by rounding down to the
+	 * previous uint32_t and skipping the intervening bytes.
+	 */
+	bitpos = ((sizeof(uint32_t)-1) & (size_t)bitstream);
+	adjbitstream = adjbitstream - bitpos;
+	bs->start = bs->tail = (uint32_t *) adjbitstream;
+
+	tmp = *bs->start;
+#ifndef ARCH_IS_BIG_ENDIAN
+	BSWAP(tmp);
+#endif
+	bs->bufa = tmp;
+
+	tmp = *(bs->start + 1);
+#ifndef ARCH_IS_BIG_ENDIAN
+	BSWAP(tmp);
+#endif
+	bs->bufb = tmp;
+
+	bs->buf = 0;
+	bs->pos = bs->initpos = bitpos*8;
+	bs->length = length;
+}
+ BitstreamReset(Bitstream * const bs)
+ {
+
+	uint32_t tmp;
+
+	bs->tail = bs->start;
+
+	tmp = *bs->start;
+#ifndef ARCH_IS_BIG_ENDIAN
+	BSWAP(tmp);
+#endif
+	bs->bufa = tmp;
+
+	tmp = *(bs->start + 1);
+#ifndef ARCH_IS_BIG_ENDIAN
+	BSWAP(tmp);
+#endif
+	bs->bufb = tmp;
+
+	bs->buf = 0;
+	bs->pos = bs->initpos;
+}
+ BitstreamShowBits(Bitstream * const bs,
+ 				  const uint32_t bits)
+ {
+
+	int nbit = (bits + bs->pos) - 32;
+
+	if (nbit > 0) {
+		return ((bs->bufa & (0xffffffff >> bs->pos)) << nbit) | (bs->
+																 bufb >> (32 -
+																		  nbit));
+	} else {
+		return (bs->bufa & (0xffffffff >> bs->pos)) >> (32 - bs->pos - bits);
+	}
+}
+ void BitstreamSkip(Bitstream * const bs,
+ 			  const uint32_t bits)
+ {
+
+	bs->pos += bits;
+
+	if (bs->pos >= 32) {
+		uint32_t tmp;
+
+		bs->bufa = bs->bufb;
+		tmp = *((uint32_t *) bs->tail + 2);
+#ifndef ARCH_IS_BIG_ENDIAN
+		BSWAP(tmp);
+#endif
+		bs->bufb = tmp;
+		bs->tail++;
+		bs->pos -= 32;
+	}
+}
+ uint32_t BitstreamNumBitsToByteAlign(Bitstream *bs)
+ {
+
+	uint32_t n = (32 - bs->pos) % 8;
+	return n == 0 ? 8 : n;
+}
+ uint32_t BitstreamShowBitsFromByteAlign(Bitstream *bs, int bits)
+ {
+
+	int bspos = bs->pos + BitstreamNumBitsToByteAlign(bs);
+	int nbit = (bits + bspos) - 32;
+
+	if (bspos >= 32) {
+		return bs->bufb >> (32 - nbit);
+	} else	if (nbit > 0) {
+		return ((bs->bufa & (0xffffffff >> bspos)) << nbit) | (bs->
+																 bufb >> (32 -
+																		  nbit));
+	} else {
+		return (bs->bufa & (0xffffffff >> bspos)) >> (32 - bspos - bits);
+	}
+
+}
+ void BitstreamByteAlign(Bitstream * const bs)
+ {
+
+	uint32_t remainder = bs->pos % 8;
+
+	if (remainder) {
+		BitstreamSkip(bs, 8 - remainder);
+	}
+}
+ BitstreamPos(const Bitstream * const bs)
+ {
+
+	return((uint32_t)(8*((ptr_t)bs->tail - (ptr_t)bs->start) + bs->pos - bs->initpos));
+}
+ BitstreamLength(Bitstream * const bs)
+ {
+
+	uint32_t len = (uint32_t)((ptr_t)bs->tail - (ptr_t)bs->start);
+
+	if (bs->pos) {
+		uint32_t b = bs->buf;
+
+#ifndef ARCH_IS_BIG_ENDIAN
+		BSWAP(b);
+#endif
+		*bs->tail = b;
+
+		len += (bs->pos + 7) / 8;
+	}
+
+	/* initpos is always on a byte boundary */
+	if (bs->initpos)
+		len -= bs->initpos/8;
+
+	return len;
+}
+ BitstreamForward(Bitstream * const bs,
+ 				 const uint32_t bits)
+ {
+
+	bs->pos += bits;
+
+	if (bs->pos >= 32) {
+		uint32_t b = bs->buf;
+
+#ifndef ARCH_IS_BIG_ENDIAN
+		BSWAP(b);
+#endif
+		*bs->tail++ = b;
+		bs->buf = 0;
+		bs->pos -= 32;
+	}
+}
+ BitstreamGetBits(Bitstream * const bs,
+ 				 const uint32_t n)
+ {
+
+	uint32_t ret = BitstreamShowBits(bs, n);
+
+	BitstreamSkip(bs, n);
+	return ret;
+}
+ BitstreamGetBit(Bitstream * const bs)
+ {
+
+	return BitstreamGetBits(bs, 1);
+}
+ BitstreamPutBit(Bitstream * const bs,
+ 				const uint32_t bit)
+ {
+
+	if (bit)
+		bs->buf |= (0x80000000 >> bs->pos);
+
+	BitstreamForward(bs, 1);
+}
+ BitstreamPutBits(Bitstream * const bs,
+ 				 const uint32_t value,
+ 				 const uint32_t size)
+ {
+
+	uint32_t shift = 32 - bs->pos - size;
+
+	if (shift <= 32) {
+		bs->buf |= value << shift;
+		BitstreamForward(bs, size);
+	} else {
+		uint32_t remainder;
+
+		shift = size - (32 - bs->pos);
+		bs->buf |= value >> shift;
+		BitstreamForward(bs, size - shift);
+		remainder = shift;
+
+		shift = 32 - shift;
+
+		bs->buf |= value << shift;
+		BitstreamForward(bs, remainder);
+	}
+}
+ BitstreamPad(Bitstream * const bs)
+ {
+
+	int bits = 8 - (bs->pos % 8);
+	if (bits < 8)
+		BitstreamPutBits(bs, stuffing_codes[bits - 1], bits);
+}
+ BitstreamPadAlways(Bitstream * const bs)
+ {
+
+	int bits = 8 - (bs->pos % 8);
+	BitstreamPutBits(bs, stuffing_codes[bits - 1], bits);
+}
+
 
-#include "bitstream.h"
-#include "zigzag.h"
-#include "../quant/quant_matrix.h"
-#include "mbcoding.h"
 
 
 /* multiple definitions when merging all .h files into global_all_dll.h */
@@ -48,7 +269,7 @@
 
 
 static const uint32_t intra_dc_threshold_table[] = {
-	32,							/* never use */
+	32,							
 	13,
 	15,
 	17,
@@ -378,6 +599,15 @@
 #define VIDOBJ_START_CODE_MASK		0x0000001f
 #define VIDOBJLAY_START_CODE_MASK	0x0000000f
 
+#if 0
+			dec->time_inc_resolution--;
+#endif
+#if 0
+				dec->time_inc_bits = 0;
+#endif
+#if 0
+	DPRINTF("*** WARNING: no vop_start_code found");
+#endif
 int
 BitstreamReadHeaders(Bitstream * bs,
 					 DECODER * dec,
@@ -545,16 +775,10 @@
 			dec->time_inc_resolution = BitstreamGetBits(bs, 16);	/* vop_time_increment_resolution */
 			DPRINTF(XVID_DEBUG_HEADER,"vop_time_increment_resolution %i\n", dec->time_inc_resolution);
 
-#if 0
-			dec->time_inc_resolution--;
-#endif
 
 			if (dec->time_inc_resolution > 0) {
 				dec->time_inc_bits = MAX(log2bin(dec->time_inc_resolution-1), 1);
 			} else {
-#if 0
-				dec->time_inc_bits = 0;
-#endif
 				/* for "old" xvid compatibility, set time_inc_bits = 1 */
 				dec->time_inc_bits = 1;
 			}
@@ -1039,9 +1263,6 @@
 		}
 	}
 
-#if 0
-	DPRINTF("*** WARNING: no vop_start_code found");
-#endif
 	return -1;					/* ignore it */
 }
 
@@ -1070,6 +1291,10 @@
 /*
 	write vol header
 */
+#if 0
+	BitstreamPad(bs);
+#endif
+#define DIVX5_ID "DivX999b000p"
 void
 BitstreamWriteVolHeader(Bitstream * const bs,
 						const MBParam * pParam,
@@ -1096,9 +1321,6 @@
     }
 
 	/* visual_object_sequence_start_code */
-#if 0
-	BitstreamPad(bs);
-#endif
 
 	/*
 	 * no padding here, anymore. You have to make sure that you are
@@ -1257,7 +1479,6 @@
 	BitstreamPadAlways(bs); /* next_start_code(); */
 
 	/* fake divx5 id, to ensure compatibility with divx5 decoder */
-#define DIVX5_ID "DivX999b000p"
 	if (pParam->max_bframes > 0 && (pParam->global_flags & XVID_GLOBAL_PACKED)) {
 		BitstreamWriteUserData(bs, DIVX5_ID, strlen(DIVX5_ID));
 	}
@@ -1278,6 +1499,25 @@
 /*
   write vop header
 */
+#if 0
+	BitstreamPad(bs);
+#endif
+#if 0
+	DPRINTF(XVID_DEBUG_HEADER, "coding_type = %i\n", frame->coding_type);
+#endif
+#if 0
+	DPRINTF("[%i:%i] %c",
+			frame->seconds, frame->ticks,
+			frame->coding_type == I_VOP ? 'I' :
+			frame->coding_type == P_VOP ? 'P' :
+			frame->coding_type == S_VOP ? 'S' :	'B');
+#endif
+#if 0
+		BitstreamPadAlways(bs); /*  next_start_code() */
+#endif
+#if 0
+	DPRINTF(XVID_DEBUG_HEADER, "quant = %i\n", quant);
+#endif
 void
 BitstreamWriteVopHeader(
 						Bitstream * const bs,
@@ -1288,9 +1528,6 @@
 {
 	uint32_t i;
 
-#if 0
-	BitstreamPad(bs);
-#endif
 
 	/*
 	 * no padding here, anymore. You have to make sure that you are
@@ -1300,9 +1537,6 @@
 	BitstreamPutBits(bs, VOP_START_CODE, 32);
 
 	BitstreamPutBits(bs, frame->coding_type, 2);
-#if 0
-	DPRINTF(XVID_DEBUG_HEADER, "coding_type = %i\n", frame->coding_type);
-#endif
 
 	for (i = 0; i < frame->seconds; i++) {
 		BitstreamPutBit(bs, 1);
@@ -1313,21 +1547,11 @@
 
 	/* time_increment: value=nth_of_sec, nbits = log2(resolution) */
 	BitstreamPutBits(bs, frame->ticks, MAX(log2bin(pParam->fbase-1), 1));
-#if 0
-	DPRINTF("[%i:%i] %c",
-			frame->seconds, frame->ticks,
-			frame->coding_type == I_VOP ? 'I' :
-			frame->coding_type == P_VOP ? 'P' :
-			frame->coding_type == S_VOP ? 'S' :	'B');
-#endif
 
 	WRITE_MARKER();
 
 	if (!vop_coded) {
 		BitstreamPutBits(bs, 0, 1);
-#if 0
-		BitstreamPadAlways(bs); /*  next_start_code() */
-#endif
 		/* NB: It's up to the function caller to write the next_start_code().
 		 * At the moment encoder.c respects that requisite because a VOP
 		 * always ends with a next_start_code either if it's coded or not
@@ -1375,9 +1599,6 @@
 	}
 
 
-#if 0
-	DPRINTF(XVID_DEBUG_HEADER, "quant = %i\n", quant);
-#endif
 
 	BitstreamPutBits(bs, quant, 5);	/* quantizer */
 
diff -bBru src-ori/bitstream/bitstream.h src/bitstream/bitstream.h
--- src-ori/bitstream/bitstream.h	Wed Jan 12 10:32:43 2005
+++ src/bitstream/bitstream.h	Wed Jan 12 12:37:47 2005
@@ -152,158 +152,53 @@
 
 /* initialise bitstream structure */
 
-static void __inline
-BitstreamInit(Bitstream * const bs,
+extern  BitstreamInit(Bitstream * const bs,
 			  void *const bitstream,
 			  uint32_t length)
-{
-	uint32_t tmp;
-	size_t bitpos;
-	ptr_t adjbitstream = (ptr_t)bitstream;
-
-	/*
-	 * Start the stream on a uint32_t boundary, by rounding down to the
-	 * previous uint32_t and skipping the intervening bytes.
-	 */
-	bitpos = ((sizeof(uint32_t)-1) & (size_t)bitstream);
-	adjbitstream = adjbitstream - bitpos;
-	bs->start = bs->tail = (uint32_t *) adjbitstream;
-
-	tmp = *bs->start;
-#ifndef ARCH_IS_BIG_ENDIAN
-	BSWAP(tmp);
-#endif
-	bs->bufa = tmp;
-
-	tmp = *(bs->start + 1);
-#ifndef ARCH_IS_BIG_ENDIAN
-	BSWAP(tmp);
-#endif
-	bs->bufb = tmp;
-
-	bs->buf = 0;
-	bs->pos = bs->initpos = bitpos*8;
-	bs->length = length;
-}
+;
 
 
 /* reset bitstream state */
 
-static void __inline
-BitstreamReset(Bitstream * const bs)
-{
-	uint32_t tmp;
-
-	bs->tail = bs->start;
-
-	tmp = *bs->start;
-#ifndef ARCH_IS_BIG_ENDIAN
-	BSWAP(tmp);
-#endif
-	bs->bufa = tmp;
-
-	tmp = *(bs->start + 1);
-#ifndef ARCH_IS_BIG_ENDIAN
-	BSWAP(tmp);
-#endif
-	bs->bufb = tmp;
-
-	bs->buf = 0;
-	bs->pos = bs->initpos;
-}
+extern  BitstreamReset(Bitstream * const bs)
+;
 
 
 /* reads n bits from bitstream without changing the stream pos */
 
-static uint32_t __inline
-BitstreamShowBits(Bitstream * const bs,
+extern  BitstreamShowBits(Bitstream * const bs,
 				  const uint32_t bits)
-{
-	int nbit = (bits + bs->pos) - 32;
-
-	if (nbit > 0) {
-		return ((bs->bufa & (0xffffffff >> bs->pos)) << nbit) | (bs->
-																 bufb >> (32 -
-																		  nbit));
-	} else {
-		return (bs->bufa & (0xffffffff >> bs->pos)) >> (32 - bs->pos - bits);
-	}
-}
+;
 
 
 /* skip n bits forward in bitstream */
 
-static __inline void
-BitstreamSkip(Bitstream * const bs,
+extern  void BitstreamSkip(Bitstream * const bs,
 			  const uint32_t bits)
-{
-	bs->pos += bits;
-
-	if (bs->pos >= 32) {
-		uint32_t tmp;
-
-		bs->bufa = bs->bufb;
-		tmp = *((uint32_t *) bs->tail + 2);
-#ifndef ARCH_IS_BIG_ENDIAN
-		BSWAP(tmp);
-#endif
-		bs->bufb = tmp;
-		bs->tail++;
-		bs->pos -= 32;
-	}
-}
+;
 
 
 /* number of bits to next byte alignment */
-static __inline uint32_t
-BitstreamNumBitsToByteAlign(Bitstream *bs)
-{
-	uint32_t n = (32 - bs->pos) % 8;
-	return n == 0 ? 8 : n;
-}
+extern  uint32_t BitstreamNumBitsToByteAlign(Bitstream *bs)
+;
 
 
 /* show nbits from next byte alignment */
-static __inline uint32_t
-BitstreamShowBitsFromByteAlign(Bitstream *bs, int bits)
-{
-	int bspos = bs->pos + BitstreamNumBitsToByteAlign(bs);
-	int nbit = (bits + bspos) - 32;
-
-	if (bspos >= 32) {
-		return bs->bufb >> (32 - nbit);
-	} else	if (nbit > 0) {
-		return ((bs->bufa & (0xffffffff >> bspos)) << nbit) | (bs->
-																 bufb >> (32 -
-																		  nbit));
-	} else {
-		return (bs->bufa & (0xffffffff >> bspos)) >> (32 - bspos - bits);
-	}
-
-}
+extern  uint32_t BitstreamShowBitsFromByteAlign(Bitstream *bs, int bits)
+;
 
 
 
 /* move forward to the next byte boundary */
 
-static __inline void
-BitstreamByteAlign(Bitstream * const bs)
-{
-	uint32_t remainder = bs->pos % 8;
-
-	if (remainder) {
-		BitstreamSkip(bs, 8 - remainder);
-	}
-}
+extern  void BitstreamByteAlign(Bitstream * const bs)
+;
 
 
 /* bitstream length (unit bits) */
 
-static uint32_t __inline
-BitstreamPos(const Bitstream * const bs)
-{
-	return((uint32_t)(8*((ptr_t)bs->tail - (ptr_t)bs->start) + bs->pos - bs->initpos));
-}
+extern  BitstreamPos(const Bitstream * const bs)
+;
 
 
 /*
@@ -311,111 +206,42 @@
  * NOTE: assumes no futher bitstream functions will be called.
  */
 
-static uint32_t __inline
-BitstreamLength(Bitstream * const bs)
-{
-	uint32_t len = (uint32_t)((ptr_t)bs->tail - (ptr_t)bs->start);
-
-	if (bs->pos) {
-		uint32_t b = bs->buf;
-
-#ifndef ARCH_IS_BIG_ENDIAN
-		BSWAP(b);
-#endif
-		*bs->tail = b;
-
-		len += (bs->pos + 7) / 8;
-	}
-
-	/* initpos is always on a byte boundary */
-	if (bs->initpos)
-		len -= bs->initpos/8;
-
-	return len;
-}
+extern  BitstreamLength(Bitstream * const bs)
+;
 
 
 /* move bitstream position forward by n bits and write out buffer if needed */
 
-static void __inline
-BitstreamForward(Bitstream * const bs,
+extern  BitstreamForward(Bitstream * const bs,
 				 const uint32_t bits)
-{
-	bs->pos += bits;
-
-	if (bs->pos >= 32) {
-		uint32_t b = bs->buf;
-
-#ifndef ARCH_IS_BIG_ENDIAN
-		BSWAP(b);
-#endif
-		*bs->tail++ = b;
-		bs->buf = 0;
-		bs->pos -= 32;
-	}
-}
+;
 
 /* read n bits from bitstream */
 
-static uint32_t __inline
-BitstreamGetBits(Bitstream * const bs,
+extern  BitstreamGetBits(Bitstream * const bs,
 				 const uint32_t n)
-{
-	uint32_t ret = BitstreamShowBits(bs, n);
-
-	BitstreamSkip(bs, n);
-	return ret;
-}
+;
 
 
 /* read single bit from bitstream */
 
-static uint32_t __inline
-BitstreamGetBit(Bitstream * const bs)
-{
-	return BitstreamGetBits(bs, 1);
-}
+extern  BitstreamGetBit(Bitstream * const bs)
+;
 
 
 /* write single bit to bitstream */
 
-static void __inline
-BitstreamPutBit(Bitstream * const bs,
+extern  BitstreamPutBit(Bitstream * const bs,
 				const uint32_t bit)
-{
-	if (bit)
-		bs->buf |= (0x80000000 >> bs->pos);
-
-	BitstreamForward(bs, 1);
-}
+;
 
 
 /* write n bits to bitstream */
 
-static void __inline
-BitstreamPutBits(Bitstream * const bs,
+extern  BitstreamPutBits(Bitstream * const bs,
 				 const uint32_t value,
 				 const uint32_t size)
-{
-	uint32_t shift = 32 - bs->pos - size;
-
-	if (shift <= 32) {
-		bs->buf |= value << shift;
-		BitstreamForward(bs, size);
-	} else {
-		uint32_t remainder;
-
-		shift = size - (32 - bs->pos);
-		bs->buf |= value >> shift;
-		BitstreamForward(bs, size - shift);
-		remainder = shift;
-
-		shift = 32 - shift;
-
-		bs->buf |= value << shift;
-		BitstreamForward(bs, remainder);
-	}
-}
+;
 
 static const int stuffing_codes[8] =
 {
@@ -432,13 +258,8 @@
 
 /* pad bitstream to the next byte boundary */
 
-static void __inline
-BitstreamPad(Bitstream * const bs)
-{
-	int bits = 8 - (bs->pos % 8);
-	if (bits < 8)
-		BitstreamPutBits(bs, stuffing_codes[bits - 1], bits);
-}
+extern  BitstreamPad(Bitstream * const bs)
+;
 
 
 /*
@@ -446,11 +267,7 @@
  * alway pad: even if currently at the byte boundary
  */
 
-static void __inline
-BitstreamPadAlways(Bitstream * const bs)
-{
-	int bits = 8 - (bs->pos % 8);
-	BitstreamPutBits(bs, stuffing_codes[bits - 1], bits);
-}
+extern  BitstreamPadAlways(Bitstream * const bs)
+;
 
 #endif /* _BITSTREAM_H_ */
diff -bBru src-ori/bitstream/cbp.c src/bitstream/cbp.c
--- src-ori/bitstream/cbp.c	Wed Jan 12 10:32:43 2005
+++ src/bitstream/cbp.c	Wed Jan 12 12:38:38 2005
@@ -24,10 +24,9 @@
  *
  ****************************************************************************/
 
-#include "../portab.h"
-#include "cbp.h"
+#include <e32std.h>
+#include "../global_all_dll.h"
 
-cbpFuncPtr calc_cbp;
 
 /*
  * Returns a field of bits that indicates non zero ac blocks
diff -bBru src-ori/bitstream/cbp.h src/bitstream/cbp.h
--- src-ori/bitstream/cbp.h	Wed Jan 12 10:32:43 2005
+++ src/bitstream/cbp.h	Wed Jan 12 12:37:42 2005
@@ -33,7 +33,7 @@
 
 typedef cbpFunc *cbpFuncPtr;
 
-extern cbpFuncPtr calc_cbp;
+
 
 extern cbpFunc calc_cbp_c;
 extern cbpFunc calc_cbp_plain;
diff -bBru src-ori/bitstream/mbcoding.c src/bitstream/mbcoding.c
--- src-ori/bitstream/mbcoding.c	Wed Jan 12 10:32:43 2005
+++ src/bitstream/mbcoding.c	Wed Jan 12 12:38:58 2005
@@ -23,38 +23,32 @@
  *
  ****************************************************************************/
 
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "../portab.h"
-#include "../global.h"
-#include "bitstream.h"
-#include "zigzag.h"
-#include "vlc_codes.h"
-#include "mbcoding.h"
+#include <e32std.h>
+#include "../global_all_dll.h"
+ void MBSkip(Bitstream * bs)
+ {
+
+	BitstreamPutBit(bs, 1);	/* not coded */
+}
+
+
 
-#include "../utils/mbfunctions.h"
 
 #define LEVELOFFSET 32
 
-/* Initialized once during xvid_global call
- * RO access is thread safe */
-static REVERSE_EVENT DCT3D[2][4096];
-static VLC coeff_VLC[2][2][64][64];
 
 /* not really MB related, but VLCs are only available here */
-void bs_put_spritetrajectory(Bitstream * bs, const int val)
-{
-	const int code = sprite_trajectory_code[val+16384].code;
-	const int len = sprite_trajectory_code[val+16384].len;
-	const int code2 = sprite_trajectory_len[len].code;
-	const int len2 = sprite_trajectory_len[len].len;
-
 #if 0
 	printf("GMC=%d Code/Len  = %d / %d ",val, code,len);
 	printf("Code2 / Len2 = %d / %d \n",code2,len2);
 #endif
+void bs_put_spritetrajectory(Bitstream * bs, const int val)
+{
+	const int code = (((struct global_all_dll *)Dll::Tls())->sprite_trajectory_code)[val+16384].code;
+	const int len = (((struct global_all_dll *)Dll::Tls())->sprite_trajectory_code)[val+16384].len;
+	const int code2 = (((struct global_all_dll *)Dll::Tls())->sprite_trajectory_len)[len].code;
+	const int len2 = (((struct global_all_dll *)Dll::Tls())->sprite_trajectory_len)[len].len;
+
 
 	BitstreamPutBits(bs, code2, len2);
 	if (len) BitstreamPutBits(bs, code, len);
@@ -65,9 +59,9 @@
 	int i;
 	for (i = 0; i < 12; i++)
 	{
-		if (BitstreamShowBits(bs, sprite_trajectory_len[i].len) == sprite_trajectory_len[i].code)
+		if (BitstreamShowBits(bs, (((struct global_all_dll *)Dll::Tls())->sprite_trajectory_len)[i].len) == (((struct global_all_dll *)Dll::Tls())->sprite_trajectory_len)[i].code)
 		{
-			BitstreamSkip(bs, sprite_trajectory_len[i].len);
+			BitstreamSkip(bs, (((struct global_all_dll *)Dll::Tls())->sprite_trajectory_len)[i].len);
 			return i;
 		}
 	}
@@ -82,14 +76,14 @@
 
 	for (intra = 0; intra < 2; intra++)
 		for (i = 0; i < 4096; i++)
-			DCT3D[intra][i].event.level = 0;
+			(((struct global_all_dll *)Dll::Tls())->DCT3D)[intra][i].event.level = 0;
 
 	for (intra = 0; intra < 2; intra++) {
 		for (last = 0; last < 2; last++) {
 			for (run = 0; run < 63 + last; run++) {
 				for (level = 0; level < (uint32_t)(32 << intra); level++) {
 					offset = !intra * LEVELOFFSET;
-					coeff_VLC[intra][last][level + offset][run].len = 128;
+					(((struct global_all_dll *)Dll::Tls())->coeff_VLC)[intra][last][level + offset][run].len = 128;
 				}
 			}
 		}
@@ -100,19 +94,19 @@
 			offset = !intra * LEVELOFFSET;
 
 			for (j = 0; j < (uint32_t)(1 << (12 - coeff_tab[intra][i].vlc.len)); j++) {
-				DCT3D[intra][(coeff_tab[intra][i].vlc.code << (12 - coeff_tab[intra][i].vlc.len)) | j].len	 = coeff_tab[intra][i].vlc.len;
-				DCT3D[intra][(coeff_tab[intra][i].vlc.code << (12 - coeff_tab[intra][i].vlc.len)) | j].event = coeff_tab[intra][i].event;
+				(((struct global_all_dll *)Dll::Tls())->DCT3D)[intra][(coeff_tab[intra][i].vlc.code << (12 - coeff_tab[intra][i].vlc.len)) | j].len	 = coeff_tab[intra][i].vlc.len;
+				(((struct global_all_dll *)Dll::Tls())->DCT3D)[intra][(coeff_tab[intra][i].vlc.code << (12 - coeff_tab[intra][i].vlc.len)) | j].event = coeff_tab[intra][i].event;
 			}
 
-			coeff_VLC[intra][coeff_tab[intra][i].event.last][coeff_tab[intra][i].event.level + offset][coeff_tab[intra][i].event.run].code
+			(((struct global_all_dll *)Dll::Tls())->coeff_VLC)[intra][coeff_tab[intra][i].event.last][coeff_tab[intra][i].event.level + offset][coeff_tab[intra][i].event.run].code
 				= coeff_tab[intra][i].vlc.code << 1;
-			coeff_VLC[intra][coeff_tab[intra][i].event.last][coeff_tab[intra][i].event.level + offset][coeff_tab[intra][i].event.run].len
+			(((struct global_all_dll *)Dll::Tls())->coeff_VLC)[intra][coeff_tab[intra][i].event.last][coeff_tab[intra][i].event.level + offset][coeff_tab[intra][i].event.run].len
 				= coeff_tab[intra][i].vlc.len + 1;
 
 			if (!intra) {
-				coeff_VLC[intra][coeff_tab[intra][i].event.last][offset - coeff_tab[intra][i].event.level][coeff_tab[intra][i].event.run].code
+				(((struct global_all_dll *)Dll::Tls())->coeff_VLC)[intra][coeff_tab[intra][i].event.last][offset - coeff_tab[intra][i].event.level][coeff_tab[intra][i].event.run].code
 					= (coeff_tab[intra][i].vlc.code << 1) | 1;
-				coeff_VLC[intra][coeff_tab[intra][i].event.last][offset - coeff_tab[intra][i].event.level][coeff_tab[intra][i].event.run].len
+				(((struct global_all_dll *)Dll::Tls())->coeff_VLC)[intra][coeff_tab[intra][i].event.last][offset - coeff_tab[intra][i].event.level][coeff_tab[intra][i].event.run].len
 					= coeff_tab[intra][i].vlc.len + 1;
 			}
 		}
@@ -141,36 +135,36 @@
 							level_esc  = level;
 						} else {
 							if (!intra) {
-								coeff_VLC[intra][last][level + offset][run].code
+								(((struct global_all_dll *)Dll::Tls())->coeff_VLC)[intra][last][level + offset][run].code
 									= (ESCAPE3 << 21) | (last << 20) | (run << 14) | (1 << 13) | ((level & 0xfff) << 1) | 1;
-								coeff_VLC[intra][last][level + offset][run].len = 30;
-									coeff_VLC[intra][last][offset - level][run].code
+								(((struct global_all_dll *)Dll::Tls())->coeff_VLC)[intra][last][level + offset][run].len = 30;
+									(((struct global_all_dll *)Dll::Tls())->coeff_VLC)[intra][last][offset - level][run].code
 									= (ESCAPE3 << 21) | (last << 20) | (run << 14) | (1 << 13) | ((-(int32_t)level & 0xfff) << 1) | 1;
-								coeff_VLC[intra][last][offset - level][run].len = 30;
+								(((struct global_all_dll *)Dll::Tls())->coeff_VLC)[intra][last][offset - level][run].len = 30;
 							}
 							continue;
 						}
 					}
 
-					coeff_VLC[intra][last][level + offset][run].code
-						= (escape << coeff_VLC[intra][last][level_esc + offset][run_esc].len)
-						|  coeff_VLC[intra][last][level_esc + offset][run_esc].code;
-					coeff_VLC[intra][last][level + offset][run].len
-						= coeff_VLC[intra][last][level_esc + offset][run_esc].len + escape_len;
+					(((struct global_all_dll *)Dll::Tls())->coeff_VLC)[intra][last][level + offset][run].code
+						= (escape << (((struct global_all_dll *)Dll::Tls())->coeff_VLC)[intra][last][level_esc + offset][run_esc].len)
+						|  (((struct global_all_dll *)Dll::Tls())->coeff_VLC)[intra][last][level_esc + offset][run_esc].code;
+					(((struct global_all_dll *)Dll::Tls())->coeff_VLC)[intra][last][level + offset][run].len
+						= (((struct global_all_dll *)Dll::Tls())->coeff_VLC)[intra][last][level_esc + offset][run_esc].len + escape_len;
 
 					if (!intra) {
-						coeff_VLC[intra][last][offset - level][run].code
-							= (escape << coeff_VLC[intra][last][level_esc + offset][run_esc].len)
-							|  coeff_VLC[intra][last][level_esc + offset][run_esc].code | 1;
-						coeff_VLC[intra][last][offset - level][run].len
-							= coeff_VLC[intra][last][level_esc + offset][run_esc].len + escape_len;
+						(((struct global_all_dll *)Dll::Tls())->coeff_VLC)[intra][last][offset - level][run].code
+							= (escape << (((struct global_all_dll *)Dll::Tls())->coeff_VLC)[intra][last][level_esc + offset][run_esc].len)
+							|  (((struct global_all_dll *)Dll::Tls())->coeff_VLC)[intra][last][level_esc + offset][run_esc].code | 1;
+						(((struct global_all_dll *)Dll::Tls())->coeff_VLC)[intra][last][offset - level][run].len
+							= (((struct global_all_dll *)Dll::Tls())->coeff_VLC)[intra][last][level_esc + offset][run_esc].len + escape_len;
 					}
 				}
 
 				if (!intra) {
-					coeff_VLC[intra][last][0][run].code
+					(((struct global_all_dll *)Dll::Tls())->coeff_VLC)[intra][last][0][run].code
 						= (ESCAPE3 << 21) | (last << 20) | (run << 14) | (1 << 13) | ((-32 & 0xfff) << 1) | 1;
-					coeff_VLC[intra][last][0][run].len = 30;
+					(((struct global_all_dll *)Dll::Tls())->coeff_VLC)[intra][last][0][run].len = 30;
 				}
 			}
 		}
@@ -179,19 +173,19 @@
 	/* init sprite_trajectory tables
 	 * even if GMC is not specified (it might be used later...) */
 
-	sprite_trajectory_code[0+16384].code = 0;
-	sprite_trajectory_code[0+16384].len = 0;
+	(((struct global_all_dll *)Dll::Tls())->sprite_trajectory_code)[0+16384].code = 0;
+	(((struct global_all_dll *)Dll::Tls())->sprite_trajectory_code)[0+16384].len = 0;
 	for (k=0;k<14;k++) {
 		int limit = (1<<k);
 
 		for (l=-(2*limit-1); l <= -limit; l++) {
-			sprite_trajectory_code[l+16384].code = (2*limit-1)+l;
-			sprite_trajectory_code[l+16384].len = k+1;
+			(((struct global_all_dll *)Dll::Tls())->sprite_trajectory_code)[l+16384].code = (2*limit-1)+l;
+			(((struct global_all_dll *)Dll::Tls())->sprite_trajectory_code)[l+16384].len = k+1;
 		}
 
 		for (l=limit; l<= 2*limit-1; l++) {
-			sprite_trajectory_code[l+16384].code = l;
-			sprite_trajectory_code[l+16384].len = k+1;
+			(((struct global_all_dll *)Dll::Tls())->sprite_trajectory_code)[l+16384].code = l;
+			(((struct global_all_dll *)Dll::Tls())->sprite_trajectory_code)[l+16384].len = k+1;
 		}
 	}
 }
@@ -276,8 +270,8 @@
 			level_shifted = prev_level + 32;
 			if (!(level_shifted & -64))
 			{
-				code = coeff_VLC[0][0][level_shifted][prev_run].code;
-				len	 = coeff_VLC[0][0][level_shifted][prev_run].len;
+				code = (((struct global_all_dll *)Dll::Tls())->coeff_VLC)[0][0][level_shifted][prev_run].code;
+				len	 = (((struct global_all_dll *)Dll::Tls())->coeff_VLC)[0][0][level_shifted][prev_run].len;
 			}
 			else
 			{
@@ -296,8 +290,8 @@
 	level_shifted = prev_level + 32;
 	if (!(level_shifted & -64))
 	{
-		code = coeff_VLC[0][1][level_shifted][prev_run].code;
-		len	 = coeff_VLC[0][1][level_shifted][prev_run].len;
+		code = (((struct global_all_dll *)Dll::Tls())->coeff_VLC)[0][1][level_shifted][prev_run].code;
+		len	 = (((struct global_all_dll *)Dll::Tls())->coeff_VLC)[0][1][level_shifted][prev_run].len;
 	}
 	else
 	{
@@ -331,8 +325,8 @@
 		{
 			abs_level = abs(prev_level);
 			abs_level = abs_level < 64 ? abs_level : 0;
-			code	  = coeff_VLC[1][0][abs_level][prev_run].code;
-			len		  = coeff_VLC[1][0][abs_level][prev_run].len;
+			code	  = (((struct global_all_dll *)Dll::Tls())->coeff_VLC)[1][0][abs_level][prev_run].code;
+			len		  = (((struct global_all_dll *)Dll::Tls())->coeff_VLC)[1][0][abs_level][prev_run].len;
 			if (len != 128)
 				code |= (prev_level < 0);
 			else
@@ -351,8 +345,8 @@
 
 	abs_level = abs(prev_level);
 	abs_level = abs_level < 64 ? abs_level : 0;
-	code	  = coeff_VLC[1][1][abs_level][prev_run].code;
-	len		  = coeff_VLC[1][1][abs_level][prev_run].len;
+	code	  = (((struct global_all_dll *)Dll::Tls())->coeff_VLC)[1][1][abs_level][prev_run].code;
+	len		  = (((struct global_all_dll *)Dll::Tls())->coeff_VLC)[1][1][abs_level][prev_run].len;
 	if (len != 128)
 		code |= (prev_level < 0);
 	else
@@ -392,7 +386,7 @@
 		{
 			abs_level = abs(prev_level);
 			abs_level = abs_level < 64 ? abs_level : 0;
-			len		  = coeff_VLC[1][0][abs_level][prev_run].len;
+			len		  = (((struct global_all_dll *)Dll::Tls())->coeff_VLC)[1][0][abs_level][prev_run].len;
 			bits      += len!=128 ? len : 30;
 
 			prev_level = level;
@@ -405,7 +399,7 @@
 
 	abs_level = abs(prev_level);
 	abs_level = abs_level < 64 ? abs_level : 0;
-	len		  = coeff_VLC[1][1][abs_level][prev_run].len;
+	len		  = (((struct global_all_dll *)Dll::Tls())->coeff_VLC)[1][1][abs_level][prev_run].len;
 	bits      += len!=128 ? len : 30;
 
 	return bits;
@@ -432,7 +426,7 @@
 		if ((level = qcoeff[zigzag[i++]]) != 0) {
 			level_shifted = prev_level + 32;
 			if (!(level_shifted & -64))
-				len	 = coeff_VLC[0][0][level_shifted][prev_run].len;
+				len	 = (((struct global_all_dll *)Dll::Tls())->coeff_VLC)[0][0][level_shifted][prev_run].len;
 			else
 				len  = 30;
 
@@ -447,7 +441,7 @@
 
 	level_shifted = prev_level + 32;
 	if (!(level_shifted & -64))
-		len	 = coeff_VLC[0][1][level_shifted][prev_run].len;
+		len	 = (((struct global_all_dll *)Dll::Tls())->coeff_VLC)[0][1][level_shifted][prev_run].len;
 	else
 		len  = 30;
 	bits += len;
@@ -456,7 +450,7 @@
 }
 
 static const int iDQtab[5] = {
-	1, 0, -1 /* no change */, 2, 3
+	1, 0, -1, 2, 3
 };
 #define DQ_VALUE2INDEX(value)  iDQtab[(value)+2]
 
@@ -476,12 +470,12 @@
 	/* write mcbpc */
 	if (frame->coding_type == I_VOP) {
 		mcbpc = ((pMB->mode >> 1) & 3) | ((pMB->cbp & 3) << 2);
-		BitstreamPutBits(bs, mcbpc_intra_tab[mcbpc].code,
-						 mcbpc_intra_tab[mcbpc].len);
+		BitstreamPutBits(bs, (((struct global_all_dll *)Dll::Tls())->mcbpc_intra_tab)[mcbpc].code,
+						 (((struct global_all_dll *)Dll::Tls())->mcbpc_intra_tab)[mcbpc].len);
 	} else {
 		mcbpc = (pMB->mode & 7) | ((pMB->cbp & 3) << 3);
-		BitstreamPutBits(bs, mcbpc_inter_tab[mcbpc].code,
-						 mcbpc_inter_tab[mcbpc].len);
+		BitstreamPutBits(bs, (((struct global_all_dll *)Dll::Tls())->mcbpc_inter_tab)[mcbpc].code,
+						 (((struct global_all_dll *)Dll::Tls())->mcbpc_inter_tab)[mcbpc].len);
 	}
 
 	/* ac prediction flag */
@@ -542,8 +536,8 @@
 	cbpy = 15 - (pMB->cbp >> 2);
 
 	/* write mcbpc */
-	BitstreamPutBits(bs, mcbpc_inter_tab[mcbpc].code,
-					 mcbpc_inter_tab[mcbpc].len);
+	BitstreamPutBits(bs, (((struct global_all_dll *)Dll::Tls())->mcbpc_inter_tab)[mcbpc].code,
+					 (((struct global_all_dll *)Dll::Tls())->mcbpc_inter_tab)[mcbpc].len);
 
 	if ( (frame->coding_type == S_VOP) && (pMB->mode == MODE_INTER || pMB->mode == MODE_INTER_Q) )
 		BitstreamPutBit(bs, pMB->mcsel);		/* mcsel: '0'=local motion, '1'=GMC */
@@ -976,7 +970,7 @@
 		intra = 0;
 
 	if (BitstreamShowBits(bs, 7) != ESCAPE) {
-		reverse_event = &DCT3D[intra][BitstreamShowBits(bs, 12)];
+		reverse_event = &(((struct global_all_dll *)Dll::Tls())->DCT3D)[intra][BitstreamShowBits(bs, 12)];
 
 		if ((level = reverse_event->event.level) == 0)
 			goto error;
@@ -1008,7 +1002,7 @@
 	if (mode < 3) {
 		BitstreamSkip(bs, (mode == 2) ? 2 : 1);
 
-		reverse_event = &DCT3D[intra][BitstreamShowBits(bs, 12)];
+		reverse_event = &(((struct global_all_dll *)Dll::Tls())->DCT3D)[intra][BitstreamShowBits(bs, 12)];
 
 		if ((level = reverse_event->event.level) == 0)
 			goto error;
@@ -1041,6 +1035,9 @@
 	return 0;
 }
 
+#if 0
+		DPRINTF(XVID_DEBUG_COEFF,"block[%i] %i %08x\n", scan[coeff], level, BitstreamShowBits(bs, 32));
+#endif
 void
 get_intra_block(Bitstream * bs,
 				int16_t * block,
@@ -1061,9 +1058,6 @@
 		block[scan[coeff]] = level;
 
 		DPRINTF(XVID_DEBUG_COEFF,"block[%i] %i\n", scan[coeff], level);
-#if 0
-		DPRINTF(XVID_DEBUG_COEFF,"block[%i] %i %08x\n", scan[coeff], level, BitstreamShowBits(bs, 32));
-#endif
 
 		if (level < -2047 || level > 2047) {
 			DPRINTF(XVID_DEBUG_ERROR,"warning: intra_overflow %i\n", level);
@@ -1114,7 +1108,7 @@
 
 VLC_TABLE const coeff_tab[2][102] =
 {
-	/* intra = 0 */
+	
 	{
 		{{ 2,  2}, {0, 0, 1}},
 		{{15,  4}, {0, 0, 2}},
@@ -1219,7 +1213,7 @@
 		{{94, 12}, {1, 39, 1}},
 		{{95, 12}, {1, 40, 1}}
 	},
-	/* intra = 1 */
+	
 	{
 		{{ 2,  2}, {0, 0, 1}},
 		{{15,  4}, {0, 0, 3}},
@@ -1329,7 +1323,7 @@
 /* constants taken from momusys/vm_common/inlcude/max_level.h */
 uint8_t const max_level[2][2][64] = {
 	{
-		/* intra = 0, last = 0 */
+		
 		{
 			12, 6, 4, 3, 3, 3, 3, 2,
 			2, 2, 2, 1, 1, 1, 1, 1,
@@ -1340,7 +1334,7 @@
 			0, 0, 0, 0, 0, 0, 0, 0,
 			0, 0, 0, 0, 0, 0, 0, 0
 		},
-		/* intra = 0, last = 1 */
+		
 		{
 			3, 2, 1, 1, 1, 1, 1, 1,
 			1, 1, 1, 1, 1, 1, 1, 1,
@@ -1353,7 +1347,7 @@
 		}
 	},
 	{
-		/* intra = 1, last = 0 */
+		
 		{
 			27, 10, 5, 4, 3, 3, 3, 3,
 			2, 2, 1, 1, 1, 1, 1, 0,
@@ -1364,7 +1358,7 @@
 			0, 0, 0, 0, 0, 0, 0, 0,
 			0, 0, 0, 0, 0, 0, 0, 0
 		},
-		/* intra = 1, last = 1 */
+		
 		{
 			8, 3, 2, 2, 2, 2, 2, 1,
 			1, 1, 1, 1, 1, 1, 1, 1,
@@ -1380,7 +1374,7 @@
 
 uint8_t const max_run[2][2][64] = {
 	{
-		/* intra = 0, last = 0 */
+		
 		{
 			0, 26, 10, 6, 2, 1, 1, 0,
 			0, 0, 0, 0, 0, 0, 0, 0,
@@ -1391,7 +1385,7 @@
 			0, 0, 0, 0, 0, 0, 0, 0,
 			0, 0, 0, 0, 0, 0, 0, 0,
 		},
-		/* intra = 0, last = 1 */
+		
 		{
 			0, 40, 1, 0, 0, 0, 0, 0,
 			0, 0, 0, 0, 0, 0, 0, 0,
@@ -1404,7 +1398,7 @@
 		}
 	},
 	{
-		/* intra = 1, last = 0 */
+		
 		{
 			0, 14, 9, 7, 3, 2, 1, 1,
 			1, 1, 1, 0, 0, 0, 0, 0,
@@ -1415,7 +1409,7 @@
 			0, 0, 0, 0, 0, 0, 0, 0,
 			0, 0, 0, 0, 0, 0, 0, 0,
 		},
-		/* intra = 1, last = 1 */
+		
 		{
 			0, 20, 6, 1, 0, 0, 0, 0,
 			0, 0, 0, 0, 0, 0, 0, 0,
@@ -1433,13 +1427,7 @@
  * encoder tables                                                 *
  ******************************************************************/
 
-VLC sprite_trajectory_code[32768];
 
-VLC sprite_trajectory_len[15] = {
-	{ 0x00 , 2},
-	{ 0x02 , 3}, { 0x03, 3}, { 0x04, 3}, { 0x05, 3}, { 0x06, 3},
-	{ 0x0E , 4}, { 0x1E, 5}, { 0x3E, 6}, { 0x7E, 7}, { 0xFE, 8},
-	{ 0x1FE, 9}, {0x3FE,10}, {0x7FE,11}, {0xFFE,12} };
 
 
 /* DCT coefficients. Four tables, two for last = 0, two for last = 1.
@@ -1449,22 +1437,10 @@
  CBPC as in table 4/H.263, MB type (mode): 3 = 01, 4 = 10.
  Example: cbpc = 01 and mode = 4 gives index = 0110 = 6. */
 
-VLC mcbpc_intra_tab[15] = {
-	{0x01, 9}, {0x01, 1}, {0x01, 4}, {0x00, 0},
-	{0x00, 0}, {0x01, 3}, {0x01, 6}, {0x00, 0},
-	{0x00, 0}, {0x02, 3}, {0x02, 6}, {0x00, 0},
-	{0x00, 0}, {0x03, 3}, {0x03, 6}
-};
 
 /* MCBPC inter.
    Addressing: 5 bit ccmmm (cc = CBPC, mmm = mode (1-4 binary)) */
 
-VLC mcbpc_inter_tab[29] = {
-	{1, 1}, {3, 3}, {2, 3}, {3, 5}, {4, 6}, {1, 9}, {0, 0}, {0, 0},
-	{3, 4}, {7, 7}, {5, 7}, {4, 8}, {4, 9}, {0, 0}, {0, 0}, {0, 0},
-	{2, 4}, {6, 7}, {4, 7}, {3, 8}, {3, 9}, {0, 0}, {0, 0}, {0, 0},
-	{5, 6}, {5, 9}, {5, 8}, {3, 7}, {2, 9}
-};
 
 const VLC xvid_cbpy_tab[16] = {
 	{3, 4}, {5, 5}, {4, 5}, {9, 4}, {3, 5}, {7, 4}, {2, 6}, {11, 4},
diff -bBru src-ori/bitstream/mbcoding.h src/bitstream/mbcoding.h
--- src-ori/bitstream/mbcoding.h	Wed Jan 12 10:32:43 2005
+++ src/bitstream/mbcoding.h	Wed Jan 12 12:37:43 2005
@@ -68,11 +68,8 @@
 				  Statistics * pStat);
 
 
-static __inline void
-MBSkip(Bitstream * bs)
-{
-	BitstreamPutBit(bs, 1);	/* not coded */
-}
+extern  void MBSkip(Bitstream * bs)
+;
 
 int CodeCoeffIntra_CalcBits(const int16_t qcoeff[64], const uint16_t * zigzag);
 int CodeCoeffInter_CalcBits(const int16_t qcoeff[64], const uint16_t * zigzag);
Only in src-ori/bitstream: ppc_asm
diff -bBru src-ori/bitstream/vlc_codes.h src/bitstream/vlc_codes.h
--- src-ori/bitstream/vlc_codes.h	Wed Jan 12 10:32:43 2005
+++ src/bitstream/vlc_codes.h	Wed Jan 12 12:37:44 2005
@@ -73,10 +94,10 @@
 extern VLC_TABLE const coeff_tab[2][102];
 extern uint8_t const max_level[2][2][64];
 extern uint8_t const max_run[2][2][64];
-extern VLC sprite_trajectory_code[32768];
-extern VLC sprite_trajectory_len[15];
-extern VLC mcbpc_intra_tab[15];
-extern VLC mcbpc_inter_tab[29];
+
+
+
+
 extern const VLC xvid_cbpy_tab[16];
 extern const VLC dcy_tab[511];
 extern const VLC dcc_tab[511];
Only in src-ori/bitstream: x86_asm
Only in src-ori/dct: README.IJG
diff -bBru src-ori/dct/fdct.c src/dct/fdct.c
--- src-ori/dct/fdct.c	Wed Jan 12 10:32:43 2005
+++ src/dct/fdct.c	Wed Jan 12 12:38:32 2005
@@ -105,7 +105,8 @@
  * rounded result half the time...
  */
 
-#include "fdct.h"
+#include <e32std.h>
+#include "../global_all_dll.h"
 
 #define USE_ACCURATE_ROUNDING
 
@@ -134,8 +135,6 @@
 #define FIX_2_562915447  ((int) 20995)	/* FIX(2.562915447) */
 #define FIX_3_072711026  ((int) 25172)	/* FIX(3.072711026) */
 
-/* function pointer */
-fdctFuncPtr fdct;
 
 /*
  * Perform an integer forward DCT on one block of samples.
diff -bBru src-ori/dct/fdct.h src/dct/fdct.h
--- src-ori/dct/fdct.h	Wed Jan 12 10:32:43 2005
+++ src/dct/fdct.h	Wed Jan 12 12:37:41 2005
@@ -29,7 +29,7 @@
 typedef void (fdctFunc) (short *const block);
 typedef fdctFunc *fdctFuncPtr;
 
-extern fdctFuncPtr fdct;
+
 
 fdctFunc fdct_int32;
 
Only in src-ori/dct: ia64_asm
diff -bBru src-ori/dct/idct.c src/dct/idct.c
--- src-ori/dct/idct.c	Wed Jan 12 10:32:43 2005
+++ src/dct/idct.c	Wed Jan 12 12:38:35 2005
@@ -76,7 +76,8 @@
 /* this code assumes >> to be a two's-complement arithmetic */
 /* right shift: (-2)>>1 == -1 , (-3)>>1 == -2               */
 
-#include "idct.h"
+#include <e32std.h>
+#include "../global_all_dll.h"
 
 #define W1 2841					/* 2048*sqrt(2)*cos(1*pi/16) */
 #define W2 2676					/* 2048*sqrt(2)*cos(2*pi/16) */
@@ -85,11 +86,6 @@
 #define W6 1108					/* 2048*sqrt(2)*cos(6*pi/16) */
 #define W7 565					/* 2048*sqrt(2)*cos(7*pi/16) */
 
-/* private data
- * Initialized by idct_int32_init so it's mostly RO data,
- * doesn't hurt thread safety */
-static short iclip[1024];		/* clipping table */
-static short *iclp;
 
 /* private prototypes */
 
@@ -224,20 +220,8 @@
 }
 #endif
 
-/* function pointer */
-idctFuncPtr idct;
 
 /* two dimensional inverse discrete cosine transform */
-void
-idct_int32(short *const block)
-{
-
-	/*
-	 * idct_int32_init() must be called before the first call to this
-	 * function!
-	 */
-
-
 #if 0
 	int i;
 	long i;
@@ -248,13 +232,23 @@
 	for (i=0; i<8; i++)
 		idctcol(block+i);
 #endif
+void
+idct_int32(short *const block)
+{
+
+	/*
+	 * idct_int32_init() must be called before the first call to this
+	 * function!
+	 */
+
+
 
 	short *blk;
 	long i;
 	long X0, X1, X2, X3, X4, X5, X6, X7, X8;
 
 
-	for (i = 0; i < 8; i++)		/* idct rows */
+	for (i = 0; i < 8; i++)		/* (((struct global_all_dll *)Dll::Tls())->idct) rows */
 	{
 		blk = block + (i << 3);
 		if (!
@@ -310,7 +304,7 @@
 
 
 
-	for (i = 0; i < 8; i++)		/* idct columns */
+	for (i = 0; i < 8; i++)		/* (((struct global_all_dll *)Dll::Tls())->idct) columns */
 	{
 		blk = block + i;
 		/* shortcut  */
@@ -323,7 +317,7 @@
 			 | (X5 = blk[8 * 7]) | (X6 = blk[8 * 5]) | (X7 = blk[8 * 3]))) {
 			blk[8 * 0] = blk[8 * 1] = blk[8 * 2] = blk[8 * 3] = blk[8 * 4] =
 				blk[8 * 5] = blk[8 * 6] = blk[8 * 7] =
-				iclp[(blk[8 * 0] + 32) >> 6];
+				(((struct global_all_dll *)Dll::Tls())->iclp)[(blk[8 * 0] + 32) >> 6];
 			continue;
 		}
 
@@ -357,14 +351,14 @@
 		X4 = (181 * (X4 - X5) + 128) >> 8;
 
 		/* fourth stage  */
-		blk[8 * 0] = iclp[(X7 + X1) >> 14];
-		blk[8 * 1] = iclp[(X3 + X2) >> 14];
-		blk[8 * 2] = iclp[(X0 + X4) >> 14];
-		blk[8 * 3] = iclp[(X8 + X6) >> 14];
-		blk[8 * 4] = iclp[(X8 - X6) >> 14];
-		blk[8 * 5] = iclp[(X0 - X4) >> 14];
-		blk[8 * 6] = iclp[(X3 - X2) >> 14];
-		blk[8 * 7] = iclp[(X7 - X1) >> 14];
+		blk[8 * 0] = (((struct global_all_dll *)Dll::Tls())->iclp)[(X7 + X1) >> 14];
+		blk[8 * 1] = (((struct global_all_dll *)Dll::Tls())->iclp)[(X3 + X2) >> 14];
+		blk[8 * 2] = (((struct global_all_dll *)Dll::Tls())->iclp)[(X0 + X4) >> 14];
+		blk[8 * 3] = (((struct global_all_dll *)Dll::Tls())->iclp)[(X8 + X6) >> 14];
+		blk[8 * 4] = (((struct global_all_dll *)Dll::Tls())->iclp)[(X8 - X6) >> 14];
+		blk[8 * 5] = (((struct global_all_dll *)Dll::Tls())->iclp)[(X0 - X4) >> 14];
+		blk[8 * 6] = (((struct global_all_dll *)Dll::Tls())->iclp)[(X3 - X2) >> 14];
+		blk[8 * 7] = (((struct global_all_dll *)Dll::Tls())->iclp)[(X7 - X1) >> 14];
 	}
 
 }								/* end function idct_int32(block) */
@@ -375,7 +369,8 @@
 {
 	int i;
 
-	iclp = iclip + 512;
+	(((struct global_all_dll *)Dll::Tls())->iclp) = (((struct global_all_dll *)Dll::Tls())->iclip) + 512;
 	for (i = -512; i < 512; i++)
-		iclp[i] = (i < -256) ? -256 : ((i > 255) ? 255 : i);
+		(((struct global_all_dll *)Dll::Tls())->iclp)[i] = (i < -256) ? -256 : ((i > 255) ? 255 : i);
 }
+
diff -bBru src-ori/dct/idct.h src/dct/idct.h
--- src-ori/dct/idct.h	Wed Jan 12 10:32:43 2005
+++ src/dct/idct.h	Wed Jan 12 12:37:42 2005
@@ -32,7 +32,7 @@
 typedef void (idctFunc) (short *const block);
 typedef idctFunc *idctFuncPtr;
 
-extern idctFuncPtr idct;
+
 
 idctFunc idct_int32;
 idctFunc simple_idct_c;		/* Michael Niedermayer */
Only in src-ori/dct: ppc_asm
diff -bBru src-ori/dct/simple_idct.c src/dct/simple_idct.c
--- src-ori/dct/simple_idct.c	Wed Jan 12 10:32:43 2005
+++ src/dct/simple_idct.c	Wed Jan 12 12:38:37 2005
@@ -31,8 +31,8 @@
   written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>)
  */
 
-#include "../portab.h"
-#include "idct.h"
+#include <e32std.h>
+#include "../global_all_dll.h"
 
 #if 0
 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
@@ -76,6 +78,8 @@
 
 #endif
 
+#define ROW0_MASK 0xffff000000000000LL
+#define ROW0_MASK 0xffffLL
 static __inline void idctRowCondDC (int16_t * const row)
 {
 	int a0, a1, a2, a3, b0, b1, b2, b3;
@@ -87,9 +91,7 @@
 
 #ifdef FAST_64BIT
 #ifdef ARCH_IS_BIG_ENDIAN
-#define ROW0_MASK 0xffff000000000000LL
 #else
-#define ROW0_MASK 0xffffLL
 #endif
 	if ( ((((uint64_t *)row)[0] & ~ROW0_MASK) |
               ((uint64_t *)row)[1]) == 0) {
Only in src-ori/dct: x86_asm
diff -bBru src-ori/decoder.c src/decoder.c
--- src-ori/decoder.c	Wed Jan 12 10:32:43 2005
+++ src/decoder.c	Wed Jan 12 12:43:16 2005
@@ -24,43 +24,18 @@
  *
  ****************************************************************************/
 
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
+#include <e32std.h>
+#include "global_all_dll.h"
 
 #ifdef BFRAMES_DEC_DEBUG
-	#define BFRAMES_DEC
 #endif
 
-#include "xvid.h"
-#include "portab.h"
-#include "global.h"
-
-#include "decoder.h"
-#include "bitstream/bitstream.h"
-#include "bitstream/mbcoding.h"
-
-#include "quant/quant.h"
-#include "quant/quant_matrix.h"
-#include "dct/idct.h"
-#include "dct/fdct.h"
-#include "utils/mem_transfer.h"
-#include "image/interpolate8x8.h"
-#include "image/reduced.h"
-#include "image/font.h"
-
-#include "bitstream/mbcoding.h"
-#include "prediction/mbprediction.h"
-#include "utils/timer.h"
-#include "utils/emms.h"
-#include "motion/motion.h"
-#include "motion/gmc.h"
-
-#include "image/image.h"
-#include "image/colorspace.h"
-#include "image/postprocessing.h"
-#include "utils/mem_align.h"
 
+
+
+
+
+	#define BFRAMES_DEC
 static int
 decoder_resize(DECODER * dec)
 {
@@ -344,14 +319,14 @@
 
 		start_timer();
 		if (dec->quant_type == 0) {
-			dequant_h263_intra(&data[i * 64], &block[i * 64], iQuant, iDcScaler, dec->mpeg_quant_matrices);
+			(((struct global_all_dll *)Dll::Tls())->dequant_h263_intra)(&data[i * 64], &block[i * 64], iQuant, iDcScaler, dec->mpeg_quant_matrices);
 		} else {
-			dequant_mpeg_intra(&data[i * 64], &block[i * 64], iQuant, iDcScaler, dec->mpeg_quant_matrices);
+			(((struct global_all_dll *)Dll::Tls())->dequant_mpeg_intra)(&data[i * 64], &block[i * 64], iQuant, iDcScaler, dec->mpeg_quant_matrices);
 		}
 		stop_iquant_timer();
 
 		start_timer();
-		idct(&data[i * 64]);
+		(((struct global_all_dll *)Dll::Tls())->idct)(&data[i * 64]);
 		stop_idct_timer();
 
 	}
@@ -366,19 +341,19 @@
 	if (reduced_resolution)
 	{
 		next_block*=2;
-		copy_upsampled_8x8_16to8(pY_Cur, &data[0 * 64], stride);
-		copy_upsampled_8x8_16to8(pY_Cur + 16, &data[1 * 64], stride);
-		copy_upsampled_8x8_16to8(pY_Cur + next_block, &data[2 * 64], stride);
-		copy_upsampled_8x8_16to8(pY_Cur + 16 + next_block, &data[3 * 64], stride);
-		copy_upsampled_8x8_16to8(pU_Cur, &data[4 * 64], stride2);
-		copy_upsampled_8x8_16to8(pV_Cur, &data[5 * 64], stride2);
+		(((struct global_all_dll *)Dll::Tls())->copy_upsampled_8x8_16to8)(pY_Cur, &data[0 * 64], stride);
+		(((struct global_all_dll *)Dll::Tls())->copy_upsampled_8x8_16to8)(pY_Cur + 16, &data[1 * 64], stride);
+		(((struct global_all_dll *)Dll::Tls())->copy_upsampled_8x8_16to8)(pY_Cur + next_block, &data[2 * 64], stride);
+		(((struct global_all_dll *)Dll::Tls())->copy_upsampled_8x8_16to8)(pY_Cur + 16 + next_block, &data[3 * 64], stride);
+		(((struct global_all_dll *)Dll::Tls())->copy_upsampled_8x8_16to8)(pU_Cur, &data[4 * 64], stride2);
+		(((struct global_all_dll *)Dll::Tls())->copy_upsampled_8x8_16to8)(pV_Cur, &data[5 * 64], stride2);
 	}else{
-		transfer_16to8copy(pY_Cur, &data[0 * 64], stride);
-		transfer_16to8copy(pY_Cur + 8, &data[1 * 64], stride);
-		transfer_16to8copy(pY_Cur + next_block, &data[2 * 64], stride);
-		transfer_16to8copy(pY_Cur + 8 + next_block, &data[3 * 64], stride);
-		transfer_16to8copy(pU_Cur, &data[4 * 64], stride2);
-		transfer_16to8copy(pV_Cur, &data[5 * 64], stride2);
+		(((struct global_all_dll *)Dll::Tls())->transfer_16to8copy)(pY_Cur, &data[0 * 64], stride);
+		(((struct global_all_dll *)Dll::Tls())->transfer_16to8copy)(pY_Cur + 8, &data[1 * 64], stride);
+		(((struct global_all_dll *)Dll::Tls())->transfer_16to8copy)(pY_Cur + next_block, &data[2 * 64], stride);
+		(((struct global_all_dll *)Dll::Tls())->transfer_16to8copy)(pY_Cur + 8 + next_block, &data[3 * 64], stride);
+		(((struct global_all_dll *)Dll::Tls())->transfer_16to8copy)(pU_Cur, &data[4 * 64], stride2);
+		(((struct global_all_dll *)Dll::Tls())->transfer_16to8copy)(pV_Cur, &data[5 * 64], stride2);
 	}
 	stop_transfer_timer();
 }
@@ -402,7 +377,7 @@
 	int i;
 	const uint32_t iQuant = pMB->quant;
 	const int direction = dec->alternate_vertical_scan ? 2 : 0;
-	const quant_interFuncPtr dequant = dec->quant_type == 0 ? dequant_h263_inter : dequant_mpeg_inter;
+	const quant_interFuncPtr dequant = dec->quant_type == 0 ? (((struct global_all_dll *)Dll::Tls())->dequant_h263_inter) : (((struct global_all_dll *)Dll::Tls())->dequant_mpeg_inter);
 
 	for (i = 0; i < 6; i++) {
 
@@ -419,7 +394,7 @@
 			stop_iquant_timer();
 
 			start_timer();
-			idct(&data[i * 64]);
+			(((struct global_all_dll *)Dll::Tls())->idct)(&data[i * 64]);
 			stop_idct_timer();
 		}
 	}
@@ -432,46 +407,34 @@
 	start_timer();
 	if (reduced_resolution) {
 		if (cbp & 32)
-			add_upsampled_8x8_16to8(pY_Cur, &data[0 * 64], stride);
+			(((struct global_all_dll *)Dll::Tls())->add_upsampled_8x8_16to8)(pY_Cur, &data[0 * 64], stride);
 		if (cbp & 16)
-			add_upsampled_8x8_16to8(pY_Cur + 16, &data[1 * 64], stride);
+			(((struct global_all_dll *)Dll::Tls())->add_upsampled_8x8_16to8)(pY_Cur + 16, &data[1 * 64], stride);
 		if (cbp & 8)
-			add_upsampled_8x8_16to8(pY_Cur + next_block, &data[2 * 64], stride);
+			(((struct global_all_dll *)Dll::Tls())->add_upsampled_8x8_16to8)(pY_Cur + next_block, &data[2 * 64], stride);
 		if (cbp & 4)
-			add_upsampled_8x8_16to8(pY_Cur + 16 + next_block, &data[3 * 64], stride);
+			(((struct global_all_dll *)Dll::Tls())->add_upsampled_8x8_16to8)(pY_Cur + 16 + next_block, &data[3 * 64], stride);
 		if (cbp & 2)
-			add_upsampled_8x8_16to8(pU_Cur, &data[4 * 64], stride2);
+			(((struct global_all_dll *)Dll::Tls())->add_upsampled_8x8_16to8)(pU_Cur, &data[4 * 64], stride2);
 		if (cbp & 1)
-			add_upsampled_8x8_16to8(pV_Cur, &data[5 * 64], stride2);
+			(((struct global_all_dll *)Dll::Tls())->add_upsampled_8x8_16to8)(pV_Cur, &data[5 * 64], stride2);
 	} else {
 		if (cbp & 32)
-			transfer_16to8add(pY_Cur, &data[0 * 64], stride);
+			(((struct global_all_dll *)Dll::Tls())->transfer_16to8add)(pY_Cur, &data[0 * 64], stride);
 		if (cbp & 16)
-			transfer_16to8add(pY_Cur + 8, &data[1 * 64], stride);
+			(((struct global_all_dll *)Dll::Tls())->transfer_16to8add)(pY_Cur + 8, &data[1 * 64], stride);
 		if (cbp & 8)
-			transfer_16to8add(pY_Cur + next_block, &data[2 * 64], stride);
+			(((struct global_all_dll *)Dll::Tls())->transfer_16to8add)(pY_Cur + next_block, &data[2 * 64], stride);
 		if (cbp & 4)
-			transfer_16to8add(pY_Cur + 8 + next_block, &data[3 * 64], stride);
+			(((struct global_all_dll *)Dll::Tls())->transfer_16to8add)(pY_Cur + 8 + next_block, &data[3 * 64], stride);
 		if (cbp & 2)
-			transfer_16to8add(pU_Cur, &data[4 * 64], stride2);
+			(((struct global_all_dll *)Dll::Tls())->transfer_16to8add)(pU_Cur, &data[4 * 64], stride2);
 		if (cbp & 1)
-			transfer_16to8add(pV_Cur, &data[5 * 64], stride2);
+			(((struct global_all_dll *)Dll::Tls())->transfer_16to8add)(pV_Cur, &data[5 * 64], stride2);
 	}
 	stop_transfer_timer();
 }
 
-static void __inline
-validate_vector(VECTOR * mv, unsigned int x_pos, unsigned int y_pos, const DECODER * dec)
-{
-	/* clip a vector to valid range 
-	   prevents crashes if bitstream is broken 
-	*/
-	int shift = 5 + dec->quarterpel;
-	int xborder_high = (int)(dec->mb_width - x_pos) << shift;
-	int xborder_low = (-(int)x_pos-1) << shift;
-	int yborder_high = (int)(dec->mb_height - y_pos) << shift;
-	int yborder_low = (-(int)y_pos-1) << shift;
-
 #define CHECK_MV(mv) \
 	do { \
 	if ((mv).x > xborder_high) { \
@@ -490,6 +453,19 @@
 	} \
 	} while (0)
 
+static void __inline
+validate_vector(VECTOR * mv, unsigned int x_pos, unsigned int y_pos, const DECODER * dec)
+{
+	/* clip a vector to valid range 
+	   prevents crashes if bitstream is broken 
+	*/
+	int shift = 5 + dec->quarterpel;
+	int xborder_high = (int)(dec->mb_width - x_pos) << shift;
+	int xborder_low = (-(int)x_pos-1) << shift;
+	int yborder_high = (int)(dec->mb_height - y_pos) << shift;
+	int yborder_low = (-(int)y_pos-1) << shift;
+
+
 	CHECK_MV(mv[0]);
 	CHECK_MV(mv[1]);
 	CHECK_MV(mv[2]);
@@ -1151,32 +1127,32 @@
 	interpolate8x8_switch(dec->tmp.v, backward.v, 8 * x_pos, 8 * y_pos,
 						b_uv_dx, b_uv_dy, stride2, 0);
 
-	interpolate8x8_avg2(dec->cur.y + (16 * y_pos * stride) + 16 * x_pos,
+	(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dec->cur.y + (16 * y_pos * stride) + 16 * x_pos,
 						dec->cur.y + (16 * y_pos * stride) + 16 * x_pos,
 						dec->tmp.y + (16 * y_pos * stride) + 16 * x_pos,
 						stride, 0, 8);
 
-	interpolate8x8_avg2(dec->cur.y + (16 * y_pos * stride) + 16 * x_pos + 8,
+	(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dec->cur.y + (16 * y_pos * stride) + 16 * x_pos + 8,
 						dec->cur.y + (16 * y_pos * stride) + 16 * x_pos + 8,
 						dec->tmp.y + (16 * y_pos * stride) + 16 * x_pos + 8,
 						stride, 0, 8);
 
-	interpolate8x8_avg2(dec->cur.y + ((16 * y_pos + 8) * stride) + 16 * x_pos,
+	(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dec->cur.y + ((16 * y_pos + 8) * stride) + 16 * x_pos,
 						dec->cur.y + ((16 * y_pos + 8) * stride) + 16 * x_pos,
 						dec->tmp.y + ((16 * y_pos + 8) * stride) + 16 * x_pos,
 						stride, 0, 8);
 
-	interpolate8x8_avg2(dec->cur.y + ((16 * y_pos + 8) * stride) + 16 * x_pos + 8,
+	(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dec->cur.y + ((16 * y_pos + 8) * stride) + 16 * x_pos + 8,
 						dec->cur.y + ((16 * y_pos + 8) * stride) + 16 * x_pos + 8,
 						dec->tmp.y + ((16 * y_pos + 8) * stride) + 16 * x_pos + 8,
 						stride, 0, 8);
 
-	interpolate8x8_avg2(dec->cur.u + (8 * y_pos * stride2) + 8 * x_pos,
+	(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dec->cur.u + (8 * y_pos * stride2) + 8 * x_pos,
 						dec->cur.u + (8 * y_pos * stride2) + 8 * x_pos,
 						dec->tmp.u + (8 * y_pos * stride2) + 8 * x_pos,
 						stride2, 0, 8);
 
-	interpolate8x8_avg2(dec->cur.v + (8 * y_pos * stride2) + 8 * x_pos,
+	(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dec->cur.v + (8 * y_pos * stride2) + 8 * x_pos,
 						dec->cur.v + (8 * y_pos * stride2) + 8 * x_pos,
 						dec->tmp.v + (8 * y_pos * stride2) + 8 * x_pos,
 						stride2, 0, 8);
@@ -1446,7 +1422,7 @@
 			ret = XVID_ERR_END;
 		}
 
-		emms();
+		(((struct global_all_dll *)Dll::Tls())->emms)();
 		stop_global_timer();
 		return ret;
 	}
@@ -1459,7 +1435,7 @@
 		image_output(&dec->refn[0], dec->width, dec->height, dec->edged_width,
 					 (uint8_t**)frame->output.plane, frame->output.stride, frame->output.csp, dec->interlacing);
 		if (stats) stats->type = XVID_TYPE_NOTHING;
-		emms();
+		(((struct global_all_dll *)Dll::Tls())->emms)();
 		return 1;	/* one byte consumed */
 	}
 
@@ -1478,7 +1454,7 @@
 	if (coding_type == -1) { /* nothing */
 		if (success) goto done;
 		if (stats) stats->type = XVID_TYPE_NOTHING;
-		emms();
+		(((struct global_all_dll *)Dll::Tls())->emms)();
 		return BitstreamPos(&bs)/8;
 	}
 
@@ -1497,7 +1473,7 @@
 			stats->data.vol.par = dec->aspect_ratio;
 			stats->data.vol.par_width = dec->par_width;
 			stats->data.vol.par_height = dec->par_height;
-			emms();
+			(((struct global_all_dll *)Dll::Tls())->emms)();
 			return BitstreamPos(&bs)/8;	/* number of bytes consumed */
 		}
 		goto repeat;
@@ -1622,7 +1598,7 @@
 		}
 	}
 
-	emms();
+	(((struct global_all_dll *)Dll::Tls())->emms)();
 	stop_global_timer();
 
 	return (BitstreamPos(&bs) + 7) / 8;	/* number of bytes consumed */
diff -bBru src-ori/encoder.c src/encoder.c
--- src-ori/encoder.c	Wed Jan 12 10:32:43 2005
+++ src/encoder.c	Wed Jan 12 12:42:42 2005
@@ -25,29 +25,38 @@
  *
  ****************************************************************************/
 
-#include <stdlib.h>
-#include <stdio.h>
-#include <math.h>
-#include <string.h>
-
-#include "encoder.h"
-#include "prediction/mbprediction.h"
-#include "global.h"
-#include "utils/timer.h"
-#include "image/image.h"
-#include "image/font.h"
-#include "motion/sad.h"
-#include "motion/motion.h"
-#include "motion/gmc.h"
-
-#include "bitstream/cbp.h"
-#include "utils/mbfunctions.h"
-#include "bitstream/bitstream.h"
-#include "bitstream/mbcoding.h"
-#include "utils/emms.h"
-#include "bitstream/mbcoding.h"
-#include "quant/quant_matrix.h"
-#include "utils/mem_align.h"
+#include <e32std.h>
+#include "global_all_dll.h"
+ uint8_t get_fcode(uint16_t sr)
+ {
+
+	if (sr <= 16)
+		return 1;
+
+	else if (sr <= 32)
+		return 2;
+
+	else if (sr <= 64)
+		return 3;
+
+	else if (sr <= 128)
+		return 4;
+
+	else if (sr <= 256)
+		return 5;
+
+	else if (sr <= 512)
+		return 6;
+
+	else if (sr <= 1024)
+		return 7;
+
+	else
+		return 0;
+}
+
+
+
 
 /*****************************************************************************
  * Local function prototypes
@@ -772,14 +781,14 @@
 
 	/* call plugins */
 	for (i=0; i<(unsigned int)pEnc->num_plugins;i++) {
-		emms();
+		(((struct global_all_dll *)Dll::Tls())->emms)();
 		if (pEnc->plugins[i].func) {
 			if (pEnc->plugins[i].func(pEnc->plugins[i].param, opt, &data, 0) < 0) {
 				continue;
 			}
 		}
 	}
-	emms();
+	(((struct global_all_dll *)Dll::Tls())->emms)();
 
 	/* copy modified values back into frame*/
 	if (opt == XVID_PLG_BEFORE) {
@@ -904,7 +913,7 @@
 	/* 2^8 precision maximum */
 	if (_par_width>255 || _par_height>255) {
 		float div;
-		emms();
+		(((struct global_all_dll *)Dll::Tls())->emms)();
 		if (_par_width>_par_height)
 			div = (float)_par_width/255;
 		else
@@ -966,7 +975,7 @@
 			pEnc->mbParam.edged_width, (uint8_t**)xFrame->input.plane, xFrame->input.stride,
 			xFrame->input.csp, xFrame->vol_flags & XVID_VOL_INTERLACING))
 		{
-			emms();
+			(((struct global_all_dll *)Dll::Tls())->emms)();
 			return XVID_ERR_FORMAT;
 		}
 		stop_conv_timer();
@@ -1112,7 +1121,7 @@
 			}
 			DPRINTF(XVID_DEBUG_DEBUG, "*** END\n");
 
-			emms();
+			(((struct global_all_dll *)Dll::Tls())->emms)();
 			return XVID_ERR_END;	/* end of stream reached */
 		}
 		goto done;	/* nothing to encode yet; encoder lag */
@@ -1387,7 +1396,7 @@
 	stop_global_timer();
 	write_timer();
 
-	emms();
+	(((struct global_all_dll *)Dll::Tls())->emms)();
 	return BitstreamLength(&bs);
 }
 
@@ -1511,7 +1520,7 @@
 			pEnc->current->mbs, mb_width, mb_height, pEnc->mbParam.mb_width,
 			16, 0);
 	}
-	emms();
+	(((struct global_all_dll *)Dll::Tls())->emms)();
 
 	BitstreamPadAlways(bs); /* next_start_code() at the end of VideoObjectPlane() */
 
@@ -1532,6 +1541,9 @@
 
 
 /* FrameCodeP also handles S(GMC)-VOPs */
+#if 0
+	DPRINTF(XVID_DEBUG_DEBUG, "kmu %i %i %i\n", current->sStat.kblks, current->sStat.mblks, current->sStat.ublks);
+#endif
 static int
 FrameCodeP(Encoder * pEnc,
 		   Bitstream * bs)
@@ -1749,7 +1761,7 @@
 					for (k=pEnc->bframenum_head; k< pEnc->bframenum_tail; k++)
 					{
 						int iSAD;
-						iSAD = sad16(reference->image.y + 16*y*pParam->edged_width + 16*x,
+						iSAD = (((struct global_all_dll *)Dll::Tls())->sad16)(reference->image.y + 16*y*pParam->edged_width + 16*x,
 									pEnc->bframes[k]->image.y + 16*y*pParam->edged_width + 16*x,
 								pParam->edged_width,BFRAME_SKIP_THRESHHOLD);
 						if (iSAD >= BFRAME_SKIP_THRESHHOLD * pMB->quant)
@@ -1837,7 +1849,7 @@
 			16, 0);
 	}
 
-	emms();
+	(((struct global_all_dll *)Dll::Tls())->emms)();
 
 	if (current->sStat.iMvCount == 0)
 		current->sStat.iMvCount = 1;
@@ -1863,9 +1875,6 @@
 	pEnc->fMvPrevSigma = fSigma;
 
 	/* frame drop code */
-#if 0
-	DPRINTF(XVID_DEBUG_DEBUG, "kmu %i %i %i\n", current->sStat.kblks, current->sStat.mblks, current->sStat.ublks);
-#endif
 	if (current->sStat.kblks + current->sStat.mblks <=
 		(pParam->frame_drop_ratio * mb_width * mb_height) / 100 &&
 		pEnc->mbParam.max_bframes == 0)
@@ -1925,6 +1934,10 @@
 }
 
 
+#define BFRAME_DEBUG  	if (!first && fp){ \
+		fprintf(fp,"Y=%3d   X=%3d   MB=%2d   CBP=%02X\n",y,x,mb->mode,mb->cbp); \
+	}
+
 static void
 FrameCodeB(Encoder * pEnc,
 		   FRAMEINFO * frame,
@@ -1941,9 +1954,6 @@
 	#ifdef BFRAMES_DEC_DEBUG
 	FILE *fp;
 	static char first=0;
-#define BFRAME_DEBUG  	if (!first && fp){ \
-		fprintf(fp,"Y=%3d   X=%3d   MB=%2d   CBP=%02X\n",y,x,mb->mode,mb->cbp); \
-	}
 
 	/* XXX: pEnc->current->global_flags &= ~XVID_VOP_REDUCED;  reduced resoltion not yet supported */
 
@@ -2055,7 +2065,7 @@
 		}
 	}
 
-	emms();
+	(((struct global_all_dll *)Dll::Tls())->emms)();
 
 	/* TODO: dynamic fcode/bcode ??? */
 
diff -bBru src-ori/encoder.h src/encoder.h
--- src-ori/encoder.h	Wed Jan 12 10:32:43 2005
+++ src/encoder.h	Wed Jan 12 12:38:04 2005
@@ -156,96 +253,129 @@
 } FRAMEINFO;
 
 
+
 typedef struct
 {
+
 	MBParam mbParam;
 
+
+
 	int iFrameNum;
+
 	int bitrate;
 
+
+
     /* zones */
+
     unsigned int num_zones;
+
     xvid_enc_zone_t * zones;
 
+
+
     /* plugins */
+
     int num_plugins;    /* note: we store plugin flags in MBPARAM */
+
     xvid_enc_plugin_t * plugins;
 
+
+
     /* dquant */
 
+
+
     int * temp_dquants;
 
+
+
 	/* images */
 
+
+
 	FRAMEINFO *current;
+
 	FRAMEINFO *reference;
 
+
+
 	IMAGE sOriginal;    /* original image copy for i/p frames */
+
     IMAGE sOriginal2;   /* original image copy for b-frames */
+
 	IMAGE vInterH;
+
 	IMAGE vInterV;
+
 	IMAGE vInterVf;
+
 	IMAGE vInterHV;
+
 	IMAGE vInterHVf;
 
+
+
 	IMAGE vGMC;
 
+
+
 	/* image queue */
+
 	int queue_head;
+
 	int queue_tail;
+
 	int queue_size;
+
 	QUEUEINFO *queue;
 
+
+
 	/* bframe buffer */
+
 	int bframenum_head;
+
 	int bframenum_tail;
+
 	int flush_bframes;
 
+
+
 	FRAMEINFO **bframes;
+
 	IMAGE f_refh;
+
 	IMAGE f_refv;
+
 	IMAGE f_refhv;
 
+
+
     /* closed_gop fixup temporary storage */
+
 	int closed_bframenum; /* == -1 if there is no fixup intended */
-    QUEUEINFO closed_qframe;	/* qFrame, only valid when >= 0 */
 
-	int m_framenum; /* debug frame num counter; unlike iFrameNum, does not reset at ivop */
+    QUEUEINFO closed_qframe;	/* qFrame, only valid when >= 0 */
 
-	float fMvPrevSigma;
-} Encoder;
 
-/*****************************************************************************
- * Inline functions
- ****************************************************************************/
 
-static __inline uint8_t
-get_fcode(uint16_t sr)
-{
-	if (sr <= 16)
-		return 1;
+	int m_framenum; /* debug frame num counter; unlike iFrameNum, does not reset at ivop */
 
-	else if (sr <= 32)
-		return 2;
 
-	else if (sr <= 64)
-		return 3;
 
-	else if (sr <= 128)
-		return 4;
+	float fMvPrevSigma;
 
-	else if (sr <= 256)
-		return 5;
+} Encoder;
 
-	else if (sr <= 512)
-		return 6;
 
-	else if (sr <= 1024)
-		return 7;
+/*****************************************************************************
+ * Inline functions
+ ****************************************************************************/
 
-	else
-		return 0;
-}
+extern  uint8_t get_fcode(uint16_t sr)
+;
 
 
 /*****************************************************************************
diff -bBru src-ori/global.h src/global.h
--- src-ori/global.h	Wed Jan 12 10:32:43 2005
+++ src/global.h	Wed Jan 12 12:38:11 2005
@@ -60,16 +60,16 @@
 #define N_VOP	4
 
 /* convert mpeg-4 coding type i/p/b/s_VOP to XVID_TYPE_xxx */
-static __inline int
-coding2type(int coding_type)
-{
+static __inline int coding2type(int coding_type)
+ {
+
 	return coding_type + 1;
 }
 
 /* convert XVID_TYPE_xxx to bitstream coding type i/p/b/s_VOP */
-static __inline int
-type2coding(int xvid_type)
-{
+static __inline int type2coding(int xvid_type)
+ {
+
 	return xvid_type - 1;
 }
 
@@ -188,48 +274,86 @@
 
 typedef struct
 {
+
 	/* decoder/encoder */
+
 	VECTOR mvs[4];
 
+
+
 	short int pred_values[6][MBPRED_SIZE];
+
 	int acpred_directions[6];
 
+
+
 	int mode;
+
 	int quant;					/* absolute quant */
 
+
+
 	int field_dct;
+
 	int field_pred;
+
 	int field_for_top;
+
 	int field_for_bot;
 
+
+
 	/* encoder specific */
 
+
+
 	VECTOR pmvs[4];
+
 	VECTOR qmvs[4];				/* mvs in quarter pixel resolution */
 
+
+
 	int32_t sad8[4];			/* SAD values for inter4v-VECTORs */
+
 	int32_t sad16;				/* SAD value for inter-VECTOR */
 
+
+
 	int dquant;
+
 	int cbp;
 
+
+
 	/* bframe stuff */
 
+
+
 	VECTOR b_mvs[4];
+
 	VECTOR b_qmvs[4];
 
+
+
 	VECTOR amv; /* average motion vectors from GMC  */
+
 	int32_t mcsel;
 
+
+
 /* This structure has become way to big! What to do? Split it up?   */
 
+
+
 }
+
 MACROBLOCK;
 
-static __inline uint32_t
-get_dc_scaler(uint32_t quant,
+
+static __inline uint32_t get_dc_scaler(uint32_t quant,
 			  uint32_t lum)
-{
+ {
+
 	if (quant < 5)
 		return 8;
 
Only in src: global_all_dll.c
Only in src: global_all_dll.h
diff -bBru src-ori/image/colorspace.c src/image/colorspace.c
--- src-ori/image/colorspace.c	Wed Jan 12 10:32:43 2005
+++ src/image/colorspace.c	Wed Jan 12 12:39:41 2005
@@ -23,67 +23,18 @@
  *
  ****************************************************************************/
 
-#include <string.h>				/* memcpy */
+#include <e32std.h>
+#include "../global_all_dll.h"
 
-#include "../global.h"
-#include "colorspace.h"
 
 /* function pointers */
 
-/* input */
-packedFuncPtr rgb555_to_yv12;
-packedFuncPtr rgb565_to_yv12;
-packedFuncPtr rgb444_to_yv12;
-packedFuncPtr bgr_to_yv12;
-packedFuncPtr bgra_to_yv12;
-packedFuncPtr abgr_to_yv12;
-packedFuncPtr rgba_to_yv12;
-packedFuncPtr argb_to_yv12;
-packedFuncPtr yuv_to_yv12;
-packedFuncPtr yuyv_to_yv12;
-packedFuncPtr uyvy_to_yv12;
-
-packedFuncPtr rgb555i_to_yv12;
-packedFuncPtr rgb565i_to_yv12;
-packedFuncPtr bgri_to_yv12;
-packedFuncPtr bgrai_to_yv12;
-packedFuncPtr abgri_to_yv12;
-packedFuncPtr rgbai_to_yv12;
-packedFuncPtr argbi_to_yv12;
-packedFuncPtr yuyvi_to_yv12;
-packedFuncPtr uyvyi_to_yv12;
-
-/* output */
-packedFuncPtr yv12_to_rgb555;
-packedFuncPtr yv12_to_rgb565;
-packedFuncPtr yv12_to_rgb444;
-packedFuncPtr yv12_to_bgr;
-packedFuncPtr yv12_to_bgra;
-packedFuncPtr yv12_to_abgr;
-packedFuncPtr yv12_to_rgba;
-packedFuncPtr yv12_to_argb;
-packedFuncPtr yv12_to_yuv;
-packedFuncPtr yv12_to_yuyv;
-packedFuncPtr yv12_to_uyvy;
-
-packedFuncPtr yv12_to_rgb555i;
-packedFuncPtr yv12_to_rgb565i;
-packedFuncPtr yv12_to_bgri;
-packedFuncPtr yv12_to_bgrai;
-packedFuncPtr yv12_to_abgri;
-packedFuncPtr yv12_to_rgbai;
-packedFuncPtr yv12_to_argbi;
-packedFuncPtr yv12_to_yuyvi;
-packedFuncPtr yv12_to_uyvyi;
-
-planarFuncPtr yv12_to_yv12;
-
-
-int32_t RGB_Y_tab[256];
-int32_t B_U_tab[256];
-int32_t G_U_tab[256];
-int32_t G_V_tab[256];
-int32_t R_V_tab[256];
+
+
+
+
+
+
 
 
 
@@ -125,15 +76,16 @@
 
 
 
+
 /********** colorspace input (xxx_to_yv12) functions **********/
 
-/*	rgb -> yuv def's
 
+
+
+/*	rgb -> yuv def's
 	this following constants are "official spec"
 	Video Demystified" (ISBN 1-878707-09-4)
-
 	rgb<->yuv _is_ lossy, since most programs do the conversion differently
-
 	SCALEBITS/FIX taken from  ffmpeg
 */
 
@@ -349,13 +321,14 @@
 	((MAX(0,MIN(255, G))     ) & 0x00f0) | \
 	((MAX(0,MIN(255, B)) >> 4) & 0x000f)
 
+
 #define WRITE_RGB16(ROW,UV_ROW,C1)	\
-	rgb_y = RGB_Y_tab[ y_ptr[y_stride*(ROW) + 0] ];						\
+	rgb_y = (((struct global_all_dll *)Dll::Tls())->RGB_Y_tab)[ y_ptr[y_stride*(ROW) + 0] ];						\
 	b[ROW] = (b[ROW] & 0x7) + ((rgb_y + b_u##UV_ROW) >> SCALEBITS_OUT);	\
 	g[ROW] = (g[ROW] & 0x7) + ((rgb_y - g_uv##UV_ROW) >> SCALEBITS_OUT);	\
 	r[ROW] = (r[ROW] & 0x7) + ((rgb_y + r_v##UV_ROW) >> SCALEBITS_OUT);		\
 	*(uint16_t *) (x_ptr+((ROW)*x_stride)+0) = C1(r[ROW], g[ROW], b[ROW]);	\
-	rgb_y = RGB_Y_tab[ y_ptr[y_stride*(ROW) + 1] ];				\
+	rgb_y = (((struct global_all_dll *)Dll::Tls())->RGB_Y_tab)[ y_ptr[y_stride*(ROW) + 1] ];				\
 	b[ROW] = (b[ROW] & 0x7) + ((rgb_y + b_u##UV_ROW) >> SCALEBITS_OUT);		\
 	g[ROW] = (g[ROW] & 0x7) + ((rgb_y - g_uv##UV_ROW) >> SCALEBITS_OUT);	\
 	r[ROW] = (r[ROW] & 0x7) + ((rgb_y + r_v##UV_ROW) >> SCALEBITS_OUT);		\
@@ -364,11 +338,12 @@
 #define YV12_TO_RGB16_ROW(SIZE,C1,C2,C3,C4) \
 	int r[2], g[2], b[2];					\
 	r[0] = r[1] = g[0] = g[1] = b[0] = b[1] = 0;
+
 #define YV12_TO_RGB16(SIZE,C1,C2,C3,C4)		\
 	int rgb_y; 												\
-	int b_u0 = B_U_tab[ u_ptr[0] ];								\
-	int g_uv0 = G_U_tab[ u_ptr[0] ] + G_V_tab[ v_ptr[0] ];		\
-	int r_v0 = R_V_tab[ v_ptr[0] ];								\
+	int b_u0 = (((struct global_all_dll *)Dll::Tls())->B_U_tab)[ u_ptr[0] ];								\
+	int g_uv0 = (((struct global_all_dll *)Dll::Tls())->G_U_tab)[ u_ptr[0] ] + (((struct global_all_dll *)Dll::Tls())->G_V_tab)[ v_ptr[0] ];		\
+	int r_v0 = (((struct global_all_dll *)Dll::Tls())->R_V_tab)[ v_ptr[0] ];								\
 	WRITE_RGB16(0, 0, C1)										\
 	WRITE_RGB16(1, 0, C1)
 
@@ -377,14 +353,15 @@
 	r[0] = r[1] = r[2] = r[3] = 0;			\
 	g[0] = g[1] = g[2] = g[3] = 0;			\
 	b[0] = b[1] = b[2] = b[3] = 0;
+
 #define YV12_TO_RGB16I(SIZE,C1,C2,C3,C4)		\
 	int rgb_y; 													\
-	int b_u0 = B_U_tab[ u_ptr[0] ];								\
-	int g_uv0 = G_U_tab[ u_ptr[0] ] + G_V_tab[ v_ptr[0] ];		\
-	int r_v0 = R_V_tab[ v_ptr[0] ];								\
-    int b_u1 = B_U_tab[ u_ptr[uv_stride] ];						\
-	int g_uv1 = G_U_tab[ u_ptr[uv_stride] ] + G_V_tab[ v_ptr[uv_stride] ];	\
-	int r_v1 = R_V_tab[ v_ptr[uv_stride] ];						\
+	int b_u0 = (((struct global_all_dll *)Dll::Tls())->B_U_tab)[ u_ptr[0] ];								\
+	int g_uv0 = (((struct global_all_dll *)Dll::Tls())->G_U_tab)[ u_ptr[0] ] + (((struct global_all_dll *)Dll::Tls())->G_V_tab)[ v_ptr[0] ];		\
+	int r_v0 = (((struct global_all_dll *)Dll::Tls())->R_V_tab)[ v_ptr[0] ];								\
+    int b_u1 = (((struct global_all_dll *)Dll::Tls())->B_U_tab)[ u_ptr[uv_stride] ];						\
+	int g_uv1 = (((struct global_all_dll *)Dll::Tls())->G_U_tab)[ u_ptr[uv_stride] ] + (((struct global_all_dll *)Dll::Tls())->G_V_tab)[ v_ptr[uv_stride] ];	\
+	int r_v1 = (((struct global_all_dll *)Dll::Tls())->R_V_tab)[ v_ptr[uv_stride] ];						\
     WRITE_RGB16(0, 0, C1)										\
 	WRITE_RGB16(1, 1, C1)										\
     WRITE_RGB16(2, 0, C1)										\
@@ -394,12 +372,12 @@
 /* rgb/rgbi output */
 
 #define WRITE_RGB(SIZE,ROW,UV_ROW,C1,C2,C3,C4)	\
-	rgb_y = RGB_Y_tab[ y_ptr[(ROW)*y_stride + 0] ];						\
+	rgb_y = (((struct global_all_dll *)Dll::Tls())->RGB_Y_tab)[ y_ptr[(ROW)*y_stride + 0] ];						\
 	x_ptr[(ROW)*x_stride+(C3)] = MAX(0, MIN(255, (rgb_y + b_u##UV_ROW) >> SCALEBITS_OUT));	\
 	x_ptr[(ROW)*x_stride+(C2)] = MAX(0, MIN(255, (rgb_y - g_uv##UV_ROW) >> SCALEBITS_OUT));	\
 	x_ptr[(ROW)*x_stride+(C1)] = MAX(0, MIN(255, (rgb_y + r_v##UV_ROW) >> SCALEBITS_OUT));	\
 	if ((SIZE)>3) x_ptr[(ROW)*x_stride+(C4)] = 0;									\
-	rgb_y = RGB_Y_tab[ y_ptr[(ROW)*y_stride + 1] ];									\
+	rgb_y = (((struct global_all_dll *)Dll::Tls())->RGB_Y_tab)[ y_ptr[(ROW)*y_stride + 1] ];									\
 	x_ptr[(ROW)*x_stride+(SIZE)+(C3)] = MAX(0, MIN(255, (rgb_y + b_u##UV_ROW) >> SCALEBITS_OUT));	\
 	x_ptr[(ROW)*x_stride+(SIZE)+(C2)] = MAX(0, MIN(255, (rgb_y - g_uv##UV_ROW) >> SCALEBITS_OUT));	\
 	x_ptr[(ROW)*x_stride+(SIZE)+(C1)] = MAX(0, MIN(255, (rgb_y + r_v##UV_ROW) >> SCALEBITS_OUT));	\
@@ -409,9 +388,9 @@
 #define YV12_TO_RGB_ROW(SIZE,C1,C2,C3,C4) 	/* nothing */
 #define YV12_TO_RGB(SIZE,C1,C2,C3,C4)				\
 	int rgb_y;												\
-	int b_u0 = B_U_tab[ u_ptr[0] ];							\
-	int g_uv0 = G_U_tab[ u_ptr[0] ] + G_V_tab[ v_ptr[0] ];	\
-	int r_v0 = R_V_tab[ v_ptr[0] ];							\
+	int b_u0 = (((struct global_all_dll *)Dll::Tls())->B_U_tab)[ u_ptr[0] ];							\
+	int g_uv0 = (((struct global_all_dll *)Dll::Tls())->G_U_tab)[ u_ptr[0] ] + (((struct global_all_dll *)Dll::Tls())->G_V_tab)[ v_ptr[0] ];	\
+	int r_v0 = (((struct global_all_dll *)Dll::Tls())->R_V_tab)[ v_ptr[0] ];							\
 	WRITE_RGB(SIZE, 0, 0, C1,C2,C3,C4)						\
 	WRITE_RGB(SIZE, 1, 0, C1,C2,C3,C4)
 
@@ -418,12 +398,12 @@
 #define YV12_TO_RGBI_ROW(SIZE,C1,C2,C3,C4) 	/* nothing */
 #define YV12_TO_RGBI(SIZE,C1,C2,C3,C4)				\
 	int rgb_y;												\
-	int b_u0 = B_U_tab[ u_ptr[0] ];							\
-	int g_uv0 = G_U_tab[ u_ptr[0] ] + G_V_tab[ v_ptr[0] ];	\
-	int r_v0 = R_V_tab[ v_ptr[0] ];							\
-    int b_u1 = B_U_tab[ u_ptr[uv_stride] ];					\
-	int g_uv1 = G_U_tab[ u_ptr[uv_stride] ] + G_V_tab[ v_ptr[uv_stride] ];	\
-	int r_v1 = R_V_tab[ v_ptr[uv_stride] ];					\
+	int b_u0 = (((struct global_all_dll *)Dll::Tls())->B_U_tab)[ u_ptr[0] ];							\
+	int g_uv0 = (((struct global_all_dll *)Dll::Tls())->G_U_tab)[ u_ptr[0] ] + (((struct global_all_dll *)Dll::Tls())->G_V_tab)[ v_ptr[0] ];	\
+	int r_v0 = (((struct global_all_dll *)Dll::Tls())->R_V_tab)[ v_ptr[0] ];							\
+    int b_u1 = (((struct global_all_dll *)Dll::Tls())->B_U_tab)[ u_ptr[uv_stride] ];					\
+	int g_uv1 = (((struct global_all_dll *)Dll::Tls())->G_U_tab)[ u_ptr[uv_stride] ] + (((struct global_all_dll *)Dll::Tls())->G_V_tab)[ v_ptr[uv_stride] ];	\
+	int r_v1 = (((struct global_all_dll *)Dll::Tls())->R_V_tab)[ v_ptr[uv_stride] ];					\
 	WRITE_RGB(SIZE, 0, 0, C1,C2,C3,C4)		\
 	WRITE_RGB(SIZE, 1, 1, C1,C2,C3,C4)		\
 	WRITE_RGB(SIZE, 2, 0, C1,C2,C3,C4)		\
@@ -524,10 +508,11 @@
 	int32_t i;
 
 	for (i = 0; i < 256; i++) {
-		RGB_Y_tab[i] = FIX_OUT(RGB_Y_OUT) * (i - Y_ADD_OUT);
-		B_U_tab[i] = FIX_OUT(B_U_OUT) * (i - U_ADD_OUT);
-		G_U_tab[i] = FIX_OUT(G_U_OUT) * (i - U_ADD_OUT);
-		G_V_tab[i] = FIX_OUT(G_V_OUT) * (i - V_ADD_OUT);
-		R_V_tab[i] = FIX_OUT(R_V_OUT) * (i - V_ADD_OUT);
+		(((struct global_all_dll *)Dll::Tls())->RGB_Y_tab)[i] = FIX_OUT(RGB_Y_OUT) * (i - Y_ADD_OUT);
+		(((struct global_all_dll *)Dll::Tls())->B_U_tab)[i] = FIX_OUT(B_U_OUT) * (i - U_ADD_OUT);
+		(((struct global_all_dll *)Dll::Tls())->G_U_tab)[i] = FIX_OUT(G_U_OUT) * (i - U_ADD_OUT);
+		(((struct global_all_dll *)Dll::Tls())->G_V_tab)[i] = FIX_OUT(G_V_OUT) * (i - V_ADD_OUT);
+		(((struct global_all_dll *)Dll::Tls())->R_V_tab)[i] = FIX_OUT(R_V_OUT) * (i - V_ADD_OUT);
 	}
 }
+
diff -bBru src-ori/image/colorspace.h src/image/colorspace.h
--- src-ori/image/colorspace.h	Wed Jan 12 10:32:43 2005
+++ src/image/colorspace.h	Wed Jan 12 12:37:50 2005
@@ -52,26 +61,26 @@
 
 /* xxx_to_yv12 colorspace conversion functions (encoder) */
 
-extern packedFuncPtr rgb555_to_yv12;
-extern packedFuncPtr rgb565_to_yv12;
-extern packedFuncPtr rgb444_to_yv12;
-extern packedFuncPtr bgr_to_yv12;
-extern packedFuncPtr bgra_to_yv12;
-extern packedFuncPtr abgr_to_yv12;
-extern packedFuncPtr rgba_to_yv12;
-extern packedFuncPtr argb_to_yv12;
-extern packedFuncPtr yuyv_to_yv12;
-extern packedFuncPtr uyvy_to_yv12;
-
-extern packedFuncPtr rgb555i_to_yv12;
-extern packedFuncPtr rgb565i_to_yv12;
-extern packedFuncPtr bgri_to_yv12;
-extern packedFuncPtr bgrai_to_yv12;
-extern packedFuncPtr abgri_to_yv12;
-extern packedFuncPtr rgbai_to_yv12;
-extern packedFuncPtr argbi_to_yv12;
-extern packedFuncPtr yuyvi_to_yv12;
-extern packedFuncPtr uyvyi_to_yv12;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
 
 /* plain c */
 packedFunc rgb555_to_yv12_c;
@@ -114,26 +123,26 @@
 
 /* yv12_to_xxx colorspace conversion functions (decoder) */
 
-extern packedFuncPtr yv12_to_rgb555;
-extern packedFuncPtr yv12_to_rgb565;
-extern packedFuncPtr yv12_to_rgb444;
-extern packedFuncPtr yv12_to_bgr;
-extern packedFuncPtr yv12_to_bgra;
-extern packedFuncPtr yv12_to_abgr;
-extern packedFuncPtr yv12_to_rgba;
-extern packedFuncPtr yv12_to_argb;
-extern packedFuncPtr yv12_to_yuyv;
-extern packedFuncPtr yv12_to_uyvy;
-
-extern packedFuncPtr yv12_to_rgb555i;
-extern packedFuncPtr yv12_to_rgb565i;
-extern packedFuncPtr yv12_to_bgri;
-extern packedFuncPtr yv12_to_bgrai;
-extern packedFuncPtr yv12_to_abgri;
-extern packedFuncPtr yv12_to_rgbai;
-extern packedFuncPtr yv12_to_argbi;
-extern packedFuncPtr yv12_to_yuyvi;
-extern packedFuncPtr yv12_to_uyvyi;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
 
 /* plain c */
 packedFunc yv12_to_rgb555_c;
@@ -171,13 +180,18 @@
 
 typedef void (planarFunc) (
 				uint8_t * y_dst, uint8_t * u_dst, uint8_t * v_dst,
+
 				int y_dst_stride, int uv_dst_stride,
+
 				uint8_t * y_src, uint8_t * u_src, uint8_t * v_src,
+
 				int y_src_stride, int uv_src_stride,
+
 				int width, int height, int vflip);
+
 typedef planarFunc *planarFuncPtr;
 
-extern planarFuncPtr yv12_to_yv12;
+
 
 planarFunc yv12_to_yv12_c;
 
diff -bBru src-ori/image/font.c src/image/font.c
--- src-ori/image/font.c	Wed Jan 12 10:32:43 2005
+++ src/image/font.c	Wed Jan 12 12:40:20 2005
@@ -23,11 +23,9 @@
  *
  ****************************************************************************/
 
-#include <stdio.h>
-#include <stdarg.h>
+#include <e32std.h>
+#include "../global_all_dll.h"
 
-#include "image.h"
-#include "font.h"
 
 #define FONT_WIDTH	4
 #define FONT_HEIGHT	6
@@ -295,7 +293,7 @@
 
 
 static const char ascii65[26][FONT_WIDTH*FONT_HEIGHT] = {
-	/* A */
+	
 	{0,1,1,0,
 	 1,0,0,1,
 	 1,0,0,1,
@@ -303,7 +301,7 @@
 	 1,0,0,1,
 	 1,0,0,1},
 
-	/* B */
+	
 	{1,1,1,0,
 	 1,0,0,1,
 	 1,1,1,0,
@@ -311,7 +309,7 @@
 	 1,0,0,1,
 	 1,1,1,0},
 
-	/* C */
+	
 	{0,1,1,0,
 	 1,0,0,1,
 	 1,0,0,0,
@@ -319,7 +317,7 @@
 	 1,0,0,1,
 	 0,1,1,0},
 
-	/* D */
+	
 	{1,1,0,0,
 	 1,0,1,0,
 	 1,0,0,1,
@@ -327,7 +325,7 @@
 	 1,0,1,0,
 	 1,1,0,0},
 
-	/* E */
+	
 	{1,1,1,1,
 	 1,0,0,0,
 	 1,1,1,0,
@@ -335,7 +333,7 @@
 	 1,0,0,0,
 	 1,1,1,1},
 
-	/* F */
+	
 	{1,1,1,1,
 	 1,0,0,0,
 	 1,1,1,0,
@@ -343,7 +341,7 @@
 	 1,0,0,0,
 	 1,0,0,0},
 
-	/* G */
+	
 	{0,1,1,1,
 	 1,0,0,0,
 	 1,0,1,1,
@@ -351,7 +349,7 @@
 	 1,0,0,1,
 	 0,1,1,0},
 
-	/* H */
+	
 	{1,0,0,1,
 	 1,0,0,1,
 	 1,1,1,1,
@@ -359,7 +357,7 @@
 	 1,0,0,1,
 	 1,0,0,1},
 
-	/* I */
+	
 	{0,1,1,1,
 	 0,0,1,0,
 	 0,0,1,0,
@@ -367,7 +365,7 @@
 	 0,0,1,0,
 	 0,1,1,1},
 
-	/* J */
+	
 	{0,1,1,1,
 	 0,0,1,0,
 	 0,0,1,0,
@@ -375,7 +373,7 @@
 	 1,0,1,0,
 	 0,1,0,0},
 
-	/* K */
+	
 	{1,0,0,1,
 	 1,0,0,1,
 	 1,1,1,0,
@@ -383,7 +381,7 @@
 	 1,0,0,1,
 	 1,0,0,1},
 
-	/* L */
+	
 	{1,0,0,0,
 	 1,0,0,0,
 	 1,0,0,0,
@@ -391,7 +389,7 @@
 	 1,0,0,0,
 	 1,1,1,1},
 
-	/* M */
+	
 	{1,0,0,1,
 	 1,1,1,1,
 	 1,1,1,1,
@@ -399,7 +397,7 @@
 	 1,0,0,1,
 	 1,0,0,1},
 
-	/* N */
+	
 	{1,0,0,1,
 	 1,1,0,1,
 	 1,1,0,1,
@@ -407,7 +405,7 @@
 	 1,0,1,1,
 	 1,0,0,1},
 
-	/* 0 */
+	
 	{0,1,1,0,
 	 1,0,0,1,
 	 1,0,0,1,
@@ -415,7 +413,7 @@
 	 1,0,0,1,
 	 0,1,1,0},
 
-	/* P */
+	
 	{1,1,1,0,
 	 1,0,0,1,
 	 1,1,1,0,
@@ -423,7 +421,7 @@
 	 1,0,0,0,
 	 1,0,0,0},
 
-	/* Q */
+	
 	{0,1,1,0,
 	 1,0,0,1,
 	 1,0,0,1,
@@ -432,7 +430,7 @@
 	 0,1,0,1},
 
 
-	/* R */
+	
 	{1,1,1,0,
 	 1,0,0,1,
 	 1,1,1,0,
@@ -440,7 +438,7 @@
 	 1,0,0,1,
 	 1,0,0,1},
 
-	/* S */
+	
 	{0,1,1,0,
 	 1,0,0,1,
 	 0,1,0,0,
@@ -448,7 +446,7 @@
 	 1,0,0,1,
 	 0,1,1,0},
 
-	/* T */
+	
 	{0,1,1,1,
 	 0,0,1,0,
 	 0,0,1,0,
@@ -456,7 +454,7 @@
 	 0,0,1,0,
 	 0,0,1,0},
 
-	/* U */
+	
 	{1,0,0,1,
 	 1,0,0,1,
 	 1,0,0,1,
@@ -464,7 +462,7 @@
 	 1,0,0,1,
 	 1,1,1,1},
 
-	/* V */
+	
 	{1,0,0,1,
 	 1,0,0,1,
 	 1,0,0,1,
@@ -472,7 +470,7 @@
 	 0,1,1,0,
 	 0,1,1,0},
 
-	/* W */
+	
 	{1,0,0,1,
 	 1,0,0,1,
 	 1,0,0,1,
@@ -480,7 +478,7 @@
 	 1,1,1,1,
 	 1,0,0,1},
 
-	/* X */
+	
 	{1,0,0,1,
 	 1,0,0,1,
 	 0,1,1,0,
@@ -488,7 +486,7 @@
 	 1,0,0,1,
 	 1,0,0,1},
 
-	/* Y */
+	
 	{1,0,0,1,
 	 1,0,0,1,
 	 0,1,0,0,
@@ -496,7 +494,7 @@
 	 0,1,0,0,
 	 1,0,0,0},
 
-	/* Z */
+	
 	{1,1,1,1,
 	 0,0,0,1,
 	 0,0,1,0,
@@ -509,7 +507,7 @@
 
 
 static const char ascii91[6][FONT_WIDTH*FONT_HEIGHT] = {
-	/* [ */
+	
 	{0,1,1,0,
 	 0,1,0,0,
 	 0,1,0,0,
@@ -517,7 +515,7 @@
 	 0,1,0,0,
 	 0,1,1,0},
 
-	/* '\' */
+	
 	{1,0,0,0,
 	 1,0,0,0,
 	 0,1,0,0,
@@ -525,7 +523,7 @@
 	 0,0,0,1,
 	 0,0,0,1},
 
-	/* ] */
+	
 	{0,1,1,0,
 	 0,0,1,0,
 	 0,0,1,0,
@@ -533,7 +531,7 @@
 	 0,0,1,0,
 	 0,1,1,0},
 
-	/* ^ */
+	
 	{0,1,0,1,
 	 0,0,0,0,
 	 0,0,0,0,
@@ -541,7 +539,7 @@
 	 0,0,0,0,
 	 0,0,0,0},
 
-	/* _ */
+	
 	{0,0,0,0,
 	 0,0,0,0,
 	 0,0,0,0,
@@ -549,7 +547,7 @@
 	 0,0,0,0,
 	 1,1,1,1},
 
-	/* ` */
+	
 	{0,1,0,0,
 	 0,0,1,0,
 	 0,0,0,0,
Only in src-ori/image: ia64_asm
diff -bBru src-ori/image/image.c src/image/image.c
--- src-ori/image/image.c	Wed Jan 12 10:32:43 2005
+++ src/image/image.c	Wed Jan 12 12:41:16 2005
@@ -23,20 +23,16 @@
  *
  ****************************************************************************/
 
-#include <stdlib.h>
-#include <string.h>				/* memcpy, memset */
-#include <math.h>
-
-#include "../portab.h"
-#include "../global.h"			/* XVID_CSP_XXX's */
-#include "../xvid.h"			/* XVID_CSP_XXX's */
-#include "image.h"
-#include "colorspace.h"
-#include "interpolate8x8.h"
-#include "reduced.h"
-#include "../utils/mem_align.h"
+#include <e32std.h>
+#include "../global_all_dll.h"
+ image_null(IMAGE * image)
+ {
+
+	image->y = image->u = image->v = NULL;
+}
+
+
 
-#include "font.h"		/* XXX: remove later */
 
 #define SAFETY	64
 #define EDGE_SIZE2  (EDGE_SIZE/2)
@@ -238,6 +234,12 @@
 }
 
 /* bframe encoding requires image-based u,v interpolation */
+#if 0
+	const uint32_t edged_width2 = edged_width / 2;
+	const uint32_t edged_height2 = edged_height / 2;
+	const uint32_t offset2 = EDGE_SIZE2 * (edged_width2 + 1);
+	const uint32_t stride_add2 = 7 * edged_width2;
+#endif
 void
 image_interpolate(const IMAGE * refn,
 				  IMAGE * refh,
@@ -250,12 +252,6 @@
 {
 	const uint32_t offset = EDGE_SIZE2 * (edged_width + 1); /* we only interpolate half of the edge area */
 	const uint32_t stride_add = 7 * edged_width;
-#if 0
-	const uint32_t edged_width2 = edged_width / 2;
-	const uint32_t edged_height2 = edged_height / 2;
-	const uint32_t offset2 = EDGE_SIZE2 * (edged_width2 + 1);
-	const uint32_t stride_add2 = 7 * edged_width2;
-#endif
 	uint8_t *n_ptr, *h_ptr, *v_ptr, *hv_ptr;
 	uint32_t x, y;
 
@@ -274,8 +270,8 @@
 
 		for (y = 0; y < (edged_height - EDGE_SIZE); y += 8) {
 			for (x = 0; x < (edged_width - EDGE_SIZE); x += 8) {
-				interpolate8x8_6tap_lowpass_h(h_ptr, n_ptr, edged_width, rounding);
-				interpolate8x8_6tap_lowpass_v(v_ptr, n_ptr, edged_width, rounding);
+				(((struct global_all_dll *)Dll::Tls())->interpolate8x8_6tap_lowpass_h)(h_ptr, n_ptr, edged_width, rounding);
+				(((struct global_all_dll *)Dll::Tls())->interpolate8x8_6tap_lowpass_v)(v_ptr, n_ptr, edged_width, rounding);
 
 				n_ptr += 8;
 				h_ptr += 8;
@@ -303,7 +299,7 @@
 			for (x = 0; x < (edged_width - EDGE_SIZE); x = x + 8) {
 				hv_ptr -= 8;
 				h_ptr -= 8;
-				interpolate8x8_6tap_lowpass_v(hv_ptr, h_ptr, edged_width, rounding);
+				(((struct global_all_dll *)Dll::Tls())->interpolate8x8_6tap_lowpass_v)(hv_ptr, h_ptr, edged_width, rounding);
 			}
 		}
 	} else {
@@ -313,9 +309,9 @@
 
 		for (y = 0; y < (edged_height - EDGE_SIZE); y += 8) {
 			for (x = 0; x < (edged_width - EDGE_SIZE); x += 8) {
-				interpolate8x8_halfpel_h(h_ptr, n_ptr, edged_width, rounding);
-				interpolate8x8_halfpel_v(v_ptr, n_ptr, edged_width, rounding);
-				interpolate8x8_halfpel_hv(hv_ptr, n_ptr, edged_width, rounding);
+				(((struct global_all_dll *)Dll::Tls())->interpolate8x8_halfpel_h)(h_ptr, n_ptr, edged_width, rounding);
+				(((struct global_all_dll *)Dll::Tls())->interpolate8x8_halfpel_v)(v_ptr, n_ptr, edged_width, rounding);
+				(((struct global_all_dll *)Dll::Tls())->interpolate8x8_halfpel_hv)(hv_ptr, n_ptr, edged_width, rounding);
 
 				n_ptr += 8;
 				h_ptr += 8;
@@ -348,9 +344,9 @@
 
 	for (y = 0; y < edged_height2; y += 8) {
 		for (x = 0; x < edged_width2; x += 8) {
-			interpolate8x8_halfpel_h(h_ptr, n_ptr, edged_width2, rounding);
-			interpolate8x8_halfpel_v(v_ptr, n_ptr, edged_width2, rounding);
-			interpolate8x8_halfpel_hv(hv_ptr, n_ptr, edged_width2, rounding);
+			(((struct global_all_dll *)Dll::Tls())->interpolate8x8_halfpel_h)(h_ptr, n_ptr, edged_width2, rounding);
+			(((struct global_all_dll *)Dll::Tls())->interpolate8x8_halfpel_v)(v_ptr, n_ptr, edged_width2, rounding);
+			(((struct global_all_dll *)Dll::Tls())->interpolate8x8_halfpel_hv)(hv_ptr, n_ptr, edged_width2, rounding);
 
 			n_ptr += 8;
 			h_ptr += 8;
@@ -375,9 +371,9 @@
 
 	for (y = 0; y < edged_height2; y = y + 8) {
 		for (x = 0; x < edged_width2; x = x + 8) {
-			interpolate8x8_halfpel_h(h_ptr, n_ptr, edged_width2, rounding);
-			interpolate8x8_halfpel_v(v_ptr, n_ptr, edged_width2, rounding);
-			interpolate8x8_halfpel_hv(hv_ptr, n_ptr, edged_width2, rounding);
+			(((struct global_all_dll *)Dll::Tls())->interpolate8x8_halfpel_h)(h_ptr, n_ptr, edged_width2, rounding);
+			(((struct global_all_dll *)Dll::Tls())->interpolate8x8_halfpel_v)(v_ptr, n_ptr, edged_width2, rounding);
+			(((struct global_all_dll *)Dll::Tls())->interpolate8x8_halfpel_hv)(hv_ptr, n_ptr, edged_width2, rounding);
 
 			n_ptr += 8;
 			h_ptr += 8;
@@ -460,6 +456,10 @@
 correpsonding luma pixels are pure black or white.
 */
 
+#define IS_PURE(a)  ((a)<=16||(a)>=235)
+#define IMG_Y(Y,X)	img->y[(Y)*edged_width + (X)]
+#define IMG_U(Y,X)	img->u[(Y)*edged_width/2 + (X)]
+#define IMG_V(Y,X)	img->v[(Y)*edged_width/2 + (X)]
 void
 image_chroma_optimize(IMAGE * img, int width, int height, int edged_width)
 {
@@ -469,10 +469,6 @@
 	for (y = 1; y < height/2 - 1; y++)
 	for (x = 1; x < width/2 - 1; x++)
 	{
-#define IS_PURE(a)  ((a)<=16||(a)>=235)
-#define IMG_Y(Y,X)	img->y[(Y)*edged_width + (X)]
-#define IMG_U(Y,X)	img->u[(Y)*edged_width/2 + (X)]
-#define IMG_V(Y,X)	img->v[(Y)*edged_width/2 + (X)]
 
 		if (IS_PURE(IMG_Y(y*2  ,x*2  )) &&
 			IS_PURE(IMG_Y(y*2  ,x*2+1)) &&
@@ -537,6 +533,9 @@
 
 
 
+#if 0
+	const int height_signed = (csp & XVID_CSP_VFLIP) ? -height : height;
+#endif
 int
 image_input(IMAGE * image,
 			uint32_t width,
@@ -550,16 +549,13 @@
 	const int edged_width2 = edged_width/2;
 	const int width2 = width/2;
 	const int height2 = height/2;
-#if 0
-	const int height_signed = (csp & XVID_CSP_VFLIP) ? -height : height;
-#endif
 
 	switch (csp & ~XVID_CSP_VFLIP) {
 	case XVID_CSP_RGB555:
 		safe_packed_conv(
 			src[0], src_stride[0], image->y, image->u, image->v,
 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
-			interlacing?rgb555i_to_yv12  :rgb555_to_yv12,
+			interlacing?(((struct global_all_dll *)Dll::Tls())->rgb555i_to_yv12)  :(((struct global_all_dll *)Dll::Tls())->rgb555_to_yv12),
 			interlacing?rgb555i_to_yv12_c:rgb555_to_yv12_c, 2);
 		break;
 
@@ -567,7 +563,7 @@
 		safe_packed_conv(
 			src[0], src_stride[0], image->y, image->u, image->v,
 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
-			interlacing?rgb565i_to_yv12  :rgb565_to_yv12,
+			interlacing?(((struct global_all_dll *)Dll::Tls())->rgb565i_to_yv12)  :(((struct global_all_dll *)Dll::Tls())->rgb565_to_yv12),
 			interlacing?rgb565i_to_yv12_c:rgb565_to_yv12_c, 2);
 		break;
 
@@ -575,7 +571,7 @@
 		safe_packed_conv(
 			src[0], src_stride[0], image->y, image->u, image->v,
 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
-			rgb444_to_yv12,
+			(((struct global_all_dll *)Dll::Tls())->rgb444_to_yv12),
 			rgb444_to_yv12_c, 2);
 		break;
 
@@ -584,7 +580,7 @@
 		safe_packed_conv(
 			src[0], src_stride[0], image->y, image->u, image->v,
 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
-			interlacing?bgri_to_yv12  :bgr_to_yv12,
+			interlacing?(((struct global_all_dll *)Dll::Tls())->bgri_to_yv12)  :(((struct global_all_dll *)Dll::Tls())->bgr_to_yv12),
 			interlacing?bgri_to_yv12_c:bgr_to_yv12_c, 3);
 		break;
 
@@ -592,7 +588,7 @@
 		safe_packed_conv(
 			src[0], src_stride[0], image->y, image->u, image->v,
 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
-			interlacing?bgrai_to_yv12  :bgra_to_yv12,
+			interlacing?(((struct global_all_dll *)Dll::Tls())->bgrai_to_yv12)  :(((struct global_all_dll *)Dll::Tls())->bgra_to_yv12),
 			interlacing?bgrai_to_yv12_c:bgra_to_yv12_c, 4);
 		break;
 
@@ -600,7 +596,7 @@
 		safe_packed_conv(
 			src[0], src_stride[0], image->y, image->u, image->v,
 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
-			interlacing?abgri_to_yv12  :abgr_to_yv12,
+			interlacing?(((struct global_all_dll *)Dll::Tls())->abgri_to_yv12)  :(((struct global_all_dll *)Dll::Tls())->abgr_to_yv12),
 			interlacing?abgri_to_yv12_c:abgr_to_yv12_c, 4);
 		break;
 
@@ -608,7 +604,7 @@
 		safe_packed_conv(
 			src[0], src_stride[0], image->y, image->u, image->v,
 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
-			interlacing?rgbai_to_yv12  :rgba_to_yv12,
+			interlacing?(((struct global_all_dll *)Dll::Tls())->rgbai_to_yv12)  :(((struct global_all_dll *)Dll::Tls())->rgba_to_yv12),
 			interlacing?rgbai_to_yv12_c:rgba_to_yv12_c, 4);
 		break;
             
@@ -616,7 +612,7 @@
 		safe_packed_conv(
 			src[0], src_stride[0], image->y, image->u, image->v,
 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
-			interlacing?argbi_to_yv12  : argb_to_yv12,
+			interlacing?(((struct global_all_dll *)Dll::Tls())->argbi_to_yv12)  : (((struct global_all_dll *)Dll::Tls())->argb_to_yv12),
 			interlacing?argbi_to_yv12_c: argb_to_yv12_c, 4);
 		break;
 
@@ -624,7 +620,7 @@
 		safe_packed_conv(
 			src[0], src_stride[0], image->y, image->u, image->v,
 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
-			interlacing?yuyvi_to_yv12  :yuyv_to_yv12,
+			interlacing?(((struct global_all_dll *)Dll::Tls())->yuyvi_to_yv12)  :(((struct global_all_dll *)Dll::Tls())->yuyv_to_yv12),
 			interlacing?yuyvi_to_yv12_c:yuyv_to_yv12_c, 2);
 		break;
 
@@ -632,7 +628,7 @@
 		safe_packed_conv(
 			src[0], src_stride[0], image->y, image->v, image->u,
 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
-			interlacing?yuyvi_to_yv12  :yuyv_to_yv12,
+			interlacing?(((struct global_all_dll *)Dll::Tls())->yuyvi_to_yv12)  :(((struct global_all_dll *)Dll::Tls())->yuyv_to_yv12),
 			interlacing?yuyvi_to_yv12_c:yuyv_to_yv12_c, 2);
 		break;
 
@@ -640,24 +636,24 @@
 		safe_packed_conv(
 			src[0], src_stride[0], image->y, image->u, image->v,
 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
-			interlacing?uyvyi_to_yv12  :uyvy_to_yv12,
+			interlacing?(((struct global_all_dll *)Dll::Tls())->uyvyi_to_yv12)  :(((struct global_all_dll *)Dll::Tls())->uyvy_to_yv12),
 			interlacing?uyvyi_to_yv12_c:uyvy_to_yv12_c, 2);
 		break;
 
 	case XVID_CSP_I420:	/* YCbCr == YUV == internal colorspace for MPEG */
-		yv12_to_yv12(image->y, image->u, image->v, edged_width, edged_width2,
+		(((struct global_all_dll *)Dll::Tls())->yv12_to_yv12)(image->y, image->u, image->v, edged_width, edged_width2,
 			src[0], src[0] + src_stride[0]*height, src[0] + src_stride[0]*height + (src_stride[0]/2)*height2,
 			src_stride[0], src_stride[0]/2, width, height, (csp & XVID_CSP_VFLIP));
 		break;
 
 	case XVID_CSP_YV12: /* YCrCb == YVA == U and V plane swapped */
-		yv12_to_yv12(image->y, image->v, image->u, edged_width, edged_width2,
+		(((struct global_all_dll *)Dll::Tls())->yv12_to_yv12)(image->y, image->v, image->u, edged_width, edged_width2,
 			src[0], src[0] + src_stride[0]*height, src[0] + src_stride[0]*height + (src_stride[0]/2)*height2,
 			src_stride[0], src_stride[0]/2, width, height, (csp & XVID_CSP_VFLIP));
 		break;
 
 	case XVID_CSP_PLANAR:  /* YCbCr with arbitrary pointers and different strides for Y and UV */
-		yv12_to_yv12(image->y, image->u, image->v, edged_width, edged_width2,
+		(((struct global_all_dll *)Dll::Tls())->yv12_to_yv12)(image->y, image->u, image->v, edged_width, edged_width2,
 			src[0], src[1], src[2], src_stride[0], src_stride[1],  /* v: dst_stride[2] not yet supported */
 			width, height, (csp & XVID_CSP_VFLIP));
 		break;
@@ -744,7 +740,7 @@
 		safe_packed_conv(
 			dst[0], dst_stride[0], image->y, image->u, image->v,
 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
-			interlacing?yv12_to_rgb555i  :yv12_to_rgb555,
+			interlacing?(((struct global_all_dll *)Dll::Tls())->yv12_to_rgb555i)  :(((struct global_all_dll *)Dll::Tls())->yv12_to_rgb555),
 			interlacing?yv12_to_rgb555i_c:yv12_to_rgb555_c, 2);
 		return 0;
 
@@ -752,7 +748,7 @@
 		safe_packed_conv(
 			dst[0], dst_stride[0], image->y, image->u, image->v,
 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
-			interlacing?yv12_to_rgb565i  :yv12_to_rgb565,
+			interlacing?(((struct global_all_dll *)Dll::Tls())->yv12_to_rgb565i)  :(((struct global_all_dll *)Dll::Tls())->yv12_to_rgb565),
 			interlacing?yv12_to_rgb565i_c:yv12_to_rgb565_c, 2);
 		return 0;
 
@@ -760,7 +756,7 @@
 		safe_packed_conv(
 			dst[0], dst_stride[0], image->y, image->u, image->v,
 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
-			yv12_to_rgb444,
+			(((struct global_all_dll *)Dll::Tls())->yv12_to_rgb444),
 			yv12_to_rgb444_c, 2);
 		return 0;
 
@@ -769,7 +765,7 @@
 		safe_packed_conv(
 			dst[0], dst_stride[0], image->y, image->u, image->v,
 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
-			interlacing?yv12_to_bgri  :yv12_to_bgr,
+			interlacing?(((struct global_all_dll *)Dll::Tls())->yv12_to_bgri)  :(((struct global_all_dll *)Dll::Tls())->yv12_to_bgr),
 			interlacing?yv12_to_bgri_c:yv12_to_bgr_c, 3);
 		return 0;
 
@@ -777,7 +773,7 @@
 		safe_packed_conv(
 			dst[0], dst_stride[0], image->y, image->u, image->v,
 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
-			interlacing?yv12_to_bgrai  :yv12_to_bgra,
+			interlacing?(((struct global_all_dll *)Dll::Tls())->yv12_to_bgrai)  :(((struct global_all_dll *)Dll::Tls())->yv12_to_bgra),
 			interlacing?yv12_to_bgrai_c:yv12_to_bgra_c, 4);
 		return 0;
 
@@ -785,7 +781,7 @@
 		safe_packed_conv(
 			dst[0], dst_stride[0], image->y, image->u, image->v,
 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
-			interlacing?yv12_to_abgri  :yv12_to_abgr,
+			interlacing?(((struct global_all_dll *)Dll::Tls())->yv12_to_abgri)  :(((struct global_all_dll *)Dll::Tls())->yv12_to_abgr),
 			interlacing?yv12_to_abgri_c:yv12_to_abgr_c, 4);
 		return 0;
 
@@ -793,7 +789,7 @@
 		safe_packed_conv(
 			dst[0], dst_stride[0], image->y, image->u, image->v,
 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
-			interlacing?yv12_to_rgbai  :yv12_to_rgba,
+			interlacing?(((struct global_all_dll *)Dll::Tls())->yv12_to_rgbai)  :(((struct global_all_dll *)Dll::Tls())->yv12_to_rgba),
 			interlacing?yv12_to_rgbai_c:yv12_to_rgba_c, 4);
 		return 0;
 
@@ -801,7 +797,7 @@
 		safe_packed_conv(
 			dst[0], dst_stride[0], image->y, image->u, image->v,
 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
-			interlacing?yv12_to_argbi  :yv12_to_argb,
+			interlacing?(((struct global_all_dll *)Dll::Tls())->yv12_to_argbi)  :(((struct global_all_dll *)Dll::Tls())->yv12_to_argb),
 			interlacing?yv12_to_argbi_c:yv12_to_argb_c, 4);
 		return 0;
 
@@ -809,7 +805,7 @@
 		safe_packed_conv(
 			dst[0], dst_stride[0], image->y, image->u, image->v,
 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
-			interlacing?yv12_to_yuyvi  :yv12_to_yuyv,
+			interlacing?(((struct global_all_dll *)Dll::Tls())->yv12_to_yuyvi)  :(((struct global_all_dll *)Dll::Tls())->yv12_to_yuyv),
 			interlacing?yv12_to_yuyvi_c:yv12_to_yuyv_c, 2);
 		return 0;
 
@@ -817,7 +813,7 @@
 		safe_packed_conv(
 			dst[0], dst_stride[0], image->y, image->v, image->u,
 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
-			interlacing?yv12_to_yuyvi  :yv12_to_yuyv,
+			interlacing?(((struct global_all_dll *)Dll::Tls())->yv12_to_yuyvi)  :(((struct global_all_dll *)Dll::Tls())->yv12_to_yuyv),
 			interlacing?yv12_to_yuyvi_c:yv12_to_yuyv_c, 2);
 		return 0;
 
@@ -825,26 +821,26 @@
 		safe_packed_conv(
 			dst[0], dst_stride[0], image->y, image->u, image->v,
 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
-			interlacing?yv12_to_uyvyi  :yv12_to_uyvy,
+			interlacing?(((struct global_all_dll *)Dll::Tls())->yv12_to_uyvyi)  :(((struct global_all_dll *)Dll::Tls())->yv12_to_uyvy),
 			interlacing?yv12_to_uyvyi_c:yv12_to_uyvy_c, 2);
 		return 0;
 
 	case XVID_CSP_I420: /* YCbCr == YUV == internal colorspace for MPEG */
-		yv12_to_yv12(dst[0], dst[0] + dst_stride[0]*height, dst[0] + dst_stride[0]*height + (dst_stride[0]/2)*height2,
+		(((struct global_all_dll *)Dll::Tls())->yv12_to_yv12)(dst[0], dst[0] + dst_stride[0]*height, dst[0] + dst_stride[0]*height + (dst_stride[0]/2)*height2,
 			dst_stride[0], dst_stride[0]/2,
 			image->y, image->u, image->v, edged_width, edged_width2,
 			width, height, (csp & XVID_CSP_VFLIP));
 		return 0;
 
 	case XVID_CSP_YV12:	/* YCrCb == YVU == U and V plane swapped */
-		yv12_to_yv12(dst[0], dst[0] + dst_stride[0]*height, dst[0] + dst_stride[0]*height + (dst_stride[0]/2)*height2,
+		(((struct global_all_dll *)Dll::Tls())->yv12_to_yv12)(dst[0], dst[0] + dst_stride[0]*height, dst[0] + dst_stride[0]*height + (dst_stride[0]/2)*height2,
 			dst_stride[0], dst_stride[0]/2,
 			image->y, image->v, image->u, edged_width, edged_width2,
 			width, height, (csp & XVID_CSP_VFLIP));
 		return 0;
 
 	case XVID_CSP_PLANAR:  /* YCbCr with arbitrary pointers and different strides for Y and UV */
-		yv12_to_yv12(dst[0], dst[1], dst[2],
+		(((struct global_all_dll *)Dll::Tls())->yv12_to_yv12)(dst[0], dst[1], dst[2],
 			dst_stride[0], dst_stride[1],	/* v: dst_stride[2] not yet supported */
 			image->y, image->u, image->v, edged_width, edged_width2,
 			width, height, (csp & XVID_CSP_VFLIP));
@@ -983,7 +979,6 @@
 
 /* dump image to yuvpgm file */
 
-#include <stdio.h>
 
 int
 image_dump_yuvpgm(const IMAGE * image,
@@ -1138,7 +1133,7 @@
 			if (mbs[(j-1)/2*mb_stride + (i/2)].mode != MODE_NOT_CODED ||
 				mbs[(j+0)/2*mb_stride + (i/2)].mode != MODE_NOT_CODED)
 			{
-				hfilter_31(img->y + (j*block - 1)*edged_width + i*block,
+				(((struct global_all_dll *)Dll::Tls())->hfilter_31)(img->y + (j*block - 1)*edged_width + i*block,
 								  img->y + (j*block + 0)*edged_width + i*block, nblocks);
 			}
 		}
@@ -1149,7 +1144,7 @@
 			if (mbs[(j/2)*mb_stride + (i-1)/2].mode != MODE_NOT_CODED ||
 				mbs[(j/2)*mb_stride + (i+0)/2].mode != MODE_NOT_CODED)
 			{
-				vfilter_31(img->y + (j*block)*edged_width + i*block - 1,
+				(((struct global_all_dll *)Dll::Tls())->vfilter_31)(img->y + (j*block)*edged_width + i*block - 1,
 						   img->y + (j*block)*edged_width + i*block + 0,
 						   edged_width, nblocks);
 			}
@@ -1165,9 +1160,9 @@
 			if (mbs[(j-1)*mb_stride + i].mode != MODE_NOT_CODED ||
 				mbs[(j+0)*mb_stride + i].mode != MODE_NOT_CODED)
 			{
-				hfilter_31(img->u + (j*block - 1)*edged_width2 + i*block,
+				(((struct global_all_dll *)Dll::Tls())->hfilter_31)(img->u + (j*block - 1)*edged_width2 + i*block,
 						   img->u + (j*block + 0)*edged_width2 + i*block, nblocks);
-				hfilter_31(img->v + (j*block - 1)*edged_width2 + i*block,
+				(((struct global_all_dll *)Dll::Tls())->hfilter_31)(img->v + (j*block - 1)*edged_width2 + i*block,
 						   img->v + (j*block + 0)*edged_width2 + i*block, nblocks);
 			}
 		}
@@ -1178,10 +1173,10 @@
 			if (mbs[j*mb_stride + i - 1].mode != MODE_NOT_CODED ||
 				mbs[j*mb_stride + i + 0].mode != MODE_NOT_CODED)
 			{
-				vfilter_31(img->u + (j*block)*edged_width2 + i*block - 1,
+				(((struct global_all_dll *)Dll::Tls())->vfilter_31)(img->u + (j*block)*edged_width2 + i*block - 1,
 						   img->u + (j*block)*edged_width2 + i*block + 0,
 						   edged_width2, nblocks);
-				vfilter_31(img->v + (j*block)*edged_width2 + i*block - 1,
+				(((struct global_all_dll *)Dll::Tls())->vfilter_31)(img->v + (j*block)*edged_width2 + i*block - 1,
 						   img->v + (j*block)*edged_width2 + i*block + 0,
 						   edged_width2, nblocks);
 			}
diff -bBru src-ori/image/image.h src/image/image.h
--- src-ori/image/image.h	Wed Jan 12 10:32:43 2005
+++ src/image/image.h	Wed Jan 12 12:37:56 2005
@@ -38,11 +38,8 @@
 void init_image(uint32_t cpu_flags);
 
 
-static void __inline
-image_null(IMAGE * image)
-{
-	image->y = image->u = image->v = NULL;
-}
+extern  image_null(IMAGE * image)
+;
 
 int32_t image_create(IMAGE * image,
 					 uint32_t edged_width,
diff -bBru src-ori/image/interpolate8x8.c src/image/interpolate8x8.c
--- src-ori/image/interpolate8x8.c	Wed Jan 12 10:32:43 2005
+++ src/image/interpolate8x8.c	Wed Jan 12 12:40:18 2005
@@ -23,29 +23,404 @@
  *
  ****************************************************************************/
 
-#include "../portab.h"
-#include "../global.h"
-#include "interpolate8x8.h"
+#include <e32std.h>
+#include "../global_all_dll.h"
+ void interpolate8x8_switch(uint8_t * const cur,
+ 					  const uint8_t * const refn,
+ 					  const uint32_t x,
+ 					  const uint32_t y,
+ 					  const int32_t dx,
+ 					  const int dy,
+ 					  const uint32_t stride,
+ 					  const uint32_t rounding)
+ {
+
+
+	const uint8_t * const src = refn + (int)((y + (dy>>1)) * stride + x + (dx>>1));
+	uint8_t * const dst = cur + (int)(y * stride + x);
+
+	switch (((dx & 1) << 1) + (dy & 1))	{ /* ((dx%2)?2:0)+((dy%2)?1:0) */
+	case 0:
+		(((struct global_all_dll *)Dll::Tls())->transfer8x8_copy)(dst, src, stride);
+		break;
+	case 1:
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_halfpel_v)(dst, src, stride, rounding);
+		break;
+	case 2:
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_halfpel_h)(dst, src, stride, rounding);
+		break;
+	default:
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_halfpel_hv)(dst, src, stride, rounding);
+		break;
+	}
+}
+ void interpolate16x16_switch(uint8_t * const cur,
+ 					  const uint8_t * const refn,
+ 					  const uint32_t x,
+ 					  const uint32_t y,
+ 					  const int32_t dx,
+ 					  const int dy,
+ 					  const uint32_t stride,
+ 					  const uint32_t rounding)
+ {
+
+	interpolate8x8_switch(cur, refn, x,   y,   dx, dy, stride, rounding);
+	interpolate8x8_switch(cur, refn, x+8, y,   dx, dy, stride, rounding);
+	interpolate8x8_switch(cur, refn, x,   y+8, dx, dy, stride, rounding);
+	interpolate8x8_switch(cur, refn, x+8, y+8, dx, dy, stride, rounding);
+}
+ void interpolate32x32_switch(uint8_t * const cur,
+ 					  const uint8_t * const refn,
+ 					  const uint32_t x,
+ 					  const uint32_t y,
+ 					  const int32_t dx,
+ 					  const int dy,
+ 					  const uint32_t stride,
+ 					  const uint32_t rounding)
+ {
+
+	interpolate16x16_switch(cur, refn, x,    y,    dx, dy, stride, rounding);
+	interpolate16x16_switch(cur, refn, x+16, y,    dx, dy, stride, rounding);
+	interpolate16x16_switch(cur, refn, x,    y+16, dx, dy, stride, rounding);
+	interpolate16x16_switch(cur, refn, x+16, y+16, dx, dy, stride, rounding);
+}
+ uint8_t * interpolate8x8_switch2(uint8_t * const buffer,
+ 					  const uint8_t * const refn,
+ 					  const int x,
+ 					  const int y,
+ 					  const int dx,
+ 					  const int dy,
+ 					  const uint32_t stride,
+ 					  const uint32_t rounding)
+ {
+
+
+	const uint8_t * const src = refn + (int)((y + (dy>>1)) * stride + x + (dx>>1));
+
+	switch (((dx & 1) << 1) + (dy & 1))	{ /* ((dx%2)?2:0)+((dy%2)?1:0) */
+	case 0:
+		return (uint8_t *)src;
+	case 1:
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_halfpel_v)(buffer, src, stride, rounding);
+		break;
+	case 2:
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_halfpel_h)(buffer, src, stride, rounding);
+		break;
+	default:
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_halfpel_hv)(buffer, src, stride, rounding);
+		break;
+	}
+	return buffer;
+}
+ void interpolate8x8_quarterpel(uint8_t * const cur, 				     uint8_t * const refn,
+ 					 uint8_t * const refh,
+ 					 uint8_t * const refv,
+ 					 uint8_t * const refhv,
+ 				     const uint32_t x, const uint32_t y,
+ 					 const int32_t dx,  const int dy,
+ 					 const uint32_t stride,
+ 					 const uint32_t rounding)
+ {
+
+	const int32_t xRef = (int)x*4 + dx;
+	const int32_t yRef = (int)y*4 + dy;
+
+	uint8_t *src, *dst;
+	uint8_t *halfpel_h, *halfpel_v, *halfpel_hv;
+	int32_t x_int, y_int, x_frac, y_frac;
+
+	x_int = xRef/4;
+	if (xRef < 0 && xRef % 4)
+		x_int--;
+
+	x_frac = xRef - (4*x_int);
+
+	y_int  = yRef/4;
+	if (yRef < 0 && yRef % 4)
+		y_int--;
+
+	y_frac = yRef - (4*y_int);
+
+	src = refn + y_int * (int)stride + x_int;
+	halfpel_h = refh;
+	halfpel_v = refv;
+	halfpel_hv = refhv;
+
+	dst = cur + y * stride + x;
+
+	switch((y_frac << 2) | (x_frac)) {
+
+	case 0:
+		(((struct global_all_dll *)Dll::Tls())->transfer8x8_copy)(dst, src, stride);
+		break;
+
+	case 1:
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_lowpass_h)(halfpel_h, src, stride, rounding);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst, src, halfpel_h, stride, rounding, 8);
+  		break;
+
+	case 2:
+	    (((struct global_all_dll *)Dll::Tls())->interpolate8x8_lowpass_h)(dst, src, stride, rounding);
+  		break;
+
+	case 3:
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_lowpass_h)(halfpel_h, src, stride, rounding);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst, src + 1, halfpel_h, stride, rounding, 8);
+  		break;
+
+	case 4:
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_lowpass_v)(halfpel_v, src, stride, rounding);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst, src, halfpel_v, stride, rounding, 8);
+ 		break;
+
+	case 5:
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_lowpass_h)(halfpel_h, src, stride, rounding);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(halfpel_v, src, halfpel_h, stride, rounding, 9);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_lowpass_v)(halfpel_hv, halfpel_v, stride, rounding);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst, halfpel_v, halfpel_hv, stride, rounding, 8);
+ 		break;
+
+	case 6:
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_lowpass_hv)(halfpel_hv, halfpel_h, src, stride, rounding);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst, halfpel_h, halfpel_hv, stride, rounding, 8);
+ 		break;
+
+	case 7:
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_lowpass_h)(halfpel_h, src, stride, rounding);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(halfpel_v, src + 1, halfpel_h, stride, rounding, 9);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_lowpass_v)(halfpel_hv, halfpel_v, stride, rounding);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst, halfpel_v, halfpel_hv, stride, rounding, 8);
+ 		break;
+
+	case 8:
+	    (((struct global_all_dll *)Dll::Tls())->interpolate8x8_lowpass_v)(dst, src, stride, rounding);
+		break;
+
+	case 9:
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_lowpass_h)(halfpel_h, src, stride, rounding);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(halfpel_v, src, halfpel_h, stride, rounding, 9);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_lowpass_v)(dst, halfpel_v, stride, rounding);
+  		break;
+
+	case 10:
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_lowpass_hv)(dst, halfpel_h, src, stride, rounding);
+		break;
+
+	case 11:
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_lowpass_h)(halfpel_h, src, stride, rounding);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(halfpel_v, src + 1, halfpel_h, stride, rounding, 9);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_lowpass_v)(dst, halfpel_v, stride, rounding);
+   		break;
+
+	case 12:
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_lowpass_v)(halfpel_v, src, stride, rounding);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst, src+stride, halfpel_v, stride, rounding, 8);
+   		break;
+
+	case 13:
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_lowpass_h)(halfpel_h, src, stride, rounding);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(halfpel_v, src, halfpel_h, stride, rounding, 9);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_lowpass_v)(halfpel_hv, halfpel_v, stride, rounding);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst, halfpel_v+stride, halfpel_hv, stride, rounding, 8);
+ 		break;
+
+	case 14:
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_lowpass_hv)(halfpel_hv, halfpel_h, src, stride, rounding);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst, halfpel_h+stride, halfpel_hv, stride, rounding, 8);
+ 		break;
+
+	case 15:
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_lowpass_h)(halfpel_h, src, stride, rounding);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(halfpel_v, src + 1, halfpel_h, stride, rounding, 9);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_lowpass_v)(halfpel_hv, halfpel_v, stride, rounding);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst, halfpel_hv, halfpel_v + stride, stride, rounding, 8);
+		break;
+	}
+}
+ void interpolate16x16_quarterpel(uint8_t * const cur, 				     uint8_t * const refn,
+ 					 uint8_t * const refh,
+ 					 uint8_t * const refv,
+ 					 uint8_t * const refhv,
+ 				     const uint32_t x, const uint32_t y,
+ 					 const int32_t dx,  const int dy,
+ 					 const uint32_t stride,
+ 					 const uint32_t rounding)
+ {
+
+	const int32_t xRef = (int)x*4 + dx;
+	const int32_t yRef = (int)y*4 + dy;
+
+	uint8_t *src, *dst;
+	uint8_t *halfpel_h, *halfpel_v, *halfpel_hv;
+	int32_t x_int, y_int, x_frac, y_frac;
+
+	x_int = xRef/4;
+	if (xRef < 0 && xRef % 4)
+		x_int--;
+
+	x_frac = xRef - (4*x_int);
+
+	y_int  = yRef/4;
+	if (yRef < 0 && yRef % 4)
+		y_int--;
+
+	y_frac = yRef - (4*y_int);
+
+	src = refn + y_int * (int)stride + x_int;
+	halfpel_h = refh;
+	halfpel_v = refv;
+	halfpel_hv = refhv;
+
+	dst = cur + y * stride + x;
+
+	switch((y_frac << 2) | (x_frac)) {
+
+	case 0:
+		transfer16x16_copy(dst, src, stride);
+		break;
+
+	case 1:
+		(((struct global_all_dll *)Dll::Tls())->interpolate16x16_lowpass_h)(halfpel_h, src, stride, rounding);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst, src, halfpel_h, stride, rounding, 8);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst+8, src+8, halfpel_h+8, stride, rounding, 8);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst+8*stride, src+8*stride, halfpel_h+8*stride, stride, rounding, 8);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst+8*stride+8, src+8*stride+8, halfpel_h+8*stride+8, stride, rounding, 8);
+		break;
+
+	case 2:
+	    (((struct global_all_dll *)Dll::Tls())->interpolate16x16_lowpass_h)(dst, src, stride, rounding);
+  		break;
+
+	case 3:
+		(((struct global_all_dll *)Dll::Tls())->interpolate16x16_lowpass_h)(halfpel_h, src, stride, rounding);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst, src + 1, halfpel_h, stride, rounding, 8);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst+8, src + 8 + 1, halfpel_h+8, stride, rounding, 8);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst+8*stride, src + 8*stride + 1, halfpel_h+8*stride, stride, rounding, 8);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst+8*stride+8, src+8*stride+8 + 1, halfpel_h+8*stride+8, stride, rounding, 8);
+		break;
+
+	case 4:
+		(((struct global_all_dll *)Dll::Tls())->interpolate16x16_lowpass_v)(halfpel_v, src, stride, rounding);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst, src, halfpel_v, stride, rounding, 8);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst+8, src+8, halfpel_v+8, stride, rounding, 8);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst+8*stride, src+8*stride, halfpel_v+8*stride, stride, rounding, 8);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst+8*stride+8, src+8*stride+8, halfpel_v+8*stride+8, stride, rounding, 8);
+		break;
+
+	case 5:
+		(((struct global_all_dll *)Dll::Tls())->interpolate16x16_lowpass_h)(halfpel_h, src, stride, rounding);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(halfpel_v, src, halfpel_h, stride, rounding, 9);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(halfpel_v+8, src + 8, halfpel_h+8, stride, rounding, 9);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(halfpel_v+8*stride, src + 8*stride, halfpel_h+8*stride, stride, rounding, 9);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(halfpel_v+8*stride+8, src+8*stride+8, halfpel_h+8*stride+8, stride, rounding, 9);
+
+		(((struct global_all_dll *)Dll::Tls())->interpolate16x16_lowpass_v)(halfpel_hv, halfpel_v, stride, rounding);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst, halfpel_hv, halfpel_v, stride, rounding, 8);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst+8, halfpel_hv+8, halfpel_v+8, stride, rounding, 8);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst+8*stride, halfpel_hv+8*stride, halfpel_v+8*stride, stride, rounding, 8);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst+8*stride+8, halfpel_hv+8*stride+8, halfpel_v+8*stride+8, stride, rounding, 8);
+		break;
+
+	case 6:
+		(((struct global_all_dll *)Dll::Tls())->interpolate16x16_lowpass_hv)(halfpel_hv, halfpel_h, src, stride, rounding);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst, halfpel_h, halfpel_hv, stride, rounding, 8);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst+8, halfpel_h+8, halfpel_hv+8, stride, rounding, 8);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst+8*stride, halfpel_h+8*stride, halfpel_hv+8*stride, stride, rounding, 8);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst+8*stride+8, halfpel_h+8*stride+8, halfpel_hv+8*stride+8, stride, rounding, 8);
+		break;
+
+	case 7:
+		(((struct global_all_dll *)Dll::Tls())->interpolate16x16_lowpass_h)(halfpel_h, src, stride, rounding);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(halfpel_v, src+1, halfpel_h, stride, rounding, 9);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(halfpel_v+8, src+1 + 8, halfpel_h+8, stride, rounding, 9);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(halfpel_v+8*stride, src+1 + 8*stride, halfpel_h+8*stride, stride, rounding, 9);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(halfpel_v+8*stride+8, src+1+8*stride+8, halfpel_h+8*stride+8, stride, rounding, 9);
+
+		(((struct global_all_dll *)Dll::Tls())->interpolate16x16_lowpass_v)(halfpel_hv, halfpel_v, stride, rounding);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst, halfpel_hv, halfpel_v, stride, rounding, 8);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst+8, halfpel_hv+8, halfpel_v+8, stride, rounding, 8);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst+8*stride, halfpel_hv+8*stride, halfpel_v+8*stride, stride, rounding, 8);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst+8*stride+8, halfpel_hv+8*stride+8, halfpel_v+8*stride+8, stride, rounding, 8);
+		break;
+
+	case 8:
+	    (((struct global_all_dll *)Dll::Tls())->interpolate16x16_lowpass_v)(dst, src, stride, rounding);
+		break;
+
+	case 9:
+		(((struct global_all_dll *)Dll::Tls())->interpolate16x16_lowpass_h)(halfpel_h, src, stride, rounding);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(halfpel_v, src, halfpel_h, stride, rounding, 9);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(halfpel_v+8, src + 8, halfpel_h+8, stride, rounding, 9);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(halfpel_v+8*stride, src + 8*stride, halfpel_h+8*stride, stride, rounding, 9);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(halfpel_v+8*stride+8, src+8*stride+8, halfpel_h+8*stride+8, stride, rounding, 9);
+		(((struct global_all_dll *)Dll::Tls())->interpolate16x16_lowpass_v)(dst, halfpel_v, stride, rounding);
+		break;
+
+	case 10:
+		(((struct global_all_dll *)Dll::Tls())->interpolate16x16_lowpass_hv)(dst, halfpel_h, src, stride, rounding);
+		break;
+
+	case 11:
+		(((struct global_all_dll *)Dll::Tls())->interpolate16x16_lowpass_h)(halfpel_h, src, stride, rounding);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(halfpel_v, src+1, halfpel_h, stride, rounding, 9);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(halfpel_v+8, src+1 + 8, halfpel_h+8, stride, rounding, 9);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(halfpel_v+8*stride, src+1 + 8*stride, halfpel_h+8*stride, stride, rounding, 9);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(halfpel_v+8*stride+8, src+1+8*stride+8, halfpel_h+8*stride+8, stride, rounding, 9);
+		(((struct global_all_dll *)Dll::Tls())->interpolate16x16_lowpass_v)(dst, halfpel_v, stride, rounding);
+		break;
+
+	case 12:
+		(((struct global_all_dll *)Dll::Tls())->interpolate16x16_lowpass_v)(halfpel_v, src, stride, rounding);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst, src+stride, halfpel_v, stride, rounding, 8);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst+8, src+stride+8, halfpel_v+8, stride, rounding, 8);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst+8*stride, src+stride+8*stride, halfpel_v+8*stride, stride, rounding, 8);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst+8*stride+8, src+stride+8*stride+8, halfpel_v+8*stride+8, stride, rounding, 8);
+		break;
+
+	case 13:
+		(((struct global_all_dll *)Dll::Tls())->interpolate16x16_lowpass_h)(halfpel_h, src, stride, rounding);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(halfpel_v, src, halfpel_h, stride, rounding, 9);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(halfpel_v+8, src + 8, halfpel_h+8, stride, rounding, 9);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(halfpel_v+8*stride, src + 8*stride, halfpel_h+8*stride, stride, rounding, 9);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(halfpel_v+8*stride+8, src+8*stride+8, halfpel_h+8*stride+8, stride, rounding, 9);
+
+		(((struct global_all_dll *)Dll::Tls())->interpolate16x16_lowpass_v)(halfpel_hv, halfpel_v, stride, rounding);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst, halfpel_hv, halfpel_v+stride, stride, rounding, 8);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst+8, halfpel_hv+8, halfpel_v+stride+8, stride, rounding, 8);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst+8*stride, halfpel_hv+8*stride, halfpel_v+stride+8*stride, stride, rounding, 8);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst+8*stride+8, halfpel_hv+8*stride+8, halfpel_v+stride+8*stride+8, stride, rounding, 8);
+		break;
+
+	case 14:
+		(((struct global_all_dll *)Dll::Tls())->interpolate16x16_lowpass_hv)(halfpel_hv, halfpel_h, src, stride, rounding);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst, halfpel_h+stride, halfpel_hv, stride, rounding, 8);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst+8, halfpel_h+stride+8, halfpel_hv+8, stride, rounding, 8);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst+8*stride, halfpel_h+stride+8*stride, halfpel_hv+8*stride, stride, rounding, 8);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst+8*stride+8, halfpel_h+stride+8*stride+8, halfpel_hv+8*stride+8, stride, rounding, 8);
+		break;
+
+	case 15:
+		(((struct global_all_dll *)Dll::Tls())->interpolate16x16_lowpass_h)(halfpel_h, src, stride, rounding);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(halfpel_v, src+1, halfpel_h, stride, rounding, 9);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(halfpel_v+8, src+1 + 8, halfpel_h+8, stride, rounding, 9);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(halfpel_v+8*stride, src+1 + 8*stride, halfpel_h+8*stride, stride, rounding, 9);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(halfpel_v+8*stride+8, src+1+8*stride+8, halfpel_h+8*stride+8, stride, rounding, 9);
+
+		(((struct global_all_dll *)Dll::Tls())->interpolate16x16_lowpass_v)(halfpel_hv, halfpel_v, stride, rounding);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst, halfpel_hv, halfpel_v+stride, stride, rounding, 8);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst+8, halfpel_hv+8, halfpel_v+stride+8, stride, rounding, 8);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst+8*stride, halfpel_hv+8*stride, halfpel_v+stride+8*stride, stride, rounding, 8);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(dst+8*stride+8, halfpel_hv+8*stride+8, halfpel_v+stride+8*stride+8, stride, rounding, 8);
+		break;
+	}
+}
+
 
-/* function pointers */
-INTERPOLATE8X8_PTR interpolate8x8_halfpel_h;
-INTERPOLATE8X8_PTR interpolate8x8_halfpel_v;
-INTERPOLATE8X8_PTR interpolate8x8_halfpel_hv;
 
-INTERPOLATE8X8_AVG2_PTR interpolate8x8_avg2;
-INTERPOLATE8X8_AVG4_PTR interpolate8x8_avg4;
 
-INTERPOLATE_LOWPASS_PTR interpolate8x8_lowpass_h;
-INTERPOLATE_LOWPASS_PTR interpolate8x8_lowpass_v;
 
-INTERPOLATE_LOWPASS_PTR interpolate16x16_lowpass_h;
-INTERPOLATE_LOWPASS_PTR interpolate16x16_lowpass_v;
 
-INTERPOLATE_LOWPASS_HV_PTR interpolate8x8_lowpass_hv;
-INTERPOLATE_LOWPASS_HV_PTR interpolate16x16_lowpass_hv;
 
-INTERPOLATE8X8_6TAP_LOWPASS_PTR interpolate8x8_6tap_lowpass_h;
-INTERPOLATE8X8_6TAP_LOWPASS_PTR interpolate8x8_6tap_lowpass_v;
 
 void interpolate8x8_avg2_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint32_t stride, const uint32_t rounding, const uint32_t height)
 {
diff -bBru src-ori/image/interpolate8x8.h src/image/interpolate8x8.h
--- src-ori/image/interpolate8x8.h	Wed Jan 12 10:32:43 2005
+++ src/image/interpolate8x8.h	Wed Jan 12 12:37:52 2005
@@ -68,29 +89,32 @@
 
 typedef void (INTERPOLATE8X8_6TAP_LOWPASS) (uint8_t *dst,
 									        uint8_t *src,
+
 									        int32_t stride,
+
 									        int32_t rounding);
 
+
 typedef INTERPOLATE8X8_6TAP_LOWPASS *INTERPOLATE8X8_6TAP_LOWPASS_PTR;
 
-extern INTERPOLATE8X8_PTR interpolate8x8_halfpel_h;
-extern INTERPOLATE8X8_PTR interpolate8x8_halfpel_v;
-extern INTERPOLATE8X8_PTR interpolate8x8_halfpel_hv;
 
-extern INTERPOLATE8X8_AVG2_PTR interpolate8x8_avg2;
-extern INTERPOLATE8X8_AVG4_PTR interpolate8x8_avg4;
 
-extern INTERPOLATE_LOWPASS_PTR interpolate8x8_lowpass_h;
-extern INTERPOLATE_LOWPASS_PTR interpolate8x8_lowpass_v;
 
-extern INTERPOLATE_LOWPASS_PTR interpolate16x16_lowpass_h;
-extern INTERPOLATE_LOWPASS_PTR interpolate16x16_lowpass_v;
 
-extern INTERPOLATE_LOWPASS_HV_PTR interpolate8x8_lowpass_hv;
-extern INTERPOLATE_LOWPASS_HV_PTR interpolate16x16_lowpass_hv;
 
-extern INTERPOLATE8X8_6TAP_LOWPASS_PTR interpolate8x8_6tap_lowpass_h;
-extern INTERPOLATE8X8_6TAP_LOWPASS_PTR interpolate8x8_6tap_lowpass_v;
+
+
+
+
+
+
+
+
+
+
+
+
+
 
 INTERPOLATE8X8 interpolate8x8_halfpel_h_c;
 INTERPOLATE8X8 interpolate8x8_halfpel_v_c;
@@ -145,8 +169,7 @@
 INTERPOLATE8X8_6TAP_LOWPASS interpolate8x8_6tap_lowpass_v_mmx;
 #endif
 
-static __inline void
-interpolate8x8_switch(uint8_t * const cur,
+extern  void interpolate8x8_switch(uint8_t * const cur,
 					  const uint8_t * const refn,
 					  const uint32_t x,
 					  const uint32_t y,
@@ -154,30 +177,10 @@
 					  const int dy,
 					  const uint32_t stride,
 					  const uint32_t rounding)
-{
-
-	const uint8_t * const src = refn + (int)((y + (dy>>1)) * stride + x + (dx>>1));
-	uint8_t * const dst = cur + (int)(y * stride + x);
-
-	switch (((dx & 1) << 1) + (dy & 1))	{ /* ((dx%2)?2:0)+((dy%2)?1:0) */
-	case 0:
-		transfer8x8_copy(dst, src, stride);
-		break;
-	case 1:
-		interpolate8x8_halfpel_v(dst, src, stride, rounding);
-		break;
-	case 2:
-		interpolate8x8_halfpel_h(dst, src, stride, rounding);
-		break;
-	default:
-		interpolate8x8_halfpel_hv(dst, src, stride, rounding);
-		break;
-	}
-}
+;
 
 
-static __inline void
-interpolate16x16_switch(uint8_t * const cur,
+extern  void interpolate16x16_switch(uint8_t * const cur,
 					  const uint8_t * const refn,
 					  const uint32_t x,
 					  const uint32_t y,
@@ -185,16 +188,10 @@
 					  const int dy,
 					  const uint32_t stride,
 					  const uint32_t rounding)
-{
-	interpolate8x8_switch(cur, refn, x,   y,   dx, dy, stride, rounding);
-	interpolate8x8_switch(cur, refn, x+8, y,   dx, dy, stride, rounding);
-	interpolate8x8_switch(cur, refn, x,   y+8, dx, dy, stride, rounding);
-	interpolate8x8_switch(cur, refn, x+8, y+8, dx, dy, stride, rounding);
-}
+;
 
 
-static __inline void
-interpolate32x32_switch(uint8_t * const cur,
+extern  void interpolate32x32_switch(uint8_t * const cur,
 					  const uint8_t * const refn,
 					  const uint32_t x,
 					  const uint32_t y,
@@ -202,16 +199,10 @@
 					  const int dy,
 					  const uint32_t stride,
 					  const uint32_t rounding)
-{
-	interpolate16x16_switch(cur, refn, x,    y,    dx, dy, stride, rounding);
-	interpolate16x16_switch(cur, refn, x+16, y,    dx, dy, stride, rounding);
-	interpolate16x16_switch(cur, refn, x,    y+16, dx, dy, stride, rounding);
-	interpolate16x16_switch(cur, refn, x+16, y+16, dx, dy, stride, rounding);
-}
+;
 
 
-static __inline uint8_t *
-interpolate8x8_switch2(uint8_t * const buffer,
+extern  uint8_t * interpolate8x8_switch2(uint8_t * const buffer,
 					  const uint8_t * const refn,
 					  const int x,
 					  const int y,
@@ -219,28 +210,9 @@
 					  const int dy,
 					  const uint32_t stride,
 					  const uint32_t rounding)
-{
-
-	const uint8_t * const src = refn + (int)((y + (dy>>1)) * stride + x + (dx>>1));
-
-	switch (((dx & 1) << 1) + (dy & 1))	{ /* ((dx%2)?2:0)+((dy%2)?1:0) */
-	case 0:
-		return (uint8_t *)src;
-	case 1:
-		interpolate8x8_halfpel_v(buffer, src, stride, rounding);
-		break;
-	case 2:
-		interpolate8x8_halfpel_h(buffer, src, stride, rounding);
-		break;
-	default:
-		interpolate8x8_halfpel_hv(buffer, src, stride, rounding);
-		break;
-	}
-	return buffer;
-}
+;
 
-static __inline void interpolate8x8_quarterpel(uint8_t * const cur,
-				     uint8_t * const refn,
+extern  void interpolate8x8_quarterpel(uint8_t * const cur, 				     uint8_t * const refn,
 					 uint8_t * const refh,
 					 uint8_t * const refv,
 					 uint8_t * const refhv,
@@ -248,125 +220,9 @@
 					 const int32_t dx,  const int dy,
 					 const uint32_t stride,
 					 const uint32_t rounding)
-{
-	const int32_t xRef = (int)x*4 + dx;
-	const int32_t yRef = (int)y*4 + dy;
-
-	uint8_t *src, *dst;
-	uint8_t *halfpel_h, *halfpel_v, *halfpel_hv;
-	int32_t x_int, y_int, x_frac, y_frac;
-
-	x_int = xRef/4;
-	if (xRef < 0 && xRef % 4)
-		x_int--;
-
-	x_frac = xRef - (4*x_int);
-
-	y_int  = yRef/4;
-	if (yRef < 0 && yRef % 4)
-		y_int--;
-
-	y_frac = yRef - (4*y_int);
-
-	src = refn + y_int * (int)stride + x_int;
-	halfpel_h = refh;
-	halfpel_v = refv;
-	halfpel_hv = refhv;
-
-	dst = cur + y * stride + x;
-
-	switch((y_frac << 2) | (x_frac)) {
-
-	case 0:
-		transfer8x8_copy(dst, src, stride);
-		break;
-
-	case 1:
-		interpolate8x8_lowpass_h(halfpel_h, src, stride, rounding);
-		interpolate8x8_avg2(dst, src, halfpel_h, stride, rounding, 8);
-  		break;
-
-	case 2:
-	    interpolate8x8_lowpass_h(dst, src, stride, rounding);
-  		break;
-
-	case 3:
-		interpolate8x8_lowpass_h(halfpel_h, src, stride, rounding);
-		interpolate8x8_avg2(dst, src + 1, halfpel_h, stride, rounding, 8);
-  		break;
-
-	case 4:
-		interpolate8x8_lowpass_v(halfpel_v, src, stride, rounding);
-		interpolate8x8_avg2(dst, src, halfpel_v, stride, rounding, 8);
- 		break;
-
-	case 5:
-		interpolate8x8_lowpass_h(halfpel_h, src, stride, rounding);
-		interpolate8x8_avg2(halfpel_v, src, halfpel_h, stride, rounding, 9);
-		interpolate8x8_lowpass_v(halfpel_hv, halfpel_v, stride, rounding);
-		interpolate8x8_avg2(dst, halfpel_v, halfpel_hv, stride, rounding, 8);
- 		break;
-
-	case 6:
-		interpolate8x8_lowpass_hv(halfpel_hv, halfpel_h, src, stride, rounding);
-		interpolate8x8_avg2(dst, halfpel_h, halfpel_hv, stride, rounding, 8);
- 		break;
-
-	case 7:
-		interpolate8x8_lowpass_h(halfpel_h, src, stride, rounding);
-		interpolate8x8_avg2(halfpel_v, src + 1, halfpel_h, stride, rounding, 9);
-		interpolate8x8_lowpass_v(halfpel_hv, halfpel_v, stride, rounding);
-		interpolate8x8_avg2(dst, halfpel_v, halfpel_hv, stride, rounding, 8);
- 		break;
-
-	case 8:
-	    interpolate8x8_lowpass_v(dst, src, stride, rounding);
-		break;
-
-	case 9:
-		interpolate8x8_lowpass_h(halfpel_h, src, stride, rounding);
-		interpolate8x8_avg2(halfpel_v, src, halfpel_h, stride, rounding, 9);
-		interpolate8x8_lowpass_v(dst, halfpel_v, stride, rounding);
-  		break;
-
-	case 10:
-		interpolate8x8_lowpass_hv(dst, halfpel_h, src, stride, rounding);
-		break;
-
-	case 11:
-		interpolate8x8_lowpass_h(halfpel_h, src, stride, rounding);
-		interpolate8x8_avg2(halfpel_v, src + 1, halfpel_h, stride, rounding, 9);
-		interpolate8x8_lowpass_v(dst, halfpel_v, stride, rounding);
-   		break;
-
-	case 12:
-		interpolate8x8_lowpass_v(halfpel_v, src, stride, rounding);
-		interpolate8x8_avg2(dst, src+stride, halfpel_v, stride, rounding, 8);
-   		break;
-
-	case 13:
-		interpolate8x8_lowpass_h(halfpel_h, src, stride, rounding);
-		interpolate8x8_avg2(halfpel_v, src, halfpel_h, stride, rounding, 9);
-		interpolate8x8_lowpass_v(halfpel_hv, halfpel_v, stride, rounding);
-		interpolate8x8_avg2(dst, halfpel_v+stride, halfpel_hv, stride, rounding, 8);
- 		break;
-
-	case 14:
-		interpolate8x8_lowpass_hv(halfpel_hv, halfpel_h, src, stride, rounding);
-		interpolate8x8_avg2(dst, halfpel_h+stride, halfpel_hv, stride, rounding, 8);
- 		break;
-
-	case 15:
-		interpolate8x8_lowpass_h(halfpel_h, src, stride, rounding);
-		interpolate8x8_avg2(halfpel_v, src + 1, halfpel_h, stride, rounding, 9);
-		interpolate8x8_lowpass_v(halfpel_hv, halfpel_v, stride, rounding);
-		interpolate8x8_avg2(dst, halfpel_hv, halfpel_v + stride, stride, rounding, 8);
-		break;
-	}
-}
+;
 
-static __inline void interpolate16x16_quarterpel(uint8_t * const cur,
-				     uint8_t * const refn,
+extern  void interpolate16x16_quarterpel(uint8_t * const cur, 				     uint8_t * const refn,
 					 uint8_t * const refh,
 					 uint8_t * const refv,
 					 uint8_t * const refhv,
@@ -374,173 +230,6 @@
 					 const int32_t dx,  const int dy,
 					 const uint32_t stride,
 					 const uint32_t rounding)
-{
-	const int32_t xRef = (int)x*4 + dx;
-	const int32_t yRef = (int)y*4 + dy;
-
-	uint8_t *src, *dst;
-	uint8_t *halfpel_h, *halfpel_v, *halfpel_hv;
-	int32_t x_int, y_int, x_frac, y_frac;
-
-	x_int = xRef/4;
-	if (xRef < 0 && xRef % 4)
-		x_int--;
-
-	x_frac = xRef - (4*x_int);
-
-	y_int  = yRef/4;
-	if (yRef < 0 && yRef % 4)
-		y_int--;
-
-	y_frac = yRef - (4*y_int);
-
-	src = refn + y_int * (int)stride + x_int;
-	halfpel_h = refh;
-	halfpel_v = refv;
-	halfpel_hv = refhv;
-
-	dst = cur + y * stride + x;
-
-	switch((y_frac << 2) | (x_frac)) {
-
-	case 0:
-		transfer16x16_copy(dst, src, stride);
-		break;
-
-	case 1:
-		interpolate16x16_lowpass_h(halfpel_h, src, stride, rounding);
-		interpolate8x8_avg2(dst, src, halfpel_h, stride, rounding, 8);
-		interpolate8x8_avg2(dst+8, src+8, halfpel_h+8, stride, rounding, 8);
-		interpolate8x8_avg2(dst+8*stride, src+8*stride, halfpel_h+8*stride, stride, rounding, 8);
-		interpolate8x8_avg2(dst+8*stride+8, src+8*stride+8, halfpel_h+8*stride+8, stride, rounding, 8);
-		break;
-
-	case 2:
-	    interpolate16x16_lowpass_h(dst, src, stride, rounding);
-  		break;
-
-	case 3:
-		interpolate16x16_lowpass_h(halfpel_h, src, stride, rounding);
-		interpolate8x8_avg2(dst, src + 1, halfpel_h, stride, rounding, 8);
-		interpolate8x8_avg2(dst+8, src + 8 + 1, halfpel_h+8, stride, rounding, 8);
-		interpolate8x8_avg2(dst+8*stride, src + 8*stride + 1, halfpel_h+8*stride, stride, rounding, 8);
-		interpolate8x8_avg2(dst+8*stride+8, src+8*stride+8 + 1, halfpel_h+8*stride+8, stride, rounding, 8);
-		break;
-
-	case 4:
-		interpolate16x16_lowpass_v(halfpel_v, src, stride, rounding);
-		interpolate8x8_avg2(dst, src, halfpel_v, stride, rounding, 8);
-		interpolate8x8_avg2(dst+8, src+8, halfpel_v+8, stride, rounding, 8);
-		interpolate8x8_avg2(dst+8*stride, src+8*stride, halfpel_v+8*stride, stride, rounding, 8);
-		interpolate8x8_avg2(dst+8*stride+8, src+8*stride+8, halfpel_v+8*stride+8, stride, rounding, 8);
-		break;
-
-	case 5:
-		interpolate16x16_lowpass_h(halfpel_h, src, stride, rounding);
-		interpolate8x8_avg2(halfpel_v, src, halfpel_h, stride, rounding, 9);
-		interpolate8x8_avg2(halfpel_v+8, src + 8, halfpel_h+8, stride, rounding, 9);
-		interpolate8x8_avg2(halfpel_v+8*stride, src + 8*stride, halfpel_h+8*stride, stride, rounding, 9);
-		interpolate8x8_avg2(halfpel_v+8*stride+8, src+8*stride+8, halfpel_h+8*stride+8, stride, rounding, 9);
-
-		interpolate16x16_lowpass_v(halfpel_hv, halfpel_v, stride, rounding);
-		interpolate8x8_avg2(dst, halfpel_hv, halfpel_v, stride, rounding, 8);
-		interpolate8x8_avg2(dst+8, halfpel_hv+8, halfpel_v+8, stride, rounding, 8);
-		interpolate8x8_avg2(dst+8*stride, halfpel_hv+8*stride, halfpel_v+8*stride, stride, rounding, 8);
-		interpolate8x8_avg2(dst+8*stride+8, halfpel_hv+8*stride+8, halfpel_v+8*stride+8, stride, rounding, 8);
-		break;
-
-	case 6:
-		interpolate16x16_lowpass_hv(halfpel_hv, halfpel_h, src, stride, rounding);
-		interpolate8x8_avg2(dst, halfpel_h, halfpel_hv, stride, rounding, 8);
-		interpolate8x8_avg2(dst+8, halfpel_h+8, halfpel_hv+8, stride, rounding, 8);
-		interpolate8x8_avg2(dst+8*stride, halfpel_h+8*stride, halfpel_hv+8*stride, stride, rounding, 8);
-		interpolate8x8_avg2(dst+8*stride+8, halfpel_h+8*stride+8, halfpel_hv+8*stride+8, stride, rounding, 8);
-		break;
-
-	case 7:
-		interpolate16x16_lowpass_h(halfpel_h, src, stride, rounding);
-		interpolate8x8_avg2(halfpel_v, src+1, halfpel_h, stride, rounding, 9);
-		interpolate8x8_avg2(halfpel_v+8, src+1 + 8, halfpel_h+8, stride, rounding, 9);
-		interpolate8x8_avg2(halfpel_v+8*stride, src+1 + 8*stride, halfpel_h+8*stride, stride, rounding, 9);
-		interpolate8x8_avg2(halfpel_v+8*stride+8, src+1+8*stride+8, halfpel_h+8*stride+8, stride, rounding, 9);
-
-		interpolate16x16_lowpass_v(halfpel_hv, halfpel_v, stride, rounding);
-		interpolate8x8_avg2(dst, halfpel_hv, halfpel_v, stride, rounding, 8);
-		interpolate8x8_avg2(dst+8, halfpel_hv+8, halfpel_v+8, stride, rounding, 8);
-		interpolate8x8_avg2(dst+8*stride, halfpel_hv+8*stride, halfpel_v+8*stride, stride, rounding, 8);
-		interpolate8x8_avg2(dst+8*stride+8, halfpel_hv+8*stride+8, halfpel_v+8*stride+8, stride, rounding, 8);
-		break;
-
-	case 8:
-	    interpolate16x16_lowpass_v(dst, src, stride, rounding);
-		break;
-
-	case 9:
-		interpolate16x16_lowpass_h(halfpel_h, src, stride, rounding);
-		interpolate8x8_avg2(halfpel_v, src, halfpel_h, stride, rounding, 9);
-		interpolate8x8_avg2(halfpel_v+8, src + 8, halfpel_h+8, stride, rounding, 9);
-		interpolate8x8_avg2(halfpel_v+8*stride, src + 8*stride, halfpel_h+8*stride, stride, rounding, 9);
-		interpolate8x8_avg2(halfpel_v+8*stride+8, src+8*stride+8, halfpel_h+8*stride+8, stride, rounding, 9);
-		interpolate16x16_lowpass_v(dst, halfpel_v, stride, rounding);
-		break;
-
-	case 10:
-		interpolate16x16_lowpass_hv(dst, halfpel_h, src, stride, rounding);
-		break;
-
-	case 11:
-		interpolate16x16_lowpass_h(halfpel_h, src, stride, rounding);
-		interpolate8x8_avg2(halfpel_v, src+1, halfpel_h, stride, rounding, 9);
-		interpolate8x8_avg2(halfpel_v+8, src+1 + 8, halfpel_h+8, stride, rounding, 9);
-		interpolate8x8_avg2(halfpel_v+8*stride, src+1 + 8*stride, halfpel_h+8*stride, stride, rounding, 9);
-		interpolate8x8_avg2(halfpel_v+8*stride+8, src+1+8*stride+8, halfpel_h+8*stride+8, stride, rounding, 9);
-		interpolate16x16_lowpass_v(dst, halfpel_v, stride, rounding);
-		break;
-
-	case 12:
-		interpolate16x16_lowpass_v(halfpel_v, src, stride, rounding);
-		interpolate8x8_avg2(dst, src+stride, halfpel_v, stride, rounding, 8);
-		interpolate8x8_avg2(dst+8, src+stride+8, halfpel_v+8, stride, rounding, 8);
-		interpolate8x8_avg2(dst+8*stride, src+stride+8*stride, halfpel_v+8*stride, stride, rounding, 8);
-		interpolate8x8_avg2(dst+8*stride+8, src+stride+8*stride+8, halfpel_v+8*stride+8, stride, rounding, 8);
-		break;
-
-	case 13:
-		interpolate16x16_lowpass_h(halfpel_h, src, stride, rounding);
-		interpolate8x8_avg2(halfpel_v, src, halfpel_h, stride, rounding, 9);
-		interpolate8x8_avg2(halfpel_v+8, src + 8, halfpel_h+8, stride, rounding, 9);
-		interpolate8x8_avg2(halfpel_v+8*stride, src + 8*stride, halfpel_h+8*stride, stride, rounding, 9);
-		interpolate8x8_avg2(halfpel_v+8*stride+8, src+8*stride+8, halfpel_h+8*stride+8, stride, rounding, 9);
-
-		interpolate16x16_lowpass_v(halfpel_hv, halfpel_v, stride, rounding);
-		interpolate8x8_avg2(dst, halfpel_hv, halfpel_v+stride, stride, rounding, 8);
-		interpolate8x8_avg2(dst+8, halfpel_hv+8, halfpel_v+stride+8, stride, rounding, 8);
-		interpolate8x8_avg2(dst+8*stride, halfpel_hv+8*stride, halfpel_v+stride+8*stride, stride, rounding, 8);
-		interpolate8x8_avg2(dst+8*stride+8, halfpel_hv+8*stride+8, halfpel_v+stride+8*stride+8, stride, rounding, 8);
-		break;
-
-	case 14:
-		interpolate16x16_lowpass_hv(halfpel_hv, halfpel_h, src, stride, rounding);
-		interpolate8x8_avg2(dst, halfpel_h+stride, halfpel_hv, stride, rounding, 8);
-		interpolate8x8_avg2(dst+8, halfpel_h+stride+8, halfpel_hv+8, stride, rounding, 8);
-		interpolate8x8_avg2(dst+8*stride, halfpel_h+stride+8*stride, halfpel_hv+8*stride, stride, rounding, 8);
-		interpolate8x8_avg2(dst+8*stride+8, halfpel_h+stride+8*stride+8, halfpel_hv+8*stride+8, stride, rounding, 8);
-		break;
-
-	case 15:
-		interpolate16x16_lowpass_h(halfpel_h, src, stride, rounding);
-		interpolate8x8_avg2(halfpel_v, src+1, halfpel_h, stride, rounding, 9);
-		interpolate8x8_avg2(halfpel_v+8, src+1 + 8, halfpel_h+8, stride, rounding, 9);
-		interpolate8x8_avg2(halfpel_v+8*stride, src+1 + 8*stride, halfpel_h+8*stride, stride, rounding, 9);
-		interpolate8x8_avg2(halfpel_v+8*stride+8, src+1+8*stride+8, halfpel_h+8*stride+8, stride, rounding, 9);
-
-		interpolate16x16_lowpass_v(halfpel_hv, halfpel_v, stride, rounding);
-		interpolate8x8_avg2(dst, halfpel_hv, halfpel_v+stride, stride, rounding, 8);
-		interpolate8x8_avg2(dst+8, halfpel_hv+8, halfpel_v+stride+8, stride, rounding, 8);
-		interpolate8x8_avg2(dst+8*stride, halfpel_hv+8*stride, halfpel_v+stride+8*stride, stride, rounding, 8);
-		interpolate8x8_avg2(dst+8*stride+8, halfpel_hv+8*stride+8, halfpel_v+stride+8*stride+8, stride, rounding, 8);
-		break;
-	}
-}
+;
 
 #endif
diff -bBru src-ori/image/postprocessing.c src/image/postprocessing.c
--- src-ori/image/postprocessing.c	Wed Jan 12 10:32:43 2005
+++ src/image/postprocessing.c	Wed Jan 12 12:40:28 2005
@@ -23,15 +23,9 @@
  *
  ****************************************************************************/
 
-#include <stdlib.h>
-#include <string.h>
-#include <math.h>
-
-#include "../portab.h"
-#include "../global.h"
-#include "image.h"
-#include "../utils/emms.h"
-#include "postprocessing.h"
+#include <e32std.h>
+#include "../global_all_dll.h"
+
 
 /* Some useful (and fast) macros
    Note that the MIN/MAX macros assume signed shift - if your compiler
@@ -285,7 +282,7 @@
 	int i, j;
 	int patt[4] = { -1,0,1,0 };
 
-	emms();
+	(((struct global_all_dll *)Dll::Tls())->emms)();
 
 	srand(123457);
 
diff -bBru src-ori/image/qpel.c src/image/qpel.c
--- src-ori/image/qpel.c	Wed Jan 12 11:25:40 2005
+++ src/image/qpel.c	Wed Jan 12 12:40:42 2005
@@ -25,8 +25,307 @@
 
 #ifndef XVID_AUTO_INCLUDE
 
-#include "../portab.h"
-#include "qpel.h"
+#include <e32std.h>
+#include "../global_all_dll.h"
+ new_interpolate16x16_quarterpel(uint8_t * const cur,
+ 								uint8_t * const refn,
+ 								uint8_t * const refh,
+ 								uint8_t * const refv,
+ 								uint8_t * const refhv,
+ 								const uint32_t x, const uint32_t y,
+ 								const int32_t dx,  const int dy,
+ 								const uint32_t stride,
+ 								const uint32_t rounding)
+ {
+
+	const uint8_t *src;
+	uint8_t *dst;
+	uint8_t *tmp;
+	int32_t quads;
+	const XVID_QP_FUNCS *Ops;
+
+	int32_t x_int, y_int;
+
+	const int32_t xRef = (int)x*4 + dx;
+	const int32_t yRef = (int)y*4 + dy;
+
+	Ops = (((struct global_all_dll *)Dll::Tls())->xvid_QP_Funcs); /* TODO: pass as argument */
+	quads = (dx&3) | ((dy&3)<<2);
+
+	x_int = xRef/4;
+	if (xRef < 0 && xRef % 4)
+		x_int--;
+
+	y_int	 = yRef/4;
+	if (yRef < 0 && yRef % 4)
+		y_int--;
+
+	dst = cur + y * stride + x;
+	src = refn + y_int * (int)stride + x_int;
+
+	tmp = refh; /* we need at least a 16 x stride scratch block */
+
+	switch(quads) {
+	case 0:
+		(((struct global_all_dll *)Dll::Tls())->transfer8x8_copy)( dst, src, stride);
+		(((struct global_all_dll *)Dll::Tls())->transfer8x8_copy)( dst+8, src+8, stride);
+		(((struct global_all_dll *)Dll::Tls())->transfer8x8_copy)( dst+8*stride, src+8*stride, stride);
+		(((struct global_all_dll *)Dll::Tls())->transfer8x8_copy)( dst+8*stride+8, src+8*stride+8, stride);
+		break;
+	case 1:
+		Ops->H_Pass_Avrg(dst, src, 16, stride, rounding);
+		break;
+	case 2:
+		Ops->H_Pass(dst, src, 16, stride, rounding);
+		break;
+	case 3:
+		Ops->H_Pass_Avrg_Up(dst, src, 16, stride, rounding);
+		break;
+	case 4:
+		Ops->V_Pass_Avrg(dst, src, 16, stride, rounding);
+		break;
+	case 5:
+		Ops->H_Pass_Avrg(tmp, src, 17, stride, rounding);
+		Ops->V_Pass_Avrg(dst, tmp, 16, stride, rounding);
+		break;
+	case 6:
+		Ops->H_Pass(tmp, src,	  17, stride, rounding);
+		Ops->V_Pass_Avrg(dst, tmp, 16, stride, rounding);
+		break;
+	case 7:
+		Ops->H_Pass_Avrg_Up(tmp, src, 17, stride, rounding);
+		Ops->V_Pass_Avrg(dst, tmp, 16, stride, rounding);
+		break;
+	case 8:
+		Ops->V_Pass(dst, src, 16, stride, rounding);
+		break;
+	case 9:
+		Ops->H_Pass_Avrg(tmp, src, 17, stride, rounding);
+		Ops->V_Pass(dst, tmp, 16, stride, rounding);
+		break;
+	case 10:
+		Ops->H_Pass(tmp, src, 17, stride, rounding);
+		Ops->V_Pass(dst, tmp, 16, stride, rounding);
+		break;
+	case 11:
+		Ops->H_Pass_Avrg_Up(tmp, src, 17, stride, rounding);
+		Ops->V_Pass(dst, tmp, 16, stride, rounding);
+		break;
+	case 12:
+		Ops->V_Pass_Avrg_Up(dst, src, 16, stride, rounding);
+		break;
+	case 13:
+		Ops->H_Pass_Avrg(tmp, src, 17, stride, rounding);
+		Ops->V_Pass_Avrg_Up(dst, tmp, 16, stride, rounding);
+		break;
+	case 14:
+		Ops->H_Pass(tmp, src, 17, stride, rounding);
+		Ops->V_Pass_Avrg_Up( dst, tmp, 16, stride, rounding);
+		break;
+	case 15:
+		Ops->H_Pass_Avrg_Up(tmp, src, 17, stride, rounding);
+		Ops->V_Pass_Avrg_Up(dst, tmp, 16, stride, rounding);
+		break;
+	}
+}
+ new_interpolate16x8_quarterpel(uint8_t * const cur,
+ 							   uint8_t * const refn,
+ 							   uint8_t * const refh,
+ 							   uint8_t * const refv,
+ 							   uint8_t * const refhv,
+ 							   const uint32_t x, const uint32_t y,
+ 							   const int32_t dx,  const int dy,
+ 							   const uint32_t stride,
+ 							   const uint32_t rounding)
+ {
+
+	const uint8_t *src;
+	uint8_t *dst;
+	uint8_t *tmp;
+	int32_t quads;
+	const XVID_QP_FUNCS *Ops;
+
+	int32_t x_int, y_int;
+
+	const int32_t xRef = (int)x*4 + dx;
+	const int32_t yRef = (int)y*4 + dy;
+
+	Ops = (((struct global_all_dll *)Dll::Tls())->xvid_QP_Funcs); /* TODO: pass as argument */
+	quads = (dx&3) | ((dy&3)<<2);
+
+	x_int = xRef/4;
+	if (xRef < 0 && xRef % 4)
+		x_int--;
+
+	y_int	 = yRef/4;
+	if (yRef < 0 && yRef % 4)
+		y_int--;
+
+	dst = cur + y * stride + x;
+	src = refn + y_int * (int)stride + x_int;
+
+	tmp = refh; /* we need at least a 16 x stride scratch block */
+
+	switch(quads) {
+	case 0:
+		(((struct global_all_dll *)Dll::Tls())->transfer8x8_copy)( dst, src, stride);
+		(((struct global_all_dll *)Dll::Tls())->transfer8x8_copy)( dst+8, src+8, stride);
+		break;
+	case 1:
+		Ops->H_Pass_Avrg(dst, src, 8, stride, rounding);
+		break;
+	case 2:
+		Ops->H_Pass(dst, src, 8, stride, rounding);
+		break;
+	case 3:
+		Ops->H_Pass_Avrg_Up(dst, src, 8, stride, rounding);
+		break;
+	case 4:
+		Ops->V_Pass_Avrg_8(dst, src, 16, stride, rounding);
+		break;
+	case 5:
+		Ops->H_Pass_Avrg(tmp, src, 9, stride, rounding);
+		Ops->V_Pass_Avrg_8(dst, tmp, 16, stride, rounding);
+		break;
+	case 6:
+		Ops->H_Pass(tmp, src,	  9, stride, rounding);
+		Ops->V_Pass_Avrg_8(dst, tmp, 16, stride, rounding);
+		break;
+	case 7:
+		Ops->H_Pass_Avrg_Up(tmp, src, 9, stride, rounding);
+		Ops->V_Pass_Avrg_8(dst, tmp, 16, stride, rounding);
+		break;
+	case 8:
+		Ops->V_Pass_8(dst, src, 16, stride, rounding);
+		break;
+	case 9:
+		Ops->H_Pass_Avrg(tmp, src, 9, stride, rounding);
+		Ops->V_Pass_8(dst, tmp, 16, stride, rounding);
+		break;
+	case 10:
+		Ops->H_Pass(tmp, src, 9, stride, rounding);
+		Ops->V_Pass_8(dst, tmp, 16, stride, rounding);
+		break;
+	case 11:
+		Ops->H_Pass_Avrg_Up(tmp, src, 9, stride, rounding);
+		Ops->V_Pass_8(dst, tmp, 16, stride, rounding);
+		break;
+	case 12:
+		Ops->V_Pass_Avrg_Up_8(dst, src, 16, stride, rounding);
+		break;
+	case 13:
+		Ops->H_Pass_Avrg(tmp, src, 9, stride, rounding);
+		Ops->V_Pass_Avrg_Up_8(dst, tmp, 16, stride, rounding);
+		break;
+	case 14:
+		Ops->H_Pass(tmp, src, 9, stride, rounding);
+		Ops->V_Pass_Avrg_Up_8( dst, tmp, 16, stride, rounding);
+		break;
+	case 15:
+		Ops->H_Pass_Avrg_Up(tmp, src, 9, stride, rounding);
+		Ops->V_Pass_Avrg_Up_8(dst, tmp, 16, stride, rounding);
+		break;
+	}
+}
+ new_interpolate8x8_quarterpel(uint8_t * const cur,
+ 							  uint8_t * const refn,
+ 							  uint8_t * const refh,
+ 							  uint8_t * const refv,
+ 							  uint8_t * const refhv,
+ 							  const uint32_t x, const uint32_t y,
+ 							  const int32_t dx,  const int dy,
+ 							  const uint32_t stride,
+ 							  const uint32_t rounding)
+ {
+
+	const uint8_t *src;
+	uint8_t *dst;
+	uint8_t *tmp;
+	int32_t quads;
+	const XVID_QP_FUNCS *Ops;
+
+	int32_t x_int, y_int;
+
+	const int32_t xRef = (int)x*4 + dx;
+	const int32_t yRef = (int)y*4 + dy;
+
+	Ops = (((struct global_all_dll *)Dll::Tls())->xvid_QP_Funcs); /* TODO: pass as argument */
+	quads = (dx&3) | ((dy&3)<<2);
+
+	x_int = xRef/4;
+	if (xRef < 0 && xRef % 4)
+		x_int--;
+
+	y_int	 = yRef/4;
+	if (yRef < 0 && yRef % 4)
+		y_int--;
+
+	dst = cur + y * stride + x;
+	src = refn + y_int * (int)stride + x_int;
+
+	tmp = refh; /* we need at least a 16 x stride scratch block */
+
+	switch(quads) {
+	case 0:
+		(((struct global_all_dll *)Dll::Tls())->transfer8x8_copy)( dst, src, stride);
+		break;
+	case 1:
+		Ops->H_Pass_Avrg_8(dst, src, 8, stride, rounding);
+		break;
+	case 2:
+		Ops->H_Pass_8(dst, src, 8, stride, rounding);
+		break;
+	case 3:
+		Ops->H_Pass_Avrg_Up_8(dst, src, 8, stride, rounding);
+		break;
+	case 4:
+		Ops->V_Pass_Avrg_8(dst, src, 8, stride, rounding);
+		break;
+	case 5:
+		Ops->H_Pass_Avrg_8(tmp, src, 9, stride, rounding);
+		Ops->V_Pass_Avrg_8(dst, tmp, 8, stride, rounding);
+		break;
+	case 6:
+		Ops->H_Pass_8(tmp, src, 9, stride, rounding);
+		Ops->V_Pass_Avrg_8(dst, tmp, 8, stride, rounding);
+		break;
+	case 7:
+		Ops->H_Pass_Avrg_Up_8(tmp, src, 9, stride, rounding);
+		Ops->V_Pass_Avrg_8(dst, tmp, 8, stride, rounding);
+		break;
+	case 8:
+		Ops->V_Pass_8(dst, src, 8, stride, rounding);
+		break;
+	case 9:
+		Ops->H_Pass_Avrg_8(tmp, src, 9, stride, rounding);
+		Ops->V_Pass_8(dst, tmp, 8, stride, rounding);
+		break;
+	case 10:
+		Ops->H_Pass_8(tmp, src, 9, stride, rounding);
+		Ops->V_Pass_8(dst, tmp, 8, stride, rounding);
+		break;
+	case 11:
+		Ops->H_Pass_Avrg_Up_8(tmp, src, 9, stride, rounding);
+		Ops->V_Pass_8(dst, tmp, 8, stride, rounding);
+		break;
+	case 12:
+		Ops->V_Pass_Avrg_Up_8(dst, src, 8, stride, rounding);
+		break;
+	case 13:
+		Ops->H_Pass_Avrg_8(tmp, src, 9, stride, rounding);
+		Ops->V_Pass_Avrg_Up_8(dst, tmp, 8, stride, rounding);
+		break;
+	case 14:
+		Ops->H_Pass_8(tmp, src, 9, stride, rounding);
+		Ops->V_Pass_Avrg_Up_8( dst, tmp, 8, stride, rounding);
+		break;
+	case 15:
+		Ops->H_Pass_Avrg_Up_8(tmp, src, 9, stride, rounding);
+		Ops->V_Pass_Avrg_Up_8(dst, tmp, 8, stride, rounding);
+		break;
+	}
+}
+
 
 /* Quarterpel FIR definition
  ****************************************************************************/
@@ -81,7 +380,7 @@
 #define FUNC_HA_UP  H_Pass_Avrg_Up_16_C
 #define FUNC_VA_UP  V_Pass_Avrg_Up_16_C
 
-#include __FILE__   /* self-include ourself */
+#include __FILE__
 
 /* note: B-frame always uses Rnd=0... */
 #define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
@@ -92,7 +391,7 @@
 #define FUNC_HA_UP  H_Pass_Avrg_Up_16_Add_C
 #define FUNC_VA_UP  V_Pass_Avrg_Up_16_Add_C
 
-#include __FILE__   /* self-include ourself */
+#include __FILE__
 
 #undef SIZE
 #undef TABLE
@@ -110,7 +409,7 @@
 #define FUNC_HA_UP  H_Pass_Avrg_Up_8_C
 #define FUNC_VA_UP  V_Pass_Avrg_Up_8_C
 
-#include __FILE__   /* self-include ourself */
+#include __FILE__
 
 /* note: B-frame always uses Rnd=0... */
 #define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
@@ -121,7 +420,7 @@
 #define FUNC_HA_UP  H_Pass_Avrg_Up_8_Add_C
 #define FUNC_VA_UP  V_Pass_Avrg_Up_8_Add_C
 
-#include __FILE__   /* self-include ourself */
+#include __FILE__
 
 #undef SIZE
 #undef TABLE
@@ -132,8 +431,6 @@
  * TODO: embed in enc/dec structure?
  ****************************************************************************/
 
-XVID_QP_FUNCS *xvid_QP_Funcs = 0;
-XVID_QP_FUNCS *xvid_QP_Add_Funcs = 0;
 
 /* plain C impl. declaration
  * TODO: should be declared elsewhere?
@@ -209,7 +506,6 @@
  ****************************************************************************/
 
 #ifdef ARCH_IS_IA32
-uint16_t xvid_Expand_mmx[256][4]; /* 8b -> 64b expansion table */
 #endif
 
 /* Alternate way of filtering (cf. USE_TABLES flag in qpel_mmx.asm)
@@ -220,23 +516,6 @@
  * Using Symmetries (and bswap) could reduce further
  * the memory to 7 tables (->14K). */
 
-int16_t xvid_FIR_1_0_0_0[256][4];
-int16_t xvid_FIR_3_1_0_0[256][4];
-int16_t xvid_FIR_6_3_1_0[256][4];
-int16_t xvid_FIR_14_3_2_1[256][4];
-int16_t xvid_FIR_20_6_3_1[256][4];
-int16_t xvid_FIR_20_20_6_3[256][4];
-int16_t xvid_FIR_23_19_6_3[256][4];
-int16_t xvid_FIR_7_20_20_6[256][4];
-int16_t xvid_FIR_6_20_20_6[256][4];
-int16_t xvid_FIR_6_20_20_7[256][4];
-int16_t xvid_FIR_3_6_20_20[256][4];
-int16_t xvid_FIR_3_6_19_23[256][4];
-int16_t xvid_FIR_1_3_6_20[256][4];
-int16_t xvid_FIR_1_2_3_14[256][4];
-int16_t xvid_FIR_0_1_3_6[256][4];
-int16_t xvid_FIR_0_0_1_3[256][4];
-int16_t xvid_FIR_0_0_0_1[256][4];
 
 static void Init_FIR_Table(int16_t Tab[][4],
                            int A, int B, int C, int D)
@@ -257,32 +536,32 @@
 	int i;
 
 	for(i=0; i<256; ++i) {
-		xvid_Expand_mmx[i][0] = i;
-		xvid_Expand_mmx[i][1] = i;
-		xvid_Expand_mmx[i][2] = i;
-		xvid_Expand_mmx[i][3] = i;
+		(((struct global_all_dll *)Dll::Tls())->xvid_Expand_mmx)[i][0] = i;
+		(((struct global_all_dll *)Dll::Tls())->xvid_Expand_mmx)[i][1] = i;
+		(((struct global_all_dll *)Dll::Tls())->xvid_Expand_mmx)[i][2] = i;
+		(((struct global_all_dll *)Dll::Tls())->xvid_Expand_mmx)[i][3] = i;
 	}
 #endif
 
 	/* Alternate way of filtering (cf. USE_TABLES flag in qpel_mmx.asm) */
 
-	Init_FIR_Table(xvid_FIR_1_0_0_0,   -1,  0,  0,  0);
-	Init_FIR_Table(xvid_FIR_3_1_0_0,    3, -1,  0,  0);
-	Init_FIR_Table(xvid_FIR_6_3_1_0,   -6,  3, -1,  0);
-	Init_FIR_Table(xvid_FIR_14_3_2_1,  14, -3,  2, -1);
-	Init_FIR_Table(xvid_FIR_20_6_3_1,  20, -6,  3, -1);
-	Init_FIR_Table(xvid_FIR_20_20_6_3, 20, 20, -6,  3);
-	Init_FIR_Table(xvid_FIR_23_19_6_3, 23, 19, -6,  3);
-	Init_FIR_Table(xvid_FIR_7_20_20_6, -7, 20, 20, -6);
-	Init_FIR_Table(xvid_FIR_6_20_20_6, -6, 20, 20, -6);
-	Init_FIR_Table(xvid_FIR_6_20_20_7, -6, 20, 20, -7);
-	Init_FIR_Table(xvid_FIR_3_6_20_20,  3, -6, 20, 20);
-	Init_FIR_Table(xvid_FIR_3_6_19_23,  3, -6, 19, 23);
-	Init_FIR_Table(xvid_FIR_1_3_6_20,  -1,  3, -6, 20);
-	Init_FIR_Table(xvid_FIR_1_2_3_14,  -1,  2, -3, 14);
-	Init_FIR_Table(xvid_FIR_0_1_3_6,    0, -1,  3, -6);
-	Init_FIR_Table(xvid_FIR_0_0_1_3,    0,  0, -1,  3);
-	Init_FIR_Table(xvid_FIR_0_0_0_1,    0,  0,  0, -1);
+	Init_FIR_Table((((struct global_all_dll *)Dll::Tls())->xvid_FIR_1_0_0_0),   -1,  0,  0,  0);
+	Init_FIR_Table((((struct global_all_dll *)Dll::Tls())->xvid_FIR_3_1_0_0),    3, -1,  0,  0);
+	Init_FIR_Table((((struct global_all_dll *)Dll::Tls())->xvid_FIR_6_3_1_0),   -6,  3, -1,  0);
+	Init_FIR_Table((((struct global_all_dll *)Dll::Tls())->xvid_FIR_14_3_2_1),  14, -3,  2, -1);
+	Init_FIR_Table((((struct global_all_dll *)Dll::Tls())->xvid_FIR_20_6_3_1),  20, -6,  3, -1);
+	Init_FIR_Table((((struct global_all_dll *)Dll::Tls())->xvid_FIR_20_20_6_3), 20, 20, -6,  3);
+	Init_FIR_Table((((struct global_all_dll *)Dll::Tls())->xvid_FIR_23_19_6_3), 23, 19, -6,  3);
+	Init_FIR_Table((((struct global_all_dll *)Dll::Tls())->xvid_FIR_7_20_20_6), -7, 20, 20, -6);
+	Init_FIR_Table((((struct global_all_dll *)Dll::Tls())->xvid_FIR_6_20_20_6), -6, 20, 20, -6);
+	Init_FIR_Table((((struct global_all_dll *)Dll::Tls())->xvid_FIR_6_20_20_7), -6, 20, 20, -7);
+	Init_FIR_Table((((struct global_all_dll *)Dll::Tls())->xvid_FIR_3_6_20_20),  3, -6, 20, 20);
+	Init_FIR_Table((((struct global_all_dll *)Dll::Tls())->xvid_FIR_3_6_19_23),  3, -6, 19, 23);
+	Init_FIR_Table((((struct global_all_dll *)Dll::Tls())->xvid_FIR_1_3_6_20),  -1,  3, -6, 20);
+	Init_FIR_Table((((struct global_all_dll *)Dll::Tls())->xvid_FIR_1_2_3_14),  -1,  2, -3, 14);
+	Init_FIR_Table((((struct global_all_dll *)Dll::Tls())->xvid_FIR_0_1_3_6),    0, -1,  3, -6);
+	Init_FIR_Table((((struct global_all_dll *)Dll::Tls())->xvid_FIR_0_0_1_3),    0,  0, -1,  3);
+	Init_FIR_Table((((struct global_all_dll *)Dll::Tls())->xvid_FIR_0_0_0_1),    0,  0,  0, -1);
 
 }
 
diff -bBru src-ori/image/qpel.h src/image/qpel.h
--- src-ori/image/qpel.h	Wed Jan 12 11:26:28 2005
+++ src/image/qpel.h	Wed Jan 12 12:37:55 2005
@@ -74,8 +93,8 @@
 extern const XVID_QP_FUNCS xvid_QP_Add_Funcs_mmx;
 #endif
 
-extern const XVID_QP_FUNCS *xvid_QP_Funcs;      /* <- main pointer for enc/dec structure */
-extern const XVID_QP_FUNCS *xvid_QP_Add_Funcs;  /* <- main pointer for enc/dec structure */
+
+
 
 /*****************************************************************************
  * macros
@@ -97,8 +116,7 @@
 
  ****************************************************************************/
 
-static void __inline
-new_interpolate16x16_quarterpel(uint8_t * const cur,
+extern  new_interpolate16x16_quarterpel(uint8_t * const cur,
 								uint8_t * const refn,
 								uint8_t * const refh,
 								uint8_t * const refv,
@@ -107,100 +125,9 @@
 								const int32_t dx,  const int dy,
 								const uint32_t stride,
 								const uint32_t rounding)
-{
-	const uint8_t *src;
-	uint8_t *dst;
-	uint8_t *tmp;
-	int32_t quads;
-	const XVID_QP_FUNCS *Ops;
-
-	int32_t x_int, y_int;
-
-	const int32_t xRef = (int)x*4 + dx;
-	const int32_t yRef = (int)y*4 + dy;
-
-	Ops = xvid_QP_Funcs; /* TODO: pass as argument */
-	quads = (dx&3) | ((dy&3)<<2);
-
-	x_int = xRef/4;
-	if (xRef < 0 && xRef % 4)
-		x_int--;
-
-	y_int	 = yRef/4;
-	if (yRef < 0 && yRef % 4)
-		y_int--;
-
-	dst = cur + y * stride + x;
-	src = refn + y_int * (int)stride + x_int;
-
-	tmp = refh; /* we need at least a 16 x stride scratch block */
-
-	switch(quads) {
-	case 0:
-		transfer8x8_copy( dst, src, stride);
-		transfer8x8_copy( dst+8, src+8, stride);
-		transfer8x8_copy( dst+8*stride, src+8*stride, stride);
-		transfer8x8_copy( dst+8*stride+8, src+8*stride+8, stride);
-		break;
-	case 1:
-		Ops->H_Pass_Avrg(dst, src, 16, stride, rounding);
-		break;
-	case 2:
-		Ops->H_Pass(dst, src, 16, stride, rounding);
-		break;
-	case 3:
-		Ops->H_Pass_Avrg_Up(dst, src, 16, stride, rounding);
-		break;
-	case 4:
-		Ops->V_Pass_Avrg(dst, src, 16, stride, rounding);
-		break;
-	case 5:
-		Ops->H_Pass_Avrg(tmp, src, 17, stride, rounding);
-		Ops->V_Pass_Avrg(dst, tmp, 16, stride, rounding);
-		break;
-	case 6:
-		Ops->H_Pass(tmp, src,	  17, stride, rounding);
-		Ops->V_Pass_Avrg(dst, tmp, 16, stride, rounding);
-		break;
-	case 7:
-		Ops->H_Pass_Avrg_Up(tmp, src, 17, stride, rounding);
-		Ops->V_Pass_Avrg(dst, tmp, 16, stride, rounding);
-		break;
-	case 8:
-		Ops->V_Pass(dst, src, 16, stride, rounding);
-		break;
-	case 9:
-		Ops->H_Pass_Avrg(tmp, src, 17, stride, rounding);
-		Ops->V_Pass(dst, tmp, 16, stride, rounding);
-		break;
-	case 10:
-		Ops->H_Pass(tmp, src, 17, stride, rounding);
-		Ops->V_Pass(dst, tmp, 16, stride, rounding);
-		break;
-	case 11:
-		Ops->H_Pass_Avrg_Up(tmp, src, 17, stride, rounding);
-		Ops->V_Pass(dst, tmp, 16, stride, rounding);
-		break;
-	case 12:
-		Ops->V_Pass_Avrg_Up(dst, src, 16, stride, rounding);
-		break;
-	case 13:
-		Ops->H_Pass_Avrg(tmp, src, 17, stride, rounding);
-		Ops->V_Pass_Avrg_Up(dst, tmp, 16, stride, rounding);
-		break;
-	case 14:
-		Ops->H_Pass(tmp, src, 17, stride, rounding);
-		Ops->V_Pass_Avrg_Up( dst, tmp, 16, stride, rounding);
-		break;
-	case 15:
-		Ops->H_Pass_Avrg_Up(tmp, src, 17, stride, rounding);
-		Ops->V_Pass_Avrg_Up(dst, tmp, 16, stride, rounding);
-		break;
-	}
-}
+;
 
-static void __inline
-new_interpolate16x8_quarterpel(uint8_t * const cur,
+extern  new_interpolate16x8_quarterpel(uint8_t * const cur,
 							   uint8_t * const refn,
 							   uint8_t * const refh,
 							   uint8_t * const refv,
@@ -209,98 +136,9 @@
 							   const int32_t dx,  const int dy,
 							   const uint32_t stride,
 							   const uint32_t rounding)
-{
-	const uint8_t *src;
-	uint8_t *dst;
-	uint8_t *tmp;
-	int32_t quads;
-	const XVID_QP_FUNCS *Ops;
-
-	int32_t x_int, y_int;
-
-	const int32_t xRef = (int)x*4 + dx;
-	const int32_t yRef = (int)y*4 + dy;
-
-	Ops = xvid_QP_Funcs; /* TODO: pass as argument */
-	quads = (dx&3) | ((dy&3)<<2);
-
-	x_int = xRef/4;
-	if (xRef < 0 && xRef % 4)
-		x_int--;
-
-	y_int	 = yRef/4;
-	if (yRef < 0 && yRef % 4)
-		y_int--;
-
-	dst = cur + y * stride + x;
-	src = refn + y_int * (int)stride + x_int;
-
-	tmp = refh; /* we need at least a 16 x stride scratch block */
-
-	switch(quads) {
-	case 0:
-		transfer8x8_copy( dst, src, stride);
-		transfer8x8_copy( dst+8, src+8, stride);
-		break;
-	case 1:
-		Ops->H_Pass_Avrg(dst, src, 8, stride, rounding);
-		break;
-	case 2:
-		Ops->H_Pass(dst, src, 8, stride, rounding);
-		break;
-	case 3:
-		Ops->H_Pass_Avrg_Up(dst, src, 8, stride, rounding);
-		break;
-	case 4:
-		Ops->V_Pass_Avrg_8(dst, src, 16, stride, rounding);
-		break;
-	case 5:
-		Ops->H_Pass_Avrg(tmp, src, 9, stride, rounding);
-		Ops->V_Pass_Avrg_8(dst, tmp, 16, stride, rounding);
-		break;
-	case 6:
-		Ops->H_Pass(tmp, src,	  9, stride, rounding);
-		Ops->V_Pass_Avrg_8(dst, tmp, 16, stride, rounding);
-		break;
-	case 7:
-		Ops->H_Pass_Avrg_Up(tmp, src, 9, stride, rounding);
-		Ops->V_Pass_Avrg_8(dst, tmp, 16, stride, rounding);
-		break;
-	case 8:
-		Ops->V_Pass_8(dst, src, 16, stride, rounding);
-		break;
-	case 9:
-		Ops->H_Pass_Avrg(tmp, src, 9, stride, rounding);
-		Ops->V_Pass_8(dst, tmp, 16, stride, rounding);
-		break;
-	case 10:
-		Ops->H_Pass(tmp, src, 9, stride, rounding);
-		Ops->V_Pass_8(dst, tmp, 16, stride, rounding);
-		break;
-	case 11:
-		Ops->H_Pass_Avrg_Up(tmp, src, 9, stride, rounding);
-		Ops->V_Pass_8(dst, tmp, 16, stride, rounding);
-		break;
-	case 12:
-		Ops->V_Pass_Avrg_Up_8(dst, src, 16, stride, rounding);
-		break;
-	case 13:
-		Ops->H_Pass_Avrg(tmp, src, 9, stride, rounding);
-		Ops->V_Pass_Avrg_Up_8(dst, tmp, 16, stride, rounding);
-		break;
-	case 14:
-		Ops->H_Pass(tmp, src, 9, stride, rounding);
-		Ops->V_Pass_Avrg_Up_8( dst, tmp, 16, stride, rounding);
-		break;
-	case 15:
-		Ops->H_Pass_Avrg_Up(tmp, src, 9, stride, rounding);
-		Ops->V_Pass_Avrg_Up_8(dst, tmp, 16, stride, rounding);
-		break;
-	}
-}
+;
 
-static void __inline
-new_interpolate8x8_quarterpel(uint8_t * const cur,
+extern  new_interpolate8x8_quarterpel(uint8_t * const cur,
 							  uint8_t * const refn,
 							  uint8_t * const refh,
 							  uint8_t * const refv,
@@ -309,93 +147,6 @@
 							  const int32_t dx,  const int dy,
 							  const uint32_t stride,
 							  const uint32_t rounding)
-{
-	const uint8_t *src;
-	uint8_t *dst;
-	uint8_t *tmp;
-	int32_t quads;
-	const XVID_QP_FUNCS *Ops;
-
-	int32_t x_int, y_int;
-
-	const int32_t xRef = (int)x*4 + dx;
-	const int32_t yRef = (int)y*4 + dy;
-
-	Ops = xvid_QP_Funcs; /* TODO: pass as argument */
-	quads = (dx&3) | ((dy&3)<<2);
-
-	x_int = xRef/4;
-	if (xRef < 0 && xRef % 4)
-		x_int--;
-
-	y_int	 = yRef/4;
-	if (yRef < 0 && yRef % 4)
-		y_int--;
-
-	dst = cur + y * stride + x;
-	src = refn + y_int * (int)stride + x_int;
-
-	tmp = refh; /* we need at least a 16 x stride scratch block */
-
-	switch(quads) {
-	case 0:
-		transfer8x8_copy( dst, src, stride);
-		break;
-	case 1:
-		Ops->H_Pass_Avrg_8(dst, src, 8, stride, rounding);
-		break;
-	case 2:
-		Ops->H_Pass_8(dst, src, 8, stride, rounding);
-		break;
-	case 3:
-		Ops->H_Pass_Avrg_Up_8(dst, src, 8, stride, rounding);
-		break;
-	case 4:
-		Ops->V_Pass_Avrg_8(dst, src, 8, stride, rounding);
-		break;
-	case 5:
-		Ops->H_Pass_Avrg_8(tmp, src, 9, stride, rounding);
-		Ops->V_Pass_Avrg_8(dst, tmp, 8, stride, rounding);
-		break;
-	case 6:
-		Ops->H_Pass_8(tmp, src, 9, stride, rounding);
-		Ops->V_Pass_Avrg_8(dst, tmp, 8, stride, rounding);
-		break;
-	case 7:
-		Ops->H_Pass_Avrg_Up_8(tmp, src, 9, stride, rounding);
-		Ops->V_Pass_Avrg_8(dst, tmp, 8, stride, rounding);
-		break;
-	case 8:
-		Ops->V_Pass_8(dst, src, 8, stride, rounding);
-		break;
-	case 9:
-		Ops->H_Pass_Avrg_8(tmp, src, 9, stride, rounding);
-		Ops->V_Pass_8(dst, tmp, 8, stride, rounding);
-		break;
-	case 10:
-		Ops->H_Pass_8(tmp, src, 9, stride, rounding);
-		Ops->V_Pass_8(dst, tmp, 8, stride, rounding);
-		break;
-	case 11:
-		Ops->H_Pass_Avrg_Up_8(tmp, src, 9, stride, rounding);
-		Ops->V_Pass_8(dst, tmp, 8, stride, rounding);
-		break;
-	case 12:
-		Ops->V_Pass_Avrg_Up_8(dst, src, 8, stride, rounding);
-		break;
-	case 13:
-		Ops->H_Pass_Avrg_8(tmp, src, 9, stride, rounding);
-		Ops->V_Pass_Avrg_Up_8(dst, tmp, 8, stride, rounding);
-		break;
-	case 14:
-		Ops->H_Pass_8(tmp, src, 9, stride, rounding);
-		Ops->V_Pass_Avrg_Up_8( dst, tmp, 8, stride, rounding);
-		break;
-	case 15:
-		Ops->H_Pass_Avrg_Up_8(tmp, src, 9, stride, rounding);
-		Ops->V_Pass_Avrg_Up_8(dst, tmp, 8, stride, rounding);
-		break;
-	}
-}
+;
 
 #endif  /* _XVID_QPEL_H_ */
diff -bBru src-ori/image/reduced.c src/image/reduced.c
--- src-ori/image/reduced.c	Wed Jan 12 10:32:43 2005
+++ src/image/reduced.c	Wed Jan 12 12:40:33 2005
@@ -23,17 +23,9 @@
  *
  ****************************************************************************/
 
-#include "../portab.h"
-#include "../global.h"
-#include "reduced.h"
-
-/* function pointers */
-COPY_UPSAMPLED_8X8_16TO8 * copy_upsampled_8x8_16to8;
-ADD_UPSAMPLED_8X8_16TO8 * add_upsampled_8x8_16to8;
-VFILTER_31 * vfilter_31;
-HFILTER_31 * hfilter_31;
-FILTER_18X18_TO_8X8 * filter_18x18_to_8x8;
-FILTER_DIFF_18X18_TO_8X8 * filter_diff_18x18_to_8x8;
+#include <e32std.h>
+#include "../global_all_dll.h"
+
 
 /*----------------------------------------------------------------------------
  * Upsampling (1/3/3/1) filter
diff -bBru src-ori/image/reduced.h src/image/reduced.h
--- src-ori/image/reduced.h	Wed Jan 12 10:32:43 2005
+++ src/image/reduced.h	Wed Jan 12 12:37:54 2005
@@ -41,40 +41,40 @@
 typedef void FILTER_DIFF_18X18_TO_8X8(int16_t *Dst, const uint8_t *Src, const int BpS);
 
 
-extern COPY_UPSAMPLED_8X8_16TO8 * copy_upsampled_8x8_16to8;
+
 extern COPY_UPSAMPLED_8X8_16TO8 xvid_Copy_Upsampled_8x8_16To8_C;
 #ifdef ARCH_IS_IA32
 extern COPY_UPSAMPLED_8X8_16TO8 xvid_Copy_Upsampled_8x8_16To8_mmx;
 extern COPY_UPSAMPLED_8X8_16TO8 xvid_Copy_Upsampled_8x8_16To8_xmm;
 #endif
 
-extern ADD_UPSAMPLED_8X8_16TO8 * add_upsampled_8x8_16to8;
+
 extern ADD_UPSAMPLED_8X8_16TO8 xvid_Add_Upsampled_8x8_16To8_C;
 #ifdef ARCH_IS_IA32
 extern ADD_UPSAMPLED_8X8_16TO8 xvid_Add_Upsampled_8x8_16To8_mmx;
 extern ADD_UPSAMPLED_8X8_16TO8 xvid_Add_Upsampled_8x8_16To8_xmm;
 #endif
 
-extern VFILTER_31 * vfilter_31;
+
 extern VFILTER_31 xvid_VFilter_31_C;
 #ifdef ARCH_IS_IA32
 extern VFILTER_31 xvid_VFilter_31_x86;
 #endif
 
-extern HFILTER_31 * hfilter_31;
+
 extern HFILTER_31 xvid_HFilter_31_C;
 #ifdef ARCH_IS_IA32
 extern HFILTER_31 xvid_HFilter_31_x86;
 extern HFILTER_31 xvid_HFilter_31_mmx;
 #endif
 
-extern FILTER_18X18_TO_8X8 * filter_18x18_to_8x8;
+
 extern FILTER_18X18_TO_8X8 xvid_Filter_18x18_To_8x8_C;
 #ifdef ARCH_IS_IA32
 extern FILTER_18X18_TO_8X8 xvid_Filter_18x18_To_8x8_mmx;
 #endif
 
-extern FILTER_DIFF_18X18_TO_8X8 * filter_diff_18x18_to_8x8;
+
 extern FILTER_DIFF_18X18_TO_8X8 xvid_Filter_Diff_18x18_To_8x8_C;
 #ifdef ARCH_IS_IA32
 extern FILTER_DIFF_18X18_TO_8X8 xvid_Filter_Diff_18x18_To_8x8_mmx;
Only in src-ori/image: x86_asm
diff -bBru src-ori/motion/estimation.h src/motion/estimation.h
--- src-ori/motion/estimation.h	Wed Jan 12 10:32:43 2005
+++ src/motion/estimation.h	Wed Jan 12 12:38:13 2005
@@ -71,59 +71,110 @@
 
 typedef struct
 {
+
 	/* data modified by CheckCandidates */
+
 	int32_t iMinSAD[5];			/* smallest SADs found so far */
+
 	VECTOR currentMV[5];		/* best vectors found so far */
+
 	VECTOR currentQMV[5];		/* as above, but used during qpel search */
+
 	int temp[4];				/* temporary space */
+
 	unsigned int dir;			/* 'direction', set when better vector is found */
+
 	int chromaX, chromaY, chromaSAD; /* info to make ChromaSAD faster */
+
 	VECTOR currentQMV2;			/* extra vector for SubpelRefine_fast */
+
 	int32_t iMinSAD2;			/* extra SAD value for SubpelRefine_fast */
 
+
+
 	/* general fields */
+
 	int max_dx, min_dx, max_dy, min_dy; /* maximum range */
+
 	uint32_t rounding;			/* rounding type in use */
+
 	VECTOR predMV;				/* vector which predicts current vector */
+
 	const uint8_t * RefP[6];	/* reference pictures - N, V, H, HV, cU, cV */
+
 	const uint8_t * Cur;		/* current picture */
+
 	const uint8_t *CurU, *CurV;	/* current picture - chroma planes */
 	
+	
+
 	uint8_t * RefQ;				/* temporary space for interpolations */
+
 	uint32_t lambda16;			/* how much vector bits weight */
+
 	uint32_t lambda8;			/* as above - for inter4v mode */
+
 	uint32_t iEdgedWidth;		/* picture's stride */
+
 	uint32_t iFcode;			/* current fcode */
 	
+	
+
 	int qpel;					/* if we're coding in qpel mode */
+
 	int qpel_precision;			/* if X and Y are in qpel precision (refinement probably) */
+
 	int chroma;					/* should we include chroma SAD? */
+
 	int rrv;					/* are we using reduced resolution? */
 
+
+
 	/* fields for interpolate and direct modes */
+
 	const uint8_t * b_RefP[6];	/* backward reference pictures - N, V, H, HV, cU, cV */
+
 	VECTOR bpredMV;				/* backward prediction - used interpolate mode only */
+
 	uint32_t bFcode;			/* backward fcode - used as above */
 
+
+
 	/* fields for direct mode */
+
 	VECTOR directmvF[4];		/* scaled reference vectors */
+
 	VECTOR directmvB[4];		/* as above */
+
 	const VECTOR * referencemv; /* pointer to not-scaled reference vectors */
 
+
+
 	/* BITS/R-D stuff */
+
 	int16_t * dctSpace;			/* temporary space for dct */
+
 	uint32_t iQuant;			/* current quant */
+
 	uint32_t quant_type;		/* current quant type */
+
 	unsigned int cbp[2];					/* CBP of the best vector found so far + cbp for inter4v search */
+
 	const uint16_t * scan_table; /* current scan table */
+
 	const uint16_t * mpeg_quant_matrices;			/* current MPEG quantization matrices */
 
+
+
 } SearchData;
 
+
 typedef void(CheckFunc)(const int x, const int y,
 						SearchData * const Data,
+
 						const unsigned int Direction);
 
+
 CheckFunc CheckCandidate16no4v; /* shared between p-vop and b-vop search */
 
 uint8_t *
diff -bBru src-ori/motion/estimation_bvop.c src/motion/estimation_bvop.c
--- src-ori/motion/estimation_bvop.c	Wed Jan 12 10:32:43 2005
+++ src/motion/estimation_bvop.c	Wed Jan 12 12:44:30 2005
@@ -26,18 +26,9 @@
  ****************************************************************************/
 
 
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>	/* memcpy */
-
-#include "../encoder.h"
-#include "../global.h"
-#include "../image/interpolate8x8.h"
-#include "estimation.h"
-#include "motion.h"
-#include "sad.h"
-#include "motion_inlines.h"
+#include <e32std.h>
+#include "../global_all_dll.h"
+
 
 static int32_t
 ChromaSAD2(const int fx, const int fy, const int bx, const int by,
@@ -49,9 +40,9 @@
 
 	const INTERPOLATE8X8_PTR interpolate8x8_halfpel[] = {
 		NULL,
-		interpolate8x8_halfpel_v,
-		interpolate8x8_halfpel_h,
-		interpolate8x8_halfpel_hv
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_halfpel_v),
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_halfpel_h),
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_halfpel_hv)
 	};
 
 	int offset = (fx>>1) + (fy>>1)*stride;
@@ -80,8 +71,8 @@
 		b_refv = (uint8_t*)data->b_RefP[5] + offset;
 	}
 
-	sad = sad8bi(data->CurU, b_refu, f_refu, stride);
-	sad += sad8bi(data->CurV, b_refv, f_refv, stride);
+	sad = (((struct global_all_dll *)Dll::Tls())->sad8bi)(data->CurU, b_refu, f_refu, stride);
+	sad += (((struct global_all_dll *)Dll::Tls())->sad8bi)(data->CurV, b_refv, f_refv, stride);
 
 	return sad;
 }
@@ -128,7 +119,7 @@
 	t = d_mv_bits(xf, yf, data->predMV, data->iFcode, data->qpel^data->qpel_precision, 0)
 		 + d_mv_bits(xb, yb, data->bpredMV, data->iFcode, data->qpel^data->qpel_precision, 0);
 
-	sad = sad16bi(data->Cur, ReferenceF, ReferenceB, data->iEdgedWidth);
+	sad = (((struct global_all_dll *)Dll::Tls())->sad16bi)(data->Cur, ReferenceF, ReferenceB, data->iEdgedWidth);
 	sad += (data->lambda16 * t * sad)>>10;
 
 	if (data->chroma && sad < *data->iMinSAD)
@@ -178,7 +169,7 @@
 	t = d_mv_bits(xf, yf, data->predMV, data->iFcode, data->qpel^data->qpel_precision, 0)
 		 + d_mv_bits(xb, yb, data->bpredMV, data->iFcode, data->qpel^data->qpel_precision, 0);
 
-	sad = sad16bi(data->Cur, ReferenceF, ReferenceB, data->iEdgedWidth);
+	sad = (((struct global_all_dll *)Dll::Tls())->sad16bi)(data->Cur, ReferenceF, ReferenceB, data->iEdgedWidth);
 	sad += (data->lambda16 * t * sad)>>10;
 
 	if (data->chroma && sad < *data->iMinSAD)
@@ -247,7 +238,7 @@
 		ReferenceF = xvid_me_interpolate8x8qpel(mvs.x, mvs.y, k, 0, data);
 		ReferenceB = xvid_me_interpolate8x8qpel(b_mvs.x, b_mvs.y, k, 1, data);
 
-		sad += sad8bi(data->Cur + 8*(k&1) + 8*(k>>1)*(data->iEdgedWidth),
+		sad += (((struct global_all_dll *)Dll::Tls())->sad8bi)(data->Cur + 8*(k&1) + 8*(k>>1)*(data->iEdgedWidth),
 						ReferenceF, ReferenceB, data->iEdgedWidth);
 		if (sad > *(data->iMinSAD)) return;
 	}
@@ -304,7 +295,7 @@
 		ReferenceB = GetReferenceB(b_mvs.x, b_mvs.y, 1, data);
 	}
 
-	sad = sad16bi(data->Cur, ReferenceF, ReferenceB, data->iEdgedWidth);
+	sad = (((struct global_all_dll *)Dll::Tls())->sad16bi)(data->Cur, ReferenceF, ReferenceB, data->iEdgedWidth);
 	sad += (data->lambda16 * d_mv_bits(x, y, zeroMV, 1, 0, 0) * sad)>>10;
 
 	if (data->chroma && sad < *data->iMinSAD)
@@ -345,7 +336,7 @@
 	t = d_mv_bits(x, y, data->predMV, data->iFcode,
 					data->qpel^data->qpel_precision, data->rrv);
 
-	sad = sad16(data->Cur, Reference, data->iEdgedWidth, 256*4096);
+	sad = (((struct global_all_dll *)Dll::Tls())->sad16)(data->Cur, Reference, data->iEdgedWidth, 256*4096);
 	sad += (data->lambda16 * t * sad)>>10;
 
 	if (data->chroma && sad < *data->iMinSAD)
@@ -377,7 +368,7 @@
 	t = d_mv_bits(x, y, data->predMV, data->iFcode,
 					data->qpel^data->qpel_precision, data->rrv);
 
-	sad = sad16(data->Cur, Reference, data->iEdgedWidth, 256*4096);
+	sad = (((struct global_all_dll *)Dll::Tls())->sad16)(data->Cur, Reference, data->iEdgedWidth, 256*4096);
 	sad += (data->lambda16 * t * sad)>>10;
 
 	if (data->chroma && sad < *data->iMinSAD)
@@ -573,14 +564,14 @@
 		b_dy = (b_dy >> 3) + roundtab_76[b_dy & 0xf];
 		b_dx = (b_dx >> 3) + roundtab_76[b_dx & 0xf];
 
-		sum = sad8bi(pCur->u + 8 * x + 8 * y * stride,
+		sum = (((struct global_all_dll *)Dll::Tls())->sad8bi)(pCur->u + 8 * x + 8 * y * stride,
 						f_Ref->u + ((int)y*8 + dy/2) * (int)stride + (int)x*8 + dx/2,
 						b_Ref->u + ((int)y*8 + b_dy/2) * (int)stride + (int)x*8 + b_dx/2,
 						stride);
 
 		if (sum >= MAX_CHROMA_SAD_FOR_SKIP * (int)Data->iQuant) return; /* no skip */
 
-		sum += sad8bi(pCur->v + 8*x + 8 * y * stride,
+		sum += (((struct global_all_dll *)Dll::Tls())->sad8bi)(pCur->v + 8*x + 8 * y * stride,
 						f_Ref->v + ((int)y*8 + dy/2) * (int)stride + (int)x*8 + dx/2,
 						b_Ref->v + ((int)y*8 + b_dy/2) * (int)stride + (int)x*8 + b_dx/2,
 						stride);
diff -bBru src-ori/motion/estimation_common.c src/motion/estimation_common.c
--- src-ori/motion/estimation_common.c	Wed Jan 12 10:32:43 2005
+++ src/motion/estimation_common.c	Wed Jan 12 12:44:46 2005
@@ -25,13 +25,8 @@
  *
  ****************************************************************************/
 
-#include "../encoder.h"
-#include "../global.h"
-#include "../image/interpolate8x8.h"
-#include "estimation.h"
-#include "motion.h"
-#include "sad.h"
-#include "motion_inlines.h"
+#include <e32std.h>
+#include "../global_all_dll.h"
 
 
 /*****************************************************************************
@@ -91,21 +86,21 @@
 
 	switch (((dx & 1) << 1) | (dy & 1))	{
 		case 0:
-			sad = sad8(data->CurU, data->RefP[4] + offset, stride);
-			sad += sad8(data->CurV, data->RefP[5] + offset, stride);
+			sad = (((struct global_all_dll *)Dll::Tls())->sad8)(data->CurU, data->RefP[4] + offset, stride);
+			sad += (((struct global_all_dll *)Dll::Tls())->sad8)(data->CurV, data->RefP[5] + offset, stride);
 			break;
 		case 1:
 			next = stride;
 		case 2:
-			sad = sad8bi(data->CurU, data->RefP[4] + offset, data->RefP[4] + offset + next, stride);
-			sad += sad8bi(data->CurV, data->RefP[5] + offset, data->RefP[5] + offset + next, stride);
+			sad = (((struct global_all_dll *)Dll::Tls())->sad8bi)(data->CurU, data->RefP[4] + offset, data->RefP[4] + offset + next, stride);
+			sad += (((struct global_all_dll *)Dll::Tls())->sad8bi)(data->CurV, data->RefP[5] + offset, data->RefP[5] + offset + next, stride);
 			break;
 		default:
-			interpolate8x8_halfpel_hv(data->RefQ, data->RefP[4] + offset, stride, data->rounding);
-			sad = sad8(data->CurU, data->RefQ, stride);
+			(((struct global_all_dll *)Dll::Tls())->interpolate8x8_halfpel_hv)(data->RefQ, data->RefP[4] + offset, stride, data->rounding);
+			sad = (((struct global_all_dll *)Dll::Tls())->sad8)(data->CurU, data->RefQ, stride);
 
-			interpolate8x8_halfpel_hv(data->RefQ, data->RefP[5] + offset, stride, data->rounding);
-			sad += sad8(data->CurV, data->RefQ, stride);
+			(((struct global_all_dll *)Dll::Tls())->interpolate8x8_halfpel_hv)(data->RefQ, data->RefP[5] + offset, stride, data->rounding);
+			sad += (((struct global_all_dll *)Dll::Tls())->sad8)(data->CurV, data->RefQ, stride);
 			break;
 	}
 	data->chromaSAD = sad; /* backup, part 2 */
@@ -134,19 +129,19 @@
 		ref2 += 8 * (block&1) + 8 * (block>>1) * iEdgedWidth;
 		ref3 += 8 * (block&1) + 8 * (block>>1) * iEdgedWidth;
 		ref4 += 8 * (block&1) + 8 * (block>>1) * iEdgedWidth;
-		interpolate8x8_avg4(Reference, ref1, ref2, ref3, ref4, iEdgedWidth, rounding);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg4)(Reference, ref1, ref2, ref3, ref4, iEdgedWidth, rounding);
 		break;
 
 	case 1: /* x halfpel, y qpel - top or bottom during qpel refinement */
 		ref2 = GetReferenceB(halfpel_x, y - halfpel_y, dir, data);
 		ref2 += 8 * (block&1) + 8 * (block>>1) * iEdgedWidth;
-		interpolate8x8_avg2(Reference, ref1, ref2, iEdgedWidth, rounding, 8);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(Reference, ref1, ref2, iEdgedWidth, rounding, 8);
 		break;
 
 	case 2: /* x qpel, y halfpel - left or right during qpel refinement */
 		ref2 = GetReferenceB(x - halfpel_x, halfpel_y, dir, data);
 		ref2 += 8 * (block&1) + 8 * (block>>1) * iEdgedWidth;
-		interpolate8x8_avg2(Reference, ref1, ref2, iEdgedWidth, rounding, 8);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(Reference, ref1, ref2, iEdgedWidth, rounding, 8);
 		break;
 
 	default: /* pure halfpel position */
@@ -177,26 +172,26 @@
 		ref2 = GetReferenceB(halfpel_x, y - halfpel_y, dir, data);
 		ref3 = GetReferenceB(x - halfpel_x, halfpel_y, dir, data);
 		ref4 = GetReferenceB(x - halfpel_x, y - halfpel_y, dir, data);
-		interpolate8x8_avg4(Reference, ref1, ref2, ref3, ref4, iEdgedWidth, rounding);
-		interpolate8x8_avg4(Reference+8, ref1+8, ref2+8, ref3+8, ref4+8, iEdgedWidth, rounding);
-		interpolate8x8_avg4(Reference+8*iEdgedWidth, ref1+8*iEdgedWidth, ref2+8*iEdgedWidth, ref3+8*iEdgedWidth, ref4+8*iEdgedWidth, iEdgedWidth, rounding);
-		interpolate8x8_avg4(Reference+8*iEdgedWidth+8, ref1+8*iEdgedWidth+8, ref2+8*iEdgedWidth+8, ref3+8*iEdgedWidth+8, ref4+8*iEdgedWidth+8, iEdgedWidth, rounding);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg4)(Reference, ref1, ref2, ref3, ref4, iEdgedWidth, rounding);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg4)(Reference+8, ref1+8, ref2+8, ref3+8, ref4+8, iEdgedWidth, rounding);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg4)(Reference+8*iEdgedWidth, ref1+8*iEdgedWidth, ref2+8*iEdgedWidth, ref3+8*iEdgedWidth, ref4+8*iEdgedWidth, iEdgedWidth, rounding);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg4)(Reference+8*iEdgedWidth+8, ref1+8*iEdgedWidth+8, ref2+8*iEdgedWidth+8, ref3+8*iEdgedWidth+8, ref4+8*iEdgedWidth+8, iEdgedWidth, rounding);
 		break;
 
 	case 1: /* x halfpel, y qpel - top or bottom during qpel refinement */
 		ref2 = GetReferenceB(halfpel_x, y - halfpel_y, dir, data);
-		interpolate8x8_avg2(Reference, ref1, ref2, iEdgedWidth, rounding, 8);
-		interpolate8x8_avg2(Reference+8, ref1+8, ref2+8, iEdgedWidth, rounding, 8);
-		interpolate8x8_avg2(Reference+8*iEdgedWidth, ref1+8*iEdgedWidth, ref2+8*iEdgedWidth, iEdgedWidth, rounding, 8);
-		interpolate8x8_avg2(Reference+8*iEdgedWidth+8, ref1+8*iEdgedWidth+8, ref2+8*iEdgedWidth+8, iEdgedWidth, rounding, 8);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(Reference, ref1, ref2, iEdgedWidth, rounding, 8);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(Reference+8, ref1+8, ref2+8, iEdgedWidth, rounding, 8);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(Reference+8*iEdgedWidth, ref1+8*iEdgedWidth, ref2+8*iEdgedWidth, iEdgedWidth, rounding, 8);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(Reference+8*iEdgedWidth+8, ref1+8*iEdgedWidth+8, ref2+8*iEdgedWidth+8, iEdgedWidth, rounding, 8);
 		break;
 
 	case 2: /* x qpel, y halfpel - left or right during qpel refinement */
 		ref2 = GetReferenceB(x - halfpel_x, halfpel_y, dir, data);
-		interpolate8x8_avg2(Reference, ref1, ref2, iEdgedWidth, rounding, 8);
-		interpolate8x8_avg2(Reference+8, ref1+8, ref2+8, iEdgedWidth, rounding, 8);
-		interpolate8x8_avg2(Reference+8*iEdgedWidth, ref1+8*iEdgedWidth, ref2+8*iEdgedWidth, iEdgedWidth, rounding, 8);
-		interpolate8x8_avg2(Reference+8*iEdgedWidth+8, ref1+8*iEdgedWidth+8, ref2+8*iEdgedWidth+8, iEdgedWidth, rounding, 8);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(Reference, ref1, ref2, iEdgedWidth, rounding, 8);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(Reference+8, ref1+8, ref2+8, iEdgedWidth, rounding, 8);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(Reference+8*iEdgedWidth, ref1+8*iEdgedWidth, ref2+8*iEdgedWidth, iEdgedWidth, rounding, 8);
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2)(Reference+8*iEdgedWidth+8, ref1+8*iEdgedWidth+8, ref2+8*iEdgedWidth+8, iEdgedWidth, rounding, 8);
 		break;
 
 
diff -bBru src-ori/motion/estimation_gmc.c src/motion/estimation_gmc.c
--- src-ori/motion/estimation_gmc.c	Wed Jan 12 10:32:43 2005
+++ src/motion/estimation_gmc.c	Wed Jan 12 12:45:23 2005
@@ -23,20 +23,9 @@
  *
  ****************************************************************************/
 
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <math.h>
-
-#include "../encoder.h"
-#include "../prediction/mbprediction.h"
-#include "estimation.h"
-#include "motion.h"
-#include "sad.h"
-#include "gmc.h"
-#include "../utils/emms.h"
-#include "motion_inlines.h"
+#include <e32std.h>
+#include "../global_all_dll.h"
+
 
 static void
 CheckCandidate16I(const int x, const int y, SearchData * const data, const unsigned int Direction)
@@ -49,7 +38,7 @@
 
 	Reference = GetReference(x, y, data);
 
-	sad = sad16(data->Cur, Reference, data->iEdgedWidth, 256*4096);
+	sad = (((struct global_all_dll *)Dll::Tls())->sad16)(data->Cur, Reference, data->iEdgedWidth, 256*4096);
 
 	if (sad < data->iMinSAD[0]) {
 		data->iMinSAD[0] = sad;
@@ -137,7 +126,7 @@
 
 	Data.iFcode = current->fcode;
 
-	if (sadInit) (*sadInit) ();
+	if ((((struct global_all_dll *)Dll::Tls())->sadInit)) (*(((struct global_all_dll *)Dll::Tls())->sadInit)) ();
 
 	for (y = 0; y < pParam->mb_height; y ++) {
 		for (x = 0; x < pParam->mb_width; x ++) {
@@ -210,8 +199,8 @@
 			&& (abs(mv.y - (pMB+MBw)->mvs[0].y) < deltay) )
 		{	const int iEdgedWidth = pParam->edged_width;
 			const uint8_t *const pCur = current->image.y + 16*(my*iEdgedWidth + mx);
-			if ( (sad16 ( pCur, pCur+1 , iEdgedWidth, 65536) >= gradx )
-			 &&  (sad16 ( pCur, pCur+iEdgedWidth, iEdgedWidth, 65536) >= grady ) )
+			if ( ((((struct global_all_dll *)Dll::Tls())->sad16) ( pCur, pCur+1 , iEdgedWidth, 65536) >= gradx )
+			 &&  ((((struct global_all_dll *)Dll::Tls())->sad16) ( pCur, pCur+iEdgedWidth, iEdgedWidth, 65536) >= grady ) )
 			 {	pMB->mcsel = 1;
 				num++;
 			 }
@@ -219,7 +208,7 @@
 		/* only use "structured" blocks */
 		}
 	}
-	emms();
+	(((struct global_all_dll *)Dll::Tls())->emms)();
 
 	/* 	further filtering would be possible, but during iteration, remaining
 		outliers usually are removed, too */
@@ -330,8 +319,8 @@
 			const int mbnum = mx + my * MBw;
 			MACROBLOCK *const pMB = &pMBs[mbnum];
 			const uint8_t *const pCur = current->image.y + 16*(my*iEdgedWidth + mx);
-			if ( (sad16 ( pCur, pCur+1 , iEdgedWidth, 65536) >= gradx )
-			 &&  (sad16 ( pCur, pCur+iEdgedWidth, iEdgedWidth, 65536) >= grady ) )
+			if ( ((((struct global_all_dll *)Dll::Tls())->sad16) ( pCur, pCur+1 , iEdgedWidth, 65536) >= gradx )
+			 &&  ((((struct global_all_dll *)Dll::Tls())->sad16) ( pCur, pCur+iEdgedWidth, iEdgedWidth, 65536) >= grady ) )
 			 {	pMB->mcsel = 1;
 				gmc.duv[0].x += pMB->mvs[0].x;
 				gmc.duv[0].y += pMB->mvs[0].y;
@@ -369,25 +358,6 @@
 	return gmc;
 }
 
-int
-GlobalMotionEstRefine(
-				WARPPOINTS *const startwp,
-				MACROBLOCK * const pMBs,
-				const MBParam * const pParam,
-				const FRAMEINFO * const current,
-				const FRAMEINFO * const reference,
-				const IMAGE * const pCurr,
-				const IMAGE * const pRef,
-				const IMAGE * const pRefH,
-				const IMAGE * const pRefV,
-				const IMAGE * const pRefHV)
-{
-	uint8_t* GMCblock = (uint8_t*)malloc(16*pParam->edged_width);
-	WARPPOINTS bestwp=*startwp;
-	WARPPOINTS centerwp,currwp;
-	int gmcminSAD=0;
-	int gmcSAD=0;
-	int direction;
 #if 0
 	int mx,my;
 #endif
@@ -416,6 +384,27 @@
 		}
 	}
 #endif
+int
+GlobalMotionEstRefine(
+				WARPPOINTS *const startwp,
+				MACROBLOCK * const pMBs,
+				const MBParam * const pParam,
+				const FRAMEINFO * const current,
+				const FRAMEINFO * const reference,
+				const IMAGE * const pCurr,
+				const IMAGE * const pRef,
+				const IMAGE * const pRefH,
+				const IMAGE * const pRefV,
+				const IMAGE * const pRefHV)
+{
+	uint8_t* GMCblock = (uint8_t*)malloc(16*pParam->edged_width);
+	WARPPOINTS bestwp=*startwp;
+	WARPPOINTS centerwp,currwp;
+	int gmcminSAD=0;
+	int gmcSAD=0;
+	int direction;
+
+
 
 	gmcminSAD = globalSAD(&bestwp, pParam, pMBs, current, pRef, pCurr, GMCblock);
 
@@ -610,7 +599,7 @@
 						mx, my,
 						pParam->m_rounding_type);
 
-		iSAD = sad16 ( pCurr->y + 16*(my*iEdgedWidth + mx),
+		iSAD = (((struct global_all_dll *)Dll::Tls())->sad16) ( pCurr->y + 16*(my*iEdgedWidth + mx),
 						GMCblock , iEdgedWidth, 65536);
 		iSAD -= pMBs[mbnum].sad16;
 
diff -bBru src-ori/motion/estimation_pvop.c src/motion/estimation_pvop.c
--- src-ori/motion/estimation_pvop.c	Wed Jan 12 10:32:43 2005
+++ src/motion/estimation_pvop.c	Wed Jan 12 12:45:10 2005
@@ -25,20 +25,9 @@
  *
  ****************************************************************************/
 
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>	/* memcpy */
-
-#include "../encoder.h"
-#include "../prediction/mbprediction.h"
-#include "../global.h"
-#include "../utils/timer.h"
-#include "../image/interpolate8x8.h"
-#include "estimation.h"
-#include "motion.h"
-#include "sad.h"
-#include "motion_inlines.h"
+#include <e32std.h>
+#include "../global_all_dll.h"
+
 
 static const int xvid_me_lambda_vec8[32] =
 	{     0    ,(int)(1.00235 * NEIGH_TEND_8X8 + 0.5),
@@ -70,7 +59,7 @@
 
 	Reference = GetReference(x, y, data);
 
-	sad = sad16v(data->Cur, Reference, data->iEdgedWidth, data->temp);
+	sad = (((struct global_all_dll *)Dll::Tls())->sad16v)(data->Cur, Reference, data->iEdgedWidth, data->temp);
 	t = d_mv_bits(x, y, data->predMV, data->iFcode, data->qpel, 0);
 
 	sad += (data->lambda16 * t * sad)>>10;
@@ -110,7 +99,7 @@
 
 	Reference = xvid_me_interpolate16x16qpel(x, y, 0, data);
 
-	sad = sad16v(data->Cur, Reference, data->iEdgedWidth, data->temp);
+	sad = (((struct global_all_dll *)Dll::Tls())->sad16v)(data->Cur, Reference, data->iEdgedWidth, data->temp);
 	t = d_mv_bits(x, y, data->predMV, data->iFcode, 0, 0);
 
 	sad += (data->lambda16 * t * sad)>>10;
@@ -160,7 +149,7 @@
 		current = data->currentQMV;
 	}
 
-	sad = sad8(data->Cur, Reference, data->iEdgedWidth);
+	sad = (((struct global_all_dll *)Dll::Tls())->sad8)(data->Cur, Reference, data->iEdgedWidth);
 	t = d_mv_bits(x, y, data->predMV, data->iFcode, data->qpel^data->qpel_precision, 0);
 
 	sad += (data->lambda8 * t * (sad+NEIGH_8X8_BIAS))>>10;
@@ -186,7 +175,7 @@
 	Reference = xvid_me_interpolate8x8qpel(x, y, 0, 0, data);
 	current = data->currentQMV;
 
-	sad = sad8(data->Cur, Reference, data->iEdgedWidth);
+	sad = (((struct global_all_dll *)Dll::Tls())->sad8)(data->Cur, Reference, data->iEdgedWidth);
 	t = d_mv_bits(x, y, data->predMV, data->iFcode, data->qpel^data->qpel_precision, 0);
 
 	sad += (data->lambda8 * t * (sad+NEIGH_8X8_BIAS))>>10;
@@ -247,19 +236,19 @@
 {
 	int offset = (x + y*stride)*8;
 	if(!rrv) {
-		uint32_t sadC = sad8(current->u + offset,
+		uint32_t sadC = (((struct global_all_dll *)Dll::Tls())->sad8)(current->u + offset,
 						reference->u + offset, stride);
 		if (sadC > iQuant * MAX_CHROMA_SAD_FOR_SKIP) return 0;
-		sadC += sad8(current->v + offset,
+		sadC += (((struct global_all_dll *)Dll::Tls())->sad8)(current->v + offset,
 						reference->v + offset, stride);
 		if (sadC > iQuant * MAX_CHROMA_SAD_FOR_SKIP) return 0;
 		return 1;
 
 	} else {
-		uint32_t sadC = sad16(current->u + 2*offset,
+		uint32_t sadC = (((struct global_all_dll *)Dll::Tls())->sad16)(current->u + 2*offset,
 						reference->u + 2*offset, stride, 256*4096);
 		if (sadC > iQuant * MAX_CHROMA_SAD_FOR_SKIP*4) return 0;
-		sadC += sad16(current->v + 2*offset,
+		sadC += (((struct global_all_dll *)Dll::Tls())->sad16)(current->v + 2*offset,
 						reference->v + 2*offset, stride, 256*4096);
 		if (sadC > iQuant * MAX_CHROMA_SAD_FOR_SKIP*4) return 0;
 		return 1;
@@ -415,12 +404,12 @@
 	/* mcsel */
 	if (coding_type == S_VOP) {
 
-		int32_t iSAD = sad16(Data->Cur,
+		int32_t iSAD = (((struct global_all_dll *)Dll::Tls())->sad16)(Data->Cur,
 			vGMC->y + 16*y*Data->iEdgedWidth + 16*x, Data->iEdgedWidth, 65536);
 
 		if (Data->chroma) {
-			iSAD += sad8(Data->CurU, vGMC->u + 8*y*(Data->iEdgedWidth/2) + 8*x, Data->iEdgedWidth/2);
-			iSAD += sad8(Data->CurV, vGMC->v + 8*y*(Data->iEdgedWidth/2) + 8*x, Data->iEdgedWidth/2);
+			iSAD += (((struct global_all_dll *)Dll::Tls())->sad8)(Data->CurU, vGMC->u + 8*y*(Data->iEdgedWidth/2) + 8*x, Data->iEdgedWidth/2);
+			iSAD += (((struct global_all_dll *)Dll::Tls())->sad8)(Data->CurV, vGMC->v + 8*y*(Data->iEdgedWidth/2) + 8*x, Data->iEdgedWidth/2);
 		}
 
 		if (iSAD <= sad) {		/* mode decision GMC */
@@ -444,12 +433,12 @@
 	if (InterBias < sad) {
 		int32_t deviation;
 		if (!Data->rrv)
-			deviation = dev16(Data->Cur, Data->iEdgedWidth);
+			deviation = (((struct global_all_dll *)Dll::Tls())->dev16)(Data->Cur, Data->iEdgedWidth);
 		else
-			deviation = dev16(Data->Cur, Data->iEdgedWidth) + /* dev32() */
-						dev16(Data->Cur+16, Data->iEdgedWidth) +
-						dev16(Data->Cur + 16*Data->iEdgedWidth, Data->iEdgedWidth) +
-						dev16(Data->Cur+16+16*Data->iEdgedWidth, Data->iEdgedWidth);
+			deviation = (((struct global_all_dll *)Dll::Tls())->dev16)(Data->Cur, Data->iEdgedWidth) + /* dev32() */
+						(((struct global_all_dll *)Dll::Tls())->dev16)(Data->Cur+16, Data->iEdgedWidth) +
+						(((struct global_all_dll *)Dll::Tls())->dev16)(Data->Cur + 16*Data->iEdgedWidth, Data->iEdgedWidth) +
+						(((struct global_all_dll *)Dll::Tls())->dev16)(Data->Cur+16+16*Data->iEdgedWidth, Data->iEdgedWidth);
 
 		if (deviation < (sad - InterBias)) mode = MODE_INTRA;
 	}
@@ -918,7 +908,7 @@
 	}
 
 	Data.RefQ = pRefV->u; /* a good place, also used in MC (for similar purpose) */
-	if (sadInit) (*sadInit) ();
+	if ((((struct global_all_dll *)Dll::Tls())->sadInit)) (*(((struct global_all_dll *)Dll::Tls())->sadInit)) ();
 
 	for (y = 0; y < mb_height; y++)	{
 		for (x = 0; x < mb_width; x++)	{
@@ -926,7 +916,7 @@
 			MACROBLOCK *prevMB = &reference->mbs[x + y * pParam->mb_width];
 
 			if (!Data.rrv) pMB->sad16 =
-				sad16v(pCurrent->y + (x + y * iEdgedWidth) * 16,
+				(((struct global_all_dll *)Dll::Tls())->sad16v)(pCurrent->y + (x + y * iEdgedWidth) * 16,
 							pRef->y + (x + y * iEdgedWidth) * 16,
 							pParam->edged_width, pMB->sad8 );
 
@@ -936,9 +926,9 @@
 							pParam->edged_width, pMB->sad8 );
 
 			if (Data.chroma) {
-				Data.chromaSAD = sad8(pCurrent->u + x*8 + y*(iEdgedWidth/2)*8,
+				Data.chromaSAD = (((struct global_all_dll *)Dll::Tls())->sad8)(pCurrent->u + x*8 + y*(iEdgedWidth/2)*8,
 									pRef->u + x*8 + y*(iEdgedWidth/2)*8, iEdgedWidth/2)
-								+ sad8(pCurrent->v + (x + y*(iEdgedWidth/2))*8,
+								+ (((struct global_all_dll *)Dll::Tls())->sad8)(pCurrent->v + (x + y*(iEdgedWidth/2))*8,
 									pRef->v + (x + y*(iEdgedWidth/2))*8, iEdgedWidth/2);
 				pMB->sad16 += Data.chromaSAD;
 			}
diff -bBru src-ori/motion/estimation_rd_based.c src/motion/estimation_rd_based.c
--- src-ori/motion/estimation_rd_based.c	Wed Jan 12 10:32:43 2005
+++ src/motion/estimation_rd_based.c	Wed Jan 12 12:43:52 2005
@@ -26,24 +26,9 @@
 
 /* RD mode decision and search */
 
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>	/* memcpy */
-
-#include "../encoder.h"
-#include "../bitstream/mbcoding.h"
-#include "../prediction/mbprediction.h"
-#include "../global.h"
-#include "../image/interpolate8x8.h"
-#include "estimation.h"
-#include "motion.h"
-#include "sad.h"
-#include "../bitstream/zigzag.h"
-#include "../quant/quant.h"
-#include "../bitstream/vlc_codes.h"
-#include "../dct/fdct.h"
-#include "motion_inlines.h"
+#include <e32std.h>
+#include "../global_all_dll.h"
+
 
 /* rd = BITS_MULT*bits + LAMBDA*distortion */
 #define LAMBDA		( (int)(BITS_MULT*1.0) )
@@ -62,19 +47,19 @@
 	int bits;
 	int distortion = 0;
 
-	fdct(data);
+	(((struct global_all_dll *)Dll::Tls())->fdct)(data);
 
-	if (quant_type) sum = quant_h263_inter(coeff, data, quant, mpeg_quant_matrices);
-	else sum = quant_mpeg_inter(coeff, data, quant, mpeg_quant_matrices);
+	if (quant_type) sum = (((struct global_all_dll *)Dll::Tls())->quant_h263_inter)(coeff, data, quant, mpeg_quant_matrices);
+	else sum = (((struct global_all_dll *)Dll::Tls())->quant_mpeg_inter)(coeff, data, quant, mpeg_quant_matrices);
 
 	if (sum > 0) {
 		*cbp |= 1 << (5 - block);
 		bits = BITS_MULT * CodeCoeffInter_CalcBits(coeff, scan_table);
 
-		if (quant_type) dequant_h263_inter(dqcoeff, coeff, quant, mpeg_quant_matrices);
-		else dequant_mpeg_inter(dqcoeff, coeff, quant, mpeg_quant_matrices);
+		if (quant_type) (((struct global_all_dll *)Dll::Tls())->dequant_h263_inter)(dqcoeff, coeff, quant, mpeg_quant_matrices);
+		else (((struct global_all_dll *)Dll::Tls())->dequant_mpeg_inter)(dqcoeff, coeff, quant, mpeg_quant_matrices);
 
-		distortion = sse8_16bit(data, dqcoeff, 8*sizeof(int16_t));
+		distortion = (((struct global_all_dll *)Dll::Tls())->sse8_16bit)(data, dqcoeff, 8*sizeof(int16_t));
 	} else {
 		const static int16_t zero_block[64] =
 			{
@@ -88,7 +73,7 @@
 				0, 0, 0, 0, 0, 0, 0, 0,
 			};
 		bits = 0;
-		distortion = sse8_16bit(data, zero_block, 8*sizeof(int16_t));
+		distortion = (((struct global_all_dll *)Dll::Tls())->sse8_16bit)(data, zero_block, 8*sizeof(int16_t));
 	}
 
 
@@ -117,14 +102,14 @@
 	unsigned int distortion = 0;
 	const uint32_t iDcScaler = get_dc_scaler(quant, block < 4);
 
-	fdct(coeff);
+	(((struct global_all_dll *)Dll::Tls())->fdct)(coeff);
 
 	if (quant_type) {
-		quant_h263_intra(qcoeff, coeff, quant, iDcScaler, mpeg_quant_matrices);
-		dequant_h263_intra(dqcoeff, qcoeff, quant, iDcScaler, mpeg_quant_matrices);
+		(((struct global_all_dll *)Dll::Tls())->quant_h263_intra)(qcoeff, coeff, quant, iDcScaler, mpeg_quant_matrices);
+		(((struct global_all_dll *)Dll::Tls())->dequant_h263_intra)(dqcoeff, qcoeff, quant, iDcScaler, mpeg_quant_matrices);
 	} else {
-		quant_mpeg_intra(qcoeff, coeff, quant, iDcScaler, mpeg_quant_matrices);
-		dequant_mpeg_intra(dqcoeff, qcoeff, quant, iDcScaler, mpeg_quant_matrices);
+		(((struct global_all_dll *)Dll::Tls())->quant_mpeg_intra)(qcoeff, coeff, quant, iDcScaler, mpeg_quant_matrices);
+		(((struct global_all_dll *)Dll::Tls())->dequant_mpeg_intra)(dqcoeff, qcoeff, quant, iDcScaler, mpeg_quant_matrices);
 	}
 
 	predict_acdc(pMB-(x+mb_width*y), x, y, mb_width, block, qcoeff,
@@ -167,7 +152,7 @@
 	bits[1] += coded = CodeCoeffIntra_CalcBits(qcoeff, scan_tables[direction]);
 	if (coded > 0) cbp[1] |= 1 << (5 - block);
 
-	distortion = sse8_16bit(coeff, dqcoeff, 8*sizeof(int16_t));
+	distortion = (((struct global_all_dll *)Dll::Tls())->sse8_16bit)(coeff, dqcoeff, 8*sizeof(int16_t));
 
 	return (LAMBDA*distortion)/(quant*quant);
 }
@@ -200,7 +185,7 @@
 
 	for(i = 0; i < 4; i++) {
 		int s = 8*((i&1) + (i>>1)*data->iEdgedWidth);
-		transfer_8to16subro(in, data->Cur + s, ptr + s, data->iEdgedWidth);
+		(((struct global_all_dll *)Dll::Tls())->transfer_8to16subro)(in, data->Cur + s, ptr + s, data->iEdgedWidth);
 		rd += data->temp[i] = Block_CalcBits(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, &cbp, i, data->scan_table, data->mpeg_quant_matrices);
 	}
 
@@ -225,16 +210,16 @@
 
 	/* chroma U */
 	ptr = interpolate8x8_switch2(data->RefQ, data->RefP[4], 0, 0, xc, yc, data->iEdgedWidth/2, data->rounding);
-	transfer_8to16subro(in, data->CurU, ptr, data->iEdgedWidth/2);
+	(((struct global_all_dll *)Dll::Tls())->transfer_8to16subro)(in, data->CurU, ptr, data->iEdgedWidth/2);
 	rd += Block_CalcBits(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, &cbp, 4, data->scan_table, data->mpeg_quant_matrices);
 	if (rd >= data->iMinSAD[0]) return;
 
 	/* chroma V */
 	ptr = interpolate8x8_switch2(data->RefQ, data->RefP[5], 0, 0, xc, yc, data->iEdgedWidth/2, data->rounding);
-	transfer_8to16subro(in, data->CurV, ptr, data->iEdgedWidth/2);
+	(((struct global_all_dll *)Dll::Tls())->transfer_8to16subro)(in, data->CurV, ptr, data->iEdgedWidth/2);
 	rd += Block_CalcBits(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, &cbp, 5, data->scan_table, data->mpeg_quant_matrices);
 
-	rd += BITS_MULT*mcbpc_inter_tab[(MODE_INTER & 7) | ((cbp & 3) << 3)].len;
+	rd += BITS_MULT*(((struct global_all_dll *)Dll::Tls())->mcbpc_inter_tab)[(MODE_INTER & 7) | ((cbp & 3) << 3)].len;
 
 	if (rd < data->iMinSAD[0]) {
 		data->iMinSAD[0] = rd;
@@ -265,7 +250,7 @@
 		current = data->currentQMV;
 	}
 
-	transfer_8to16subro(in, data->Cur, ptr, data->iEdgedWidth);
+	(((struct global_all_dll *)Dll::Tls())->transfer_8to16subro)(in, data->Cur, ptr, data->iEdgedWidth);
 	rd = Block_CalcBits(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, &cbp, 5, data->scan_table, data->mpeg_quant_matrices);
 	rd += BITS_MULT*d_mv_bits(x, y, data->predMV, data->iFcode, data->qpel^data->qpel_precision, 0);
 
@@ -467,17 +452,17 @@
 
 	/* chroma U */
 	ptr = interpolate8x8_switch2(Data->RefQ + 64, Data->RefP[4], 0, 0, sumx, sumy, Data->iEdgedWidth/2, Data->rounding);
-	transfer_8to16subro(in, Data->CurU, ptr, Data->iEdgedWidth/2);
+	(((struct global_all_dll *)Dll::Tls())->transfer_8to16subro)(in, Data->CurU, ptr, Data->iEdgedWidth/2);
 	bits += Block_CalcBits(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, 4, Data->scan_table, Data->mpeg_quant_matrices);
 
 	if (bits >= *Data->iMinSAD) return bits;
 
 	/* chroma V */
 	ptr = interpolate8x8_switch2(Data->RefQ + 64, Data->RefP[5], 0, 0, sumx, sumy, Data->iEdgedWidth/2, Data->rounding);
-	transfer_8to16subro(in, Data->CurV, ptr, Data->iEdgedWidth/2);
+	(((struct global_all_dll *)Dll::Tls())->transfer_8to16subro)(in, Data->CurV, ptr, Data->iEdgedWidth/2);
 	bits += Block_CalcBits(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, 5, Data->scan_table, Data->mpeg_quant_matrices);
 
-	bits += BITS_MULT*mcbpc_inter_tab[(MODE_INTER4V & 7) | ((cbp & 3) << 3)].len;
+	bits += BITS_MULT*(((struct global_all_dll *)Dll::Tls())->mcbpc_inter_tab)[(MODE_INTER4V & 7) | ((cbp & 3) << 3)].len;
 
 	*Data->cbp = cbp;
 	return bits;
@@ -497,7 +482,7 @@
 
 	for(i = 0; i < 4; i++) {
 		int s = 8*((i&1) + (i>>1)*Data->iEdgedWidth);
-		transfer_8to16copy(in, Data->Cur + s, Data->iEdgedWidth);
+		(((struct global_all_dll *)Dll::Tls())->transfer_8to16copy)(in, Data->Cur + s, Data->iEdgedWidth);
 		
 
 		distortion = Block_CalcBitsIntra(pMB, x, y, mb_width, i, in, coeff, dqcoeff,
@@ -513,7 +498,7 @@
 	bits2 += BITS_MULT*xvid_cbpy_tab[cbp[1]>>2].len;
 
 	/*chroma U */
-	transfer_8to16copy(in, Data->CurU, Data->iEdgedWidth/2);
+	(((struct global_all_dll *)Dll::Tls())->transfer_8to16copy)(in, Data->CurU, Data->iEdgedWidth/2);
 	distortion = Block_CalcBitsIntra(pMB, x, y, mb_width, 4, in, coeff, dqcoeff,
 									predictors[4], iQuant, Data->quant_type, bits, cbp, Data->mpeg_quant_matrices);
 	bits1 += distortion + BITS_MULT * bits[0];
@@ -523,15 +508,15 @@
 		return bits1;
 
 	/* chroma V */
-	transfer_8to16copy(in, Data->CurV, Data->iEdgedWidth/2);
+	(((struct global_all_dll *)Dll::Tls())->transfer_8to16copy)(in, Data->CurV, Data->iEdgedWidth/2);
 	distortion = Block_CalcBitsIntra(pMB, x, y, mb_width, 5, in, coeff, dqcoeff,
 									predictors[5], iQuant, Data->quant_type, bits, cbp, Data->mpeg_quant_matrices);
 
 	bits1 += distortion + BITS_MULT * bits[0];
 	bits2 += distortion + BITS_MULT * bits[1];
 
-	bits1 += BITS_MULT*mcbpc_inter_tab[(MODE_INTRA & 7) | ((cbp[0] & 3) << 3)].len;
-	bits2 += BITS_MULT*mcbpc_inter_tab[(MODE_INTRA & 7) | ((cbp[1] & 3) << 3)].len;
+	bits1 += BITS_MULT*(((struct global_all_dll *)Dll::Tls())->mcbpc_inter_tab)[(MODE_INTRA & 7) | ((cbp[0] & 3) << 3)].len;
+	bits2 += BITS_MULT*(((struct global_all_dll *)Dll::Tls())->mcbpc_inter_tab)[(MODE_INTRA & 7) | ((cbp[1] & 3) << 3)].len;
 
 	*Data->cbp = bits1 <= bits2 ? cbp[0] : cbp[1];
 
@@ -548,7 +533,7 @@
 
 	for(i = 0; i < 4; i++) {
 		int s = 8*((i&1) + (i>>1)*Data->iEdgedWidth);
-		transfer_8to16subro(in, Data->Cur + s, vGMC->y + s + 16*(x+y*Data->iEdgedWidth), Data->iEdgedWidth);
+		(((struct global_all_dll *)Dll::Tls())->transfer_8to16subro)(in, Data->Cur + s, vGMC->y + s + 16*(x+y*Data->iEdgedWidth), Data->iEdgedWidth);
 		bits += Block_CalcBits(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, i, Data->scan_table, Data->mpeg_quant_matrices);
 		if (bits >= Data->iMinSAD[0]) return bits;
 	}
@@ -556,16 +541,16 @@
 	bits += BITS_MULT*xvid_cbpy_tab[15-(cbp>>2)].len;
 
 	/*chroma U */
-	transfer_8to16subro(in, Data->CurU, vGMC->u + 8*(x+y*(Data->iEdgedWidth/2)), Data->iEdgedWidth/2);
+	(((struct global_all_dll *)Dll::Tls())->transfer_8to16subro)(in, Data->CurU, vGMC->u + 8*(x+y*(Data->iEdgedWidth/2)), Data->iEdgedWidth/2);
 	bits += Block_CalcBits(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, 4, Data->scan_table, Data->mpeg_quant_matrices);
 
 	if (bits >= Data->iMinSAD[0]) return bits;
 
 	/* chroma V */
-	transfer_8to16subro(in, Data->CurV , vGMC->v + 8*(x+y*(Data->iEdgedWidth/2)), Data->iEdgedWidth/2);
+	(((struct global_all_dll *)Dll::Tls())->transfer_8to16subro)(in, Data->CurV , vGMC->v + 8*(x+y*(Data->iEdgedWidth/2)), Data->iEdgedWidth/2);
 	bits += Block_CalcBits(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, 5, Data->scan_table, Data->mpeg_quant_matrices);
 
-	bits += BITS_MULT*mcbpc_inter_tab[(MODE_INTER & 7) | ((cbp & 3) << 3)].len;
+	bits += BITS_MULT*(((struct global_all_dll *)Dll::Tls())->mcbpc_inter_tab)[(MODE_INTER & 7) | ((cbp & 3) << 3)].len;
 
 	*Data->cbp = cbp;
 
@@ -734,12 +719,12 @@
 		/* mcsel */
 		if (coding_type == S_VOP) {
 
-			int32_t iSAD = sad16(Data->Cur,
+			int32_t iSAD = (((struct global_all_dll *)Dll::Tls())->sad16)(Data->Cur,
 				vGMC->y + 16*y*Data->iEdgedWidth + 16*x, Data->iEdgedWidth, 65536);
 
 			if (Data->chroma) {
-				iSAD += sad8(Data->CurU, vGMC->u + 8*y*(Data->iEdgedWidth/2) + 8*x, Data->iEdgedWidth/2);
-				iSAD += sad8(Data->CurV, vGMC->v + 8*y*(Data->iEdgedWidth/2) + 8*x, Data->iEdgedWidth/2);
+				iSAD += (((struct global_all_dll *)Dll::Tls())->sad8)(Data->CurU, vGMC->u + 8*y*(Data->iEdgedWidth/2) + 8*x, Data->iEdgedWidth/2);
+				iSAD += (((struct global_all_dll *)Dll::Tls())->sad8)(Data->CurV, vGMC->v + 8*y*(Data->iEdgedWidth/2) + 8*x, Data->iEdgedWidth/2);
 			}
 
 			if (iSAD <= sad) {		/* mode decision GMC */
@@ -782,11 +767,11 @@
 				*Data->iMinSAD = min_rd = gmc_rd;
 				mode = MODE_INTER;
 				cbp = *Data->cbp;
-				sad = sad16(Data->Cur,
+				sad = (((struct global_all_dll *)Dll::Tls())->sad16)(Data->Cur,
 					vGMC->y + 16*y*Data->iEdgedWidth + 16*x, Data->iEdgedWidth, 65536);
 				if (Data->chroma) {
-					sad += sad8(Data->CurU, vGMC->u + 8*y*(Data->iEdgedWidth/2) + 8*x, Data->iEdgedWidth/2);
-					sad += sad8(Data->CurV, vGMC->v + 8*y*(Data->iEdgedWidth/2) + 8*x, Data->iEdgedWidth/2);
+					sad += (((struct global_all_dll *)Dll::Tls())->sad8)(Data->CurU, vGMC->u + 8*y*(Data->iEdgedWidth/2) + 8*x, Data->iEdgedWidth/2);
+					sad += (((struct global_all_dll *)Dll::Tls())->sad8)(Data->CurV, vGMC->v + 8*y*(Data->iEdgedWidth/2) + 8*x, Data->iEdgedWidth/2);
 				}
 			}
 		}
@@ -837,12 +822,12 @@
 		if (InterBias < sad) {
 			int32_t deviation;
 			if (!Data->rrv)
-				deviation = dev16(Data->Cur, Data->iEdgedWidth);
+				deviation = (((struct global_all_dll *)Dll::Tls())->dev16)(Data->Cur, Data->iEdgedWidth);
 			else
-				deviation = dev16(Data->Cur, Data->iEdgedWidth) + /* dev32() */
-							dev16(Data->Cur+16, Data->iEdgedWidth) +
-							dev16(Data->Cur + 16*Data->iEdgedWidth, Data->iEdgedWidth) +
-							dev16(Data->Cur+16+16*Data->iEdgedWidth, Data->iEdgedWidth);
+				deviation = (((struct global_all_dll *)Dll::Tls())->dev16)(Data->Cur, Data->iEdgedWidth) + /* dev32() */
+							(((struct global_all_dll *)Dll::Tls())->dev16)(Data->Cur+16, Data->iEdgedWidth) +
+							(((struct global_all_dll *)Dll::Tls())->dev16)(Data->Cur + 16*Data->iEdgedWidth, Data->iEdgedWidth) +
+							(((struct global_all_dll *)Dll::Tls())->dev16)(Data->Cur+16+16*Data->iEdgedWidth, Data->iEdgedWidth);
 
 			if (deviation < (sad - InterBias)) mode = MODE_INTRA;
 		}
diff -bBru src-ori/motion/gmc.c src/motion/gmc.c
--- src-ori/motion/gmc.c	Wed Jan 12 10:32:43 2005
+++ src/motion/gmc.c	Wed Jan 12 12:43:24 2005
@@ -23,12 +23,9 @@
  *
  ****************************************************************************/
 
-#include "../portab.h"
-#include "../global.h"
-#include "../encoder.h"
-#include "gmc.h"
+#include <e32std.h>
+#include "../global_all_dll.h"
 
-#include <stdio.h>
 
 /* ************************************************************
  * Pts = 2 or 3
Only in src-ori/motion: ia64_asm
diff -bBru src-ori/motion/motion_comp.c src/motion/motion_comp.c
--- src-ori/motion/motion_comp.c	Wed Jan 12 11:40:18 2005
+++ src/motion/motion_comp.c	Wed Jan 12 12:44:05 2005
@@ -24,15 +24,9 @@
  *
  ****************************************************************************/
 
-#include <stdio.h>
+#include <e32std.h>
+#include "../global_all_dll.h"
 
-#include "../encoder.h"
-#include "../utils/mbfunctions.h"
-#include "../image/interpolate8x8.h"
-#include "../image/qpel.h"
-#include "../image/reduced.h"
-#include "../utils/timer.h"
-#include "motion.h"
 
 #ifndef RSHIFT
 #define RSHIFT(a,b) ((a) > 0 ? ((a) + (1<<((b)-1)))>>(b) : ((a) + (1<<((b)-1))-1)>>(b))
@@ -133,13 +127,13 @@
 
 		} else ptr = get_ref(ref, refh, refv, refhv, x, y, 1, dx, dy, stride);
 
-		transfer_8to16sub(dct_codes, cur + y * stride + x,
+		(((struct global_all_dll *)Dll::Tls())->transfer_8to16sub)(dct_codes, cur + y * stride + x,
 							ptr, stride);
-		transfer_8to16sub(dct_codes+64, cur + y * stride + x + 8,
+		(((struct global_all_dll *)Dll::Tls())->transfer_8to16sub)(dct_codes+64, cur + y * stride + x + 8,
 							ptr + 8, stride);
-		transfer_8to16sub(dct_codes+128, cur + y * stride + x + 8*stride,
+		(((struct global_all_dll *)Dll::Tls())->transfer_8to16sub)(dct_codes+128, cur + y * stride + x + 8*stride,
 							ptr + 8*stride, stride);
-		transfer_8to16sub(dct_codes+192, cur + y * stride + x + 8*stride+8,
+		(((struct global_all_dll *)Dll::Tls())->transfer_8to16sub)(dct_codes+192, cur + y * stride + x + 8*stride+8,
 							ptr + 8*stride + 8, stride);
 
 	} else { /* reduced_resolution */
@@ -148,17 +142,17 @@
 
 		ptr = get_ref(ref, refh, refv, refhv, x, y, 1, dx, dy, stride);
 
-		filter_18x18_to_8x8(dct_codes, cur+y*stride + x, stride);
-		filter_diff_18x18_to_8x8(dct_codes, ptr, stride);
+		(((struct global_all_dll *)Dll::Tls())->filter_18x18_to_8x8)(dct_codes, cur+y*stride + x, stride);
+		(((struct global_all_dll *)Dll::Tls())->filter_diff_18x18_to_8x8)(dct_codes, ptr, stride);
 
-		filter_18x18_to_8x8(dct_codes+64, cur+y*stride + x + 16, stride);
-		filter_diff_18x18_to_8x8(dct_codes+64, ptr + 16, stride);
+		(((struct global_all_dll *)Dll::Tls())->filter_18x18_to_8x8)(dct_codes+64, cur+y*stride + x + 16, stride);
+		(((struct global_all_dll *)Dll::Tls())->filter_diff_18x18_to_8x8)(dct_codes+64, ptr + 16, stride);
 
-		filter_18x18_to_8x8(dct_codes+128, cur+(y+16)*stride + x, stride);
-		filter_diff_18x18_to_8x8(dct_codes+128, ptr + 16*stride, stride);
+		(((struct global_all_dll *)Dll::Tls())->filter_18x18_to_8x8)(dct_codes+128, cur+(y+16)*stride + x, stride);
+		(((struct global_all_dll *)Dll::Tls())->filter_diff_18x18_to_8x8)(dct_codes+128, ptr + 16*stride, stride);
 
-		filter_18x18_to_8x8(dct_codes+192, cur+(y+16)*stride + x + 16, stride);
-		filter_diff_18x18_to_8x8(dct_codes+192, ptr + 16*stride + 16, stride);
+		(((struct global_all_dll *)Dll::Tls())->filter_18x18_to_8x8)(dct_codes+192, cur+(y+16)*stride + x + 16, stride);
+		(((struct global_all_dll *)Dll::Tls())->filter_diff_18x18_to_8x8)(dct_codes+192, ptr + 16*stride + 16, stride);
 
 		transfer32x32_copy(cur + y*stride + x, ptr, stride);
 	}
@@ -200,7 +194,7 @@
 			} else ptr = ref + ((int)y + dy/4)*(int)stride + (int)x + dx/4; /* fullpixel position */
 		} else ptr = get_ref(ref, refh, refv, refhv, x, y, 1, dx, dy, stride);
 
-			transfer_8to16sub(dct_codes, cur + y * stride + x, ptr, stride);
+			(((struct global_all_dll *)Dll::Tls())->transfer_8to16sub)(dct_codes, cur + y * stride + x, ptr, stride);
 
 	} else { /* reduced_resolution */
 
@@ -208,8 +202,8 @@
 
 		ptr = get_ref(ref, refh, refv, refhv, x, y, 1, dx, dy, stride);
 
-		filter_18x18_to_8x8(dct_codes, cur+y*stride + x, stride);
-		filter_diff_18x18_to_8x8(dct_codes, ptr, stride);
+		(((struct global_all_dll *)Dll::Tls())->filter_18x18_to_8x8)(dct_codes, cur+y*stride + x, stride);
+		(((struct global_all_dll *)Dll::Tls())->filter_diff_18x18_to_8x8)(dct_codes, ptr, stride);
 
 		transfer16x16_copy(cur + y*stride + x, ptr, stride);
 	}
@@ -252,11 +246,11 @@
 { /* uv-block-based compensation */
 
 	if (!rrv) {
-		transfer_8to16sub(coeff, Cur->u + 8 * j * stride + 8 * i,
+		(((struct global_all_dll *)Dll::Tls())->transfer_8to16sub)(coeff, Cur->u + 8 * j * stride + 8 * i,
 							interpolate8x8_switch2(temp, Ref->u, 8 * i, 8 * j,
 													dx, dy, stride, rounding),
 							stride);
-		transfer_8to16sub(coeff + 64, Cur->v + 8 * j * stride + 8 * i,
+		(((struct global_all_dll *)Dll::Tls())->transfer_8to16sub)(coeff + 64, Cur->v + 8 * j * stride + 8 * i,
  							interpolate8x8_switch2(temp, Ref->v, 8 * i, 8 * j,
 													dx, dy, stride, rounding),
 							stride);
@@ -266,14 +260,14 @@
 		current = Cur->u + 16*j*stride + 16*i;
 		reference = temp - 16*j*stride - 16*i;
 		interpolate18x18_switch(reference, Ref->u, 16*i, 16*j, dx, dy, stride, rounding);
-		filter_18x18_to_8x8(coeff, current, stride);
-		filter_diff_18x18_to_8x8(coeff, temp, stride);
+		(((struct global_all_dll *)Dll::Tls())->filter_18x18_to_8x8)(coeff, current, stride);
+		(((struct global_all_dll *)Dll::Tls())->filter_diff_18x18_to_8x8)(coeff, temp, stride);
 		transfer16x16_copy(current, temp, stride);
 
 		current = Cur->v + 16*j*stride + 16*i;
 		interpolate18x18_switch(reference, Ref->v, 16*i, 16*j, dx, dy, stride, rounding);
-		filter_18x18_to_8x8(coeff + 64, current, stride);
-		filter_diff_18x18_to_8x8(coeff + 64, temp, stride);
+		(((struct global_all_dll *)Dll::Tls())->filter_18x18_to_8x8)(coeff + 64, current, stride);
+		(((struct global_all_dll *)Dll::Tls())->filter_diff_18x18_to_8x8)(coeff + 64, temp, stride);
 		transfer16x16_copy(current, temp, stride);
 	}
 }
@@ -308,10 +302,10 @@
 						  ref->y + 16 * (i + j * edged_width),
 						  edged_width);
 
-		transfer8x8_copy(cur->u + 8 * (i + j * edged_width/2),
+		(((struct global_all_dll *)Dll::Tls())->transfer8x8_copy)(cur->u + 8 * (i + j * edged_width/2),
 							ref->u + 8 * (i + j * edged_width/2),
 							edged_width / 2);
-		transfer8x8_copy(cur->v + 8 * (i + j * edged_width/2),
+		(((struct global_all_dll *)Dll::Tls())->transfer8x8_copy)(cur->v + 8 * (i + j * edged_width/2),
 							ref->v + 8 * (i + j * edged_width/2),
 							edged_width / 2);
 		return;
@@ -326,21 +320,21 @@
 
 			/* call normal routine once, easier than "if (mcsel)"ing all the time */
 
-			transfer_8to16sub(&dct_codes[0*64], cur->y + 16*j*edged_width + 16*i,
+			(((struct global_all_dll *)Dll::Tls())->transfer_8to16sub)(&dct_codes[0*64], cur->y + 16*j*edged_width + 16*i,
 								 			refGMC->y + 16*j*edged_width + 16*i, edged_width);
-			transfer_8to16sub(&dct_codes[1*64], cur->y + 16*j*edged_width + 16*i+8,
+			(((struct global_all_dll *)Dll::Tls())->transfer_8to16sub)(&dct_codes[1*64], cur->y + 16*j*edged_width + 16*i+8,
 											refGMC->y + 16*j*edged_width + 16*i+8, edged_width);
-			transfer_8to16sub(&dct_codes[2*64], cur->y + (16*j+8)*edged_width + 16*i,
+			(((struct global_all_dll *)Dll::Tls())->transfer_8to16sub)(&dct_codes[2*64], cur->y + (16*j+8)*edged_width + 16*i,
 											refGMC->y + (16*j+8)*edged_width + 16*i, edged_width);
-			transfer_8to16sub(&dct_codes[3*64], cur->y + (16*j+8)*edged_width + 16*i+8,
+			(((struct global_all_dll *)Dll::Tls())->transfer_8to16sub)(&dct_codes[3*64], cur->y + (16*j+8)*edged_width + 16*i+8,
 											refGMC->y + (16*j+8)*edged_width + 16*i+8, edged_width);
 
 /* lumi is needed earlier for mode decision, but chroma should be done block-based, but it isn't, yet. */
 
-			transfer_8to16sub(&dct_codes[4 * 64], cur->u + 8 *j*edged_width/2 + 8*i,
+			(((struct global_all_dll *)Dll::Tls())->transfer_8to16sub)(&dct_codes[4 * 64], cur->u + 8 *j*edged_width/2 + 8*i,
 								refGMC->u + 8 *j*edged_width/2 + 8*i, edged_width/2);
 
-			transfer_8to16sub(&dct_codes[5 * 64], cur->v + 8*j* edged_width/2 + 8*i,
+			(((struct global_all_dll *)Dll::Tls())->transfer_8to16sub)(&dct_codes[5 * 64], cur->v + 8*j* edged_width/2 + 8*i,
 								refGMC->v + 8*j* edged_width/2 + 8*i, edged_width/2);
 
 			return;
@@ -485,7 +479,7 @@
 							i, j, 16, b_dx, b_dy, edged_width);
 		}
 		for (k = 0; k < 4; k++)
-				transfer_8to16sub2(&dct_codes[k * 64],
+				(((struct global_all_dll *)Dll::Tls())->transfer_8to16sub2)(&dct_codes[k * 64],
 									cur->y + (i * 16+(k&1)*8) + (j * 16+((k>>1)*8)) * edged_width,
 									ptr1 + (k&1)*8 + (k>>1)*8*edged_width,
 									ptr2 + (k&1)*8 + (k>>1)*8*edged_width, edged_width);
@@ -535,7 +529,7 @@
 				ptr2 = get_ref(b_ref->y, b_refh->y, b_refv->y, b_refhv->y,
 								2*i + (k&1), 2*j + (k>>1), 8, b_dx, b_dy,  edged_width);
 			}
-			transfer_8to16sub2(&dct_codes[k * 64],
+			(((struct global_all_dll *)Dll::Tls())->transfer_8to16sub2)(&dct_codes[k * 64],
 								cur->y + (i * 16+(k&1)*8) + (j * 16+((k>>1)*8)) * edged_width,
 								ptr1, ptr2,	edged_width);
 
@@ -550,7 +544,7 @@
 	}
 
 	/* v block-based chroma interpolation for direct and interpolate modes */
-	transfer_8to16sub2(&dct_codes[4 * 64],
+	(((struct global_all_dll *)Dll::Tls())->transfer_8to16sub2)(&dct_codes[4 * 64],
 						cur->u + (j * 8) * edged_width / 2 + (i * 8),
 						interpolate8x8_switch2(tmp, b_ref->u, 8 * i, 8 * j,
 												b_dx, b_dy, edged_width / 2, 0),
@@ -558,7 +552,7 @@
 												dx, dy, edged_width / 2, 0),
 						edged_width / 2);
 
-	transfer_8to16sub2(&dct_codes[5 * 64],
+	(((struct global_all_dll *)Dll::Tls())->transfer_8to16sub2)(&dct_codes[5 * 64],
 						cur->v + (j * 8) * edged_width / 2 + (i * 8),
 						interpolate8x8_switch2(tmp, b_ref->v, 8 * i, 8 * j,
 												b_dx, b_dy, edged_width / 2, 0),
diff -bBru src-ori/motion/motion_inlines.h src/motion/motion_inlines.h
--- src-ori/motion/motion_inlines.h	Wed Jan 12 10:32:43 2005
+++ src/motion/motion_inlines.h	Wed Jan 12 12:38:15 2005
@@ -34,8 +34,7 @@
  * Calculate the min/max range
  * relative to the _MACROBLOCK_ position
  */
-static void __inline
-get_range(int32_t * const min_dx,
+static void __inline get_range(int32_t * const min_dx,
 		  int32_t * const max_dx,
 		  int32_t * const min_dy,
 		  int32_t * const max_dy,
@@ -47,7 +46,8 @@
 		  const uint32_t fcode,
 		  const int precision, /* 2 for qpel, 1 for halfpel */
 		  const int rrv)
-{
+ {
+
 	int k;
 	const int search_range = 16 << fcode;
 	int high = search_range - 1;
@@ -82,9 +82,9 @@
 	7, 7, 7, 6, 4, 3, 2, 1
 };
 
-static __inline uint32_t
-d_mv_bits(int x, int y, const VECTOR pred, const uint32_t iFcode, const int qpel, const int rrv)
-{
+static __inline uint32_t d_mv_bits(int x, int y, const VECTOR pred, const uint32_t iFcode, const int qpel, const int rrv)
+ {
+
 	unsigned int bits;
 
 	x <<= qpel;
@@ -106,17 +106,17 @@
 	return bits;
 }
 
-static __inline const uint8_t *
-GetReference(const int x, const int y, const SearchData * const data)
-{
+static __inline const uint8_t * GetReference(const int x, const int y, const SearchData * const data)
+ {
+
 	const int picture = ((x&1)<<1) | (y&1);
 	const int offset = (x>>1) + (y>>1)*data->iEdgedWidth;
 	return data->RefP[picture] + offset;
 }
 
-static __inline const uint8_t *
-GetReferenceB(const int x, const int y, const uint32_t dir, const SearchData * const data)
-{
+static __inline const uint8_t * GetReferenceB(const int x, const int y, const uint32_t dir, const SearchData * const data)
+ {
+
 	/* dir : 0 = forward, 1 = backward */
 	const uint8_t *const *const direction = ( dir == 0 ? data->RefP : data->b_RefP );
 	const int picture = ((x&1)<<1) | (y&1);
@@ -124,9 +124,9 @@
 	return direction[picture] + offset;
 }
 
-static __inline void
-ZeroMacroblockP(MACROBLOCK *pMB, const int32_t sad)
-{
+static __inline void ZeroMacroblockP(MACROBLOCK *pMB, const int32_t sad)
+ {
+
 	pMB->mode = MODE_INTER;
 	pMB->mvs[0] = pMB->mvs[1] = pMB->mvs[2] = pMB->mvs[3] = zeroMV;
 	pMB->qmvs[0] = pMB->qmvs[1] = pMB->qmvs[2] = pMB->qmvs[3] = zeroMV;
@@ -136,9 +136,9 @@
 }
 
 /* check if given vector is equal to any vector checked before */
-static __inline int
-vector_repeats(const VECTOR * const pmv, const unsigned int i)
-{
+static __inline int vector_repeats(const VECTOR * const pmv, const unsigned int i)
+ {
+
 	unsigned int j;
 	for (j = 0; j < i; j++)
 		if (MVequal(pmv[i], pmv[j])) return 1; /* same vector has been checked already */
@@ -147,9 +147,9 @@
 
 /*	make a binary mask that prevents diamonds/squares
 	from checking a vector which has been checked as a prediction */
-static __inline int
-make_mask(const VECTOR * const pmv, const unsigned int i, const unsigned int current)
-{
+static __inline int make_mask(const VECTOR * const pmv, const unsigned int i, const unsigned int current)
+ {
+
 	unsigned int mask = 255, j;
 	for (j = 0; j < i; j++) {
 		if (pmv[current].x == pmv[j].x) {
Only in src-ori/motion/ppc_asm: README
diff -bBru src-ori/motion/ppc_asm/sad_altivec.c src/motion/ppc_asm/sad_altivec.c
--- src-ori/motion/ppc_asm/sad_altivec.c	Wed Jan 12 10:32:43 2005
+++ src/motion/ppc_asm/sad_altivec.c	Wed Jan 12 12:45:14 2005
@@ -1,7 +1,12 @@
-/*
 
-    Copyright (C) 2002 Benjamin Herrenschmidt <benh@kernel.crashing.org>
 
+
+
+
+
+
+/*
+    Copyright (C) 2002 Benjamin Herrenschmidt <benh@kernel.crashing.org>
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
     the Free Software Foundation; either version 2 of the License, or
@@ -32,7 +32,8 @@
 register vector unsigned int zerovec asm("%v31");
 #endif
 
-#include <stdio.h>
+#include <e32std.h>
+#include "../../global_all_dll.h"
 
 #undef DEBUG
 
Only in src-ori/motion/ppc_asm: sad_altivec.s
diff -bBru src-ori/motion/sad.c src/motion/sad.c
--- src-ori/motion/sad.c	Wed Jan 12 10:32:43 2005
+++ src/motion/sad.c	Wed Jan 12 12:43:30 2005
@@ -23,21 +23,11 @@
  *
  ****************************************************************************/
 
-#include "../portab.h"
-#include "../global.h"
-#include "sad.h"
-
-#include <stdlib.h>
-
-sad16FuncPtr sad16;
-sad8FuncPtr sad8;
-sad16biFuncPtr sad16bi;
-sad8biFuncPtr sad8bi;		/* not really sad16, but no difference in prototype */
-dev16FuncPtr dev16;
-sad16vFuncPtr sad16v;
-sse8Func_16bitPtr sse8_16bit;
+#include <e32std.h>
+#include "../global_all_dll.h"
+
+
 
-sadInitFuncPtr sadInit;
 
 
 uint32_t
@@ -215,10 +205,10 @@
 			   const uint32_t stride,
 			   int32_t *sad)
 {
-	sad[0] = sad8(cur, ref, stride);
-	sad[1] = sad8(cur + 8, ref + 8, stride);
-	sad[2] = sad8(cur + 8*stride, ref + 8*stride, stride);
-	sad[3] = sad8(cur + 8*stride + 8, ref + 8*stride + 8, stride);
+	sad[0] = (((struct global_all_dll *)Dll::Tls())->sad8)(cur, ref, stride);
+	sad[1] = (((struct global_all_dll *)Dll::Tls())->sad8)(cur + 8, ref + 8, stride);
+	sad[2] = (((struct global_all_dll *)Dll::Tls())->sad8)(cur + 8*stride, ref + 8*stride, stride);
+	sad[3] = (((struct global_all_dll *)Dll::Tls())->sad8)(cur + 8*stride + 8, ref + 8*stride + 8, stride);
 
 	return sad[0]+sad[1]+sad[2]+sad[3];
 }
@@ -228,10 +218,10 @@
 			   const uint32_t stride,
 			   int32_t *sad)
 {
-	sad[0] = sad16(cur, ref, stride, 256*4096);
-	sad[1] = sad16(cur + 16, ref + 16, stride, 256*4096);
-	sad[2] = sad16(cur + 16*stride, ref + 16*stride, stride, 256*4096);
-	sad[3] = sad16(cur + 16*stride + 16, ref + 16*stride + 16, stride, 256*4096);
+	sad[0] = (((struct global_all_dll *)Dll::Tls())->sad16)(cur, ref, stride, 256*4096);
+	sad[1] = (((struct global_all_dll *)Dll::Tls())->sad16)(cur + 16, ref + 16, stride, 256*4096);
+	sad[2] = (((struct global_all_dll *)Dll::Tls())->sad16)(cur + 16*stride, ref + 16*stride, stride, 256*4096);
+	sad[3] = (((struct global_all_dll *)Dll::Tls())->sad16)(cur + 16*stride + 16, ref + 16*stride + 16, stride, 256*4096);
 
 	return sad[0]+sad[1]+sad[2]+sad[3];
 }
diff -bBru src-ori/motion/sad.h src/motion/sad.h
--- src-ori/motion/sad.h	Wed Jan 12 10:32:43 2005
+++ src/motion/sad.h	Wed Jan 12 12:38:14 2005
@@ -31,7 +31,7 @@
 typedef void (sadInitFunc) (void);
 typedef sadInitFunc *sadInitFuncPtr;
 
-extern sadInitFuncPtr sadInit;
+
 sadInitFunc sadInit_altivec;
 
 typedef uint32_t(sad16Func) (const uint8_t * const cur,
@@ -36,10 +36,13 @@
 
 typedef uint32_t(sad16Func) (const uint8_t * const cur,
 							 const uint8_t * const ref,
+
 							 const uint32_t stride,
+
 							 const uint32_t best_sad);
+
 typedef sad16Func *sad16FuncPtr;
-extern sad16FuncPtr sad16;
+
 sad16Func sad16_c;
 
 #ifdef ARCH_IS_IA32
@@ -61,9 +64,11 @@
 
 typedef uint32_t(sad8Func) (const uint8_t * const cur,
 							const uint8_t * const ref,
+
 							const uint32_t stride);
+
 typedef sad8Func *sad8FuncPtr;
-extern sad8FuncPtr sad8;
+
 sad8Func sad8_c;
 
 #ifdef ARCH_IS_IA32
@@ -82,10 +87,13 @@
 
 typedef uint32_t(sad16biFunc) (const uint8_t * const cur,
 							   const uint8_t * const ref1,
+
 							   const uint8_t * const ref2,
+
 							   const uint32_t stride);
+
 typedef sad16biFunc *sad16biFuncPtr;
-extern sad16biFuncPtr sad16bi;
+
 sad16biFunc sad16bi_c;
 
 #ifdef ARCH_IS_IA32
@@ -101,10 +109,13 @@
 
 typedef uint32_t(sad8biFunc) (const uint8_t * const cur,
 							   const uint8_t * const ref1,
+
 							   const uint8_t * const ref2,
+
 							   const uint32_t stride);
+
 typedef sad8biFunc *sad8biFuncPtr;
-extern sad8biFuncPtr sad8bi;
+
 sad8biFunc sad8bi_c;
 
 #ifdef ARCH_IS_IA32
@@ -117,8 +128,9 @@
 
 typedef uint32_t(dev16Func) (const uint8_t * const cur,
 							 const uint32_t stride);
+
 typedef dev16Func *dev16FuncPtr;
-extern dev16FuncPtr dev16;
+
 dev16Func dev16_c;
 
 typedef uint32_t (sad16vFunc)(	const uint8_t * const cur,
@@ -123,9 +135,11 @@
 
 typedef uint32_t (sad16vFunc)(	const uint8_t * const cur,
 								const uint8_t * const ref,
+
 								const uint32_t stride, int32_t *sad8);
+
 typedef sad16vFunc *sad16vFuncPtr;
-extern sad16vFuncPtr sad16v;
+
 
 sad16vFunc sad16v_c;
 sad16vFunc sad32v_c;
@@ -149,9 +163,11 @@
 
 typedef uint32_t (sse8Func_16bit)(const int16_t * cur,
 								  const int16_t * ref,
+
 								  const uint32_t stride);
+
 typedef sse8Func_16bit *sse8Func_16bitPtr;
-extern sse8Func_16bitPtr sse8_16bit;
+
 
 sse8Func_16bit sse8_16bit_c;
 #ifdef ARCH_IS_IA32
diff -bBru src-ori/motion/vop_type_decision.c src/motion/vop_type_decision.c
--- src-ori/motion/vop_type_decision.c	Wed Jan 12 10:32:43 2005
+++ src/motion/vop_type_decision.c	Wed Jan 12 12:44:34 2005
@@ -23,14 +23,8 @@
  *
  ****************************************************************************/
 
-#include "../encoder.h"
-#include "../prediction/mbprediction.h"
-#include "estimation.h"
-#include "motion.h"
-#include "sad.h"
-#include "gmc.h"
-#include "../utils/emms.h"
-#include "motion_inlines.h"
+#include <e32std.h>
+#include "../global_all_dll.h"
 
 
 #define INTRA_THRESH	2000
@@ -186,7 +180,7 @@
 	InterThresh -= P_SENS_BIAS * bCount;
 	if (InterThresh < INTER_THRESH_MIN) InterThresh = INTER_THRESH_MIN;
 
-	if (sadInit) (*sadInit) ();
+	if ((((struct global_all_dll *)Dll::Tls())->sadInit)) (*(((struct global_all_dll *)Dll::Tls())->sadInit)) ();
 
 	for (y = 1; y < pParam->mb_height-1; y += 2) {
 		for (x = 1; x < pParam->mb_width-1; x += 2) {
@@ -206,7 +200,7 @@
 			for (i = 0; i < 4; i++) {
 				int dev;
 				MACROBLOCK *pMB = &pMBs[x+(i&1) + (y+(i>>1)) * pParam->mb_width];
-				dev = dev16(pCurrent->y + (x + (i&1) + (y + (i>>1)) * pParam->edged_width) * 16,
+				dev = (((struct global_all_dll *)Dll::Tls())->dev16)(pCurrent->y + (x + (i&1) + (y + (i>>1)) * pParam->edged_width) * 16,
 								pParam->edged_width);
 
 				complexity += MAX(dev, 300);
@@ -229,6 +223,7 @@
 
 	if (sSAD > IntraThresh2) return I_VOP;
 	if (sSAD > InterThresh) return P_VOP;
-	emms();
+	(((struct global_all_dll *)Dll::Tls())->emms)();
 	return B_VOP;
 }
+
Only in src-ori/motion: x86_asm
diff -bBru src-ori/plugins/plugin_2pass1.c src/plugins/plugin_2pass1.c
--- src-ori/plugins/plugin_2pass1.c	Wed Jan 12 10:32:43 2005
+++ src/plugins/plugin_2pass1.c	Wed Jan 12 12:45:54 2005
@@ -26,12 +26,9 @@
  *
  *****************************************************************************/
 
-#include <stdio.h>
-#include <errno.h> /* errno var (or function with recent libc) */
-#include <string.h> /* strerror() */
+#include <e32std.h>
+#include "../global_all_dll.h"
 
-#include "../xvid.h"
-#include "../image/image.h"
 
 
 /* This preprocessor constant controls wheteher or not, first pass is done
@@ -42,8 +39,7 @@
 
 
 /* context struct */
-typedef struct
-{
+typedef struct {
 	FILE * stat_file;
 
     double fq_error;
diff -bBru src-ori/plugins/plugin_2pass2.c src/plugins/plugin_2pass2.c
--- src-ori/plugins/plugin_2pass2.c	Wed Jan 12 10:32:43 2005
+++ src/plugins/plugin_2pass2.c	Wed Jan 12 12:46:25 2005
@@ -39,12 +39,9 @@
 	to fight most common problems without user's knowladge */
 #define SMART_OVERFLOW_SETTING
 
-#include <stdio.h>
-#include <math.h>
-#include <limits.h>
+#include <e32std.h>
+#include "../global_all_dll.h"
 
-#include "../xvid.h"
-#include "../image/image.h"
 
 /*****************************************************************************
  * Some default settings
@@ -76,8 +73,7 @@
  ****************************************************************************/
 
 /* Statistics */
-typedef struct {
-	int type;               /* first pass type */
+typedef struct { 	int type;               /* first pass type */
 	int quant;              /* first pass quant */
 	int blks[3];            /* k,m,y blks */
 	int length;             /* first pass length */
@@ -90,9 +86,9 @@
 	double weight;
 } twopass_stat_t;
 
+
 /* Context struct */
-typedef struct
-{
+typedef struct {
 	xvid_plugin_2pass2_t param;
 
 	/*----------------------------------
@@ -277,6 +274,7 @@
 /*----------------------------------------------------------------------------
  *--------------------------------------------------------------------------*/
 
+#define _INIT(a, b) if((a) <= 0) (a) = (b)
 static int
 rc_2pass2_create(xvid_plg_create_t * create, rc_2pass2_t **handle)
 {
@@ -291,7 +289,6 @@
 	rc->param = *param;
 
 	/* Initialize all defaults */
-#define _INIT(a, b) if((a) <= 0) (a) = (b)
 	/* Let's set our defaults if needed */
 	_INIT(rc->param.keyframe_boost, DEFAULT_KEYFRAME_BOOST);
 	_INIT(rc->param.overflow_control_strength, DEFAULT_OVERFLOW_CONTROL_STRENGTH);
@@ -452,6 +450,10 @@
 /*----------------------------------------------------------------------------
  *--------------------------------------------------------------------------*/
 
+#if 0
+		/* Leave this one alone, as it impacts badly on quality */
+		overflow *= framesize_factor;
+#endif
 static int
 rc_2pass2_before(rc_2pass2_t * rc, xvid_plg_data_t * data)
 {
@@ -581,10 +583,6 @@
 
 		/* Treat only the overflow part concerned by this frame type and size */
 		overflow *= frametype_factor;
-#if 0
-		/* Leave this one alone, as it impacts badly on quality */
-		overflow *= framesize_factor;
-#endif
 
 		/* Apply the overflow strength imposed by the user */
 		overflow *= (rc->param.overflow_control_strength/100.0f);
diff -bBru src-ori/plugins/plugin_dump.c src/plugins/plugin_dump.c
--- src-ori/plugins/plugin_dump.c	Wed Jan 12 10:32:43 2005
+++ src/plugins/plugin_dump.c	Wed Jan 12 12:45:50 2005
@@ -23,10 +23,9 @@
  *
  ****************************************************************************/
 
-#include <stdio.h>
+#include <e32std.h>
+#include "../global_all_dll.h"
 
-#include "../xvid.h"
-#include "../image/image.h"
 
 
 int xvid_plugin_dump(void * handle, int opt, void * param1, void * param2)
diff -bBru src-ori/plugins/plugin_fixed.c src/plugins/plugin_fixed.c
--- src-ori/plugins/plugin_fixed.c	Wed Jan 12 10:32:43 2005
+++ src/plugins/plugin_fixed.c	Wed Jan 12 12:46:33 2005
@@ -24,12 +24,11 @@
  ****************************************************************************/
 
 
-#include "../xvid.h"
-#include "../image/image.h"
+#include <e32std.h>
+#include "../global_all_dll.h"
 
 
-typedef struct
-{
+typedef struct {
 	int32_t quant_increment;
 	int32_t quant_base;
 
diff -bBru src-ori/plugins/plugin_lumimasking.c src/plugins/plugin_lumimasking.c
--- src-ori/plugins/plugin_lumimasking.c	Wed Jan 12 10:32:43 2005
+++ src/plugins/plugin_lumimasking.c	Wed Jan 12 12:45:50 2005
@@ -24,18 +24,15 @@
  *
  ****************************************************************************/
 
-#include <stdlib.h>
+#include <e32std.h>
+#include "../global_all_dll.h"
 
-#include "../xvid.h"
-#include "../portab.h"
-#include "../utils/emms.h"
 
 /*****************************************************************************
  * Private data type
  ****************************************************************************/
 
-typedef struct
-{
+typedef struct {
 	float *quant;
 	float *val;
 } lumi_data_t;
diff -bBru src-ori/plugins/plugin_psnr.c src/plugins/plugin_psnr.c
--- src-ori/plugins/plugin_psnr.c	Wed Jan 12 10:32:43 2005
+++ src/plugins/plugin_psnr.c	Wed Jan 12 12:45:51 2005
@@ -23,10 +23,9 @@
  *
  ****************************************************************************/
 
-#include <stdio.h>
+#include <e32std.h>
+#include "../global_all_dll.h"
 
-#include "../xvid.h"
-#include "../image/image.h"
 
 
 int xvid_plugin_psnr(void * handle, int opt, void * param1, void * param2)
diff -bBru src-ori/plugins/plugin_single.c src/plugins/plugin_single.c
--- src-ori/plugins/plugin_single.c	Wed Jan 12 10:32:43 2005
+++ src/plugins/plugin_single.c	Wed Jan 12 12:45:46 2005
@@ -25,10 +25,9 @@
  ****************************************************************************/
 
 
-#include <limits.h>
+#include <e32std.h>
+#include "../global_all_dll.h"
 
-#include "../xvid.h"
-#include "../image/image.h"
 
 #define DEFAULT_INITIAL_QUANTIZER 4
 
@@ -37,8 +36,7 @@
 #define DEFAULT_AVERAGING_PERIOD 100
 #define DEFAULT_BUFFER 100
 
-typedef struct
-{
+typedef struct {
 	int reaction_delay_factor;
 	int averaging_period;
 	int buffer;
diff -bBru src-ori/portab.h src/portab.h
--- src-ori/portab.h	Wed Jan 12 10:32:43 2005
+++ src/portab.h	Wed Jan 12 12:38:11 2005
@@ -34,7 +34,7 @@
 
 /* Buffer size for msvc implementation because it outputs to DebugOutput */
 #if defined(_DEBUG)
-extern unsigned int xvid_debug;
+
 #define DPRINTF_BUF_SZ  1024
 #endif
 
@@ -131,8 +131,8 @@
  * DPRINTF function when not compiling in _DEBUG mode
  */
 #   ifdef _DEBUG
-static __inline void DPRINTF(int level, char *fmt, ...)
-{
+static __inline void DPRINTF(int level, char *fmt, ...) {
+
 	if (xvid_debug & level) {
 		va_list args;
 		char buf[DPRINTF_BUF_SZ];
@@ -163,8 +163,8 @@
 #    if defined(ARCH_IS_IA32)
 #        define BSWAP(a) __asm mov eax,a __asm bswap eax __asm mov a, eax
 
-static __inline int64_t read_counter(void)
-{
+static __inline int64_t read_counter(void) {
+
 	int64_t ts;
 	uint32_t ts1, ts2;
 	__asm {
@@ -185,8 +185,8 @@
 	 (((a) >> 8) & 0xff00) | (((a) >> 24) & 0xff))
 
 #        include <time.h>
-static __inline int64_t read_counter(void)
-{
+static __inline int64_t read_counter(void) {
+
 	return (int64_t)clock();
 }
 
@@ -221,8 +221,8 @@
 #       include <stdio.h>
 #       include <stdarg.h>
 
-static __inline void DPRINTF(int level, char *format, ...)
-{
+static __inline void DPRINTF(int level, char *format, ...) {
+
 	va_list args;
 	va_start(args, format);
 	if(xvid_debug & level) {
@@ -246,8 +246,8 @@
 #    if defined(ARCH_IS_IA32)
 #        define BSWAP(a) __asm__ ( "bswapl %0\n" : "=r" (a) : "0" (a) );
 
-static __inline int64_t read_counter(void)
-{
+static __inline int64_t read_counter(void) {
+
 	int64_t ts;
 	uint32_t ts1, ts2;
 	__asm__ __volatile__("rdtsc\n\t":"=a"(ts1), "=d"(ts2));
@@ -262,22 +262,22 @@
 #        define BSWAP(a) __asm__ __volatile__ \
 	( "lwbrx %0,0,%1; eieio" : "=r" (a) : "r" (&(a)), "m" (a));
 
-static __inline unsigned long get_tbl(void)
-{
+static __inline unsigned long get_tbl(void) {
+
 	unsigned long tbl;
 	asm volatile ("mftb %0":"=r" (tbl));
 	return tbl;
 }
 
-static __inline unsigned long get_tbu(void)
-{
+static __inline unsigned long get_tbu(void) {
+
 	unsigned long tbl;
 	asm volatile ("mftbu %0":"=r" (tbl));
 	return tbl;
 }
 
-static __inline int64_t read_counter(void)
-{
+static __inline int64_t read_counter(void) {
+
 	unsigned long tb, tu;
 	do {
 		tu = get_tbu();
@@ -294,8 +294,8 @@
 	("mux1 %1 = %0, @rev" ";;" \
 	 "shr.u %1 = %1, 32" : "=r" (a) : "r" (a));
 
-static __inline int64_t read_counter(void)
-{
+static __inline int64_t read_counter(void) {
+
 	unsigned long result;
 	__asm__ __volatile__("mov %0=ar.itc" : "=r"(result) :: "memory");
 	return result;
@@ -310,8 +310,8 @@
 	 (((a) >> 8) & 0xff00) | (((a) >> 24) & 0xff))
 
 #        include <time.h>
-static __inline int64_t read_counter(void)
-{
+static __inline int64_t read_counter(void) {
+
 	return (int64_t)clock();
 }
 
@@ -336,8 +336,8 @@
 #    include <stdarg.h>
 
 #    ifdef _DEBUG
-static __inline void DPRINTF(int level, char *fmt, ...)
-{
+static __inline void DPRINTF(int level, char *fmt, ...) {
+
 	if (xvid_debug & level) {
 		va_list args;
 		char buf[DPRINTF_BUF_SZ];
@@ -362,8 +362,8 @@
 
 #        define BSWAP(a)  __asm mov eax,a __asm bswap eax __asm mov a, eax
 
-static __inline int64_t read_counter(void)
-{
+static __inline int64_t read_counter(void) {
+
 	uint64_t ts;
 	uint32_t ts1, ts2;
 	__asm {
@@ -415,8 +415,8 @@
 #       include <stdio.h>
 #       include <stdarg.h>
 
-static __inline void DPRINTF(int level, char *format, ...)
-{
+static __inline void DPRINTF(int level, char *format, ...) {
+
 	va_list args;
 	va_start(args, format);
 	if(xvid_debug & level) {
@@ -434,8 +434,8 @@
 	 (((a) >> 8) & 0xff00) | (((a) >> 24) & 0xff))
 
 #    include <time.h>
-static __inline int64_t read_counter(void)
-{
+static __inline int64_t read_counter(void) {
+
 	return (int64_t)clock();
 }
 
diff -bBru src-ori/prediction/mbprediction.c src/prediction/mbprediction.c
--- src-ori/prediction/mbprediction.c	Wed Jan 12 10:58:23 2005
+++ src/prediction/mbprediction.c	Wed Jan 12 12:45:43 2005
@@ -24,15 +24,9 @@
  *
  ****************************************************************************/
 
-#include <stdlib.h>
+#include <e32std.h>
+#include "../global_all_dll.h"
 
-#include "../global.h"
-#include "../encoder.h"
-#include "mbprediction.h"
-#include "../utils/mbfunctions.h"
-#include "../bitstream/cbp.h"
-#include "../bitstream/mbcoding.h"
-#include "../bitstream/zigzag.h"
 
 
 static int __inline
@@ -262,10 +256,10 @@
 /*****************************************************************************
  ****************************************************************************/
 
-/* encoder: subtract predictors from qcoeff[] and calculate S1/S2
 
-returns sum of coeefficients *saved* if prediction is enabled
 
+/* encoder: subtract predictors from qcoeff[] and calculate S1/S2
+returns sum of coeefficients *saved* if prediction is enabled
 S1 = sum of all (qcoeff - prediction)
 S2 = sum of all qcoeff
 */
@@ -443,7 +437,7 @@
 				apply_acdc(pMB, j, &qcoeff[j * 64], predictors[j]);
 		}
 
-		pMB->cbp = calc_cbp(qcoeff);
+		pMB->cbp = (((struct global_all_dll *)Dll::Tls())->calc_cbp)(qcoeff);
 	}
 }
 
Only in src-ori/quant: ia64_asm
diff -bBru src-ori/quant/quant.h src/quant/quant.h
--- src-ori/quant/quant.h	Wed Jan 12 10:32:43 2005
+++ src/quant/quant.h	Wed Jan 12 12:37:58 2005
@@ -41,10 +45,10 @@
 typedef quant_intraFunc *quant_intraFuncPtr;
 
 /* Global function pointers */
-extern quant_intraFuncPtr quant_h263_intra;
-extern quant_intraFuncPtr quant_mpeg_intra;
-extern quant_intraFuncPtr dequant_h263_intra;
-extern quant_intraFuncPtr dequant_mpeg_intra;
+
+
+
+
 
 /*****************************************************************************
  * Known implementation of Intra (de)Quant functions
@@ -97,10 +104,10 @@
 typedef quant_interFunc *quant_interFuncPtr;
 
 /* Global function pointers */
-extern quant_interFuncPtr quant_h263_inter;
-extern quant_interFuncPtr quant_mpeg_inter;
-extern quant_interFuncPtr dequant_h263_inter;
-extern quant_interFuncPtr dequant_mpeg_inter;
+
+
+
+
 
 /*****************************************************************************
  * Known implementation of Inter (de)Quant functions
diff -bBru src-ori/quant/quant_h263.c src/quant/quant_h263.c
--- src-ori/quant/quant_h263.c	Wed Jan 12 10:32:43 2005
+++ src/quant/quant_h263.c	Wed Jan 12 12:41:19 2005
@@ -23,20 +23,14 @@
  *
  ****************************************************************************/
 
-#include "../global.h"
-#include "quant.h"
+#include <e32std.h>
+#include "../global_all_dll.h"
 
 /*****************************************************************************
  * Global function pointers
  ****************************************************************************/
 
-/* Quant */
-quant_intraFuncPtr quant_h263_intra;
-quant_interFuncPtr quant_h263_inter;
-
-/* DeQuant */
-quant_intraFuncPtr dequant_h263_intra;
-quant_interFuncPtr dequant_h263_inter;
+
 
 /*****************************************************************************
  * Local data
diff -bBru src-ori/quant/quant_matrix.c src/quant/quant_matrix.c
--- src-ori/quant/quant_matrix.c	Wed Jan 12 10:32:43 2005
+++ src/quant/quant_matrix.c	Wed Jan 12 12:41:17 2005
@@ -24,7 +24,8 @@
  *
  ****************************************************************************/
 
-#include "quant_matrix.h"
+#include <e32std.h>
+#include "../global_all_dll.h"
 
 #define FIX(X)   (((X)==1) ? 0xFFFF : ((1UL << 16) / (X) + 1))
 #define FIXL(X)    ((1UL << 16) / (X) - 1)
diff -bBru src-ori/quant/quant_mpeg.c src/quant/quant_mpeg.c
--- src-ori/quant/quant_mpeg.c	Wed Jan 12 10:32:43 2005
+++ src/quant/quant_mpeg.c	Wed Jan 12 12:41:23 2005
@@ -23,21 +23,14 @@
  *
  ****************************************************************************/
 
-#include "../global.h"
-#include "quant.h"
-#include "quant_matrix.h"
+#include <e32std.h>
+#include "../global_all_dll.h"
 
 /*****************************************************************************
  * Global function pointers
  ****************************************************************************/
 
-/* Quant */
-quant_intraFuncPtr quant_mpeg_intra;
-quant_interFuncPtr quant_mpeg_inter;
-
-/* DeQuant */
-quant_intraFuncPtr dequant_mpeg_intra;
-quant_interFuncPtr dequant_mpeg_inter;
+
 
 /*****************************************************************************
  * Local data
Only in src-ori/quant: x86_asm
diff -bBru src-ori/utils/emms.c src/utils/emms.c
--- src-ori/utils/emms.c	Wed Jan 12 10:32:43 2005
+++ src/utils/emms.c	Wed Jan 12 12:41:27 2005
@@ -23,14 +23,13 @@
  *
  ****************************************************************************/
 
-#include "emms.h"
-#include "../portab.h"
+#include <e32std.h>
+#include "../global_all_dll.h"
 
 /*****************************************************************************
  * Library data, declared here
  ****************************************************************************/
 
-emmsFuncPtr emms;
 
 
 /*****************************************************************************
diff -bBru src-ori/utils/emms.h src/utils/emms.h
--- src-ori/utils/emms.h	Wed Jan 12 10:32:43 2005
+++ src/utils/emms.h	Wed Jan 12 12:37:59 2005
@@ -27,7 +27,7 @@
 #define _EMMS_H_
 
 /*****************************************************************************
- * emms API
+ * (((struct global_all_dll *)Dll::Tls())->emms) API
  ****************************************************************************/
 
 typedef void (emmsFunc) ();
@@ -35,7 +35,7 @@
 typedef emmsFunc *emmsFuncPtr;
 
 /* Our global function pointer - defined in emms.c */
-extern emmsFuncPtr emms;
+
 
 /* Implemented functions */
 
Only in src-ori/utils: ia64_asm
diff -bBru src-ori/utils/mbfunctions.h src/utils/mbfunctions.h
--- src-ori/utils/mbfunctions.h	Wed Jan 12 10:32:43 2005
+++ src/utils/mbfunctions.h	Wed Jan 12 12:38:00 2005
@@ -61,8 +61,9 @@
 typedef uint32_t (MBFIELDTEST) (int16_t data[6 * 64]);	/* function pointer for field test */
 typedef MBFIELDTEST *MBFIELDTEST_PTR;
 
+
 /* global field test pointer for xvid.c */
-extern MBFIELDTEST_PTR MBFieldTest;
+
 
 /* field test implementations */
 MBFIELDTEST MBFieldTest_c;
diff -bBru src-ori/utils/mbtransquant.c src/utils/mbtransquant.c
--- src-ori/utils/mbtransquant.c	Wed Jan 12 10:32:43 2005
+++ src/utils/mbtransquant.c	Wed Jan 12 12:41:55 2005
@@ -25,27 +25,12 @@
  *
  ****************************************************************************/
 
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "../portab.h"
-#include "mbfunctions.h"
-
-#include "../global.h"
-#include "mem_transfer.h"
-#include "timer.h"
-#include "../bitstream/mbcoding.h"
-#include "../bitstream/zigzag.h"
-#include "../dct/fdct.h"
-#include "../dct/idct.h"
-#include "../quant/quant.h"
-#include "../encoder.h"
+#include <e32std.h>
+#include "../global_all_dll.h"
+
+
 
-#include "../image/reduced.h"
-#include  "../quant/quant_matrix.h"
 
-MBFIELDTEST_PTR MBFieldTest;
 
 /*
  * Skip blocks having a coefficient sum below this value. This value will be
@@ -62,7 +47,7 @@
 static __inline uint32_t
 MBDecideFieldDCT(int16_t data[6 * 64])
 {
-	uint32_t field = MBFieldTest(data);
+	uint32_t field = (((struct global_all_dll *)Dll::Tls())->MBFieldTest)(data);
 
 	if (field)
 		MBFrameToField(data);
@@ -91,12 +76,12 @@
 
 	/* Perform DCT */
 	start_timer();
-	fdct(&data[0 * 64]);
-	fdct(&data[1 * 64]);
-	fdct(&data[2 * 64]);
-	fdct(&data[3 * 64]);
-	fdct(&data[4 * 64]);
-	fdct(&data[5 * 64]);
+	(((struct global_all_dll *)Dll::Tls())->fdct)(&data[0 * 64]);
+	(((struct global_all_dll *)Dll::Tls())->fdct)(&data[1 * 64]);
+	(((struct global_all_dll *)Dll::Tls())->fdct)(&data[2 * 64]);
+	(((struct global_all_dll *)Dll::Tls())->fdct)(&data[3 * 64]);
+	(((struct global_all_dll *)Dll::Tls())->fdct)(&data[4 * 64]);
+	(((struct global_all_dll *)Dll::Tls())->fdct)(&data[5 * 64]);
 	stop_dct_timer();
 }
 
@@ -106,12 +91,12 @@
 	   const uint8_t cbp)
 {
 	start_timer();
-	if(cbp & (1 << (5 - 0))) idct(&data[0 * 64]);
-	if(cbp & (1 << (5 - 1))) idct(&data[1 * 64]);
-	if(cbp & (1 << (5 - 2))) idct(&data[2 * 64]);
-	if(cbp & (1 << (5 - 3))) idct(&data[3 * 64]);
-	if(cbp & (1 << (5 - 4))) idct(&data[4 * 64]);
-	if(cbp & (1 << (5 - 5))) idct(&data[5 * 64]);
+	if(cbp & (1 << (5 - 0))) (((struct global_all_dll *)Dll::Tls())->idct)(&data[0 * 64]);
+	if(cbp & (1 << (5 - 1))) (((struct global_all_dll *)Dll::Tls())->idct)(&data[1 * 64]);
+	if(cbp & (1 << (5 - 2))) (((struct global_all_dll *)Dll::Tls())->idct)(&data[2 * 64]);
+	if(cbp & (1 << (5 - 3))) (((struct global_all_dll *)Dll::Tls())->idct)(&data[3 * 64]);
+	if(cbp & (1 << (5 - 4))) (((struct global_all_dll *)Dll::Tls())->idct)(&data[4 * 64]);
+	if(cbp & (1 << (5 - 5))) (((struct global_all_dll *)Dll::Tls())->idct)(&data[5 * 64]);
 	stop_idct_timer();
 }
 
@@ -128,8 +113,8 @@
 
 	quant_intraFuncPtr const quant[2] =
 		{
-			quant_h263_intra,
-			quant_mpeg_intra
+			(((struct global_all_dll *)Dll::Tls())->quant_h263_intra),
+			(((struct global_all_dll *)Dll::Tls())->quant_mpeg_intra)
 		};
 
 	mpeg = !!(pParam->vol_flags & XVID_VOL_MPEGQUANT);
@@ -159,8 +144,8 @@
 
 	quant_intraFuncPtr const dequant[2] =
 		{
-			dequant_h263_intra,
-			dequant_mpeg_intra
+			(((struct global_all_dll *)Dll::Tls())->dequant_h263_intra),
+			(((struct global_all_dll *)Dll::Tls())->dequant_mpeg_intra)
 		};
 
 	mpeg = !!(pParam->vol_flags & XVID_VOL_MPEGQUANT);
@@ -204,8 +189,8 @@
 
 	quant_interFuncPtr const quant[2] =
 		{
-			quant_h263_inter,
-			quant_mpeg_inter
+			(((struct global_all_dll *)Dll::Tls())->quant_h263_inter),
+			(((struct global_all_dll *)Dll::Tls())->quant_mpeg_inter)
 		};
 
 	mpeg = !!(pParam->vol_flags & XVID_VOL_MPEGQUANT);
@@ -279,8 +264,8 @@
 
 	quant_interFuncPtr const dequant[2] =
 		{
-			dequant_h263_inter,
-			dequant_mpeg_inter
+			(((struct global_all_dll *)Dll::Tls())->dequant_h263_inter),
+			(((struct global_all_dll *)Dll::Tls())->dequant_mpeg_inter)
 		};
 
 	mpeg = !!(pParam->vol_flags & XVID_VOL_MPEGQUANT);
@@ -316,8 +301,8 @@
 	const IMAGE * const pCurrent = &frame->image;
 	transfer_operation_8to16_t * const functions[2] =
 		{
-			(transfer_operation_8to16_t *)transfer_8to16copy,
-			(transfer_operation_8to16_t *)filter_18x18_to_8x8
+			(transfer_operation_8to16_t *)(((struct global_all_dll *)Dll::Tls())->transfer_8to16copy),
+			(transfer_operation_8to16_t *)(((struct global_all_dll *)Dll::Tls())->filter_18x18_to_8x8)
 		};
 	transfer_operation_8to16_t *transfer_op = NULL;
 
@@ -366,10 +351,10 @@
 	/* Array of function pointers, indexed by [vop_reduced<<1+add] */
 	transfer_operation_16to8_t  * const functions[4] =
 		{
-			(transfer_operation_16to8_t*)transfer_16to8copy,
-			(transfer_operation_16to8_t*)transfer_16to8add,
-			(transfer_operation_16to8_t*)copy_upsampled_8x8_16to8,
-			(transfer_operation_16to8_t*)add_upsampled_8x8_16to8
+			(transfer_operation_16to8_t*)(((struct global_all_dll *)Dll::Tls())->transfer_16to8copy),
+			(transfer_operation_16to8_t*)(((struct global_all_dll *)Dll::Tls())->transfer_16to8add),
+			(transfer_operation_16to8_t*)(((struct global_all_dll *)Dll::Tls())->copy_upsampled_8x8_16to8),
+			(transfer_operation_16to8_t*)(((struct global_all_dll *)Dll::Tls())->add_upsampled_8x8_16to8)
 		};
 
 	transfer_operation_16to8_t *transfer_op = NULL;
@@ -743,7 +728,7 @@
 	12,13,13,13,13,13,13,13,13,14,16,16,16,16,17,17,17,17,18,18,18,18,18,18,18,18,19,19,19,19,19,19};
 
 
-static const uint8_t * const B16_17_Code_Len[24] = { /* levels [1..24] */
+static const uint8_t * const B16_17_Code_Len[24] = {
 	Code_Len20,Code_Len19,Code_Len18,Code_Len17,
 	Code_Len16,Code_Len15,Code_Len14,Code_Len13,
 	Code_Len12,Code_Len11,Code_Len10,Code_Len9,
@@ -752,7 +737,7 @@
 	Code_Len2, Code_Len1, Code_Len1, Code_Len1,
 };
 
-static const uint8_t * const B16_17_Code_Len_Last[6] = { /* levels [1..6] */
+static const uint8_t * const B16_17_Code_Len_Last[6] = {
 	Code_Len24,Code_Len23,Code_Len22,Code_Len21, Code_Len3, Code_Len1,
 };
 
@@ -784,6 +769,7 @@
 }
 
 /* this routine has been strippen of all debug code */
+	typedef struct { int16_t Run, Level; } NODE;
 static int
 dct_quantize_trellis_c(int16_t *const Out,
 					   const int16_t *const In,
@@ -799,7 +785,6 @@
 	 * *very* slightly (~0.01dB), whereas speed drops to crawling level :)
 	 * Well, actually, taking 1 more coeff past Non_Zero into account sometimes
 	 * helps. */
-	typedef struct { int16_t Run, Level; } NODE;
 
 	NODE Nodes[65], Last;
 	uint32_t Run_Costs0[64+1];
@@ -1080,6 +1065,7 @@
 }
 
 
+	typedef struct { int16_t Run, Level; } NODE;
 static int
 dct_quantize_trellis_h263_c(int16_t *const Out, const int16_t *const In, int Q, const uint16_t * const Zigzag, int Non_Zero)
 {
@@ -1090,7 +1076,6 @@
 	 * slightly (~0.01dB), whereas speed drops to crawling level :)
 	 * Well, actually, taking 1 more coeff past Non_Zero into account sometimes helps.
 	 */
-	typedef struct { int16_t Run, Level; } NODE;
 
 	NODE Nodes[65], Last;
 	uint32_t Run_Costs0[64+1];
diff -bBru src-ori/utils/mem_align.c src/utils/mem_align.c
--- src-ori/utils/mem_align.c	Wed Jan 12 10:32:43 2005
+++ src/utils/mem_align.c	Wed Jan 12 12:41:28 2005
@@ -23,9 +23,8 @@
  *
  ****************************************************************************/
 
-#include <stdlib.h>
-#include <stdio.h>
-#include "mem_align.h"
+#include <e32std.h>
+#include "../global_all_dll.h"
 
 /*****************************************************************************
  * xvid_malloc
diff -bBru src-ori/utils/mem_transfer.c src/utils/mem_transfer.c
--- src-ori/utils/mem_transfer.c	Wed Jan 12 10:32:43 2005
+++ src/utils/mem_transfer.c	Wed Jan 12 12:41:32 2005
@@ -23,20 +23,34 @@
  *
  ****************************************************************************/
 
-#include "../global.h"
-#include "mem_transfer.h"
+#include <e32std.h>
+#include "../global_all_dll.h"
+ void transfer16x16_copy(uint8_t * const dst,
+ 					const uint8_t * const src,
+ 					const uint32_t stride)
+ {
+
+	(((struct global_all_dll *)Dll::Tls())->transfer8x8_copy)(dst, src, stride);
+	(((struct global_all_dll *)Dll::Tls())->transfer8x8_copy)(dst + 8, src + 8, stride);
+	(((struct global_all_dll *)Dll::Tls())->transfer8x8_copy)(dst + 8*stride, src + 8*stride, stride);
+	(((struct global_all_dll *)Dll::Tls())->transfer8x8_copy)(dst + 8*stride + 8, src + 8*stride + 8, stride);
+}
+ void transfer32x32_copy(uint8_t * const dst,
+ 					const uint8_t * const src,
+ 					const uint32_t stride)
+ {
+
+	transfer16x16_copy(dst, src, stride);
+	transfer16x16_copy(dst + 16, src + 16, stride);
+	transfer16x16_copy(dst + 16*stride, src + 16*stride, stride);
+	transfer16x16_copy(dst + 16*stride + 16, src + 16*stride + 16, stride);
+}
+
 
 /* Function pointers - Initialized in the xvid.c module */
 
-TRANSFER_8TO16COPY_PTR transfer_8to16copy;
-TRANSFER_16TO8COPY_PTR transfer_16to8copy;
 
-TRANSFER_8TO16SUB_PTR  transfer_8to16sub;
-TRANSFER_8TO16SUBRO_PTR  transfer_8to16subro;
-TRANSFER_8TO16SUB2_PTR transfer_8to16sub2;
-TRANSFER_16TO8ADD_PTR  transfer_16to8add;
 
-TRANSFER8X8_COPY_PTR transfer8x8_copy;
 
 
 /*****************************************************************************
diff -bBru src-ori/utils/mem_transfer.h src/utils/mem_transfer.h
--- src-ori/utils/mem_transfer.h	Wed Jan 12 10:32:43 2005
+++ src/utils/mem_transfer.h	Wed Jan 12 12:38:03 2005
@@ -37,7 +39,7 @@
 typedef TRANSFER_8TO16COPY *TRANSFER_8TO16COPY_PTR;
 
 /* Our global function pointer - Initialized in xvid.c */
-extern TRANSFER_8TO16COPY_PTR transfer_8to16copy;
+
 
 /* Implemented functions */
 extern TRANSFER_8TO16COPY transfer_8to16copy_c;
@@ -61,7 +65,7 @@
 typedef TRANSFER_16TO8COPY *TRANSFER_16TO8COPY_PTR;
 
 /* Our global function pointer - Initialized in xvid.c */
-extern TRANSFER_16TO8COPY_PTR transfer_16to8copy;
+
 
 /* Implemented functions */
 extern TRANSFER_16TO8COPY transfer_16to8copy_c;
@@ -87,7 +94,7 @@
 typedef TRANSFER_8TO16SUB *TRANSFER_8TO16SUB_PTR;
 
 /* Our global function pointer - Initialized in xvid.c */
-extern TRANSFER_8TO16SUB_PTR transfer_8to16sub;
+
 
 /* Implemented functions */
 extern TRANSFER_8TO16SUB transfer_8to16sub_c;
@@ -113,7 +123,7 @@
 typedef TRANSFER_8TO16SUBRO *TRANSFER_8TO16SUBRO_PTR;
 
 /* Our global function pointer - Initialized in xvid.c */
-extern TRANSFER_8TO16SUBRO_PTR transfer_8to16subro;
+
 
 /* Implemented functions */
 extern TRANSFER_8TO16SUBRO transfer_8to16subro_c;
@@ -136,7 +150,7 @@
 typedef TRANSFER_8TO16SUB2 *TRANSFER_8TO16SUB2_PTR;
 
 /* Our global function pointer - Initialized in xvid.c */
-extern TRANSFER_8TO16SUB2_PTR transfer_8to16sub2;
+
 
 /* Implemented functions */
 extern TRANSFER_8TO16SUB2 transfer_8to16sub2_c;
@@ -162,7 +178,7 @@
 typedef TRANSFER_16TO8ADD *TRANSFER_16TO8ADD_PTR;
 
 /* Our global function pointer - Initialized in xvid.c */
-extern TRANSFER_16TO8ADD_PTR transfer_16to8add;
+
 
 /* Implemented functions */
 extern TRANSFER_16TO8ADD transfer_16to8add_c;
@@ -187,7 +205,7 @@
 typedef TRANSFER8X8_COPY *TRANSFER8X8_COPY_PTR;
 
 /* Our global function pointer - Initialized in xvid.c */
-extern TRANSFER8X8_COPY_PTR transfer8x8_copy;
+
 
 /* Implemented functions */
 extern TRANSFER8X8_COPY transfer8x8_copy_c;
@@ -201,27 +219,15 @@
 extern TRANSFER8X8_COPY transfer8x8_copy_ia64;
 #endif
 
-static __inline void
-transfer16x16_copy(uint8_t * const dst,
+extern  void transfer16x16_copy(uint8_t * const dst,
 					const uint8_t * const src,
 					const uint32_t stride)
-{
-	transfer8x8_copy(dst, src, stride);
-	transfer8x8_copy(dst + 8, src + 8, stride);
-	transfer8x8_copy(dst + 8*stride, src + 8*stride, stride);
-	transfer8x8_copy(dst + 8*stride + 8, src + 8*stride + 8, stride);
-}
+;
 
-static __inline void
-transfer32x32_copy(uint8_t * const dst,
+extern  void transfer32x32_copy(uint8_t * const dst,
 					const uint8_t * const src,
 					const uint32_t stride)
-{
-	transfer16x16_copy(dst, src, stride);
-	transfer16x16_copy(dst + 16, src + 16, stride);
-	transfer16x16_copy(dst + 16*stride, src + 16*stride, stride);
-	transfer16x16_copy(dst + 16*stride + 16, src + 16*stride + 16, stride);
-}
+;
 
 
 #endif
diff -bBru src-ori/utils/timer.c src/utils/timer.c
--- src-ori/utils/timer.c	Wed Jan 12 10:32:43 2005
+++ src/utils/timer.c	Wed Jan 12 12:41:27 2005
@@ -23,9 +23,81 @@
  *
  ****************************************************************************/
 
-#include <stdio.h>
-#include <time.h>
-#include "timer.h"
+#include <e32std.h>
+#include "../global_all_dll.h"
+ void start_timer()
+ {
+
+}
+ void start_global_timer()
+ {
+
+}
+ void stop_dct_timer()
+ {
+
+}
+ void stop_idct_timer()
+ {
+
+}
+ void stop_motion_timer()
+ {
+
+}
+ void stop_comp_timer()
+ {
+
+}
+ void stop_edges_timer()
+ {
+
+}
+ void stop_inter_timer()
+ {
+
+}
+ void stop_quant_timer()
+ {
+
+}
+ void stop_iquant_timer()
+ {
+
+}
+ void stop_conv_timer()
+ {
+
+}
+ void stop_transfer_timer()
+ {
+
+}
+ void init_timer()
+ {
+
+}
+ void write_timer()
+ {
+
+}
+ void stop_coding_timer()
+ {
+
+}
+ void stop_interlacing_timer()
+ {
+
+}
+ void stop_prediction_timer()
+ {
+
+}
+ void stop_global_timer()
+ {
+
+}
+
 
 #if defined(_PROFILING_)
 
@@ -51,7 +123,6 @@
 
 struct ts tim;
 
-double frequency = 0.0;
 
 /*
     determine cpu frequency
@@ -81,7 +152,7 @@
 void
 init_timer()
 {
-	frequency = get_freq();
+	(((struct global_all_dll *)Dll::Tls())->frequency) = get_freq();
 
 	count_frames = 0;
 
@@ -270,21 +341,21 @@
 				"Coding:\nTotal time: %f ms (%3f percent of total encoding time)\n\n\n"
 				"Interlacing:\nTotal time: %f ms (%3f percent of total encoding time)\n\n\n"
 				"Overall encoding time: %f ms, we measured %f ms (%3f percent)\n",
-				(float) (tim.dct / frequency), dct_per,
-				(float) (tim.quant / frequency), quant_per,
-				(float) (tim.idct / frequency), idct_per,
-				(float) (tim.iquant / frequency), iquant_per,
-				(float) (tim.motion / frequency), mot_per,
-				(float) (tim.comp / frequency), comp_per,
-				(float) (tim.edges / frequency), edges_per,
-				(float) (tim.inter / frequency), inter_per,
-				(float) (tim.conv / frequency), conv_per,
-				(float) (tim.trans / frequency), trans_per,
-				(float) (tim.prediction / frequency), pred_per,
-				(float) (tim.coding / frequency), cod_per,
-				(float) (tim.interlacing / frequency), interlacing_per,
-				(float) (tim.overall / frequency),
-				(float) (sum_ticks / frequency), measured);
+				(float) (tim.dct / (((struct global_all_dll *)Dll::Tls())->frequency)), dct_per,
+				(float) (tim.quant / (((struct global_all_dll *)Dll::Tls())->frequency)), quant_per,
+				(float) (tim.idct / (((struct global_all_dll *)Dll::Tls())->frequency)), idct_per,
+				(float) (tim.iquant / (((struct global_all_dll *)Dll::Tls())->frequency)), iquant_per,
+				(float) (tim.motion / (((struct global_all_dll *)Dll::Tls())->frequency)), mot_per,
+				(float) (tim.comp / (((struct global_all_dll *)Dll::Tls())->frequency)), comp_per,
+				(float) (tim.edges / (((struct global_all_dll *)Dll::Tls())->frequency)), edges_per,
+				(float) (tim.inter / (((struct global_all_dll *)Dll::Tls())->frequency)), inter_per,
+				(float) (tim.conv / (((struct global_all_dll *)Dll::Tls())->frequency)), conv_per,
+				(float) (tim.trans / (((struct global_all_dll *)Dll::Tls())->frequency)), trans_per,
+				(float) (tim.prediction / (((struct global_all_dll *)Dll::Tls())->frequency)), pred_per,
+				(float) (tim.coding / (((struct global_all_dll *)Dll::Tls())->frequency)), cod_per,
+				(float) (tim.interlacing / (((struct global_all_dll *)Dll::Tls())->frequency)), interlacing_per,
+				(float) (tim.overall / (((struct global_all_dll *)Dll::Tls())->frequency)),
+				(float) (sum_ticks / (((struct global_all_dll *)Dll::Tls())->frequency)), measured);
 
 		fclose(fp);
 	}
diff -bBru src-ori/utils/timer.h src/utils/timer.h
--- src-ori/utils/timer.h	Wed Jan 12 10:32:43 2005
+++ src/utils/timer.h	Wed Jan 12 12:37:59 2005
@@ -53,78 +53,42 @@
 
 #else
 
-static __inline void
-start_timer()
-{
-}
-static __inline void
-start_global_timer()
-{
-}
-static __inline void
-stop_dct_timer()
-{
-}
-static __inline void
-stop_idct_timer()
-{
-}
-static __inline void
-stop_motion_timer()
-{
-}
-static __inline void
-stop_comp_timer()
-{
-}
-static __inline void
-stop_edges_timer()
-{
-}
-static __inline void
-stop_inter_timer()
-{
-}
-static __inline void
-stop_quant_timer()
-{
-}
-static __inline void
-stop_iquant_timer()
-{
-}
-static __inline void
-stop_conv_timer()
-{
-}
-static __inline void
-stop_transfer_timer()
-{
-}
-static __inline void
-init_timer()
-{
-}
-static __inline void
-write_timer()
-{
-}
-static __inline void
-stop_coding_timer()
-{
-}
-static __inline void
-stop_interlacing_timer()
-{
-}
-static __inline void
-stop_prediction_timer()
-{
-}
-static __inline void
-stop_global_timer()
-{
-}
+extern  void start_timer()
+;
+extern  void start_global_timer()
+;
+extern  void stop_dct_timer()
+;
+extern  void stop_idct_timer()
+;
+extern  void stop_motion_timer()
+;
+extern  void stop_comp_timer()
+;
+extern  void stop_edges_timer()
+;
+extern  void stop_inter_timer()
+;
+extern  void stop_quant_timer()
+;
+extern  void stop_iquant_timer()
+;
+extern  void stop_conv_timer()
+;
+extern  void stop_transfer_timer()
+;
+extern  void init_timer()
+;
+extern  void write_timer()
+;
+extern  void stop_coding_timer()
+;
+extern  void stop_interlacing_timer()
+;
+extern  void stop_prediction_timer()
+;
+extern  void stop_global_timer()
+;
 
 #endif
 
Only in src-ori/utils: x86_asm
diff -bBru src-ori/xvid.c src/xvid.c
--- src-ori/xvid.c	Wed Jan 12 10:32:43 2005
+++ src/xvid.c	Wed Jan 12 12:45:36 2005
@@ -23,48 +23,22 @@
  *
  ****************************************************************************/
 
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-
-#include "xvid.h"
-#include "decoder.h"
-#include "encoder.h"
-#include "bitstream/cbp.h"
-#include "dct/idct.h"
-#include "dct/fdct.h"
-#include "image/colorspace.h"
-#include "image/interpolate8x8.h"
-#include "image/reduced.h"
-#include "utils/mem_transfer.h"
-#include "utils/mbfunctions.h"
-#include "quant/quant.h"
-#include "motion/motion.h"
-#include "motion/sad.h"
-#include "utils/emms.h"
-#include "utils/timer.h"
-#include "bitstream/mbcoding.h"
-#include "image/qpel.h"
-#include "image/postprocessing.h"
+#include <e32std.h>
+#include "global_all_dll.h"
+
 
 #if defined(_DEBUG)
-unsigned int xvid_debug = 0; /* xvid debug mask */
 #endif
 
 #if defined(ARCH_IS_IA32)
 #if defined(_MSC_VER)
-#	include <windows.h>
 #else
-#	include <signal.h>
-#	include <setjmp.h>
 
-	static jmp_buf mark;
 
 	static void
 	sigill_handler(int signal)
 	{
-	   longjmp(mark, 1);
+	   longjmp((((struct global_all_dll *)Dll::Tls())->mark), 1);
 	}
 #endif
 
@@ -103,7 +77,7 @@
         return -1;
     }
 
-    jmpret = setjmp(mark);
+    jmpret = setjmp((((struct global_all_dll *)Dll::Tls())->mark));
     if (jmpret == 0)
     {
         func();
@@ -173,133 +147,133 @@
 	init_vlc_tables();
 
 	/* Fixed Point Forward/Inverse DCT transformations */
-	fdct = fdct_int32;
-	idct = idct_int32;
+	(((struct global_all_dll *)Dll::Tls())->fdct) = fdct_int32;
+	(((struct global_all_dll *)Dll::Tls())->idct) = idct_int32;
 
 	/* Only needed on PPC Altivec archs */
-	sadInit = 0;
+	(((struct global_all_dll *)Dll::Tls())->sadInit) = 0;
 
 	/* Restore FPU context : emms_c is a nop functions */
-	emms = emms_c;
+	(((struct global_all_dll *)Dll::Tls())->emms) = emms_c;
 
 	/* Qpel stuff */
-	xvid_QP_Funcs = &xvid_QP_Funcs_C;
-	xvid_QP_Add_Funcs = &xvid_QP_Add_Funcs_C;
+	(((struct global_all_dll *)Dll::Tls())->xvid_QP_Funcs) = &xvid_QP_Funcs_C;
+	(((struct global_all_dll *)Dll::Tls())->xvid_QP_Add_Funcs) = &xvid_QP_Add_Funcs_C;
 	xvid_Init_QP();
 
 	/* Quantization functions */
-	quant_h263_intra   = quant_h263_intra_c;
-	quant_h263_inter   = quant_h263_inter_c;
-	dequant_h263_intra = dequant_h263_intra_c;
-	dequant_h263_inter = dequant_h263_inter_c;
-
-	quant_mpeg_intra   = quant_mpeg_intra_c;
-	quant_mpeg_inter   = quant_mpeg_inter_c;
-	dequant_mpeg_intra = dequant_mpeg_intra_c;
-	dequant_mpeg_inter = dequant_mpeg_inter_c;
+	(((struct global_all_dll *)Dll::Tls())->quant_h263_intra)   = quant_h263_intra_c;
+	(((struct global_all_dll *)Dll::Tls())->quant_h263_inter)   = quant_h263_inter_c;
+	(((struct global_all_dll *)Dll::Tls())->dequant_h263_intra) = dequant_h263_intra_c;
+	(((struct global_all_dll *)Dll::Tls())->dequant_h263_inter) = dequant_h263_inter_c;
+
+	(((struct global_all_dll *)Dll::Tls())->quant_mpeg_intra)   = quant_mpeg_intra_c;
+	(((struct global_all_dll *)Dll::Tls())->quant_mpeg_inter)   = quant_mpeg_inter_c;
+	(((struct global_all_dll *)Dll::Tls())->dequant_mpeg_intra) = dequant_mpeg_intra_c;
+	(((struct global_all_dll *)Dll::Tls())->dequant_mpeg_inter) = dequant_mpeg_inter_c;
 
 	/* Block transfer related functions */
-	transfer_8to16copy = transfer_8to16copy_c;
-	transfer_16to8copy = transfer_16to8copy_c;
-	transfer_8to16sub  = transfer_8to16sub_c;
-	transfer_8to16subro  = transfer_8to16subro_c;
-	transfer_8to16sub2 = transfer_8to16sub2_c;
-	transfer_16to8add  = transfer_16to8add_c;
-	transfer8x8_copy   = transfer8x8_copy_c;
+	(((struct global_all_dll *)Dll::Tls())->transfer_8to16copy) = transfer_8to16copy_c;
+	(((struct global_all_dll *)Dll::Tls())->transfer_16to8copy) = transfer_16to8copy_c;
+	(((struct global_all_dll *)Dll::Tls())->transfer_8to16sub)  = transfer_8to16sub_c;
+	(((struct global_all_dll *)Dll::Tls())->transfer_8to16subro)  = transfer_8to16subro_c;
+	(((struct global_all_dll *)Dll::Tls())->transfer_8to16sub2) = transfer_8to16sub2_c;
+	(((struct global_all_dll *)Dll::Tls())->transfer_16to8add)  = transfer_16to8add_c;
+	(((struct global_all_dll *)Dll::Tls())->transfer8x8_copy)   = transfer8x8_copy_c;
 
 	/* Interlacing functions */
-	MBFieldTest = MBFieldTest_c;
+	(((struct global_all_dll *)Dll::Tls())->MBFieldTest) = MBFieldTest_c;
 
 	/* Image interpolation related functions */
-	interpolate8x8_halfpel_h  = interpolate8x8_halfpel_h_c;
-	interpolate8x8_halfpel_v  = interpolate8x8_halfpel_v_c;
-	interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_c;
-
-	interpolate16x16_lowpass_h = interpolate16x16_lowpass_h_c;
-	interpolate16x16_lowpass_v = interpolate16x16_lowpass_v_c;
-	interpolate16x16_lowpass_hv = interpolate16x16_lowpass_hv_c;
-
-	interpolate8x8_lowpass_h = interpolate8x8_lowpass_h_c;
-	interpolate8x8_lowpass_v = interpolate8x8_lowpass_v_c;
-	interpolate8x8_lowpass_hv = interpolate8x8_lowpass_hv_c;
+	(((struct global_all_dll *)Dll::Tls())->interpolate8x8_halfpel_h)  = interpolate8x8_halfpel_h_c;
+	(((struct global_all_dll *)Dll::Tls())->interpolate8x8_halfpel_v)  = interpolate8x8_halfpel_v_c;
+	(((struct global_all_dll *)Dll::Tls())->interpolate8x8_halfpel_hv) = interpolate8x8_halfpel_hv_c;
+
+	(((struct global_all_dll *)Dll::Tls())->interpolate16x16_lowpass_h) = interpolate16x16_lowpass_h_c;
+	(((struct global_all_dll *)Dll::Tls())->interpolate16x16_lowpass_v) = interpolate16x16_lowpass_v_c;
+	(((struct global_all_dll *)Dll::Tls())->interpolate16x16_lowpass_hv) = interpolate16x16_lowpass_hv_c;
+
+	(((struct global_all_dll *)Dll::Tls())->interpolate8x8_lowpass_h) = interpolate8x8_lowpass_h_c;
+	(((struct global_all_dll *)Dll::Tls())->interpolate8x8_lowpass_v) = interpolate8x8_lowpass_v_c;
+	(((struct global_all_dll *)Dll::Tls())->interpolate8x8_lowpass_hv) = interpolate8x8_lowpass_hv_c;
 
-	interpolate8x8_6tap_lowpass_h = interpolate8x8_6tap_lowpass_h_c;
-	interpolate8x8_6tap_lowpass_v = interpolate8x8_6tap_lowpass_v_c;
+	(((struct global_all_dll *)Dll::Tls())->interpolate8x8_6tap_lowpass_h) = interpolate8x8_6tap_lowpass_h_c;
+	(((struct global_all_dll *)Dll::Tls())->interpolate8x8_6tap_lowpass_v) = interpolate8x8_6tap_lowpass_v_c;
 
-	interpolate8x8_avg2 = interpolate8x8_avg2_c;
-	interpolate8x8_avg4 = interpolate8x8_avg4_c;
+	(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2) = interpolate8x8_avg2_c;
+	(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg4) = interpolate8x8_avg4_c;
 
 	/* reduced resolution */
-	copy_upsampled_8x8_16to8 = xvid_Copy_Upsampled_8x8_16To8_C;
-	add_upsampled_8x8_16to8 = xvid_Add_Upsampled_8x8_16To8_C;
-	vfilter_31 = xvid_VFilter_31_C;
-	hfilter_31 = xvid_HFilter_31_C;
-	filter_18x18_to_8x8 = xvid_Filter_18x18_To_8x8_C;
-	filter_diff_18x18_to_8x8 = xvid_Filter_Diff_18x18_To_8x8_C;
+	(((struct global_all_dll *)Dll::Tls())->copy_upsampled_8x8_16to8) = xvid_Copy_Upsampled_8x8_16To8_C;
+	(((struct global_all_dll *)Dll::Tls())->add_upsampled_8x8_16to8) = xvid_Add_Upsampled_8x8_16To8_C;
+	(((struct global_all_dll *)Dll::Tls())->vfilter_31) = xvid_VFilter_31_C;
+	(((struct global_all_dll *)Dll::Tls())->hfilter_31) = xvid_HFilter_31_C;
+	(((struct global_all_dll *)Dll::Tls())->filter_18x18_to_8x8) = xvid_Filter_18x18_To_8x8_C;
+	(((struct global_all_dll *)Dll::Tls())->filter_diff_18x18_to_8x8) = xvid_Filter_Diff_18x18_To_8x8_C;
 
 	/* Initialize internal colorspace transformation tables */
 	colorspace_init();
 
 	/* All colorspace transformation functions User Format->YV12 */
-	yv12_to_yv12    = yv12_to_yv12_c;
-	rgb555_to_yv12  = rgb555_to_yv12_c;
-	rgb565_to_yv12  = rgb565_to_yv12_c;
-	rgb444_to_yv12  = rgb444_to_yv12_c;
-	bgr_to_yv12     = bgr_to_yv12_c;
-	bgra_to_yv12    = bgra_to_yv12_c;
-	abgr_to_yv12    = abgr_to_yv12_c;
-	rgba_to_yv12    = rgba_to_yv12_c;
-	argb_to_yv12    = argb_to_yv12_c;
-	yuyv_to_yv12    = yuyv_to_yv12_c;
-	uyvy_to_yv12    = uyvy_to_yv12_c;
-
-	rgb555i_to_yv12 = rgb555i_to_yv12_c;
-	rgb565i_to_yv12 = rgb565i_to_yv12_c;
-	bgri_to_yv12    = bgri_to_yv12_c;
-	bgrai_to_yv12   = bgrai_to_yv12_c;
-	abgri_to_yv12   = abgri_to_yv12_c;
-	rgbai_to_yv12   = rgbai_to_yv12_c;
-	argbi_to_yv12   = argbi_to_yv12_c;
-	yuyvi_to_yv12   = yuyvi_to_yv12_c;
-	uyvyi_to_yv12   = uyvyi_to_yv12_c;
+	(((struct global_all_dll *)Dll::Tls())->yv12_to_yv12)    = yv12_to_yv12_c;
+	(((struct global_all_dll *)Dll::Tls())->rgb555_to_yv12)  = rgb555_to_yv12_c;
+	(((struct global_all_dll *)Dll::Tls())->rgb565_to_yv12)  = rgb565_to_yv12_c;
+	(((struct global_all_dll *)Dll::Tls())->rgb444_to_yv12)  = rgb444_to_yv12_c;
+	(((struct global_all_dll *)Dll::Tls())->bgr_to_yv12)     = bgr_to_yv12_c;
+	(((struct global_all_dll *)Dll::Tls())->bgra_to_yv12)    = bgra_to_yv12_c;
+	(((struct global_all_dll *)Dll::Tls())->abgr_to_yv12)    = abgr_to_yv12_c;
+	(((struct global_all_dll *)Dll::Tls())->rgba_to_yv12)    = rgba_to_yv12_c;
+	(((struct global_all_dll *)Dll::Tls())->argb_to_yv12)    = argb_to_yv12_c;
+	(((struct global_all_dll *)Dll::Tls())->yuyv_to_yv12)    = yuyv_to_yv12_c;
+	(((struct global_all_dll *)Dll::Tls())->uyvy_to_yv12)    = uyvy_to_yv12_c;
+
+	(((struct global_all_dll *)Dll::Tls())->rgb555i_to_yv12) = rgb555i_to_yv12_c;
+	(((struct global_all_dll *)Dll::Tls())->rgb565i_to_yv12) = rgb565i_to_yv12_c;
+	(((struct global_all_dll *)Dll::Tls())->bgri_to_yv12)    = bgri_to_yv12_c;
+	(((struct global_all_dll *)Dll::Tls())->bgrai_to_yv12)   = bgrai_to_yv12_c;
+	(((struct global_all_dll *)Dll::Tls())->abgri_to_yv12)   = abgri_to_yv12_c;
+	(((struct global_all_dll *)Dll::Tls())->rgbai_to_yv12)   = rgbai_to_yv12_c;
+	(((struct global_all_dll *)Dll::Tls())->argbi_to_yv12)   = argbi_to_yv12_c;
+	(((struct global_all_dll *)Dll::Tls())->yuyvi_to_yv12)   = yuyvi_to_yv12_c;
+	(((struct global_all_dll *)Dll::Tls())->uyvyi_to_yv12)   = uyvyi_to_yv12_c;
 
 	/* All colorspace transformation functions YV12->User format */
-	yv12_to_rgb555  = yv12_to_rgb555_c;
-	yv12_to_rgb565  = yv12_to_rgb565_c;
-	yv12_to_rgb444  = yv12_to_rgb444_c;
-	yv12_to_bgr     = yv12_to_bgr_c;
-	yv12_to_bgra    = yv12_to_bgra_c;
-	yv12_to_abgr    = yv12_to_abgr_c;
-	yv12_to_rgba    = yv12_to_rgba_c;
-	yv12_to_argb    = yv12_to_argb_c;
-	yv12_to_yuyv    = yv12_to_yuyv_c;
-	yv12_to_uyvy    = yv12_to_uyvy_c;
-
-	yv12_to_rgb555i = yv12_to_rgb555i_c;
-	yv12_to_rgb565i = yv12_to_rgb565i_c;
-	yv12_to_bgri    = yv12_to_bgri_c;
-	yv12_to_bgrai   = yv12_to_bgrai_c;
-	yv12_to_abgri   = yv12_to_abgri_c;
-	yv12_to_rgbai   = yv12_to_rgbai_c;
-	yv12_to_argbi   = yv12_to_argbi_c;
-	yv12_to_yuyvi   = yv12_to_yuyvi_c;
-	yv12_to_uyvyi   = yv12_to_uyvyi_c;
+	(((struct global_all_dll *)Dll::Tls())->yv12_to_rgb555)  = yv12_to_rgb555_c;
+	(((struct global_all_dll *)Dll::Tls())->yv12_to_rgb565)  = yv12_to_rgb565_c;
+	(((struct global_all_dll *)Dll::Tls())->yv12_to_rgb444)  = yv12_to_rgb444_c;
+	(((struct global_all_dll *)Dll::Tls())->yv12_to_bgr)     = yv12_to_bgr_c;
+	(((struct global_all_dll *)Dll::Tls())->yv12_to_bgra)    = yv12_to_bgra_c;
+	(((struct global_all_dll *)Dll::Tls())->yv12_to_abgr)    = yv12_to_abgr_c;
+	(((struct global_all_dll *)Dll::Tls())->yv12_to_rgba)    = yv12_to_rgba_c;
+	(((struct global_all_dll *)Dll::Tls())->yv12_to_argb)    = yv12_to_argb_c;
+	(((struct global_all_dll *)Dll::Tls())->yv12_to_yuyv)    = yv12_to_yuyv_c;
+	(((struct global_all_dll *)Dll::Tls())->yv12_to_uyvy)    = yv12_to_uyvy_c;
+
+	(((struct global_all_dll *)Dll::Tls())->yv12_to_rgb555i) = yv12_to_rgb555i_c;
+	(((struct global_all_dll *)Dll::Tls())->yv12_to_rgb565i) = yv12_to_rgb565i_c;
+	(((struct global_all_dll *)Dll::Tls())->yv12_to_bgri)    = yv12_to_bgri_c;
+	(((struct global_all_dll *)Dll::Tls())->yv12_to_bgrai)   = yv12_to_bgrai_c;
+	(((struct global_all_dll *)Dll::Tls())->yv12_to_abgri)   = yv12_to_abgri_c;
+	(((struct global_all_dll *)Dll::Tls())->yv12_to_rgbai)   = yv12_to_rgbai_c;
+	(((struct global_all_dll *)Dll::Tls())->yv12_to_argbi)   = yv12_to_argbi_c;
+	(((struct global_all_dll *)Dll::Tls())->yv12_to_yuyvi)   = yv12_to_yuyvi_c;
+	(((struct global_all_dll *)Dll::Tls())->yv12_to_uyvyi)   = yv12_to_uyvyi_c;
 
 	/* Functions used in motion estimation algorithms */
-	calc_cbp   = calc_cbp_c;
-	sad16      = sad16_c;
-	sad8       = sad8_c;
-	sad16bi    = sad16bi_c;
-	sad8bi     = sad8bi_c;
-	dev16      = dev16_c;
-	sad16v	   = sad16v_c;
-	sse8_16bit = sse8_16bit_c;
+	(((struct global_all_dll *)Dll::Tls())->calc_cbp)   = calc_cbp_c;
+	(((struct global_all_dll *)Dll::Tls())->sad16)      = sad16_c;
+	(((struct global_all_dll *)Dll::Tls())->sad8)       = sad8_c;
+	(((struct global_all_dll *)Dll::Tls())->sad16bi)    = sad16bi_c;
+	(((struct global_all_dll *)Dll::Tls())->sad8bi)     = sad8bi_c;
+	(((struct global_all_dll *)Dll::Tls())->dev16)      = dev16_c;
+	(((struct global_all_dll *)Dll::Tls())->sad16v)	   = sad16v_c;
+	(((struct global_all_dll *)Dll::Tls())->sse8_16bit) = sse8_16bit_c;
 
 #if defined(ARCH_IS_IA32)
 
 	if ((cpu_flags & XVID_CPU_ASM))	{
-		vfilter_31 = xvid_VFilter_31_x86;
-		hfilter_31 = xvid_HFilter_31_x86;
+		(((struct global_all_dll *)Dll::Tls())->vfilter_31) = xvid_VFilter_31_x86;
+		(((struct global_all_dll *)Dll::Tls())->hfilter_31) = xvid_HFilter_31_x86;
 	}
 
 	if ((cpu_flags & XVID_CPU_MMX) || (cpu_flags & XVID_CPU_MMXEXT) ||
@@ -307,255 +281,255 @@
 		(cpu_flags & XVID_CPU_SSE) || (cpu_flags & XVID_CPU_SSE2))
 	{
 		/* Restore FPU context : emms_c is a nop functions */
-		emms = emms_mmx;
+		(((struct global_all_dll *)Dll::Tls())->emms) = emms_mmx;
 	}
 
 	if ((cpu_flags & XVID_CPU_MMX)) {
 
 		/* Forward and Inverse Discrete Cosine Transformation functions */
-		fdct = fdct_mmx_skal;
-		idct = idct_mmx;
+		(((struct global_all_dll *)Dll::Tls())->fdct) = fdct_mmx_skal;
+		(((struct global_all_dll *)Dll::Tls())->idct) = idct_mmx;
 
 		/* Qpel stuff */
-		xvid_QP_Funcs = &xvid_QP_Funcs_mmx;
-		xvid_QP_Add_Funcs = &xvid_QP_Add_Funcs_mmx;
+		(((struct global_all_dll *)Dll::Tls())->xvid_QP_Funcs) = &xvid_QP_Funcs_mmx;
+		(((struct global_all_dll *)Dll::Tls())->xvid_QP_Add_Funcs) = &xvid_QP_Add_Funcs_mmx;
 
 		/* Quantization related functions */
-		quant_h263_intra   = quant_h263_intra_mmx;
-		quant_h263_inter   = quant_h263_inter_mmx;
-		dequant_h263_intra = dequant_h263_intra_mmx;
-		dequant_h263_inter = dequant_h263_inter_mmx;
-
-		quant_mpeg_intra   = quant_mpeg_intra_mmx;
-		quant_mpeg_inter   = quant_mpeg_inter_mmx;
-		dequant_mpeg_intra = dequant_mpeg_intra_mmx;
-		dequant_mpeg_inter = dequant_mpeg_inter_mmx;
+		(((struct global_all_dll *)Dll::Tls())->quant_h263_intra)   = quant_h263_intra_mmx;
+		(((struct global_all_dll *)Dll::Tls())->quant_h263_inter)   = quant_h263_inter_mmx;
+		(((struct global_all_dll *)Dll::Tls())->dequant_h263_intra) = dequant_h263_intra_mmx;
+		(((struct global_all_dll *)Dll::Tls())->dequant_h263_inter) = dequant_h263_inter_mmx;
+
+		(((struct global_all_dll *)Dll::Tls())->quant_mpeg_intra)   = quant_mpeg_intra_mmx;
+		(((struct global_all_dll *)Dll::Tls())->quant_mpeg_inter)   = quant_mpeg_inter_mmx;
+		(((struct global_all_dll *)Dll::Tls())->dequant_mpeg_intra) = dequant_mpeg_intra_mmx;
+		(((struct global_all_dll *)Dll::Tls())->dequant_mpeg_inter) = dequant_mpeg_inter_mmx;
 
 		/* Block related functions */
-		transfer_8to16copy = transfer_8to16copy_mmx;
-		transfer_16to8copy = transfer_16to8copy_mmx;
-		transfer_8to16sub  = transfer_8to16sub_mmx;
-		transfer_8to16subro  = transfer_8to16subro_mmx;
-		transfer_8to16sub2 = transfer_8to16sub2_mmx;
-		transfer_16to8add  = transfer_16to8add_mmx;
-		transfer8x8_copy   = transfer8x8_copy_mmx;
+		(((struct global_all_dll *)Dll::Tls())->transfer_8to16copy) = transfer_8to16copy_mmx;
+		(((struct global_all_dll *)Dll::Tls())->transfer_16to8copy) = transfer_16to8copy_mmx;
+		(((struct global_all_dll *)Dll::Tls())->transfer_8to16sub)  = transfer_8to16sub_mmx;
+		(((struct global_all_dll *)Dll::Tls())->transfer_8to16subro)  = transfer_8to16subro_mmx;
+		(((struct global_all_dll *)Dll::Tls())->transfer_8to16sub2) = transfer_8to16sub2_mmx;
+		(((struct global_all_dll *)Dll::Tls())->transfer_16to8add)  = transfer_16to8add_mmx;
+		(((struct global_all_dll *)Dll::Tls())->transfer8x8_copy)   = transfer8x8_copy_mmx;
 
 		/* Interlacing Functions */
-		MBFieldTest = MBFieldTest_mmx;
+		(((struct global_all_dll *)Dll::Tls())->MBFieldTest) = MBFieldTest_mmx;
 
 		/* Image Interpolation related functions */
-		interpolate8x8_halfpel_h  = interpolate8x8_halfpel_h_mmx;
-		interpolate8x8_halfpel_v  = interpolate8x8_halfpel_v_mmx;
-		interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_mmx;
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_halfpel_h)  = interpolate8x8_halfpel_h_mmx;
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_halfpel_v)  = interpolate8x8_halfpel_v_mmx;
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_halfpel_hv) = interpolate8x8_halfpel_hv_mmx;
 
-		interpolate8x8_6tap_lowpass_h = interpolate8x8_6tap_lowpass_h_mmx;
-		interpolate8x8_6tap_lowpass_v = interpolate8x8_6tap_lowpass_v_mmx;
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_6tap_lowpass_h) = interpolate8x8_6tap_lowpass_h_mmx;
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_6tap_lowpass_v) = interpolate8x8_6tap_lowpass_v_mmx;
 
-		interpolate8x8_avg2 = interpolate8x8_avg2_mmx;
-		interpolate8x8_avg4 = interpolate8x8_avg4_mmx;
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg2) = interpolate8x8_avg2_mmx;
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_avg4) = interpolate8x8_avg4_mmx;
 
 		/* reduced resolution */
-		copy_upsampled_8x8_16to8 = xvid_Copy_Upsampled_8x8_16To8_mmx;
-		add_upsampled_8x8_16to8 = xvid_Add_Upsampled_8x8_16To8_mmx;
-		hfilter_31 = xvid_HFilter_31_mmx;
-		filter_18x18_to_8x8 = xvid_Filter_18x18_To_8x8_mmx;
-		filter_diff_18x18_to_8x8 = xvid_Filter_Diff_18x18_To_8x8_mmx;
+		(((struct global_all_dll *)Dll::Tls())->copy_upsampled_8x8_16to8) = xvid_Copy_Upsampled_8x8_16To8_mmx;
+		(((struct global_all_dll *)Dll::Tls())->add_upsampled_8x8_16to8) = xvid_Add_Upsampled_8x8_16To8_mmx;
+		(((struct global_all_dll *)Dll::Tls())->hfilter_31) = xvid_HFilter_31_mmx;
+		(((struct global_all_dll *)Dll::Tls())->filter_18x18_to_8x8) = xvid_Filter_18x18_To_8x8_mmx;
+		(((struct global_all_dll *)Dll::Tls())->filter_diff_18x18_to_8x8) = xvid_Filter_Diff_18x18_To_8x8_mmx;
 
 		/* image input xxx_to_yv12 related functions */
-		yv12_to_yv12  = yv12_to_yv12_mmx;
-		bgr_to_yv12   = bgr_to_yv12_mmx;
-		bgra_to_yv12  = bgra_to_yv12_mmx;
-		yuyv_to_yv12  = yuyv_to_yv12_mmx;
-		uyvy_to_yv12  = uyvy_to_yv12_mmx;
+		(((struct global_all_dll *)Dll::Tls())->yv12_to_yv12)  = yv12_to_yv12_mmx;
+		(((struct global_all_dll *)Dll::Tls())->bgr_to_yv12)   = bgr_to_yv12_mmx;
+		(((struct global_all_dll *)Dll::Tls())->bgra_to_yv12)  = bgra_to_yv12_mmx;
+		(((struct global_all_dll *)Dll::Tls())->yuyv_to_yv12)  = yuyv_to_yv12_mmx;
+		(((struct global_all_dll *)Dll::Tls())->uyvy_to_yv12)  = uyvy_to_yv12_mmx;
 
 		/* image output yv12_to_xxx related functions */
-		yv12_to_bgr   = yv12_to_bgr_mmx;
-		yv12_to_bgra  = yv12_to_bgra_mmx;
-		yv12_to_yuyv  = yv12_to_yuyv_mmx;
-		yv12_to_uyvy  = yv12_to_uyvy_mmx;
+		(((struct global_all_dll *)Dll::Tls())->yv12_to_bgr)   = yv12_to_bgr_mmx;
+		(((struct global_all_dll *)Dll::Tls())->yv12_to_bgra)  = yv12_to_bgra_mmx;
+		(((struct global_all_dll *)Dll::Tls())->yv12_to_yuyv)  = yv12_to_yuyv_mmx;
+		(((struct global_all_dll *)Dll::Tls())->yv12_to_uyvy)  = yv12_to_uyvy_mmx;
 
-		yv12_to_yuyvi = yv12_to_yuyvi_mmx;
-		yv12_to_uyvyi = yv12_to_uyvyi_mmx;
+		(((struct global_all_dll *)Dll::Tls())->yv12_to_yuyvi) = yv12_to_yuyvi_mmx;
+		(((struct global_all_dll *)Dll::Tls())->yv12_to_uyvyi) = yv12_to_uyvyi_mmx;
 
 		/* Motion estimation related functions */
-		calc_cbp = calc_cbp_mmx;
-		sad16    = sad16_mmx;
-		sad8     = sad8_mmx;
-		sad16bi = sad16bi_mmx;
-		sad8bi  = sad8bi_mmx;
-		dev16    = dev16_mmx;
-		sad16v	 = sad16v_mmx;
-		sse8_16bit = sse8_16bit_mmx;
+		(((struct global_all_dll *)Dll::Tls())->calc_cbp) = calc_cbp_mmx;
+		(((struct global_all_dll *)Dll::Tls())->sad16)    = sad16_mmx;
+		(((struct global_all_dll *)Dll::Tls())->sad8)     = sad8_mmx;
+		(((struct global_all_dll *)Dll::Tls())->sad16bi) = sad16bi_mmx;
+		(((struct global_all_dll *)Dll::Tls())->sad8bi)  = sad8bi_mmx;
+		(((struct global_all_dll *)Dll::Tls())->dev16)    = dev16_mmx;
+		(((struct global_all_dll *)Dll::Tls())->sad16v)	 = sad16v_mmx;
+		(((struct global_all_dll *)Dll::Tls())->sse8_16bit) = sse8_16bit_mmx;
 	}
 
 	/* these 3dnow functions are faster than mmx, but slower than xmm. */
 	if ((cpu_flags & XVID_CPU_3DNOW)) {
 
-		emms = emms_3dn;
+		(((struct global_all_dll *)Dll::Tls())->emms) = emms_3dn;
 
 		/* ME functions */
-		sad16bi = sad16bi_3dn;
-		sad8bi  = sad8bi_3dn;
+		(((struct global_all_dll *)Dll::Tls())->sad16bi) = sad16bi_3dn;
+		(((struct global_all_dll *)Dll::Tls())->sad8bi)  = sad8bi_3dn;
 
-		yuyv_to_yv12  = yuyv_to_yv12_3dn;
-		uyvy_to_yv12  = uyvy_to_yv12_3dn;
+		(((struct global_all_dll *)Dll::Tls())->yuyv_to_yv12)  = yuyv_to_yv12_3dn;
+		(((struct global_all_dll *)Dll::Tls())->uyvy_to_yv12)  = uyvy_to_yv12_3dn;
 	}
 
 
 	if ((cpu_flags & XVID_CPU_MMXEXT)) {
 
 		/* DCT */
-		fdct = fdct_xmm_skal;
-		idct = idct_xmm;
+		(((struct global_all_dll *)Dll::Tls())->fdct) = fdct_xmm_skal;
+		(((struct global_all_dll *)Dll::Tls())->idct) = idct_xmm;
 
 		/* Interpolation */
-		interpolate8x8_halfpel_h  = interpolate8x8_halfpel_h_xmm;
-		interpolate8x8_halfpel_v  = interpolate8x8_halfpel_v_xmm;
-		interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_xmm;
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_halfpel_h)  = interpolate8x8_halfpel_h_xmm;
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_halfpel_v)  = interpolate8x8_halfpel_v_xmm;
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_halfpel_hv) = interpolate8x8_halfpel_hv_xmm;
 
 		/* reduced resolution */
-		copy_upsampled_8x8_16to8 = xvid_Copy_Upsampled_8x8_16To8_xmm;
-		add_upsampled_8x8_16to8 = xvid_Add_Upsampled_8x8_16To8_xmm;
+		(((struct global_all_dll *)Dll::Tls())->copy_upsampled_8x8_16to8) = xvid_Copy_Upsampled_8x8_16To8_xmm;
+		(((struct global_all_dll *)Dll::Tls())->add_upsampled_8x8_16to8) = xvid_Add_Upsampled_8x8_16To8_xmm;
 
 		/* Quantization */
-		quant_mpeg_intra = quant_mpeg_intra_xmm;
-		quant_mpeg_inter = quant_mpeg_inter_xmm;
+		(((struct global_all_dll *)Dll::Tls())->quant_mpeg_intra) = quant_mpeg_intra_xmm;
+		(((struct global_all_dll *)Dll::Tls())->quant_mpeg_inter) = quant_mpeg_inter_xmm;
 
-		dequant_h263_intra = dequant_h263_intra_xmm;
-		dequant_h263_inter = dequant_h263_inter_xmm;
+		(((struct global_all_dll *)Dll::Tls())->dequant_h263_intra) = dequant_h263_intra_xmm;
+		(((struct global_all_dll *)Dll::Tls())->dequant_h263_inter) = dequant_h263_inter_xmm;
 
 		/* Buffer transfer */
-		transfer_8to16sub2 = transfer_8to16sub2_xmm;
+		(((struct global_all_dll *)Dll::Tls())->transfer_8to16sub2) = transfer_8to16sub2_xmm;
 
 		/* Colorspace transformation */
-		yv12_to_yv12  = yv12_to_yv12_xmm;
-		yuyv_to_yv12  = yuyv_to_yv12_xmm;
-		uyvy_to_yv12  = uyvy_to_yv12_xmm;
+		(((struct global_all_dll *)Dll::Tls())->yv12_to_yv12)  = yv12_to_yv12_xmm;
+		(((struct global_all_dll *)Dll::Tls())->yuyv_to_yv12)  = yuyv_to_yv12_xmm;
+		(((struct global_all_dll *)Dll::Tls())->uyvy_to_yv12)  = uyvy_to_yv12_xmm;
 
 		/* ME functions */
-		sad16 = sad16_xmm;
-		sad8  = sad8_xmm;
-		sad16bi = sad16bi_xmm;
-		sad8bi  = sad8bi_xmm;
-		dev16 = dev16_xmm;
-		sad16v	 = sad16v_xmm;
+		(((struct global_all_dll *)Dll::Tls())->sad16) = sad16_xmm;
+		(((struct global_all_dll *)Dll::Tls())->sad8)  = sad8_xmm;
+		(((struct global_all_dll *)Dll::Tls())->sad16bi) = sad16bi_xmm;
+		(((struct global_all_dll *)Dll::Tls())->sad8bi)  = sad8bi_xmm;
+		(((struct global_all_dll *)Dll::Tls())->dev16) = dev16_xmm;
+		(((struct global_all_dll *)Dll::Tls())->sad16v)	 = sad16v_xmm;
 	}
 
 	if ((cpu_flags & XVID_CPU_3DNOW)) {
 
 		/* Interpolation */
-		interpolate8x8_halfpel_h = interpolate8x8_halfpel_h_3dn;
-		interpolate8x8_halfpel_v = interpolate8x8_halfpel_v_3dn;
-		interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_3dn;
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_halfpel_h) = interpolate8x8_halfpel_h_3dn;
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_halfpel_v) = interpolate8x8_halfpel_v_3dn;
+		(((struct global_all_dll *)Dll::Tls())->interpolate8x8_halfpel_hv) = interpolate8x8_halfpel_hv_3dn;
 	}
 
 	if ((cpu_flags & XVID_CPU_3DNOWEXT)) {
 
 		/* Buffer transfer */
-		transfer_8to16copy =  transfer_8to16copy_3dne;
-		transfer_16to8copy = transfer_16to8copy_3dne;
-		transfer_8to16sub =  transfer_8to16sub_3dne;
-		transfer_8to16subro =  transfer_8to16subro_3dne;
-		transfer_16to8add = transfer_16to8add_3dne;
-		transfer8x8_copy = transfer8x8_copy_3dne;
+		(((struct global_all_dll *)Dll::Tls())->transfer_8to16copy) =  transfer_8to16copy_3dne;
+		(((struct global_all_dll *)Dll::Tls())->transfer_16to8copy) = transfer_16to8copy_3dne;
+		(((struct global_all_dll *)Dll::Tls())->transfer_8to16sub) =  transfer_8to16sub_3dne;
+		(((struct global_all_dll *)Dll::Tls())->transfer_8to16subro) =  transfer_8to16subro_3dne;
+		(((struct global_all_dll *)Dll::Tls())->transfer_16to8add) = transfer_16to8add_3dne;
+		(((struct global_all_dll *)Dll::Tls())->transfer8x8_copy) = transfer8x8_copy_3dne;
 
 		if ((cpu_flags & XVID_CPU_MMXEXT)) {
 			/* Inverse DCT */
-			idct =  idct_3dne;
+			(((struct global_all_dll *)Dll::Tls())->idct) =  idct_3dne;
 
 			/* Buffer transfer */
-			transfer_8to16sub2 =  transfer_8to16sub2_3dne;
+			(((struct global_all_dll *)Dll::Tls())->transfer_8to16sub2) =  transfer_8to16sub2_3dne;
 
 			/* Interpolation */
-			interpolate8x8_halfpel_h = interpolate8x8_halfpel_h_3dne;
-			interpolate8x8_halfpel_v = interpolate8x8_halfpel_v_3dne;
-			interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_3dne;
+			(((struct global_all_dll *)Dll::Tls())->interpolate8x8_halfpel_h) = interpolate8x8_halfpel_h_3dne;
+			(((struct global_all_dll *)Dll::Tls())->interpolate8x8_halfpel_v) = interpolate8x8_halfpel_v_3dne;
+			(((struct global_all_dll *)Dll::Tls())->interpolate8x8_halfpel_hv) = interpolate8x8_halfpel_hv_3dne;
 
 			/* Quantization */
-			quant_h263_intra = quant_h263_intra_3dne;		/* cmov only */
-			quant_h263_inter = quant_h263_inter_3dne;
-			dequant_mpeg_intra = dequant_mpeg_intra_3dne;	/* cmov only */
-			dequant_mpeg_inter = dequant_mpeg_inter_3dne;
-			dequant_h263_intra = dequant_h263_intra_3dne;
-			dequant_h263_inter = dequant_h263_inter_3dne;
+			(((struct global_all_dll *)Dll::Tls())->quant_h263_intra) = quant_h263_intra_3dne;		/* cmov only */
+			(((struct global_all_dll *)Dll::Tls())->quant_h263_inter) = quant_h263_inter_3dne;
+			(((struct global_all_dll *)Dll::Tls())->dequant_mpeg_intra) = dequant_mpeg_intra_3dne;	/* cmov only */
+			(((struct global_all_dll *)Dll::Tls())->dequant_mpeg_inter) = dequant_mpeg_inter_3dne;
+			(((struct global_all_dll *)Dll::Tls())->dequant_h263_intra) = dequant_h263_intra_3dne;
+			(((struct global_all_dll *)Dll::Tls())->dequant_h263_inter) = dequant_h263_inter_3dne;
 
 			/* ME functions */
-			calc_cbp = calc_cbp_3dne;
+			(((struct global_all_dll *)Dll::Tls())->calc_cbp) = calc_cbp_3dne;
 
-			sad16 = sad16_3dne;
-			sad8 = sad8_3dne;
-			sad16bi = sad16bi_3dne;
-			sad8bi = sad8bi_3dne;
-			dev16 = dev16_3dne;
+			(((struct global_all_dll *)Dll::Tls())->sad16) = sad16_3dne;
+			(((struct global_all_dll *)Dll::Tls())->sad8) = sad8_3dne;
+			(((struct global_all_dll *)Dll::Tls())->sad16bi) = sad16bi_3dne;
+			(((struct global_all_dll *)Dll::Tls())->sad8bi) = sad8bi_3dne;
+			(((struct global_all_dll *)Dll::Tls())->dev16) = dev16_3dne;
 		}
 	}
 
 	if ((cpu_flags & XVID_CPU_SSE2)) {
 
-		calc_cbp = calc_cbp_sse2;
+		(((struct global_all_dll *)Dll::Tls())->calc_cbp) = calc_cbp_sse2;
 
 		/* Quantization */
-		quant_h263_intra   = quant_h263_intra_sse2;
-		quant_h263_inter   = quant_h263_inter_sse2;
-		dequant_h263_intra = dequant_h263_intra_sse2;
-		dequant_h263_inter = dequant_h263_inter_sse2;
+		(((struct global_all_dll *)Dll::Tls())->quant_h263_intra)   = quant_h263_intra_sse2;
+		(((struct global_all_dll *)Dll::Tls())->quant_h263_inter)   = quant_h263_inter_sse2;
+		(((struct global_all_dll *)Dll::Tls())->dequant_h263_intra) = dequant_h263_intra_sse2;
+		(((struct global_all_dll *)Dll::Tls())->dequant_h263_inter) = dequant_h263_inter_sse2;
 
 		/* SAD operators */
-		sad16    = sad16_sse2;
-		dev16    = dev16_sse2;
+		(((struct global_all_dll *)Dll::Tls())->sad16)    = sad16_sse2;
+		(((struct global_all_dll *)Dll::Tls())->dev16)    = dev16_sse2;
 
 		/* DCT operators
 		 * no iDCT because it's not "Walken matching" */
-		fdct = fdct_sse2_skal;
+		(((struct global_all_dll *)Dll::Tls())->fdct) = fdct_sse2_skal;
 	}
 #endif /* ARCH_IS_IA32 */
 
 #if defined(ARCH_IS_IA64)
 	if ((cpu_flags & XVID_CPU_ASM)) { /* use assembler routines? */
 	  idct_ia64_init();
-	  fdct = fdct_ia64;
-	  idct = idct_ia64;   /*not yet working, crashes */
-	  interpolate8x8_halfpel_h = interpolate8x8_halfpel_h_ia64;
-	  interpolate8x8_halfpel_v = interpolate8x8_halfpel_v_ia64;
-	  interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_ia64;
-	  sad16 = sad16_ia64;
-	  sad16bi = sad16bi_ia64;
-	  sad8 = sad8_ia64;
-	  dev16 = dev16_ia64;
+	  (((struct global_all_dll *)Dll::Tls())->fdct) = fdct_ia64;
+	  (((struct global_all_dll *)Dll::Tls())->idct) = idct_ia64;   /*not yet working, crashes */
+	  (((struct global_all_dll *)Dll::Tls())->interpolate8x8_halfpel_h) = interpolate8x8_halfpel_h_ia64;
+	  (((struct global_all_dll *)Dll::Tls())->interpolate8x8_halfpel_v) = interpolate8x8_halfpel_v_ia64;
+	  (((struct global_all_dll *)Dll::Tls())->interpolate8x8_halfpel_hv) = interpolate8x8_halfpel_hv_ia64;
+	  (((struct global_all_dll *)Dll::Tls())->sad16) = sad16_ia64;
+	  (((struct global_all_dll *)Dll::Tls())->sad16bi) = sad16bi_ia64;
+	  (((struct global_all_dll *)Dll::Tls())->sad8) = sad8_ia64;
+	  (((struct global_all_dll *)Dll::Tls())->dev16) = dev16_ia64;
 /*	  Halfpel8_Refine = Halfpel8_Refine_ia64; */
-	  quant_h263_intra = quant_h263_intra_ia64;
-	  quant_h263_inter = quant_h263_inter_ia64;
-	  dequant_h263_intra = dequant_h263_intra_ia64;
-	  dequant_h263_inter = dequant_h263_inter_ia64;
-	  transfer_8to16copy = transfer_8to16copy_ia64;
-	  transfer_16to8copy = transfer_16to8copy_ia64;
-	  transfer_8to16sub = transfer_8to16sub_ia64;
-	  transfer_8to16sub2 = transfer_8to16sub2_ia64;
-	  transfer_16to8add = transfer_16to8add_ia64;
-	  transfer8x8_copy = transfer8x8_copy_ia64;
+	  (((struct global_all_dll *)Dll::Tls())->quant_h263_intra) = quant_h263_intra_ia64;
+	  (((struct global_all_dll *)Dll::Tls())->quant_h263_inter) = quant_h263_inter_ia64;
+	  (((struct global_all_dll *)Dll::Tls())->dequant_h263_intra) = dequant_h263_intra_ia64;
+	  (((struct global_all_dll *)Dll::Tls())->dequant_h263_inter) = dequant_h263_inter_ia64;
+	  (((struct global_all_dll *)Dll::Tls())->transfer_8to16copy) = transfer_8to16copy_ia64;
+	  (((struct global_all_dll *)Dll::Tls())->transfer_16to8copy) = transfer_16to8copy_ia64;
+	  (((struct global_all_dll *)Dll::Tls())->transfer_8to16sub) = transfer_8to16sub_ia64;
+	  (((struct global_all_dll *)Dll::Tls())->transfer_8to16sub2) = transfer_8to16sub2_ia64;
+	  (((struct global_all_dll *)Dll::Tls())->transfer_16to8add) = transfer_16to8add_ia64;
+	  (((struct global_all_dll *)Dll::Tls())->transfer8x8_copy) = transfer8x8_copy_ia64;
 	}
 #endif
 
 #if defined(ARCH_IS_PPC)
 	if ((cpu_flags & XVID_CPU_ASM))
 	{
-		calc_cbp = calc_cbp_ppc;
+		(((struct global_all_dll *)Dll::Tls())->calc_cbp) = calc_cbp_ppc;
 	}
 
 	if ((cpu_flags & XVID_CPU_ALTIVEC))
 	{
-		calc_cbp = calc_cbp_altivec;
-		fdct = fdct_altivec;
-		idct = idct_altivec;
-		sadInit = sadInit_altivec;
-		sad16 = sad16_altivec;
-		sad8 = sad8_altivec;
-		dev16 = dev16_altivec;
+		(((struct global_all_dll *)Dll::Tls())->calc_cbp) = calc_cbp_altivec;
+		(((struct global_all_dll *)Dll::Tls())->fdct) = fdct_altivec;
+		(((struct global_all_dll *)Dll::Tls())->idct) = idct_altivec;
+		(((struct global_all_dll *)Dll::Tls())->sadInit) = sadInit_altivec;
+		(((struct global_all_dll *)Dll::Tls())->sad16) = sad16_altivec;
+		(((struct global_all_dll *)Dll::Tls())->sad8) = sad8_altivec;
+		(((struct global_all_dll *)Dll::Tls())->dev16) = dev16_altivec;
 	}
 #endif
 
 #if defined(_DEBUG)
-    xvid_debug = init->debug;
+    (((struct global_all_dll *)Dll::Tls())->xvid_debug) = init->debug;
 #endif
 
     return 0;
@@ -582,6 +556,9 @@
 }
 
 
+#if 0
+	const int flip1 = (convert->input.colorspace & XVID_CSP_VFLIP) ^ (convert->output.colorspace & XVID_CSP_VFLIP);
+#endif
 static int
 xvid_gbl_convert(xvid_gbl_convert_t* convert)
 {
@@ -594,9 +571,6 @@
 	if (XVID_VERSION_MAJOR(convert->version) != 1)   /* v1.x.x */
 	      return XVID_ERR_VERSION;
 
-#if 0
-	const int flip1 = (convert->input.colorspace & XVID_CSP_VFLIP) ^ (convert->output.colorspace & XVID_CSP_VFLIP);
-#endif
 	width = convert->width;
 	height = convert->height;
 	width2 = convert->width/2;
@@ -618,7 +592,7 @@
 	}
 
 
-	emms();
+	(((struct global_all_dll *)Dll::Tls())->emms)();
 	return 0;
 }
 

