You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
tdemultimedia/mpeglib/lib/mpegplay/copyFunctions.cpp

331 lines
9.6 KiB

/*
stores heavily used copy functions (makes mmx support easier)
Copyright (C) 2000 Martin Vogt
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Library General Public License as published by
the Free Software Foundation.
For more information look at the file COPYRIGHT in this package
*/
#include "copyFunctions.h"
/*
* We use a lookup table to make sure values stay in the 0..255 range.
* Since this is cropping (ie, x = (x < 0)?0:(x>255)?255:x; ), wee call this
* table the "crop table".
* MAX_NEG_CROP is the maximum neg/pos value we can handle.
*/
/*
* We use a lookup table to make sure values stay in the 0..255 range.
* Since this is cropping (ie, x = (x < 0)?0:(x>255)?255:x; ), wee call this
* table the "crop table".
* MAX_NEG_CROP is the maximum neg/pos value we can handle.
*/
// Compiler cannot allocate too big arrays.
CopyFunctions::CopyFunctions() {
/* Initialize crop table. */
cropTbl=new unsigned char[NUM_CROP_ENTRIES];
int i;
for (i = (-MAX_NEG_CROP); i < NUM_CROP_ENTRIES - MAX_NEG_CROP; i++) {
if (i <= 0) {
cropTbl[i + MAX_NEG_CROP] = 0;
} else if (i >= 255) {
cropTbl[i + MAX_NEG_CROP] = 255;
} else {
cropTbl[i + MAX_NEG_CROP] = i;
}
}
cm=cropTbl + MAX_NEG_CROP;
copyFunctions_asm = new CopyFunctions_MMX();
lmmx=copyFunctions_asm->support();
}
CopyFunctions::~CopyFunctions() {
delete cropTbl;
}
void CopyFunctions::startNOFloatSection() {
// nothing
copyFunctions_asm->startNOFloatSection();
}
void CopyFunctions::endNOFloatSection() {
copyFunctions_asm->endNOFloatSection();
}
void CopyFunctions::copy8_byte(unsigned char* source1,
unsigned char* dest,int inc) {
if (lmmx == false) {
int rr;
for (rr = 0; rr < 8; rr++) {
memcpy(dest,source1,sizeof(char)*8);
source1+=inc;
dest+=inc;
}
} else {
copyFunctions_asm->copy8_byte(source1,dest,inc);
}
}
void CopyFunctions::copy8_word(unsigned short* source1,
unsigned short* dest,int inc) {
int rr;
// Optimisation is slower, leave it in C
for (rr = 0; rr < 8; rr++) {
memcpy(dest,source1,sizeof(short)*8);
source1+=inc;
dest+=inc;
}
}
void CopyFunctions::copy8_src1linear_crop(short* source1,
unsigned char* dest,int inc) {
if (lmmx == false) {
int rr;
for (rr = 0; rr < 8; rr++) {
dest[0] = cm[source1[0]];
dest[1] = cm[source1[1]];
dest[2] = cm[source1[2]];
dest[3] = cm[source1[3]];
dest[4] = cm[source1[4]];
dest[5] = cm[source1[5]];
dest[6] = cm[source1[6]];
dest[7] = cm[source1[7]];
dest += inc;
source1 += 8;
}
} else {
copyFunctions_asm->copy8_src1linear_crop(source1,dest,inc);
}
}
void CopyFunctions::copy8_div2_nocrop(unsigned char* source1,
unsigned char* source2,
unsigned char* dest,int inc) {
if (lmmx == false) {
int rr;
for (rr = 0; rr < 8; rr++) {
dest[0] = (int) (source1[0] + source2[0]+1) >> 1;
dest[1] = (int) (source1[1] + source2[1]+1) >> 1;
dest[2] = (int) (source1[2] + source2[2]+1) >> 1;
dest[3] = (int) (source1[3] + source2[3]+1) >> 1;
dest[4] = (int) (source1[4] + source2[4]+1) >> 1;
dest[5] = (int) (source1[5] + source2[5]+1) >> 1;
dest[6] = (int) (source1[6] + source2[6]+1) >> 1;
dest[7] = (int) (source1[7] + source2[7]+1) >> 1;
dest += inc;
source1 += inc;
source2 += inc;
}
} else {
copyFunctions_asm->copy8_div2_nocrop(source1,source2, dest, inc);
}
}
void CopyFunctions::copy8_div2_destlinear_nocrop(unsigned char* source1,
unsigned char* source2,
unsigned char* dest,int inc) {
if (lmmx == false) {
int rr;
for (rr = 0; rr < 8; rr++) {
dest[0] = (int) (source1[0] + source2[0]) >> 1;
dest[1] = (int) (source1[1] + source2[1]) >> 1;
dest[2] = (int) (source1[2] + source2[2]) >> 1;
dest[3] = (int) (source1[3] + source2[3]) >> 1;
dest[4] = (int) (source1[4] + source2[4]) >> 1;
dest[5] = (int) (source1[5] + source2[5]) >> 1;
dest[6] = (int) (source1[6] + source2[6]) >> 1;
dest[7] = (int) (source1[7] + source2[7]) >> 1;
dest += 8;
source1 += inc;
source2 += inc;
}
} else {
copyFunctions_asm->copy8_div2_destlinear_nocrop(source1,source2,dest,inc);
}
}
void CopyFunctions::copy16_div2_destlinear_nocrop(unsigned char* source1,
unsigned char* source2,
unsigned char* dest,int inc){
if (lmmx == false) {
int rr;
for (rr = 0; rr < 16; rr++) {
dest[0] = (int) (source1[0] + source2[0]) >> 1;
dest[1] = (int) (source1[1] + source2[1]) >> 1;
dest[2] = (int) (source1[2] + source2[2]) >> 1;
dest[3] = (int) (source1[3] + source2[3]) >> 1;
dest[4] = (int) (source1[4] + source2[4]) >> 1;
dest[5] = (int) (source1[5] + source2[5]) >> 1;
dest[6] = (int) (source1[6] + source2[6]) >> 1;
dest[7] = (int) (source1[7] + source2[7]) >> 1;
dest[8] = (int) (source1[8] + source2[8]) >> 1;
dest[9] = (int) (source1[9] + source2[9]) >> 1;
dest[10] = (int) (source1[10] + source2[10]) >> 1;
dest[11] = (int) (source1[11] + source2[11]) >> 1;
dest[12] = (int) (source1[12] + source2[12]) >> 1;
dest[13] = (int) (source1[13] + source2[13]) >> 1;
dest[14] = (int) (source1[14] + source2[14]) >> 1;
dest[15] = (int) (source1[15] + source2[15]) >> 1;
dest += 16;
source1 += inc;
source2 += inc;
}
} else {
copyFunctions_asm->copy16_div2_destlinear_nocrop(source1,source2,dest,inc);
}
}
void CopyFunctions::copy8_div4_nocrop(unsigned char* source1,
unsigned char* source2,
unsigned char* source3,
unsigned char* source4,
unsigned char* dest,int inc) {
int rr;
for (rr = 0; rr < 8; rr++) {
dest[0]=(int) (source1[0]+source2[0]+source3[0]+source4[0] + 2) >> 2;
dest[1]=(int) (source1[1]+source2[1]+source3[1]+source4[1] + 2) >> 2;
dest[2]=(int) (source1[2]+source2[2]+source3[2]+source4[2] + 2) >> 2;
dest[3]=(int) (source1[3]+source2[3]+source3[3]+source4[3] + 2) >> 2;
dest[4]=(int) (source1[4]+source2[4]+source3[4]+source4[4] + 2) >> 2;
dest[5]=(int) (source1[5]+source2[5]+source3[5]+source4[5] + 2) >> 2;
dest[6]=(int) (source1[6]+source2[6]+source3[6]+source4[6] + 2) >> 2;
dest[7]=(int) (source1[7]+source2[7]+source3[7]+source4[7] + 2) >> 2;
dest += inc;
source1 += inc;
source2 += inc;
source3 += inc;
source4 += inc;
}
}
// Optimize me!
// should be mmx perfomance analysis shows: 8 % overall time
void CopyFunctions::copy8_src2linear_crop(unsigned char* source1,
short int* source2,
unsigned char* dest,int inc) {
int rr;
if (lmmx == false) {
for (rr = 0; rr < 8; rr++) {
dest[0] = cm[(int) source1[0] + (int) source2[0]];
dest[1] = cm[(int) source1[1] + (int) source2[1]];
dest[2] = cm[(int) source1[2] + (int) source2[2]];
dest[3] = cm[(int) source1[3] + (int) source2[3]];
dest[4] = cm[(int) source1[4] + (int) source2[4]];
dest[5] = cm[(int) source1[5] + (int) source2[5]];
dest[6] = cm[(int) source1[6] + (int) source2[6]];
dest[7] = cm[(int) source1[7] + (int) source2[7]];
dest += inc;
source1 += inc;
source2 += 8;
}
} else {
copyFunctions_asm->copy8_src2linear_crop(source1,source2,dest,inc);
}
}
// Optimize me!
// should be mmx perfomance analysis shows: 13 % overall time
void CopyFunctions::copy8_div2_src3linear_crop(unsigned char* source1,
unsigned char* source2,
short int* source3,
unsigned char* dest,int inc) {
int rr;
if (lmmx==false) {
for (rr = 0; rr < 8; rr++) {
dest[0] = cm[((int) (source1[0] + source2[0]+1) >> 1) + source3[0]];
dest[1] = cm[((int) (source1[1] + source2[1]+1) >> 1) + source3[1]];
dest[2] = cm[((int) (source1[2] + source2[2]+1) >> 1) + source3[2]];
dest[3] = cm[((int) (source1[3] + source2[3]+1) >> 1) + source3[3]];
dest[4] = cm[((int) (source1[4] + source2[4]+1) >> 1) + source3[4]];
dest[5] = cm[((int) (source1[5] + source2[5]+1) >> 1) + source3[5]];
dest[6] = cm[((int) (source1[6] + source2[6]+1) >> 1) + source3[6]];
dest[7] = cm[((int) (source1[7] + source2[7]+1) >> 1) + source3[7]];
dest += inc;
source1 += inc;
source2 += inc;
source3 += 8;
}
} else {
copyFunctions_asm->copy8_div2_src3linear_crop(source1,source2,source3,
dest,inc);
}
}
void CopyFunctions::copy8_div4_src5linear_crop(unsigned char* source1,
unsigned char* source2,
unsigned char* source3,
unsigned char* source4,
short int* source5,
unsigned char* dest,int inc) {
int rr;
for (rr = 0; rr < 8; rr++) {
dest[0]=cm[((int) (source1[0]+source2[0]+source3[0]+source4[0]+2) >> 2) + source5[0]];
dest[1]=cm[((int) (source1[1]+source2[1]+source3[1]+source4[1]+2) >> 2) + source5[1]];
dest[2]=cm[((int) (source1[2]+source2[2]+source3[2]+source4[2]+2) >> 2) + source5[2]];
dest[3]=cm[((int) (source1[3]+source2[3]+source3[3]+source4[3]+2) >> 2) + source5[3]];
dest[4]=cm[((int) (source1[4]+source2[4]+source3[4]+source4[4]+2) >> 2) + source5[4]];
dest[5]=cm[((int) (source1[5]+source2[5]+source3[5]+source4[5]+2) >> 2) + source5[5]];
dest[6]=cm[((int) (source1[6]+source2[6]+source3[6]+source4[6]+2) >> 2) + source5[6]];
dest[7]=cm[((int) (source1[7]+source2[7]+source3[7]+source4[7]+2) >> 2) + source5[7]];
dest +=inc;
source1 += inc;
source2 += inc;
source3 += inc;
source4 += inc;
source5 += 8;
}
}