// // WordBitCompress.cc // // BitStream: put and get bits into a buffer // *tagging: add tags to keep track of the position of data // inside the bitstream for debuging purposes. // *freezing: saves current position. further inserts in the BitStream // aren't really done. This way you can try different // compression algorithms and chose the best. // // Compressor: BitStream with extended compression fuctionalities // // // Part of the ht://Dig package // Copyright (c) 1999-2004 The ht://Dig Group // For copyright details, see the file COPYING in your distribution // or the GNU Library General Public License (LGPL) version 2 or later // // // $Id: WordBitCompress.cc,v 1.5 2004/05/28 13:15:26 lha Exp $ // #ifdef HAVE_CONFIG_H #include "htconfig.h" #endif /* HAVE_CONFIG_H */ #include #include"WordBitCompress.h" // ******** HtVector_byte (implementation) #define GType byte #define HtVectorGType HtVector_byte #include "HtVectorGenericCode.h" // ******** HtVector_charptr (implementation) #define GType charptr #define HtVectorGType HtVector_charptr #include "HtVectorGenericCode.h" // ************************************************** // *************** misc functions ******************* // ************************************************** // return a temporary string that merges a name and a number char * label_str(const char *s,int n) { static char buff[1000]; sprintf(buff,"%s%d",s,n); return buff; } // display n bits of value v void show_bits(int v,int n/*=16*/) { int i; if(n>0) { for(i=0;i (*((unsigned int *)b))) return 1; else if((*((unsigned int *)a)) < (*((unsigned int *)b))) return -1; else return 0; // return // (*((unsigned int *)a)) - // (*((unsigned int *)b)) ; } // quick sort an array of unsigned int's void qsort_uint(unsigned int *v,int n) { qsort((void *)v,(unsigned int)n,sizeof(unsigned int),&qsort_uint_cmp); } // log in base 2 of v // log2(0) -> -1 // log2(1) -> 0 // log2(2) -> 1 // log2(4) -> 2 // ... // log2(8) -> 3 // log2(7) -> 2 int log2(unsigned int v) { int res; for(res=-1;v;res++){v>>=1;} return(res); } // ************************************************** // *************** VlengthCoder ******************* // ************************************************** // // Compress values into a bitstream based on their probability distribution // The probability distribution is reduced to a number of intervals. // Each interval (generally) has the same probability of occuring // values are then coded by: interval_number position_inside_interval // this can be seen as modified version of shanon-fanno encoding // // Here are some aproximate calculation for estimating final coded size: // // n number of entries to code // nbits maximum size in bits of entries to code // // SUM_interval_bit_sizes -> depends on probability dist // total_size = table_size + coded_size // table_size = 2^nlev * NBITS_NBITS_VAL // coded_size = n * (nlev + SUM_interval_bit_sizes / 2^nlev ) // // example1: flat probability distribution : // SUM_interval_bit_sizes = 2^nlev * log2( 2^nbits / 2^nlev) = 2^nlev * ( nbits - nlev ) // => coded_size = n * ( nlev + nbits - nlev ) = n*nbits !! // => coded_size is the same as if we used no compression // this is normal, because it is not possible to compress random data // // example2: probability all focused in first interval except for one entry // SUM_interval_bit_sizes = 1 + nbits // the computations above are not valid because of integer roundofs // => coded_size would actually be = n * 1 + nbits // (but the code needs a few cleanups to obtain this value) // class VlengthCoder { int nbits;// min number of bits to code all entries int nlev;// split proba into 2^nlev parts int nintervals;// number of intervals int *intervals; unsigned int *intervalsizes; // speedup unsigned int *lboundaries; // speedup BitStream &bs; // inline unsigned int intervalsize(int i) // { // unsigned int res=((intervals[i] > 0 ? pow2(intervals[i]-1) : 0)); // if(intervalsizes[i]!=res){errr("intervalsizes");} // return res; // } inline unsigned int intervalsize0(int i){return((intervals[i] > 0 ? pow2(intervals[i]-1) : 0));} public: int verbose; // find interval where value v resides // fast version, this one recursively splits initial interval inline int find_interval2(const unsigned int v,unsigned int &lboundary) { int i0=0; int i1=nintervals; int i; for(;;) { if(i1==i0+1){break;} i=(i0+i1)>>1; lboundary=lboundaries[i]; // if(verbose)printf("considering i0:%3d i1:%3d : i:%3d v:%12u lboundary:%12u (%12u - %12u)\n",i0,i1,i,v,lboundary,lboundaries[i0],lboundaries[i1]); if(v=lboundary && v=nintervals){errr("code argh!");} sboundary=lboundary+intervalsizes[i]; // printf("nintervals:%3d i:%3d : %12u ... %12u : %12u\n",nintervals,i,lboundary,sboundary,v); if( (lboundary!=sboundary && v>=lboundary && v0 ? intervals[i]-1 : 0); // if(verbose>1)printf("v:%6d interval:%2d (%5d - %5d) bitsremaining:%2d ",v,i,lboundary,sboundary,bitsremaining); v-=lboundary; // if(verbose>1)printf("remain:%6d totalbits:%2d\n",v,bitsremaining+nlev); bs.put_uint(v,bitsremaining,"rem"); } // get and uncompress a value from the bitstream inline unsigned int get() { // SPEED CRITICAL SECTION int i=bs.get_uint(nlev,"int");// get interval // if(verbose>1)printf("get:interval:%2d ",i); const int bitsremaining=(intervals[i]>0 ? intervals[i]-1 : 0); // if(verbose>1)printf("bitsremain:%2d ",bitsremaining); unsigned int v=bs.get_uint(bitsremaining,"rem"); // if(verbose>1)printf("v0:%3d ",v); // unsigned int lboundary=0; v+=lboundaries[i]; // for(int j=0;j1)printf("lboundary:%5d v:%5d \n",lboundaries[i],v); return(v); } // insert the packed probability distrbution into the bitstream void code_begin(); // get the packed probability distrbution from the bitstream void get_begin(); void make_lboundaries(); VlengthCoder(BitStream &nbs,int nverbose=0); ~VlengthCoder() { delete [] lboundaries; delete [] intervals; delete [] intervalsizes; } // create VlengthCoder and its probability distrbution from an array of values VlengthCoder(unsigned int *vals,int n,BitStream &nbs,int nverbose=0); }; void VlengthCoder::code_begin() { int i; bs.add_tag("VlengthCoder:Header"); bs.put_uint(nbits,NBITS_NBITS_VAL,"nbits"); bs.put_uint(nlev,5,"nlev"); for(i=0;i1)printf("get_begin nbits:%d\n",nbits); nlev=bs.get_uint(5,"nlev"); if(verbose>1)printf("get_begin nlev:%d\n",nlev); nintervals=pow2(nlev); intervals=new int [nintervals]; CHECK_MEM(intervals); intervalsizes=new unsigned int [nintervals]; CHECK_MEM(intervalsizes); lboundaries=new unsigned int [nintervals+1]; CHECK_MEM(lboundaries); for(i=0;i1)printf("get_begin intervals:%2d:%2d\n",i,intervals[i]); } make_lboundaries(); } void VlengthCoder::make_lboundaries() { unsigned int lboundary=0; for(int j=0;j<=nintervals;j++) { lboundaries[j]=lboundary; if(j=nbits){nlev=nbits-1;} // nlev at least 1 if(nlev<1){nlev=1;} if(debug_test_nlev>=0){nlev=debug_test_nlev;} nintervals=pow2(nlev); int i; intervals=new int [nintervals]; CHECK_MEM(intervals); intervalsizes=new unsigned int [nintervals]; CHECK_MEM(intervalsizes); lboundaries=new unsigned int [nintervals+1]; CHECK_MEM(lboundaries); if(verbose>1)printf("nbits:%d nlev:%d nintervals:%d \n",nbits,nlev,nintervals); if(verbose>10) { printf("vals;\n"); for(i=0;i1)printf("intnum%02d begin:%5u end:%5u len:%5u (code:%2d) real upper boundary: real:%5u\n",i,lboundary,intervalsizes[i]+lboundary,intervalsizes[i],intervals[i],boundary); lboundary+=intervalsizes[i]; } boundary=sorted[n-1]; intervals[i]=1+log2(boundary-lboundary)+1; intervalsizes[i]=intervalsize0(i); if(0 || verbose>1)printf("intnum%02d begin:%5u end:%5u len:%5u (code:%2d) real upper boundary: real:%5u\n",i,lboundary,intervalsizes[i]+lboundary,intervalsizes[i],intervals[i],boundary); if(0 || verbose>1)printf("\n"); make_lboundaries(); int SUM_interval_bit_sizes=0; for(i=0;i>3)-1; // put first buff.back()|=((v & 0xff)<>=nbitsinfirstbyte; // printf(" (v:%x)",v); // put central for(int i=ncentral;i;i--) { buff.push_back(0); buff.back()= v & 0xff ; // ::show_bits(v & 0xff,-8); // printf(" "); v>>=8; } // put last const int nbitsremaining=n-( (ncentral<<3)+nbitsinfirstbyte ); if(nbitsremaining) { buff.push_back(0); buff.back()=v & (pow2(nbitsremaining+1)-1); // printf(" (v:%x:%x)",v & (pow2(nbitsremaining+1)-1),buff.back()); // ::show_bits(v & (pow2(nbitsremaining+1)-1),-nbitsremaining); // printf("\n"); } if(!(nbitsremaining & 0x07)){buff.push_back(0);} bitpos+=n; // printf("nbitsinfirstbyte:%d ncentral:%d nbitsremaining:%d\n",nbitsinfirstbyte,ncentral,nbitsremaining); } // printf("cuurent put order:"); // for(i=0;i>3]>>bpos0) & (pow2(n)-1); bitpos+=n; // printf("simple case:res:%x\n",res); return res; } else { int bytepos=bitpos>>3; const int ncentral=((bpos0 + n)>>3)-1; // put first res=(buff[bytepos]>>bpos0) & 0xff; // printf("normal case:res0:%x\n",res); const int nbitsinfirstbyte=8-bpos0; bytepos++; // put central if(ncentral) { unsigned int v=0; for(int i=ncentral-1;i>=0;i--) { v|=buff[bytepos+i]&0xff; if(i)v<<=8; // printf(" resC%d:v:%x\n",i,v); } bytepos+=ncentral; res|=v<>3)-1)<<3)); // printf(" :resR:%x buff[%d]:%x %d\n",res,bytepos,buff[bytepos], // (nbitsinfirstbyte +((bytepos-(bitpos>>3)-1)<<3))); } bitpos+=n; // printf("nbitsinfirstbyte:%d ncentral:%d nbitsremaining:%d\n",nbitsinfirstbyte,ncentral,nbitsremaining); return res; } } #ifdef NOTDEF unsigned int BitStream::get(int n,const char *tag/*=NULL*/) { if(check_tag(tag)==NOTOK){errr("BitStream::get(int) check_tag failed");} unsigned int res=0; for(int i=0;i=0) { printf("ERROR:BitStream:bitpos:%4d:check_tag: found tag %s at %d expected it at %d\n",bitpos,tag,found,pos); } else { printf("ERROR:BitStream:bitpos:%4d:check_tag: tag %s not found, expected it at %d\n",bitpos,tag,pos); } return(NOTOK); } return(OK); } int BitStream::find_tag(const char *tag) { int i; for(i=0;ipos && i>=0;i--); return(i); } void BitStream::show_bits(int a,int n) { for(int b=a;b=pow2(NBITS_NVALS)){errr("Compressor::put(uint *,nvals) : overflow: nvals>2^16");} put_uint_vl(n,NBITS_NVALS,"size"); if(n==0){return NBITS_NVALS;} int sdecr=2; int sfixed=1; int nbits=num_bits(HtMaxMin::max_v(vals,n)); if(verbose)printf("*********************put_vals:n:%3d nbits:%3d\n",n,nbits); int i; if(verbose) { printf("TTT:n:%3d nbits:%3d\n",n,nbits); for(i=1;i<7;i++) { debug_test_nlev=i; printf("trying nlev:%3d\n",debug_test_nlev); freeze(); put_decr(vals,n); int fndsz=unfreeze(); printf("TTT:nlev:%2d try size:%4d\n",i,fndsz); } debug_test_nlev=-1; } if(n>15 && nbits>3) { freeze(); put_decr(vals,n); sdecr=unfreeze(); freeze(); put_fixedbitl(vals,n); sfixed=unfreeze(); } if(verbose)printf("put_vals:n:%3d sdecr:%6d sfixed:%6d rap:%f\n",n,sdecr,sfixed,sdecr/(float)sfixed); if(sdecr1)printf("get_vals n:%d\n",n); if(!n){*pres=NULL;return 0;} if(verbose)printf("get_vals: n:%3d\n",n); unsigned int *res=new unsigned int[n]; CHECK_MEM(res); int comptype=get_uint(2,"put_valsCompType"); if(verbose)printf("get_vals:comptype:%d\n",comptype); switch(comptype) { case 0: get_decr(res,n); break; case 1: get_fixedbitl(res,n); break; default: errr("Compressor::get_vals invalid comptype");break; } // get_fixedbitl(res,n); // get_decr(res,n); *pres=res; return(n); } int Compressor::put_fixedbitl(byte *vals,int n,const char *tag) { int cpos=bitpos; int i,j; add_tag(tag); put_uint_vl(n,NBITS_NVALS,"size"); if(n==0){return 0;} byte maxv=vals[0]; for(i=1;imaxv){maxv=v;} } int nbits=num_bits(maxv); if(n>=pow2(NBITS_NVALS)){errr("Compressor::put_fixedbitl(byte *) : overflow: nvals>2^16");} put_uint(nbits,NBITS_NBITS_CHARVAL,"nbits"); add_tag("data"); for(i=0;i1){printf("get_decr:got:%8d\n",res[i]);} } }