//
// WordBitCompress.cc
//
// BitStream: put and get bits into a buffer
// *tagging: add tags to keep track of the position of data
// inside the bitstream for debuging purposes.
// *freezing: saves current position. further inserts in the BitStream
// aren't really done. This way you can try different
// compression algorithms and chose the best.
//
// Compressor: BitStream with extended compression fuctionalities
//
//
// Part of the ht://Dig package
// Copyright (c) 1999-2004 The ht://Dig Group
// For copyright details, see the file COPYING in your distribution
// or the GNU Library General Public License (LGPL) version 2 or later
//
//
// $Id: WordBitCompress.cc,v 1.5 2004/05/28 13:15:26 lha Exp $
//
#ifdef HAVE_CONFIG_H
#include "htconfig.h"
#endif /* HAVE_CONFIG_H */
#include
#include"WordBitCompress.h"
// ******** HtVector_byte (implementation)
#define GType byte
#define HtVectorGType HtVector_byte
#include "HtVectorGenericCode.h"
// ******** HtVector_charptr (implementation)
#define GType charptr
#define HtVectorGType HtVector_charptr
#include "HtVectorGenericCode.h"
// **************************************************
// *************** misc functions *******************
// **************************************************
// return a temporary string that merges a name and a number
char *
label_str(const char *s,int n)
{
static char buff[1000];
sprintf(buff,"%s%d",s,n);
return buff;
}
// display n bits of value v
void
show_bits(int v,int n/*=16*/)
{
int i;
if(n>0)
{
for(i=0;i (*((unsigned int *)b))) return 1;
else
if((*((unsigned int *)a)) < (*((unsigned int *)b))) return -1;
else
return 0;
// return
// (*((unsigned int *)a)) -
// (*((unsigned int *)b)) ;
}
// quick sort an array of unsigned int's
void
qsort_uint(unsigned int *v,int n)
{
qsort((void *)v,(unsigned int)n,sizeof(unsigned int),&qsort_uint_cmp);
}
// log in base 2 of v
// log2(0) -> -1
// log2(1) -> 0
// log2(2) -> 1
// log2(4) -> 2
// ...
// log2(8) -> 3
// log2(7) -> 2
int
log2(unsigned int v)
{
int res;
for(res=-1;v;res++){v>>=1;}
return(res);
}
// **************************************************
// *************** VlengthCoder *******************
// **************************************************
//
// Compress values into a bitstream based on their probability distribution
// The probability distribution is reduced to a number of intervals.
// Each interval (generally) has the same probability of occuring
// values are then coded by: interval_number position_inside_interval
// this can be seen as modified version of shanon-fanno encoding
//
// Here are some aproximate calculation for estimating final coded size:
//
// n number of entries to code
// nbits maximum size in bits of entries to code
//
// SUM_interval_bit_sizes -> depends on probability dist
// total_size = table_size + coded_size
// table_size = 2^nlev * NBITS_NBITS_VAL
// coded_size = n * (nlev + SUM_interval_bit_sizes / 2^nlev )
//
// example1: flat probability distribution :
// SUM_interval_bit_sizes = 2^nlev * log2( 2^nbits / 2^nlev) = 2^nlev * ( nbits - nlev )
// => coded_size = n * ( nlev + nbits - nlev ) = n*nbits !!
// => coded_size is the same as if we used no compression
// this is normal, because it is not possible to compress random data
//
// example2: probability all focused in first interval except for one entry
// SUM_interval_bit_sizes = 1 + nbits
// the computations above are not valid because of integer roundofs
// => coded_size would actually be = n * 1 + nbits
// (but the code needs a few cleanups to obtain this value)
//
class VlengthCoder
{
int nbits;// min number of bits to code all entries
int nlev;// split proba into 2^nlev parts
int nintervals;// number of intervals
int *intervals;
unsigned int *intervalsizes; // speedup
unsigned int *lboundaries; // speedup
BitStream &bs;
// inline unsigned int intervalsize(int i)
// {
// unsigned int res=((intervals[i] > 0 ? pow2(intervals[i]-1) : 0));
// if(intervalsizes[i]!=res){errr("intervalsizes");}
// return res;
// }
inline unsigned int intervalsize0(int i){return((intervals[i] > 0 ? pow2(intervals[i]-1) : 0));}
public:
int verbose;
// find interval where value v resides
// fast version, this one recursively splits initial interval
inline int find_interval2(const unsigned int v,unsigned int &lboundary)
{
int i0=0;
int i1=nintervals;
int i;
for(;;)
{
if(i1==i0+1){break;}
i=(i0+i1)>>1;
lboundary=lboundaries[i];
// if(verbose)printf("considering i0:%3d i1:%3d : i:%3d v:%12u lboundary:%12u (%12u - %12u)\n",i0,i1,i,v,lboundary,lboundaries[i0],lboundaries[i1]);
if(v=lboundary && v=nintervals){errr("code argh!");}
sboundary=lboundary+intervalsizes[i];
// printf("nintervals:%3d i:%3d : %12u ... %12u : %12u\n",nintervals,i,lboundary,sboundary,v);
if( (lboundary!=sboundary && v>=lboundary && v0 ? intervals[i]-1 : 0);
// if(verbose>1)printf("v:%6d interval:%2d (%5d - %5d) bitsremaining:%2d ",v,i,lboundary,sboundary,bitsremaining);
v-=lboundary;
// if(verbose>1)printf("remain:%6d totalbits:%2d\n",v,bitsremaining+nlev);
bs.put_uint(v,bitsremaining,"rem");
}
// get and uncompress a value from the bitstream
inline unsigned int get()
{
// SPEED CRITICAL SECTION
int i=bs.get_uint(nlev,"int");// get interval
// if(verbose>1)printf("get:interval:%2d ",i);
const int bitsremaining=(intervals[i]>0 ? intervals[i]-1 : 0);
// if(verbose>1)printf("bitsremain:%2d ",bitsremaining);
unsigned int v=bs.get_uint(bitsremaining,"rem");
// if(verbose>1)printf("v0:%3d ",v);
// unsigned int lboundary=0;
v+=lboundaries[i];
// for(int j=0;j1)printf("lboundary:%5d v:%5d \n",lboundaries[i],v);
return(v);
}
// insert the packed probability distrbution into the bitstream
void code_begin();
// get the packed probability distrbution from the bitstream
void get_begin();
void make_lboundaries();
VlengthCoder(BitStream &nbs,int nverbose=0);
~VlengthCoder()
{
delete [] lboundaries;
delete [] intervals;
delete [] intervalsizes;
}
// create VlengthCoder and its probability distrbution from an array of values
VlengthCoder(unsigned int *vals,int n,BitStream &nbs,int nverbose=0);
};
void
VlengthCoder::code_begin()
{
int i;
bs.add_tag("VlengthCoder:Header");
bs.put_uint(nbits,NBITS_NBITS_VAL,"nbits");
bs.put_uint(nlev,5,"nlev");
for(i=0;i1)printf("get_begin nbits:%d\n",nbits);
nlev=bs.get_uint(5,"nlev");
if(verbose>1)printf("get_begin nlev:%d\n",nlev);
nintervals=pow2(nlev);
intervals=new int [nintervals];
CHECK_MEM(intervals);
intervalsizes=new unsigned int [nintervals];
CHECK_MEM(intervalsizes);
lboundaries=new unsigned int [nintervals+1];
CHECK_MEM(lboundaries);
for(i=0;i1)printf("get_begin intervals:%2d:%2d\n",i,intervals[i]);
}
make_lboundaries();
}
void
VlengthCoder::make_lboundaries()
{
unsigned int lboundary=0;
for(int j=0;j<=nintervals;j++)
{
lboundaries[j]=lboundary;
if(j=nbits){nlev=nbits-1;}
// nlev at least 1
if(nlev<1){nlev=1;}
if(debug_test_nlev>=0){nlev=debug_test_nlev;}
nintervals=pow2(nlev);
int i;
intervals=new int [nintervals];
CHECK_MEM(intervals);
intervalsizes=new unsigned int [nintervals];
CHECK_MEM(intervalsizes);
lboundaries=new unsigned int [nintervals+1];
CHECK_MEM(lboundaries);
if(verbose>1)printf("nbits:%d nlev:%d nintervals:%d \n",nbits,nlev,nintervals);
if(verbose>10)
{
printf("vals;\n");
for(i=0;i1)printf("intnum%02d begin:%5u end:%5u len:%5u (code:%2d) real upper boundary: real:%5u\n",i,lboundary,intervalsizes[i]+lboundary,intervalsizes[i],intervals[i],boundary);
lboundary+=intervalsizes[i];
}
boundary=sorted[n-1];
intervals[i]=1+log2(boundary-lboundary)+1;
intervalsizes[i]=intervalsize0(i);
if(0 || verbose>1)printf("intnum%02d begin:%5u end:%5u len:%5u (code:%2d) real upper boundary: real:%5u\n",i,lboundary,intervalsizes[i]+lboundary,intervalsizes[i],intervals[i],boundary);
if(0 || verbose>1)printf("\n");
make_lboundaries();
int SUM_interval_bit_sizes=0;
for(i=0;i>3)-1;
// put first
buff.back()|=((v & 0xff)<>=nbitsinfirstbyte;
// printf(" (v:%x)",v);
// put central
for(int i=ncentral;i;i--)
{
buff.push_back(0);
buff.back()= v & 0xff ;
// ::show_bits(v & 0xff,-8);
// printf(" ");
v>>=8;
}
// put last
const int nbitsremaining=n-( (ncentral<<3)+nbitsinfirstbyte );
if(nbitsremaining)
{
buff.push_back(0);
buff.back()=v & (pow2(nbitsremaining+1)-1);
// printf(" (v:%x:%x)",v & (pow2(nbitsremaining+1)-1),buff.back());
// ::show_bits(v & (pow2(nbitsremaining+1)-1),-nbitsremaining);
// printf("\n");
}
if(!(nbitsremaining & 0x07)){buff.push_back(0);}
bitpos+=n;
// printf("nbitsinfirstbyte:%d ncentral:%d nbitsremaining:%d\n",nbitsinfirstbyte,ncentral,nbitsremaining);
}
// printf("cuurent put order:");
// for(i=0;i>3]>>bpos0) & (pow2(n)-1);
bitpos+=n;
// printf("simple case:res:%x\n",res);
return res;
}
else
{
int bytepos=bitpos>>3;
const int ncentral=((bpos0 + n)>>3)-1;
// put first
res=(buff[bytepos]>>bpos0) & 0xff;
// printf("normal case:res0:%x\n",res);
const int nbitsinfirstbyte=8-bpos0;
bytepos++;
// put central
if(ncentral)
{
unsigned int v=0;
for(int i=ncentral-1;i>=0;i--)
{
v|=buff[bytepos+i]&0xff;
if(i)v<<=8;
// printf(" resC%d:v:%x\n",i,v);
}
bytepos+=ncentral;
res|=v<>3)-1)<<3));
// printf(" :resR:%x buff[%d]:%x %d\n",res,bytepos,buff[bytepos],
// (nbitsinfirstbyte +((bytepos-(bitpos>>3)-1)<<3)));
}
bitpos+=n;
// printf("nbitsinfirstbyte:%d ncentral:%d nbitsremaining:%d\n",nbitsinfirstbyte,ncentral,nbitsremaining);
return res;
}
}
#ifdef NOTDEF
unsigned int
BitStream::get(int n,const char *tag/*=NULL*/)
{
if(check_tag(tag)==NOTOK){errr("BitStream::get(int) check_tag failed");}
unsigned int res=0;
for(int i=0;i=0)
{
printf("ERROR:BitStream:bitpos:%4d:check_tag: found tag %s at %d expected it at %d\n",bitpos,tag,found,pos);
}
else
{
printf("ERROR:BitStream:bitpos:%4d:check_tag: tag %s not found, expected it at %d\n",bitpos,tag,pos);
}
return(NOTOK);
}
return(OK);
}
int
BitStream::find_tag(const char *tag)
{
int i;
for(i=0;ipos && i>=0;i--);
return(i);
}
void
BitStream::show_bits(int a,int n)
{
for(int b=a;b=pow2(NBITS_NVALS)){errr("Compressor::put(uint *,nvals) : overflow: nvals>2^16");}
put_uint_vl(n,NBITS_NVALS,"size");
if(n==0){return NBITS_NVALS;}
int sdecr=2;
int sfixed=1;
int nbits=num_bits(HtMaxMin::max_v(vals,n));
if(verbose)printf("*********************put_vals:n:%3d nbits:%3d\n",n,nbits);
int i;
if(verbose)
{
printf("TTT:n:%3d nbits:%3d\n",n,nbits);
for(i=1;i<7;i++)
{
debug_test_nlev=i;
printf("trying nlev:%3d\n",debug_test_nlev);
freeze();
put_decr(vals,n);
int fndsz=unfreeze();
printf("TTT:nlev:%2d try size:%4d\n",i,fndsz);
}
debug_test_nlev=-1;
}
if(n>15 && nbits>3)
{
freeze();
put_decr(vals,n);
sdecr=unfreeze();
freeze();
put_fixedbitl(vals,n);
sfixed=unfreeze();
}
if(verbose)printf("put_vals:n:%3d sdecr:%6d sfixed:%6d rap:%f\n",n,sdecr,sfixed,sdecr/(float)sfixed);
if(sdecr1)printf("get_vals n:%d\n",n);
if(!n){*pres=NULL;return 0;}
if(verbose)printf("get_vals: n:%3d\n",n);
unsigned int *res=new unsigned int[n];
CHECK_MEM(res);
int comptype=get_uint(2,"put_valsCompType");
if(verbose)printf("get_vals:comptype:%d\n",comptype);
switch(comptype)
{
case 0: get_decr(res,n);
break;
case 1: get_fixedbitl(res,n);
break;
default: errr("Compressor::get_vals invalid comptype");break;
}
// get_fixedbitl(res,n);
// get_decr(res,n);
*pres=res;
return(n);
}
int
Compressor::put_fixedbitl(byte *vals,int n,const char *tag)
{
int cpos=bitpos;
int i,j;
add_tag(tag);
put_uint_vl(n,NBITS_NVALS,"size");
if(n==0){return 0;}
byte maxv=vals[0];
for(i=1;imaxv){maxv=v;}
}
int nbits=num_bits(maxv);
if(n>=pow2(NBITS_NVALS)){errr("Compressor::put_fixedbitl(byte *) : overflow: nvals>2^16");}
put_uint(nbits,NBITS_NBITS_CHARVAL,"nbits");
add_tag("data");
for(i=0;i1){printf("get_decr:got:%8d\n",res[i]);}
}
}