//
// HtPack.cc
//
// HtPack: Compress and uncompress data in e.g. simple structures.
// The structure must have the layout defined in the ABI;
// the layout the compiler generates.
//
// Part of the ht://Dig package
// Copyright (c) 1999-2004 The ht://Dig Group
// For copyright details, see the file COPYING in your distribution
// or the GNU Library General Public License (LGPL) version 2 or later
//
//
// $Id: HtPack.cc,v 1.8 2004/05/28 13:15:20 lha Exp $
//
#ifdef HAVE_CONFIG_H
#include "htconfig.h"
#endif /* HAVE_CONFIG_H */
#include "HtPack.h"
#include
#include
// For the moment, these formats are accepted:
// "i" native int, with most compressed value 0
// "u" unsigned int, with most compressed value 0
// "c" unsigned int, with most compressed value 1.
//
// If someone adds other formats (and uses them), please note
// that structure padding may give surprising effects on some
// (most) platforms, for example if you try to unpack a
// structure with the imagined signature "isi" (int, short, int).
// You will want to solve that portably.
//
// Compression is done to 2 bits description (overhead) each,
// plus variable-sized data.
// Theoretically, different formats can use different number of
// bits in the description with a few changes.
// The description is located in a byte before every four
// "fields".
String
htPack(const char format[], const char *data)
{
const char *s = format;
// We insert the encodings by number, rather than shifting and
// inserting at the "bottom". This should make it faster for
// decoding, which presumably is more important than the speed
// of encoding.
int code_no = 0;
// Make a wild guess that we will compress some ordinary sized
// struct. This guess only has speed effects.
String compressed(60);
// Accumulated codes.
unsigned int description = 0;
// Store the encoding here. We cannot use a char *, as the
// string may be reallocated and moved.
int code_index = 0;
// Make place for the first codes.
compressed << '\0';
// Format string loop.
while (*s)
{
int fchar = *s++;
int n;
if (isdigit(*s))
{
char* t;
n = strtol(s, &t, 10);
s = t;
}
else
n = 1;
// Loop over N in e.g. "iN" (default 1).
while (n--)
{
// Format character handling.
switch (fchar)
{
case 'c':
{
// We compress an unsigned int with the most common
// value 1 as this:
// 00 - value is 1.
// 01 - value fits in unsigned char - appended.
// 10 - value fits in unsigned short - appended.
// 11 - just plain unsigned int - appended (you lose).
unsigned int value;
// Initialize, but allow disalignment.
memcpy(&value, data, sizeof value);
data += sizeof(unsigned int);
int mycode;
if (value == 1)
{
mycode = 0;
}
else
{
unsigned char charvalue = (unsigned char) value;
unsigned short shortvalue = (unsigned short) value;
if (value == charvalue)
{
mycode = 1;
compressed << charvalue;
}
else if (value == shortvalue)
{
mycode = 2;
compressed.append((char *) &shortvalue, sizeof shortvalue);
}
else
{
mycode = 3;
compressed.append((char *) &value, sizeof value);
}
}
description |= mycode << (2*code_no++);
}
break;
case 'i':
{
// We compress a (signed) int as follows:
// 00 - value is 0.
// 01 - value fits in char - appended.
// 10 - value fits in short - appended.
// 11 - just plain int - appended (you lose).
int value;
// Initialize, but allow disalignment.
memcpy(&value, data, sizeof value);
data += sizeof(int);
int mycode;
if (value == 0)
{
mycode = 0;
}
else
{
char charvalue = char(value);
short shortvalue = short(value);
if (value == charvalue)
{
mycode = 1;
compressed << charvalue;
}
else if (value == shortvalue)
{
mycode = 2;
compressed.append((char *) &shortvalue, sizeof shortvalue);
}
else
{
mycode = 3;
compressed.append((char *) &value, sizeof value);
}
}
description |= mycode << (2*code_no++);
}
break;
case 'u':
{
// We compress an unsigned int like an int:
// 00 - value is 0.
// 01 - value fits in unsigned char - appended.
// 10 - value fits in unsigned short - appended.
// 11 - just plain unsigned int - appended (you lose).
unsigned int value;
// Initialize, but allow disalignment.
memcpy(&value, data, sizeof value);
data += sizeof(unsigned int);
int mycode;
if (value == 0)
{
mycode = 0;
}
else
{
unsigned char charvalue = (unsigned char) value;
unsigned short shortvalue = (unsigned short) value;
if (value == charvalue)
{
mycode = 1;
compressed << charvalue;
}
else if (value == shortvalue)
{
mycode = 2;
compressed.append((char *) &shortvalue, sizeof shortvalue);
}
else
{
mycode = 3;
compressed.append((char *) &value, sizeof value);
}
}
description |= mycode << (2*code_no++);
}
break;
default:
#ifndef NOSTREAM
#ifdef DEBUG
if (1)
cerr << "Invalid char \'" << char(fchar)
<< "\' in pack format \"" << format << "\""
<< endl;
return "";
#endif
#endif
; // Must always have a statement after a label.
}
// Assuming 8-bit chars here. Flush encodings after 4 (2 bits
// each) or when the code-string is consumed.
if (code_no == 4 || (n == 0 && *s == 0))
{
char *codepos = compressed.get() + code_index;
*codepos = description;
description = 0;
code_no = 0;
if (n || *s)
{
// If more data to be encoded, then we need a new place to
// store the encodings.
code_index = compressed.length();
compressed << '\0';
}
}
}
}
return compressed;
}
// Reverse the effect of htPack.
String
htUnpack(const char format[], const char *data)
{
const char *s = format;
// The description needs to be renewed immediately.
unsigned int description = 1;
// Make a wild guess about that we decompress to some ordinary
// sized struct and assume the cost of allocation some extra
// memory is much less than the cost of allocating more.
// This guess only has speed effects.
String decompressed(60);
// Format string loop.
while (*s)
{
int fchar = *s++;
int n;
if (isdigit(*s))
{
char* t;
n = strtol(s, &t, 10);
s = t;
}
else
n = 1;
// Loop over N in e.g. "iN" (default 1).
while (n--)
{
// Time to renew description?
if (description == 1)
description = 256 | *data++;
// Format character handling.
switch (fchar)
{
case 'c':
{
// An unsigned int with the most common value 1 is
// compressed as follows:
// 00 - value is 1.
// 01 - value fits in unsigned char - appended.
// 10 - value fits in unsigned short - appended.
// 11 - just plain unsigned int - appended (you lose).
unsigned int value;
switch (description & 3)
{
case 0:
value = 1;
break;
case 1:
{
unsigned char charvalue;
memcpy(&charvalue, data, sizeof charvalue);
value = charvalue;
data++;
}
break;
case 2:
{
unsigned short int shortvalue;
memcpy(&shortvalue, data, sizeof shortvalue);
value = shortvalue;
data += sizeof shortvalue;
}
break;
case 3:
{
memcpy(&value, data, sizeof value);
data += sizeof value;
}
break;
}
decompressed.append((char *) &value, sizeof value);
}
break;
case 'i':
{
// A (signed) int is compressed as follows:
// 00 - value is 0.
// 01 - value fits in char - appended.
// 10 - value fits in short - appended.
// 11 - just plain int - appended (you lose).
int value;
switch (description & 3)
{
case 0:
value = 0;
break;
case 1:
{
char charvalue;
memcpy(&charvalue, data, sizeof charvalue);
value = charvalue;
data++;
}
break;
case 2:
{
short int shortvalue;
memcpy(&shortvalue, data, sizeof shortvalue);
value = shortvalue;
data += sizeof shortvalue;
}
break;
case 3:
{
memcpy(&value, data, sizeof value);
data += sizeof value;
}
break;
}
decompressed.append((char *) &value, sizeof value);
}
break;
case 'u':
{
// An unsigned int is compressed as follows:
// 00 - value is 0.
// 01 - value fits in unsigned char - appended.
// 10 - value fits in unsigned short - appended.
// 11 - just plain unsigned int - appended (you lose).
unsigned int value;
switch (description & 3)
{
case 0:
value = 0;
break;
case 1:
{
unsigned char charvalue;
memcpy(&charvalue, data, sizeof charvalue);
value = charvalue;
data++;
}
break;
case 2:
{
unsigned short int shortvalue;
memcpy(&shortvalue, data, sizeof shortvalue);
value = shortvalue;
data += sizeof shortvalue;
}
break;
case 3:
{
memcpy(&value, data, sizeof value);
data += sizeof value;
}
break;
}
decompressed.append((char *) &value, sizeof value);
}
break;
default:
#ifndef NOSTREAM
#ifdef DEBUG
if (1)
cerr << "Invalid char \'" << char(fchar)
<< "\' in unpack format \"" << format << "\""
<< endl;
return "";
#endif
#endif
; // Must always have a statement after a label.
}
description >>= 2;
}
}
return decompressed;
}
// End of HtPack.cc