// // HtPack.cc // // HtPack: Compress and uncompress data in e.g. simple structures. // The structure must have the layout defined in the ABI; // the layout the compiler generates. // // Part of the ht://Dig package // Copyright (c) 1999-2004 The ht://Dig Group // For copyright details, see the file COPYING in your distribution // or the GNU Library General Public License (LGPL) version 2 or later // // // $Id: HtPack.cc,v 1.8 2004/05/28 13:15:20 lha Exp $ // #ifdef HAVE_CONFIG_H #include "htconfig.h" #endif /* HAVE_CONFIG_H */ #include "HtPack.h" #include #include // For the moment, these formats are accepted: // "i" native int, with most compressed value 0 // "u" unsigned int, with most compressed value 0 // "c" unsigned int, with most compressed value 1. // // If someone adds other formats (and uses them), please note // that structure padding may give surprising effects on some // (most) platforms, for example if you try to unpack a // structure with the imagined signature "isi" (int, short, int). // You will want to solve that portably. // // Compression is done to 2 bits description (overhead) each, // plus variable-sized data. // Theoretically, different formats can use different number of // bits in the description with a few changes. // The description is located in a byte before every four // "fields". String htPack(const char format[], const char *data) { const char *s = format; // We insert the encodings by number, rather than shifting and // inserting at the "bottom". This should make it faster for // decoding, which presumably is more important than the speed // of encoding. int code_no = 0; // Make a wild guess that we will compress some ordinary sized // struct. This guess only has speed effects. String compressed(60); // Accumulated codes. unsigned int description = 0; // Store the encoding here. We cannot use a char *, as the // string may be reallocated and moved. int code_index = 0; // Make place for the first codes. compressed << '\0'; // Format string loop. while (*s) { int fchar = *s++; int n; if (isdigit(*s)) { char* t; n = strtol(s, &t, 10); s = t; } else n = 1; // Loop over N in e.g. "iN" (default 1). while (n--) { // Format character handling. switch (fchar) { case 'c': { // We compress an unsigned int with the most common // value 1 as this: // 00 - value is 1. // 01 - value fits in unsigned char - appended. // 10 - value fits in unsigned short - appended. // 11 - just plain unsigned int - appended (you lose). unsigned int value; // Initialize, but allow disalignment. memcpy(&value, data, sizeof value); data += sizeof(unsigned int); int mycode; if (value == 1) { mycode = 0; } else { unsigned char charvalue = (unsigned char) value; unsigned short shortvalue = (unsigned short) value; if (value == charvalue) { mycode = 1; compressed << charvalue; } else if (value == shortvalue) { mycode = 2; compressed.append((char *) &shortvalue, sizeof shortvalue); } else { mycode = 3; compressed.append((char *) &value, sizeof value); } } description |= mycode << (2*code_no++); } break; case 'i': { // We compress a (signed) int as follows: // 00 - value is 0. // 01 - value fits in char - appended. // 10 - value fits in short - appended. // 11 - just plain int - appended (you lose). int value; // Initialize, but allow disalignment. memcpy(&value, data, sizeof value); data += sizeof(int); int mycode; if (value == 0) { mycode = 0; } else { char charvalue = char(value); short shortvalue = short(value); if (value == charvalue) { mycode = 1; compressed << charvalue; } else if (value == shortvalue) { mycode = 2; compressed.append((char *) &shortvalue, sizeof shortvalue); } else { mycode = 3; compressed.append((char *) &value, sizeof value); } } description |= mycode << (2*code_no++); } break; case 'u': { // We compress an unsigned int like an int: // 00 - value is 0. // 01 - value fits in unsigned char - appended. // 10 - value fits in unsigned short - appended. // 11 - just plain unsigned int - appended (you lose). unsigned int value; // Initialize, but allow disalignment. memcpy(&value, data, sizeof value); data += sizeof(unsigned int); int mycode; if (value == 0) { mycode = 0; } else { unsigned char charvalue = (unsigned char) value; unsigned short shortvalue = (unsigned short) value; if (value == charvalue) { mycode = 1; compressed << charvalue; } else if (value == shortvalue) { mycode = 2; compressed.append((char *) &shortvalue, sizeof shortvalue); } else { mycode = 3; compressed.append((char *) &value, sizeof value); } } description |= mycode << (2*code_no++); } break; default: #ifndef NOSTREAM #ifdef DEBUG if (1) cerr << "Invalid char \'" << char(fchar) << "\' in pack format \"" << format << "\"" << endl; return ""; #endif #endif ; // Must always have a statement after a label. } // Assuming 8-bit chars here. Flush encodings after 4 (2 bits // each) or when the code-string is consumed. if (code_no == 4 || (n == 0 && *s == 0)) { char *codepos = compressed.get() + code_index; *codepos = description; description = 0; code_no = 0; if (n || *s) { // If more data to be encoded, then we need a new place to // store the encodings. code_index = compressed.length(); compressed << '\0'; } } } } return compressed; } // Reverse the effect of htPack. String htUnpack(const char format[], const char *data) { const char *s = format; // The description needs to be renewed immediately. unsigned int description = 1; // Make a wild guess about that we decompress to some ordinary // sized struct and assume the cost of allocation some extra // memory is much less than the cost of allocating more. // This guess only has speed effects. String decompressed(60); // Format string loop. while (*s) { int fchar = *s++; int n; if (isdigit(*s)) { char* t; n = strtol(s, &t, 10); s = t; } else n = 1; // Loop over N in e.g. "iN" (default 1). while (n--) { // Time to renew description? if (description == 1) description = 256 | *data++; // Format character handling. switch (fchar) { case 'c': { // An unsigned int with the most common value 1 is // compressed as follows: // 00 - value is 1. // 01 - value fits in unsigned char - appended. // 10 - value fits in unsigned short - appended. // 11 - just plain unsigned int - appended (you lose). unsigned int value; switch (description & 3) { case 0: value = 1; break; case 1: { unsigned char charvalue; memcpy(&charvalue, data, sizeof charvalue); value = charvalue; data++; } break; case 2: { unsigned short int shortvalue; memcpy(&shortvalue, data, sizeof shortvalue); value = shortvalue; data += sizeof shortvalue; } break; case 3: { memcpy(&value, data, sizeof value); data += sizeof value; } break; } decompressed.append((char *) &value, sizeof value); } break; case 'i': { // A (signed) int is compressed as follows: // 00 - value is 0. // 01 - value fits in char - appended. // 10 - value fits in short - appended. // 11 - just plain int - appended (you lose). int value; switch (description & 3) { case 0: value = 0; break; case 1: { char charvalue; memcpy(&charvalue, data, sizeof charvalue); value = charvalue; data++; } break; case 2: { short int shortvalue; memcpy(&shortvalue, data, sizeof shortvalue); value = shortvalue; data += sizeof shortvalue; } break; case 3: { memcpy(&value, data, sizeof value); data += sizeof value; } break; } decompressed.append((char *) &value, sizeof value); } break; case 'u': { // An unsigned int is compressed as follows: // 00 - value is 0. // 01 - value fits in unsigned char - appended. // 10 - value fits in unsigned short - appended. // 11 - just plain unsigned int - appended (you lose). unsigned int value; switch (description & 3) { case 0: value = 0; break; case 1: { unsigned char charvalue; memcpy(&charvalue, data, sizeof charvalue); value = charvalue; data++; } break; case 2: { unsigned short int shortvalue; memcpy(&shortvalue, data, sizeof shortvalue); value = shortvalue; data += sizeof shortvalue; } break; case 3: { memcpy(&value, data, sizeof value); data += sizeof value; } break; } decompressed.append((char *) &value, sizeof value); } break; default: #ifndef NOSTREAM #ifdef DEBUG if (1) cerr << "Invalid char \'" << char(fchar) << "\' in unpack format \"" << format << "\"" << endl; return ""; #endif #endif ; // Must always have a statement after a label. } description >>= 2; } } return decompressed; } // End of HtPack.cc