/* MDB Tools - A library for reading MS Access database files * Copyright (C) 2000 Brian Bruns * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ #include "mdbtools.h" #include "errno.h" #ifdef DMALLOC #include "dmalloc.h" #endif #ifdef _WIN32 #include #endif /* * This function is used in reading text data from an MDB table. */ int mdb_unicode2ascii(MdbHandle *mdb, char *src, size_t slen, char *dest, size_t dlen) { char *tmp = NULL; size_t tlen = 0; size_t len_in, len_out; char *in_ptr, *out_ptr; if ((!src) || (!dest) || (!dlen)) return 0; /* Uncompress 'Unicode Compressed' string into tmp */ if (IS_JET4(mdb) && (slen>=2) && ((src[0]&0xff)==0xff) && ((src[1]&0xff)==0xfe)) { unsigned int compress=1; src += 2; slen -= 2; tmp = (char *)g_malloc(slen*2); while (slen) { if (*src == 0) { compress = (compress) ? 0 : 1; src++; slen--; } else if (compress) { tmp[tlen++] = *src++; tmp[tlen++] = 0; slen--; } else if (slen >= 2){ tmp[tlen++] = *src++; tmp[tlen++] = *src++; slen-=2; } } } in_ptr = (tmp) ? tmp : src; out_ptr = dest; len_in = (tmp) ? tlen : slen; len_out = dlen; #if HAVE_ICONV while (1) { iconv(mdb->iconv_in, &in_ptr, &len_in, &out_ptr, &len_out); if ((!len_in) || (errno == E2BIG)) break; /* Don't bail if impossible conversion is encountered */ in_ptr += (IS_JET4(mdb)) ? 2 : 1; len_in -= (IS_JET4(mdb)) ? 2 : 1; *out_ptr++ = '?'; len_out--; } dlen -= len_out; #else if (IS_JET3(mdb)) { strncpy(out_ptr, in_ptr, len_in); dlen = len_in; } else { /* rough UCS-2LE to ISO-8859-1 conversion */ unsigned int i; for (i=0; iiconv_out, &in_ptr, &len_in, &out_ptr, &len_out); dlen -= len_out; #else if (IS_JET3(mdb)) { dlen = MIN(len_in, len_out); strncpy(out_ptr, in_ptr, dlen); } else { unsigned int i; slen = MIN(len_in, len_out/2); dlen = slen*2; for (i=0; i4)) { unsigned char *tmp = g_malloc(dlen); unsigned int tptr = 0, dptr = 0; int comp = 1; tmp[tptr++] = 0xff; tmp[tptr++] = 0xfe; while((dptr < dlen) && (tptr < dlen)) { if (((dest[dptr+1]==0) && (comp==0)) || ((dest[dptr+1]!=0) && (comp==1))) { /* switch encoding mode */ tmp[tptr++] = 0; comp = (comp) ? 0 : 1; } else if (dest[dptr]==0) { /* this string cannot be compressed */ tptr = dlen; } else if (comp==1) { /* encode compressed character */ tmp[tptr++] = dest[dptr]; dptr += 2; } else if (tptr+1 < dlen) { /* encode uncompressed character */ tmp[tptr++] = dest[dptr]; tmp[tptr++] = dest[dptr+1]; dptr += 2; } else { /* could not encode uncompressed character * into single byte */ tptr = dlen; } } if (tptr < dlen) { memcpy(dest, tmp, tptr); dlen = tptr; } g_free(tmp); } return dlen; } void mdb_iconv_init(MdbHandle *mdb) { const char *iconv_code; /* check environment variable */ if (!(iconv_code=getenv("MDBICONV"))) { iconv_code="UTF-8"; } #ifdef HAVE_ICONV if (IS_JET4(mdb)) { mdb->iconv_out = iconv_open("UCS-2LE", iconv_code); mdb->iconv_in = iconv_open(iconv_code, "UCS-2LE"); } else { /* According to Microsoft Knowledge Base pages 289525 and */ /* 202427, code page info is not contained in the database */ /* check environment variable */ const char *jet3_iconv_code_from_env = getenv("MDB_JET3_CHARSET"); if (jet3_iconv_code_from_env) mdb_set_encoding(mdb, jet3_iconv_code_from_env); else if (!mdb->jet3_iconv_code) { #ifdef _WIN32 // get the default from OS char default_encoding[] = "CP "; if (GetLocaleInfoA( MAKELCID(MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), SORT_DEFAULT), LOCALE_IDEFAULTANSICODEPAGE, default_encoding+2, sizeof(default_encoding)-2-1 )) mdb->jet3_iconv_code = g_strdup(default_encoding); else #endif mdb->jet3_iconv_code = g_strdup("CP1252"); } mdb->iconv_out = iconv_open(mdb->jet3_iconv_code, iconv_code); mdb->iconv_in = iconv_open(iconv_code, mdb->jet3_iconv_code); } #endif } void mdb_iconv_close(MdbHandle *mdb) { #ifdef HAVE_ICONV if (mdb->iconv_out != (iconv_t)-1) iconv_close(mdb->iconv_out); if (mdb->iconv_in != (iconv_t)-1) iconv_close(mdb->iconv_in); #endif }