You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

279 lines
11 KiB

/***************************************************************************/
/* */
/* Project: OpenSLP - OpenSource implementation of Service Location */
/* Protocol Version 2 */
/* */
/* File: slp_utf8.c */
/* */
/* Abstract: Do conversions between UTF-8 and other character encodings */
/* */
/*-------------------------------------------------------------------------*/
/* */
/* Please submit patches to http://www.openslp.org */
/* */
/*-------------------------------------------------------------------------*/
/* */
/* Copyright (C) 2000 Caldera Systems, Inc */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or without */
/* modification, are permitted provided that the following conditions are */
/* met: */
/* */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above copyright */
/* notice, this list of conditions and the following disclaimer in */
/* the documentation and/or other materials provided with the */
/* distribution. */
/* */
/* Neither the name of Caldera Systems nor the names of its */
/* contributors may be used to endorse or promote products derived */
/* from this software without specific prior written permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
/* `AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT */
/* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR */
/* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE CALDERA */
/* SYSTEMS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT */
/* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
/* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
/* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE */
/* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* */
/***************************************************************************/
#include <sys/types.h>
#include "slp_message.h"
#include "slp_v1message.h"
/* The following two routines are adapted from Ken Thompson's fss-utf.c.
* See ftp://ftp.informatik.uni-erlangen.de/pub/doc/ISO/charsets/utf-8.c
*/
typedef struct
{
int cmask;
int cval;
int shift;
long lmask;
long lval;
} Tab;
static Tab tab[] =
{
{ 0x80, 0x00, 0*6, 0x7F, 0}, /* 1 byte sequence */
{ 0xE0, 0xC0, 1*6, 0x7FF, 0x80}, /* 2 byte sequence */
{ 0xF0, 0xE0, 2*6, 0xFFFF, 0x800}, /* 3 byte sequence */
{ 0xF8, 0xF0, 3*6, 0x1FFFFF, 0x10000}, /* 4 byte sequence */
{ 0xFC, 0xF8, 4*6, 0x3FFFFFF, 0x200000}, /* 5 byte sequence */
{ 0xFE, 0xFC, 5*6, 0x7FFFFFFF, 0x4000000}, /* 6 byte sequence */
{ 0, 0, 0, 0, 0} /* end of table */
};
static int
utftouni(unsigned *p, const char *s, size_t n)
{
long l;
int c0, c;
size_t nc;
Tab *t;
if(s == 0)
return 0;
nc = 0;
if(n <= nc)
return -1;
c0 = *s & 0xff;
l = c0;
for(t = tab; t->cmask; t++)
{
nc++;
if((c0 & t->cmask) == t->cval)
{
l &= t->lmask;
if(l < t->lval)
return -1;
*p = l;
return (int)nc;
}
if(n <= nc)
return -1;
s++;
c = (*s ^ 0x80) & 0xFF;
if(c & 0xC0)
return -1;
l = (l << 6) | c;
}
return -1;
}
static int
unitoutf(char *s, unsigned wc)
{
long l;
int c, nc;
Tab *t;
if(s == 0)
return 0;
l = wc;
nc = 0;
for(t=tab; t->cmask; t++)
{
nc++;
if(l <= t->lmask)
{
c = t->shift;
*s = (char)(t->cval | ( l >> c ));
while(c > 0)
{
c -= 6;
s++;
*s = (char)(0x80 | (( l >> c) & 0x3F));
}
return nc;
}
}
return -1;
}
/*=========================================================================*/
int SLPv1AsUTF8(int encoding, char *string, int *len)
/* Converts a SLPv1 encoded string to a UTF-8 character string in */
/* place. If string does not have enough space to hold the encoded string */
/* we are dead. */
/* */
/* encoding - (IN) unicode encoding of the string passed in */
/* */
/* string - (INOUT) IN - pointer to SLPv1 encoded string */
/* OUT - pointer to converted UTF-8 string. */
/* */
/* len - (INOUT) IN - length of SLPv1 encoded string (in bytes) */
/* OUT - length of UTF-8 string (in bytes) */
/* */
/* Returns - Zero on success, SLP_ERROR_PARSE_ERROR, or */
/* SLP_ERROR_INTERNAL_ERROR if out of memory. string and len */
/* invalid if return is not successful. */
/* */
/*=========================================================================*/
{
int nc;
unsigned uni;
char utfchar[6]; /* UTF-8 chars are at most 6 bytes */
char *utfstring = string, *unistring = string;
if(encoding == SLP_CHAR_ASCII || encoding == SLP_CHAR_UTF8)
return 0;
if(encoding != SLP_CHAR_UNICODE16 && encoding != SLP_CHAR_UNICODE32)
return SLP_ERROR_INTERNAL_ERROR;
while(*len)
{
if(encoding == SLP_CHAR_UNICODE16)
{
uni = AsUINT16(unistring);
unistring += 2;
*len -= 2;
}
else
{
uni = AsUINT32(unistring);
unistring += 4;
*len -= 4;
}
if(*len < 0)
return SLP_ERROR_INTERNAL_ERROR;
nc = unitoutf(utfchar, uni);
/* Take care not to overwrite. */
if(nc < 0 || utfstring + nc > unistring)
return SLP_ERROR_INTERNAL_ERROR;
memcpy(utfstring, utfchar, nc);
utfstring += nc;
}
*len = utfstring - string;
return 0;
}
/*=========================================================================*/
int SLPv1ToEncoding(char *string, int *len, int encoding,
const char *utfstring, int utflen)
/* Converts a UTF-8 character string to a SLPv1 encoded string. */
/* When called with string set to null returns number of bytes needed */
/* in string. */
/* */
/* string - (OUT) SLPv1 encoded string. */
/* */
/* len - (INOUT) IN - bytes available in string */
/* OUT - bytes used up in string */
/* */
/* encoding - (IN) encoding of the string passed in */
/* */
/* utfstring - (IN) pointer to UTF-8 string */
/* */
/* utflen - (IN) length of UTF-8 string */
/* */
/* Returns - Zero on success, SLP_ERROR_PARSE_ERROR, or */
/* SLP_ERROR_INTERNAL_ERROR if out of memory. string and len */
/* invalid if return is not successful. */
/* */
/*=========================================================================*/
{
unsigned uni;
int nc, total = 0;
if(encoding == SLP_CHAR_ASCII || encoding == SLP_CHAR_UTF8)
{
if(*len < utflen)
return SLP_ERROR_INTERNAL_ERROR;
*len = utflen;
if(string)
memcpy(string, utfstring, utflen);
return 0;
}
if(encoding != SLP_CHAR_UNICODE16 && encoding != SLP_CHAR_UNICODE32)
return SLP_ERROR_INTERNAL_ERROR;
while(utflen)
{
nc = utftouni(&uni, utfstring, utflen);
utflen -= nc;
if(nc < 0 || utflen < 0)
return SLP_ERROR_INTERNAL_ERROR;
utfstring += nc;
if(encoding == SLP_CHAR_UNICODE16)
{
if(string)
{
ToUINT16(string, uni);
string += 2;
}
total += 2;
}
else
{
if(string)
{
ToUINT32(string, uni);
string += 4;
}
total += 4;
}
if(total > *len)
return SLP_ERROR_INTERNAL_ERROR;
}
*len = total;
return 0;
}