tdelibs/khtml/xml/dom_stringimpl.cpp

/**
 * This file is part of the DOM implementation for KDE.
 *
 * Copyright (C) 1999-2003 Lars Knoll (knoll@kde.org)
 *           (C) 1999 Antti Koivisto (koivisto@kde.org)
 *           (C) 2001-2003 Dirk Mueller ( mueller@kde.org )
 *           (C) 2002 Apple Computer, Inc.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public License
 * along with this library; see the file COPYING.LIB.  If not, write to
 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 * Boston, MA 02110-1301, USA.
 *
 */

#include "dom_stringimpl.h"

#include <kdebug.h>

#include <string.h>
#include <tqstringlist.h>

using namespace DOM;
using namespace khtml;


DOMStringImpl::DOMStringImpl(const char *str)
{
    if(str && *str)
    {
        l = strlen(str);
        s = QT_ALLOC_QCHAR_VEC( l );
        int i = l;
        TQChar* ptr = s;
        while( i-- )
            *ptr++ = *str++;
    }
    else
    {
        s = QT_ALLOC_QCHAR_VEC( 1 );  // crash protection
        s[0] = 0x0; // == TQChar::null;
        l = 0;
    }
}

// FIXME: should be a cached flag maybe.
bool DOMStringImpl::containsOnlyWhitespace() const
{
    if (!s)
        return true;

    for (uint i = 0; i < l; i++) {
        TQChar c = s[i];
        if (c.tqunicode() <= 0x7F) {
            if (c.tqunicode() > ' ')
                return false;
        } else {
            if (c.direction() != TQChar::DirWS)
                return false;
        }
    }
    return true;
}


void DOMStringImpl::append(DOMStringImpl *str)
{
    if(str && str->l != 0)
    {
        int newlen = l+str->l;
        TQChar *c = QT_ALLOC_QCHAR_VEC(newlen);
        memcpy(c, s, l*sizeof(TQChar));
        memcpy(c+l, str->s, str->l*sizeof(TQChar));
        if(s) QT_DELETE_QCHAR_VEC(s);
        s = c;
        l = newlen;
    }
}

void DOMStringImpl::insert(DOMStringImpl *str, unsigned int pos)
{
    if(pos > l)
    {
        append(str);
        return;
    }
    if(str && str->l != 0)
    {
        int newlen = l+str->l;
        TQChar *c = QT_ALLOC_QCHAR_VEC(newlen);
        memcpy(c, s, pos*sizeof(TQChar));
        memcpy(c+pos, str->s, str->l*sizeof(TQChar));
        memcpy(c+pos+str->l, s+pos, (l-pos)*sizeof(TQChar));
        if(s) QT_DELETE_QCHAR_VEC(s);
        s = c;
        l = newlen;
    }
}

void DOMStringImpl::truncate(int len)
{
    if(len > (int)l) return;

    int nl = len < 1 ? 1 : len;
    TQChar *c = QT_ALLOC_QCHAR_VEC(nl);
    memcpy(c, s, nl*sizeof(TQChar));
    if(s) QT_DELETE_QCHAR_VEC(s);
    s = c;
    l = len;
}

void DOMStringImpl::remove(unsigned int pos, int len)
{
  if(pos >= l ) return;
  if(pos+len > l)
    len = l - pos;

  uint newLen = l-len;
  TQChar *c = QT_ALLOC_QCHAR_VEC(newLen);
  memcpy(c, s, pos*sizeof(TQChar));
  memcpy(c+pos, s+pos+len, (l-len-pos)*sizeof(TQChar));
  if(s) QT_DELETE_QCHAR_VEC(s);
  s = c;
  l = newLen;
}

DOMStringImpl *DOMStringImpl::split(unsigned int pos)
{
  if( pos >=l ) return new DOMStringImpl();

  uint newLen = l-pos;
  DOMStringImpl *str = new DOMStringImpl(s + pos, newLen);
  truncate(pos);
  return str;
}

DOMStringImpl *DOMStringImpl::substring(unsigned int pos, unsigned int len)
{
  if( pos >=l ) return new DOMStringImpl();
  if(pos+len > l)
    len = l - pos;

  return new DOMStringImpl(s + pos, len);
}

// Collapses white-space according to CSS 2.1 rules
DOMStringImpl *DOMStringImpl::collapseWhiteSpace(bool preserveLF, bool preserveWS)
{
    if (preserveLF && preserveWS) return this;

    // Notice we are likely allocating more space than needed (worst case)
    TQChar *n = QT_ALLOC_QCHAR_VEC(l);

    unsigned int pos = 0;
    bool collapsing = false;   // collapsing white-space
    bool collapsingLF = false; // collapsing around linefeed
    bool changedLF = false;
    for(unsigned int i=0; i<l; i++) {
        TQChar ch = s[i];

        // We act on \r as we would on \n because CSS uses it to indicate new-line
        if (ch == '\r') ch = '\n';
        else
        // ### The XML parser lets \t through, for now treat them as spaces
        if (ch == '\t') ch = ' ';

        if (!preserveLF && ch == '\n') {
            // ### Not strictly correct according to CSS3 text-module.
            // - In ideographic languages linefeed should be ignored
            // - and in Thai and Khmer it should be treated as a zero-width space
            ch = ' '; // Treat as space
            changedLF = true;
        }

        if (collapsing) {
            if (ch == ' ')
                continue;
            if (ch == '\n') {
                collapsingLF = true;
                continue;
            }

            n[pos++] = (collapsingLF) ? '\n' : ' ';
            collapsing = false;
            collapsingLF = false;
        }
        else
        if (!preserveWS && ch == ' ') {
            collapsing = true;
            continue;
        }
        else
        if (!preserveWS && ch == '\n') {
            collapsing = true;
            collapsingLF = true;
            continue;
        }

        n[pos++] = ch;
    }
    if (collapsing)
        n[pos++] = ((collapsingLF) ? '\n' : ' ');

    if (pos == l && !changedLF) {
        QT_DELETE_QCHAR_VEC(n);
        return this;
    }
    else {
        DOMStringImpl* out = new DOMStringImpl();
        out->s = n;
        out->l = pos;

        return out;
    }
}

static Length parseLength(const TQChar *s, unsigned int l)
{
    if (l == 0) {
        return Length(1, Relative);
    }

    unsigned i = 0;
    while (i < l && s[i].isSpace())
        ++i;
    if (i < l && (s[i] == '+' || s[i] == '-'))
        ++i;
    while (i < l && s[i].isDigit())
        ++i;

    bool ok;
    int r = TQConstString(s, i).string().toInt(&ok);

    /* Skip over any remaining digits, we are not that accurate (5.5% => 5%) */
    while (i < l && (s[i].isDigit() || s[i] == '.'))
        ++i;

    /* IE Quirk: Skip any whitespace (20 % => 20%) */
    while (i < l && s[i].isSpace())
        ++i;

    if (ok) {
        if (i == l) {
            return Length(r, Fixed);
        } else {
            const TQChar* next = s+i;

            if (*next == '%')
                return Length(r, Percent);

            if (*next == '*')
                return Length(r, Relative);
        }
        return Length(r, Fixed);
    } else {
        if (i < l) {
            const TQChar* next = s+i;

            if (*next == '*')
                return Length(1, Relative);

            if (*next == '%')
                return Length(1, Relative);
        }
    }
    return Length(0, Relative);
}

khtml::Length* DOMStringImpl::toCoordsArray(int& len) const
{
    TQString str(s, l);
    for(unsigned int i=0; i < l; i++) {
        TQChar cc = s[i];
        if (cc > TQChar('9') || (cc < TQChar('0') && cc != '-' && cc != '*' && cc != '.'))
            str[i] = ' ';
    }
    str = str.simplifyWhiteSpace();

    len = str.contains(' ') + 1;
    khtml::Length* r = new khtml::Length[len];

    int i = 0;
    int pos = 0;
    int pos2;

    while((pos2 = str.find(' ', pos)) != -1) {
        r[i++] = parseLength((TQChar *) str.tqunicode()+pos, pos2-pos);
        pos = pos2+1;
    }
    r[i] = parseLength((TQChar *) str.tqunicode()+pos, str.length()-pos);

    return r;
}

khtml::Length* DOMStringImpl::toLengthArray(int& len) const
{
    TQString str(s, l);
    str = str.simplifyWhiteSpace();

    len = str.contains(',') + 1;

    // If we have no commas, we have no array.
    if( len == 1 )
        return 0L;

    khtml::Length* r = new khtml::Length[len];

    int i = 0;
    int pos = 0;
    int pos2;

    while((pos2 = str.find(',', pos)) != -1) {
        r[i++] = parseLength((TQChar *) str.tqunicode()+pos, pos2-pos);
        pos = pos2+1;
    }

    /* IE Quirk: If the last comma is the last char skip it and reduce len by one */
    if (str.length()-pos > 0)
        r[i] = parseLength((TQChar *) str.tqunicode()+pos, str.length()-pos);
    else
        len--;

    return r;
}

bool DOMStringImpl::isLower() const
{
    unsigned int i;
    for (i = 0; i < l; i++)
	if (s[i].lower() != s[i])
	    return false;
    return true;
}

DOMStringImpl *DOMStringImpl::lower() const
{
    DOMStringImpl *c = new DOMStringImpl;
    if(!l) return c;

    c->s = QT_ALLOC_QCHAR_VEC(l);
    c->l = l;

    for (unsigned int i = 0; i < l; i++)
	c->s[i] = s[i].lower();

    return c;
}

DOMStringImpl *DOMStringImpl::upper() const
{
    DOMStringImpl *c = new DOMStringImpl;
    if(!l) return c;

    c->s = QT_ALLOC_QCHAR_VEC(l);
    c->l = l;

    for (unsigned int i = 0; i < l; i++)
	c->s[i] = s[i].upper();

    return c;
}

DOMStringImpl *DOMStringImpl::capitalize(bool noFirstCap) const
{
    bool canCapitalize= !noFirstCap;
    DOMStringImpl *c = new DOMStringImpl;
    if(!l) return c;

    c->s = QT_ALLOC_QCHAR_VEC(l);
    c->l = l;

    for (unsigned int i=0; i<l; i++)
    {
        if (s[i].isLetterOrNumber() && canCapitalize)
        {
            c->s[i]=s[i].upper();
            canCapitalize=false;
        }
        else
        {
            c->s[i]=s[i];
            if (s[i].isSpace())
                canCapitalize=true;
        }
    }

    return c;
}

TQString DOMStringImpl::string() const
{
    return TQString(s, l);
}

int DOMStringImpl::toInt(bool* ok) const
{
    // match \s*[+-]?\d*
    unsigned i = 0;
    while (i < l && s[i].isSpace())
        ++i;
    if (i < l && (s[i] == '+' || s[i] == '-'))
        ++i;
    while (i < l && s[i].isDigit())
        ++i;

    return TQConstString(s, i).string().toInt(ok);
}

static const unsigned short amp[] = {'&', 'a', 'm', 'p', ';'};
static const unsigned short lt[] =  {'&', 'l', 't', ';'};
static const unsigned short gt[] =  {'&', 'g', 't', ';'};

DOMStringImpl *DOMStringImpl::escapeHTML()
{
    unsigned outL = 0;
    for (unsigned int i = 0; i < l; ++i ) {
        if ( s[i] == '&' )
            outL += 5; //&amp;
        else if (s[i] == '<' || s[i] == '>')
            outL += 4; //&gt;/&lt;
        else
            ++outL;
    }
    if (outL == l)
        return this;

    
    DOMStringImpl* toRet = new DOMStringImpl();
    toRet->s = QT_ALLOC_QCHAR_VEC(outL);
    toRet->l = outL;

    unsigned outP = 0;
    for (unsigned int i = 0; i < l; ++i ) {
        if ( s[i] == '&' ) {
            memcpy(&toRet->s[outP], amp, sizeof(amp));
            outP += 5; 
        } else if (s[i] == '<') {
            memcpy(&toRet->s[outP], lt, sizeof(lt));
            outP += 4;
        } else if (s[i] == '>') {
            memcpy(&toRet->s[outP], gt, sizeof(gt));
            outP += 4;
        } else {
            toRet->s[outP] = s[i];
            ++outP;
        }
    }
    return toRet;
}