You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
461 lines
11 KiB
461 lines
11 KiB
/**
|
|
* This file is part of the DOM implementation for KDE.
|
|
*
|
|
* Copyright (C) 1999-2003 Lars Knoll (knoll@kde.org)
|
|
* (C) 1999 Antti Koivisto (koivisto@kde.org)
|
|
* (C) 2001-2003 Dirk Mueller ( mueller@kde.org )
|
|
* (C) 2002 Apple Computer, Inc.
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Library General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2 of the License, or (at your option) any later version.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Library General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Library General Public License
|
|
* along with this library; see the file COPYING.LIB. If not, write to
|
|
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
|
* Boston, MA 02110-1301, USA.
|
|
*
|
|
*/
|
|
|
|
#include "dom_stringimpl.h"
|
|
|
|
#include <kdebug.h>
|
|
|
|
#include <string.h>
|
|
#include <qstringlist.h>
|
|
|
|
using namespace DOM;
|
|
using namespace khtml;
|
|
|
|
|
|
DOMStringImpl::DOMStringImpl(const char *str)
|
|
{
|
|
if(str && *str)
|
|
{
|
|
l = strlen(str);
|
|
s = QT_ALLOC_QCHAR_VEC( l );
|
|
int i = l;
|
|
QChar* ptr = s;
|
|
while( i-- )
|
|
*ptr++ = *str++;
|
|
}
|
|
else
|
|
{
|
|
s = QT_ALLOC_QCHAR_VEC( 1 ); // crash protection
|
|
s[0] = 0x0; // == QChar::null;
|
|
l = 0;
|
|
}
|
|
}
|
|
|
|
// FIXME: should be a cached flag maybe.
|
|
bool DOMStringImpl::containsOnlyWhitespace() const
|
|
{
|
|
if (!s)
|
|
return true;
|
|
|
|
for (uint i = 0; i < l; i++) {
|
|
QChar c = s[i];
|
|
if (c.unicode() <= 0x7F) {
|
|
if (c.unicode() > ' ')
|
|
return false;
|
|
} else {
|
|
if (c.direction() != QChar::DirWS)
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
|
|
void DOMStringImpl::append(DOMStringImpl *str)
|
|
{
|
|
if(str && str->l != 0)
|
|
{
|
|
int newlen = l+str->l;
|
|
QChar *c = QT_ALLOC_QCHAR_VEC(newlen);
|
|
memcpy(c, s, l*sizeof(QChar));
|
|
memcpy(c+l, str->s, str->l*sizeof(QChar));
|
|
if(s) QT_DELETE_QCHAR_VEC(s);
|
|
s = c;
|
|
l = newlen;
|
|
}
|
|
}
|
|
|
|
void DOMStringImpl::insert(DOMStringImpl *str, unsigned int pos)
|
|
{
|
|
if(pos > l)
|
|
{
|
|
append(str);
|
|
return;
|
|
}
|
|
if(str && str->l != 0)
|
|
{
|
|
int newlen = l+str->l;
|
|
QChar *c = QT_ALLOC_QCHAR_VEC(newlen);
|
|
memcpy(c, s, pos*sizeof(QChar));
|
|
memcpy(c+pos, str->s, str->l*sizeof(QChar));
|
|
memcpy(c+pos+str->l, s+pos, (l-pos)*sizeof(QChar));
|
|
if(s) QT_DELETE_QCHAR_VEC(s);
|
|
s = c;
|
|
l = newlen;
|
|
}
|
|
}
|
|
|
|
void DOMStringImpl::truncate(int len)
|
|
{
|
|
if(len > (int)l) return;
|
|
|
|
int nl = len < 1 ? 1 : len;
|
|
QChar *c = QT_ALLOC_QCHAR_VEC(nl);
|
|
memcpy(c, s, nl*sizeof(QChar));
|
|
if(s) QT_DELETE_QCHAR_VEC(s);
|
|
s = c;
|
|
l = len;
|
|
}
|
|
|
|
void DOMStringImpl::remove(unsigned int pos, int len)
|
|
{
|
|
if(pos >= l ) return;
|
|
if(pos+len > l)
|
|
len = l - pos;
|
|
|
|
uint newLen = l-len;
|
|
QChar *c = QT_ALLOC_QCHAR_VEC(newLen);
|
|
memcpy(c, s, pos*sizeof(QChar));
|
|
memcpy(c+pos, s+pos+len, (l-len-pos)*sizeof(QChar));
|
|
if(s) QT_DELETE_QCHAR_VEC(s);
|
|
s = c;
|
|
l = newLen;
|
|
}
|
|
|
|
DOMStringImpl *DOMStringImpl::split(unsigned int pos)
|
|
{
|
|
if( pos >=l ) return new DOMStringImpl();
|
|
|
|
uint newLen = l-pos;
|
|
DOMStringImpl *str = new DOMStringImpl(s + pos, newLen);
|
|
truncate(pos);
|
|
return str;
|
|
}
|
|
|
|
DOMStringImpl *DOMStringImpl::substring(unsigned int pos, unsigned int len)
|
|
{
|
|
if( pos >=l ) return new DOMStringImpl();
|
|
if(pos+len > l)
|
|
len = l - pos;
|
|
|
|
return new DOMStringImpl(s + pos, len);
|
|
}
|
|
|
|
// Collapses white-space according to CSS 2.1 rules
|
|
DOMStringImpl *DOMStringImpl::collapseWhiteSpace(bool preserveLF, bool preserveWS)
|
|
{
|
|
if (preserveLF && preserveWS) return this;
|
|
|
|
// Notice we are likely allocating more space than needed (worst case)
|
|
QChar *n = QT_ALLOC_QCHAR_VEC(l);
|
|
|
|
unsigned int pos = 0;
|
|
bool collapsing = false; // collapsing white-space
|
|
bool collapsingLF = false; // collapsing around linefeed
|
|
bool changedLF = false;
|
|
for(unsigned int i=0; i<l; i++) {
|
|
QChar ch = s[i];
|
|
|
|
// We act on \r as we would on \n because CSS uses it to indicate new-line
|
|
if (ch == '\r') ch = '\n';
|
|
else
|
|
// ### The XML parser lets \t through, for now treat them as spaces
|
|
if (ch == '\t') ch = ' ';
|
|
|
|
if (!preserveLF && ch == '\n') {
|
|
// ### Not strictly correct according to CSS3 text-module.
|
|
// - In ideographic languages linefeed should be ignored
|
|
// - and in Thai and Khmer it should be treated as a zero-width space
|
|
ch = ' '; // Treat as space
|
|
changedLF = true;
|
|
}
|
|
|
|
if (collapsing) {
|
|
if (ch == ' ')
|
|
continue;
|
|
if (ch == '\n') {
|
|
collapsingLF = true;
|
|
continue;
|
|
}
|
|
|
|
n[pos++] = (collapsingLF) ? '\n' : ' ';
|
|
collapsing = false;
|
|
collapsingLF = false;
|
|
}
|
|
else
|
|
if (!preserveWS && ch == ' ') {
|
|
collapsing = true;
|
|
continue;
|
|
}
|
|
else
|
|
if (!preserveWS && ch == '\n') {
|
|
collapsing = true;
|
|
collapsingLF = true;
|
|
continue;
|
|
}
|
|
|
|
n[pos++] = ch;
|
|
}
|
|
if (collapsing)
|
|
n[pos++] = ((collapsingLF) ? '\n' : ' ');
|
|
|
|
if (pos == l && !changedLF) {
|
|
QT_DELETE_QCHAR_VEC(n);
|
|
return this;
|
|
}
|
|
else {
|
|
DOMStringImpl* out = new DOMStringImpl();
|
|
out->s = n;
|
|
out->l = pos;
|
|
|
|
return out;
|
|
}
|
|
}
|
|
|
|
static Length parseLength(const QChar *s, unsigned int l)
|
|
{
|
|
if (l == 0) {
|
|
return Length(1, Relative);
|
|
}
|
|
|
|
unsigned i = 0;
|
|
while (i < l && s[i].isSpace())
|
|
++i;
|
|
if (i < l && (s[i] == '+' || s[i] == '-'))
|
|
++i;
|
|
while (i < l && s[i].isDigit())
|
|
++i;
|
|
|
|
bool ok;
|
|
int r = QConstString(s, i).string().toInt(&ok);
|
|
|
|
/* Skip over any remaining digits, we are not that accurate (5.5% => 5%) */
|
|
while (i < l && (s[i].isDigit() || s[i] == '.'))
|
|
++i;
|
|
|
|
/* IE Quirk: Skip any whitespace (20 % => 20%) */
|
|
while (i < l && s[i].isSpace())
|
|
++i;
|
|
|
|
if (ok) {
|
|
if (i == l) {
|
|
return Length(r, Fixed);
|
|
} else {
|
|
const QChar* next = s+i;
|
|
|
|
if (*next == '%')
|
|
return Length(r, Percent);
|
|
|
|
if (*next == '*')
|
|
return Length(r, Relative);
|
|
}
|
|
return Length(r, Fixed);
|
|
} else {
|
|
if (i < l) {
|
|
const QChar* next = s+i;
|
|
|
|
if (*next == '*')
|
|
return Length(1, Relative);
|
|
|
|
if (*next == '%')
|
|
return Length(1, Relative);
|
|
}
|
|
}
|
|
return Length(0, Relative);
|
|
}
|
|
|
|
khtml::Length* DOMStringImpl::toCoordsArray(int& len) const
|
|
{
|
|
QString str(s, l);
|
|
for(unsigned int i=0; i < l; i++) {
|
|
QChar cc = s[i];
|
|
if (cc > '9' || (cc < '0' && cc != '-' && cc != '*' && cc != '.'))
|
|
str[i] = ' ';
|
|
}
|
|
str = str.simplifyWhiteSpace();
|
|
|
|
len = str.contains(' ') + 1;
|
|
khtml::Length* r = new khtml::Length[len];
|
|
|
|
int i = 0;
|
|
int pos = 0;
|
|
int pos2;
|
|
|
|
while((pos2 = str.find(' ', pos)) != -1) {
|
|
r[i++] = parseLength((QChar *) str.unicode()+pos, pos2-pos);
|
|
pos = pos2+1;
|
|
}
|
|
r[i] = parseLength((QChar *) str.unicode()+pos, str.length()-pos);
|
|
|
|
return r;
|
|
}
|
|
|
|
khtml::Length* DOMStringImpl::toLengthArray(int& len) const
|
|
{
|
|
QString str(s, l);
|
|
str = str.simplifyWhiteSpace();
|
|
|
|
len = str.contains(',') + 1;
|
|
|
|
// If we have no commas, we have no array.
|
|
if( len == 1 )
|
|
return 0L;
|
|
|
|
khtml::Length* r = new khtml::Length[len];
|
|
|
|
int i = 0;
|
|
int pos = 0;
|
|
int pos2;
|
|
|
|
while((pos2 = str.find(',', pos)) != -1) {
|
|
r[i++] = parseLength((QChar *) str.unicode()+pos, pos2-pos);
|
|
pos = pos2+1;
|
|
}
|
|
|
|
/* IE Quirk: If the last comma is the last char skip it and reduce len by one */
|
|
if (str.length()-pos > 0)
|
|
r[i] = parseLength((QChar *) str.unicode()+pos, str.length()-pos);
|
|
else
|
|
len--;
|
|
|
|
return r;
|
|
}
|
|
|
|
bool DOMStringImpl::isLower() const
|
|
{
|
|
unsigned int i;
|
|
for (i = 0; i < l; i++)
|
|
if (s[i].lower() != s[i])
|
|
return false;
|
|
return true;
|
|
}
|
|
|
|
DOMStringImpl *DOMStringImpl::lower() const
|
|
{
|
|
DOMStringImpl *c = new DOMStringImpl;
|
|
if(!l) return c;
|
|
|
|
c->s = QT_ALLOC_QCHAR_VEC(l);
|
|
c->l = l;
|
|
|
|
for (unsigned int i = 0; i < l; i++)
|
|
c->s[i] = s[i].lower();
|
|
|
|
return c;
|
|
}
|
|
|
|
DOMStringImpl *DOMStringImpl::upper() const
|
|
{
|
|
DOMStringImpl *c = new DOMStringImpl;
|
|
if(!l) return c;
|
|
|
|
c->s = QT_ALLOC_QCHAR_VEC(l);
|
|
c->l = l;
|
|
|
|
for (unsigned int i = 0; i < l; i++)
|
|
c->s[i] = s[i].upper();
|
|
|
|
return c;
|
|
}
|
|
|
|
DOMStringImpl *DOMStringImpl::capitalize(bool noFirstCap) const
|
|
{
|
|
bool canCapitalize= !noFirstCap;
|
|
DOMStringImpl *c = new DOMStringImpl;
|
|
if(!l) return c;
|
|
|
|
c->s = QT_ALLOC_QCHAR_VEC(l);
|
|
c->l = l;
|
|
|
|
for (unsigned int i=0; i<l; i++)
|
|
{
|
|
if (s[i].isLetterOrNumber() && canCapitalize)
|
|
{
|
|
c->s[i]=s[i].upper();
|
|
canCapitalize=false;
|
|
}
|
|
else
|
|
{
|
|
c->s[i]=s[i];
|
|
if (s[i].isSpace())
|
|
canCapitalize=true;
|
|
}
|
|
}
|
|
|
|
return c;
|
|
}
|
|
|
|
QString DOMStringImpl::string() const
|
|
{
|
|
return QString(s, l);
|
|
}
|
|
|
|
int DOMStringImpl::toInt(bool* ok) const
|
|
{
|
|
// match \s*[+-]?\d*
|
|
unsigned i = 0;
|
|
while (i < l && s[i].isSpace())
|
|
++i;
|
|
if (i < l && (s[i] == '+' || s[i] == '-'))
|
|
++i;
|
|
while (i < l && s[i].isDigit())
|
|
++i;
|
|
|
|
return QConstString(s, i).string().toInt(ok);
|
|
}
|
|
|
|
static const unsigned short amp[] = {'&', 'a', 'm', 'p', ';'};
|
|
static const unsigned short lt[] = {'&', 'l', 't', ';'};
|
|
static const unsigned short gt[] = {'&', 'g', 't', ';'};
|
|
|
|
DOMStringImpl *DOMStringImpl::escapeHTML()
|
|
{
|
|
unsigned outL = 0;
|
|
for (unsigned int i = 0; i < l; ++i ) {
|
|
if ( s[i] == '&' )
|
|
outL += 5; //&
|
|
else if (s[i] == '<' || s[i] == '>')
|
|
outL += 4; //>/<
|
|
else
|
|
++outL;
|
|
}
|
|
if (outL == l)
|
|
return this;
|
|
|
|
|
|
DOMStringImpl* toRet = new DOMStringImpl();
|
|
toRet->s = QT_ALLOC_QCHAR_VEC(outL);
|
|
toRet->l = outL;
|
|
|
|
unsigned outP = 0;
|
|
for (unsigned int i = 0; i < l; ++i ) {
|
|
if ( s[i] == '&' ) {
|
|
memcpy(&toRet->s[outP], amp, sizeof(amp));
|
|
outP += 5;
|
|
} else if (s[i] == '<') {
|
|
memcpy(&toRet->s[outP], lt, sizeof(lt));
|
|
outP += 4;
|
|
} else if (s[i] == '>') {
|
|
memcpy(&toRet->s[outP], gt, sizeof(gt));
|
|
outP += 4;
|
|
} else {
|
|
toRet->s[outP] = s[i];
|
|
++outP;
|
|
}
|
|
}
|
|
return toRet;
|
|
}
|
|
|