You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
koffice/filters/kword/rtf/import/rtfimport_tokenizer.cpp

224 lines
4.9 KiB

/*
This file is part of the KDE project
Copyright (C) 2001 Ewald Snel <ewald@rambo.its.tudelft.nl>
Copyright (C) 2001 Tomasz Grobelny <grotk@poczta.onet.pl>
Copyright (C) 2005 Tommi Rantala <tommi.rantala@cs.helsinki.fi>
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public
License as published by the Free Software Foundation; either
version 2 of the License, or (at your option) any later version.
*/
#include <kdebug.h>
#include "rtfimport_tokenizer.h"
RTFTokenizer::RTFTokenizer()
{
tokenText.resize( 4113 );
fileBuffer.resize( 4096 );
infile = 0L;
}
/**
* Open tokenizer from file.
* @param in the input file
*/
void RTFTokenizer::open( TQFile *in )
{
fileBufferPtr = 0L;
fileBufferEnd = 0L;
infile = in;
type = RTFTokenizer::PlainText;
}
int RTFTokenizer::nextChar()
{
if ( fileBufferPtr == fileBufferEnd ) {
int n = infile->readBlock( fileBuffer.data(), fileBuffer.size() );
fileBufferPtr = ( uchar* ) fileBuffer.data();
fileBufferEnd = fileBufferPtr;
if ( n <= 0 )
return -1;
fileBufferEnd = fileBufferPtr + n;
}
return *fileBufferPtr++;
}
/**
* Reads the next token.
*/
void RTFTokenizer::next()
{
int ch;
value=0;
if (!infile)
return;
do {
int n = nextChar();
if ( n <= 0 ) {
ch = '}';
break;
}
ch = n;
}
while (ch == '\n' || ch == '\r' && ch != 0);
// Skip one byte for prepend '@' to destinations
text = (tokenText.data() + 1);
hasParam = false;
uchar *_text = (uchar *)text;
if (ch == '{')
type = RTFTokenizer::OpenGroup;
else if (ch == '}')
type = RTFTokenizer::CloseGroup;
else if (ch == '\\')
{
type = RTFTokenizer::ControlWord;
int n = nextChar();
if ( n <= 0 ) {
// Return CloseGroup on EOF
type = RTFTokenizer::CloseGroup;
return;
}
ch = n;
// Type is either control word or control symbol
if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))
{
int v = 0;
// Read alphabetic string (command)
while (_text < ( uchar* )tokenText.data()+tokenText.size()-3 &&
((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) )
{
*_text++ = ch;
int n = nextChar();
if ( n <= 0 ) {
ch = ' ';
break;
}
ch = n;
}
// Read numeric parameter (param)
bool isneg = (ch == '-');
if (isneg) {
int n = nextChar();
if ( n <= 0 ) {
type = RTFTokenizer::CloseGroup;
return;
}
ch = n;
}
while (ch >= '0' && ch <= '9') {
v = (10 * v) + ch - '0';
hasParam = true;
int n = nextChar();
if ( n <= 0 )
n = ' ';
ch = n;
}
value = isneg ? -v : v;
// If delimiter is a space, it's part of the control word
if (ch != ' ')
{
--fileBufferPtr;
}
*_text = 0; // Just put an end of string for the test, it can then be over-written again
if ( !memcmp( tokenText.data()+1, "bin", 4 ) )
{ // We have \bin, so we need to read the bytes
kdDebug(30515) << "Token:" << tokenText << endl;
if (value > 0)
{
kdDebug(30515) << "\\bin" << value << endl;
type = RTFTokenizer::BinaryData;
binaryData.resize(value);
for (int i=0; i<value; i++)
{
int n = nextChar();
if ( n <= 0 ) {
type = RTFTokenizer::CloseGroup;
break;
}
binaryData[i] = n;
}
}
}
}
else if (ch=='\'')
{
// Got hex value, for example \'2d
type = RTFTokenizer::ControlWord;
*_text++ = ch;
for(int i=0;i<2;i++)
{
int n = nextChar();
if ( n <= 0 ) {
if ( i == 0 ) {
type = RTFTokenizer::CloseGroup;
return;
} else {
ch = ' ';
break;
}
}
ch = n;
hasParam = true;
value<<=4;
value=value|((ch + ((ch & 16) ? 0 : 9)) & 0xf);
}
}
else
{
type = RTFTokenizer::ControlWord;
*_text++ = ch;
}
}
else
{
type = RTFTokenizer::PlainText;
// Everything until next backslash, opener or closer
while ( ch != '\\' && ch != '{' && ch != '}' && ch != '\n' &&
ch != '\r')
{
*_text++ = ch;
if(fileBufferPtr >= fileBufferEnd)
break;
ch = *fileBufferPtr++;
}
if(fileBufferPtr < fileBufferEnd)
--fileBufferPtr; // give back the last char
}
*_text++ = 0;
}