You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
652 lines
15 KiB
652 lines
15 KiB
/* -*- c++ -*-
|
|
parser/parser.cpp
|
|
|
|
This file is part of KSieve,
|
|
the KDE internet mail/usenet news message filtering library.
|
|
Copyright (c) 2002-2003 Marc Mutz <mutz@kde.org>
|
|
|
|
KSieve is free software; you can redistribute it and/or modify it
|
|
under the terms of the GNU General Public License, version 2, as
|
|
published by the Free Software Foundation.
|
|
|
|
KSieve is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
In addition, as a special exception, the copyright holders give
|
|
permission to link the code of this program with any edition of
|
|
the TQt library by Trolltech AS, Norway (or with modified versions
|
|
of TQt that use the same license as TQt), and distribute linked
|
|
combinations including the two. You must obey the GNU General
|
|
Public License in all respects for all of the code used other than
|
|
TQt. If you modify this file, you may extend this exception to
|
|
your version of the file, but you are not obligated to do so. If
|
|
you do not wish to do so, delete this exception statement from
|
|
your version.
|
|
*/
|
|
|
|
#include <config.h>
|
|
|
|
#include <ksieve/parser.h>
|
|
#include <impl/parser.h>
|
|
|
|
#include <ksieve/error.h>
|
|
|
|
#include <tqstring.h>
|
|
|
|
#include <assert.h>
|
|
#include <limits.h> // ULONG_MAX
|
|
#include <ctype.h> // isdigit
|
|
|
|
namespace KSieve {
|
|
|
|
//
|
|
//
|
|
// Parser Bridge implementation
|
|
//
|
|
//
|
|
|
|
Parser::Parser( const char * scursor, const char * const send, int options )
|
|
: i( 0 )
|
|
{
|
|
i = new Impl( scursor, send, options );
|
|
}
|
|
|
|
Parser::~Parser() {
|
|
delete i; i = 0;
|
|
}
|
|
|
|
void Parser::setScriptBuilder( ScriptBuilder * builder ) {
|
|
assert( i );
|
|
i->mBuilder = builder;
|
|
}
|
|
|
|
ScriptBuilder * Parser::scriptBuilder() const {
|
|
assert( i );
|
|
return i->mBuilder;
|
|
}
|
|
|
|
const Error & Parser::error() const {
|
|
assert( i );
|
|
return i->error();
|
|
}
|
|
|
|
bool Parser::parse() {
|
|
assert( i );
|
|
return i->parse();
|
|
}
|
|
|
|
}
|
|
|
|
static inline unsigned long factorForQuantifier( char ch ) {
|
|
switch ( ch ) {
|
|
case 'g':
|
|
case 'G':
|
|
return 1024*1024*1024;
|
|
case 'm':
|
|
case 'M':
|
|
return 1024*1024;
|
|
case 'k':
|
|
case 'K':
|
|
return 1024;
|
|
default:
|
|
assert( 0 ); // lexer should prohibit this
|
|
return 1; // make compiler happy
|
|
}
|
|
}
|
|
|
|
static inline bool willOverflowULong( unsigned long result, unsigned long add ) {
|
|
static const unsigned long maxULongByTen = (unsigned long)(ULONG_MAX / 10.0) ;
|
|
return result > maxULongByTen || ULONG_MAX - 10 * result < add ;
|
|
}
|
|
|
|
namespace KSieve {
|
|
|
|
//
|
|
//
|
|
// Parser Implementation
|
|
//
|
|
//
|
|
|
|
Parser::Impl::Impl( const char * scursor, const char * const send, int options )
|
|
: mToken( Lexer::None ),
|
|
lexer( scursor, send, options ),
|
|
mBuilder( 0 )
|
|
{
|
|
|
|
}
|
|
|
|
bool Parser::Impl::isStringToken() const {
|
|
return token() == Lexer::QuotedString ||
|
|
token() == Lexer::MultiLineString ;
|
|
}
|
|
|
|
|
|
bool Parser::Impl::isArgumentToken() const {
|
|
return isStringToken() ||
|
|
token() == Lexer::Number ||
|
|
token() == Lexer::Tag ||
|
|
token() == Lexer::Special && mTokenValue == "[" ;
|
|
}
|
|
|
|
bool Parser::Impl::obtainToken() {
|
|
while ( !mToken && !lexer.atEnd() && !lexer.error() ) {
|
|
mToken = lexer.nextToken( mTokenValue );
|
|
if ( lexer.error() )
|
|
break;
|
|
// comments and line feeds are semantically invisible and may
|
|
// appear anywhere, so we handle them here centrally:
|
|
switch ( token() ) {
|
|
case Lexer::HashComment:
|
|
if ( scriptBuilder() )
|
|
scriptBuilder()->hashComment( tokenValue() );
|
|
consumeToken();
|
|
break;
|
|
case Lexer::BracketComment:
|
|
if ( scriptBuilder() )
|
|
scriptBuilder()->bracketComment( tokenValue() );
|
|
consumeToken();
|
|
break;
|
|
case Lexer::LineFeeds:
|
|
for ( unsigned int i = 0, end = tokenValue().toUInt() ; i < end ; ++i )
|
|
if ( scriptBuilder() ) // better check every iteration, b/c
|
|
// we call out to ScriptBuilder,
|
|
// where nasty things might happen!
|
|
scriptBuilder()->lineFeed();
|
|
consumeToken();
|
|
break;
|
|
default: ; // make compiler happy
|
|
}
|
|
}
|
|
if ( lexer.error() && scriptBuilder() )
|
|
scriptBuilder()->error( lexer.error() );
|
|
return !lexer.error();
|
|
}
|
|
|
|
bool Parser::Impl::parse() {
|
|
// this is the entry point: START := command-list
|
|
if ( !parseCommandList() )
|
|
return false;
|
|
if ( !atEnd() ) {
|
|
makeUnexpectedTokenError( Error::ExpectedCommand );
|
|
return false;
|
|
}
|
|
if ( scriptBuilder() )
|
|
scriptBuilder()->finished();
|
|
return true;
|
|
}
|
|
|
|
|
|
bool Parser::Impl::parseCommandList() {
|
|
// our ABNF:
|
|
// command-list := *comand
|
|
|
|
while ( !atEnd() ) {
|
|
if ( !obtainToken() )
|
|
return false;
|
|
if ( token() == Lexer::None )
|
|
continue;
|
|
if ( token() != Lexer::Identifier )
|
|
return true;
|
|
if ( !parseCommand() ) {
|
|
assert( error() );
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
|
|
bool Parser::Impl::parseCommand() {
|
|
// command := identifier arguments ( ";" / block )
|
|
// arguments := *argument [ test / test-list ]
|
|
// block := "{" *command "}"
|
|
// our ABNF:
|
|
// block := "{" [ command-list ] "}"
|
|
|
|
if ( atEnd() )
|
|
return false;
|
|
|
|
//
|
|
// identifier
|
|
//
|
|
|
|
if ( !obtainToken() || token() != Lexer::Identifier )
|
|
return false;
|
|
|
|
if ( scriptBuilder() )
|
|
scriptBuilder()->commandStart( tokenValue() );
|
|
consumeToken();
|
|
|
|
//
|
|
// *argument
|
|
//
|
|
|
|
if ( !obtainToken() )
|
|
return false;
|
|
|
|
if ( atEnd() ) {
|
|
makeError( Error::MissingSemicolonOrBlock );
|
|
return false;
|
|
}
|
|
|
|
if ( isArgumentToken() && !parseArgumentList() ) {
|
|
assert( error() );
|
|
return false;
|
|
}
|
|
|
|
//
|
|
// test / test-list
|
|
//
|
|
|
|
if ( !obtainToken() )
|
|
return false;
|
|
|
|
if ( atEnd() ) {
|
|
makeError( Error::MissingSemicolonOrBlock );
|
|
return false;
|
|
}
|
|
|
|
if ( token() == Lexer::Special && tokenValue() == "(" ) { // test-list
|
|
if ( !parseTestList() ) {
|
|
assert( error() );
|
|
return false;
|
|
}
|
|
} else if ( token() == Lexer::Identifier ) { // should be test:
|
|
if ( !parseTest() ) {
|
|
assert( error() );
|
|
return false;
|
|
}
|
|
}
|
|
|
|
//
|
|
// ";" / block
|
|
//
|
|
|
|
if ( !obtainToken() )
|
|
return false;
|
|
|
|
if ( atEnd() ) {
|
|
makeError( Error::MissingSemicolonOrBlock );
|
|
return false;
|
|
}
|
|
|
|
if ( token() != Lexer::Special ) {
|
|
makeUnexpectedTokenError( Error::ExpectedBlockOrSemicolon );
|
|
return false;
|
|
}
|
|
|
|
if ( tokenValue() == ";" )
|
|
consumeToken();
|
|
else if ( tokenValue() == "{" ) { // block
|
|
if ( !parseBlock() )
|
|
return false; // it's an error since we saw '{'
|
|
} else {
|
|
makeError( Error::MissingSemicolonOrBlock );
|
|
return false;
|
|
}
|
|
|
|
if ( scriptBuilder() )
|
|
scriptBuilder()->commandEnd();
|
|
return true;
|
|
}
|
|
|
|
|
|
bool Parser::Impl::parseArgumentList() {
|
|
// our ABNF:
|
|
// argument-list := *argument
|
|
|
|
while ( !atEnd() ) {
|
|
if ( !obtainToken() )
|
|
return false;
|
|
if ( !isArgumentToken() )
|
|
return true;
|
|
if ( !parseArgument() )
|
|
return !error();
|
|
}
|
|
return true;
|
|
}
|
|
|
|
|
|
bool Parser::Impl::parseArgument() {
|
|
// argument := string-list / number / tag
|
|
|
|
if ( !obtainToken() || atEnd() )
|
|
return false;
|
|
|
|
if ( token() == Lexer::Number ) {
|
|
if ( !parseNumber() ) {
|
|
assert( error() );
|
|
return false;
|
|
}
|
|
return true;
|
|
} else if ( token() == Lexer::Tag ) {
|
|
if ( scriptBuilder() )
|
|
scriptBuilder()->taggedArgument( tokenValue() );
|
|
consumeToken();
|
|
return true;
|
|
} else if ( isStringToken() ) {
|
|
if ( scriptBuilder() )
|
|
scriptBuilder()->stringArgument( tokenValue(), token() == Lexer::MultiLineString, TQString() );
|
|
consumeToken();
|
|
return true;
|
|
} else if ( token() == Lexer::Special && tokenValue() == "[" ) {
|
|
if ( !parseStringList() ) {
|
|
assert( error() );
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
|
|
bool Parser::Impl::parseTestList() {
|
|
// test-list := "(" test *("," test) ")"
|
|
|
|
if ( !obtainToken() || atEnd() )
|
|
return false;
|
|
|
|
if ( token() != Lexer::Special || tokenValue() != "(" )
|
|
return false;
|
|
if ( scriptBuilder() )
|
|
scriptBuilder()->testListStart();
|
|
consumeToken();
|
|
|
|
// generic while/switch construct for comma-separated lists. See
|
|
// parseStringList() for another one. Any fix here is like to apply there, too.
|
|
bool lastWasComma = true;
|
|
while ( !atEnd() ) {
|
|
if ( !obtainToken() )
|
|
return false;
|
|
|
|
switch ( token() ) {
|
|
case Lexer::None:
|
|
break;
|
|
case Lexer::Special:
|
|
assert( tokenValue().length() == 1 );
|
|
assert( tokenValue()[0].latin1() );
|
|
switch ( tokenValue()[0].latin1() ) {
|
|
case ')':
|
|
consumeToken();
|
|
if ( lastWasComma ) {
|
|
makeError( Error::ConsecutiveCommasInTestList );
|
|
return false;
|
|
}
|
|
if ( scriptBuilder() )
|
|
scriptBuilder()->testListEnd();
|
|
return true;
|
|
case ',':
|
|
consumeToken();
|
|
if( lastWasComma ) {
|
|
makeError( Error::ConsecutiveCommasInTestList );
|
|
return false;
|
|
}
|
|
lastWasComma = true;
|
|
break;
|
|
default:
|
|
makeError( Error::NonStringInStringList );
|
|
return false;
|
|
}
|
|
break;
|
|
|
|
case Lexer::Identifier:
|
|
if ( !lastWasComma ) {
|
|
makeError( Error::MissingCommaInTestList );
|
|
return false;
|
|
} else {
|
|
lastWasComma = false;
|
|
if ( !parseTest() ) {
|
|
assert( error() );
|
|
return false;
|
|
}
|
|
}
|
|
break;
|
|
|
|
default:
|
|
makeUnexpectedTokenError( Error::NonTestInTestList );
|
|
return false;
|
|
}
|
|
}
|
|
|
|
makeError( Error::PrematureEndOfTestList );
|
|
return false;
|
|
}
|
|
|
|
|
|
bool Parser::Impl::parseTest() {
|
|
// test := identifier arguments
|
|
// arguments := *argument [ test / test-list ]
|
|
|
|
//
|
|
// identifier
|
|
//
|
|
|
|
if ( !obtainToken() || atEnd() )
|
|
return false;
|
|
|
|
if ( token() != Lexer::Identifier )
|
|
return false;
|
|
|
|
if ( scriptBuilder() )
|
|
scriptBuilder()->testStart( tokenValue() );
|
|
consumeToken();
|
|
|
|
//
|
|
// *argument
|
|
//
|
|
|
|
if ( !obtainToken() )
|
|
return false;
|
|
|
|
if ( atEnd() ) // a test w/o args
|
|
goto TestEnd;
|
|
|
|
if ( isArgumentToken() && !parseArgumentList() ) {
|
|
assert( error() );
|
|
return false;
|
|
}
|
|
|
|
//
|
|
// test / test-list
|
|
//
|
|
|
|
if ( !obtainToken() )
|
|
return false;
|
|
|
|
if ( atEnd() ) // a test w/o nested tests
|
|
goto TestEnd;
|
|
|
|
if ( token() == Lexer::Special && tokenValue() == "(" ) { // test-list
|
|
if ( !parseTestList() ) {
|
|
assert( error() );
|
|
return false;
|
|
}
|
|
} else if ( token() == Lexer::Identifier ) { // should be test:
|
|
if ( !parseTest() ) {
|
|
assert( error() );
|
|
return false;
|
|
}
|
|
}
|
|
|
|
TestEnd:
|
|
if ( scriptBuilder() )
|
|
scriptBuilder()->testEnd();
|
|
return true;
|
|
}
|
|
|
|
|
|
bool Parser::Impl::parseBlock() {
|
|
// our ABNF:
|
|
// block := "{" [ command-list ] "}"
|
|
|
|
if ( !obtainToken() || atEnd() )
|
|
return false;
|
|
|
|
if ( token() != Lexer::Special || tokenValue() != "{" )
|
|
return false;
|
|
if ( scriptBuilder() )
|
|
scriptBuilder()->blockStart();
|
|
consumeToken();
|
|
|
|
if ( !obtainToken() )
|
|
return false;
|
|
|
|
if ( atEnd() ) {
|
|
makeError( Error::PrematureEndOfBlock );
|
|
return false;
|
|
}
|
|
|
|
if ( token() == Lexer::Identifier ) {
|
|
if ( !parseCommandList() ) {
|
|
assert( error() );
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if ( !obtainToken() )
|
|
return false;
|
|
|
|
if ( atEnd() ) {
|
|
makeError( Error::PrematureEndOfBlock );
|
|
return false;
|
|
}
|
|
|
|
if ( token() != Lexer::Special || tokenValue() != "}" ) {
|
|
makeError( Error::NonCommandInCommandList );
|
|
return false;
|
|
}
|
|
if ( scriptBuilder() )
|
|
scriptBuilder()->blockEnd();
|
|
consumeToken();
|
|
return true;
|
|
}
|
|
|
|
bool Parser::Impl::parseStringList() {
|
|
// string-list := "[" string *("," string) "]" / string
|
|
// ;; if there is only a single string, the brackets are optional
|
|
//
|
|
// However, since strings are already handled separately from
|
|
// string lists in parseArgument(), our ABNF is modified to:
|
|
// string-list := "[" string *("," string) "]"
|
|
|
|
if ( !obtainToken() || atEnd() )
|
|
return false;
|
|
|
|
if ( token() != Lexer::Special || tokenValue() != "[" )
|
|
return false;
|
|
|
|
if ( scriptBuilder() )
|
|
scriptBuilder()->stringListArgumentStart();
|
|
consumeToken();
|
|
|
|
// generic while/switch construct for comma-separated lists. See
|
|
// parseTestList() for another one. Any fix here is like to apply there, too.
|
|
bool lastWasComma = true;
|
|
while ( !atEnd() ) {
|
|
if ( !obtainToken() )
|
|
return false;
|
|
|
|
switch ( token() ) {
|
|
case Lexer::None:
|
|
break;
|
|
case Lexer::Special:
|
|
assert( tokenValue().length() == 1 );
|
|
switch ( tokenValue()[0].latin1() ) {
|
|
case ']':
|
|
consumeToken();
|
|
if ( lastWasComma ) {
|
|
makeError( Error::ConsecutiveCommasInStringList );
|
|
return false;
|
|
}
|
|
if ( scriptBuilder() )
|
|
scriptBuilder()->stringListArgumentEnd();
|
|
return true;
|
|
case ',':
|
|
consumeToken();
|
|
if ( lastWasComma ) {
|
|
makeError( Error::ConsecutiveCommasInStringList );
|
|
return false;
|
|
}
|
|
lastWasComma = true;
|
|
break;
|
|
default:
|
|
makeError( Error::NonStringInStringList );
|
|
return false;
|
|
}
|
|
break;
|
|
|
|
case Lexer::QuotedString:
|
|
case Lexer::MultiLineString:
|
|
if ( !lastWasComma ) {
|
|
makeError( Error::MissingCommaInStringList );
|
|
return false;
|
|
}
|
|
lastWasComma = false;
|
|
if ( scriptBuilder() )
|
|
scriptBuilder()->stringListEntry( tokenValue(), token() == Lexer::MultiLineString, TQString() );
|
|
consumeToken();
|
|
break;
|
|
|
|
default:
|
|
makeError( Error::NonStringInStringList );
|
|
return false;
|
|
}
|
|
}
|
|
|
|
makeError( Error::PrematureEndOfStringList );
|
|
return false;
|
|
}
|
|
|
|
bool Parser::Impl::parseNumber() {
|
|
// The lexer returns the number including the quantifier as a
|
|
// single token value. Here, we split is an check that the number
|
|
// is not out of range:
|
|
|
|
if ( !obtainToken() || atEnd() )
|
|
return false;
|
|
|
|
if ( token() != Lexer::Number )
|
|
return false;
|
|
|
|
// number:
|
|
unsigned long result = 0;
|
|
unsigned int i = 0;
|
|
const TQCString s = tokenValue().latin1();
|
|
for ( const unsigned int len = s.length() ; i < len && isdigit( s[i] ) ; ++i ) {
|
|
const unsigned long digitValue = s[i] - '0' ;
|
|
if ( willOverflowULong( result, digitValue ) ) {
|
|
makeError( Error::NumberOutOfRange );
|
|
return false;
|
|
} else {
|
|
result *= 10 ; result += digitValue ;
|
|
}
|
|
}
|
|
|
|
// optional quantifier:
|
|
char quantifier = '\0';
|
|
if ( i < s.length() ) {
|
|
assert( i + 1 == s.length() );
|
|
quantifier = s[i];
|
|
const unsigned long factor = factorForQuantifier( quantifier );
|
|
if ( result > double(ULONG_MAX) / double(factor) ) {
|
|
makeError( Error::NumberOutOfRange );
|
|
return false;
|
|
}
|
|
result *= factor;
|
|
}
|
|
|
|
if ( scriptBuilder() )
|
|
scriptBuilder()->numberArgument( result, quantifier );
|
|
consumeToken();
|
|
return true;
|
|
}
|
|
|
|
} // namespace KSieve
|