You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
tdevelop/lib/cppparser/lexer.h

869 lines
20 KiB

/* This file is part of TDevelop
Copyright (C) 2002,2003 Roberto Raggi <roberto@kdevelop.org>
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public
License as published by the Free Software Foundation; either
version 2 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public License
along with this library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
Boston, MA 02110-1301, USA.
*/
#ifndef LEXER_H
#define LEXER_H
#include "driver.h"
#include <tqstring.h>
#include <tqmap.h>
#include <tqvaluestack.h>
#include <tqpair.h>
#include <tqptrvector.h>
#include <hashedstring.h>
#include <ext/hash_map>
#define CHARTYPE TQChar
enum Type {
Token_eof = 0,
Token_identifier = 1000,
Token_number_literal,
Token_char_literal,
Token_string_literal,
Token_whitespaces,
Token_comment,
Token_preproc,
Token_assign = 2000,
Token_ptrmem,
Token_ellipsis,
Token_scope,
Token_shift,
Token_eq,
Token_leq,
Token_geq,
Token_incr,
Token_decr,
Token_arrow,
Token_concat,
Token_K_DCOP,
Token_k_dcop,
Token_k_dcop_signals,
Token_Q_OBJECT,
Token_,
Token_signals,
Token_slots,
Token_emit,
Token_foreach, // qt4 [erbsland]
Token_and,
Token_and_eq,
Token_asm,
Token_auto,
Token_bitand,
Token_bitor,
Token_bool,
Token_break,
Token_case,
Token_catch,
Token_char,
Token_class,
Token_compl,
Token_const,
Token_const_cast,
Token_continue,
Token_default,
Token_delete,
Token_do,
Token_double,
Token_dynamic_cast,
Token_else,
Token_enum,
Token_explicit,
Token_export,
Token_extern,
Token_false,
Token_float,
Token_for,
Token_friend,
Token_goto,
Token_if,
Token_inline,
Token_int,
Token_long,
Token_mutable,
Token_namespace,
Token_new,
Token_not,
Token_not_eq,
Token_operator,
Token_or,
Token_or_eq,
Token_private,
Token_protected,
Token_public,
Token_register,
Token_reinterpret_cast,
Token_return,
Token_short,
Token_signed,
Token_sizeof,
Token_static,
Token_static_cast,
Token_struct,
Token_switch,
Token_template,
Token_this,
Token_throw,
Token_true,
Token_try,
Token_typedef,
Token_typeid,
Token_typename,
Token_union,
Token_unsigned,
Token_using,
Token_virtual,
Token_void,
Token_volatile,
Token_wchar_t,
Token_while,
Token_xor,
Token_xor_eq
};
enum SkipType {
SkipWord,
SkipWordAndArguments
};
struct LexerData;
class Token
{
Token(const TQString &);
Token( int type, int position, int length, const TQString& text );
Token( const Token& source );
Token& operator = ( const Token& source );
bool operator == ( const Token& token ) const;
operator int () const;
public:
bool isNull() const;
int type() const;
void setType( int type );
void getStartPosition( int* line, int* column ) const;
void setStartPosition( int line, int column );
void getEndPosition( int* line, int* column ) const;
void setEndPosition( int line, int column );
unsigned int length() const;
void setLength( unsigned int length );
int position() const;
void setPosition( int position );
TQString text() const;
private:
int m_type;
int m_position;
int m_length;
int m_startLine;
int m_startColumn;
int m_endLine;
int m_endColumn;
const TQString & m_text;
friend class Lexer;
friend class Parser;
}; // class Token
class Lexer
{
public:
Lexer( Driver* driver );
~Lexer();
bool recordComments() const;
void setRecordComments( bool record );
bool recordWhiteSpaces() const;
void setRecordWhiteSpaces( bool record );
bool reportWarnings() const;
void setReportWarnings( bool enable );
bool reportMessages() const;
void setReportMessages( bool enable );
bool skipWordsEnabled() const;
void setSkipWordsEnabled( bool enabled );
bool preprocessorEnabled() const;
void setPreprocessorEnabled( bool enabled );
void resetSkipWords();
void addSkipWord( const TQString& word, SkipType skipType=SkipWord, const TQString& str = TQString() );
TQString source() const;
void setSource( const TQString& source );
int index() const;
void setIndex( int index );
//returns the count of lines that wer skipped due to #ifdef's
int skippedLines() const;
void reset();
const Token& tokenAt( int position ) const;
const Token& nextToken();
const Token& lookAhead( int n ) const;
static int toInt( const Token& token );
int tokenPosition( const Token& token ) const;
void getTokenPosition( const Token& token, int* line, int* col );
int currentLine() const { return m_currentLine; }
int currentColumn() const { return m_currentColumn; }
inline const CHARTYPE* offset( int offset ) const {
return m_source.unicode() + offset;
}
inline int getOffset( const TQChar* p ) const {
return int(p - (m_source.unicode()));
}
private:
void setEndPtr( const TQChar* c ) {
m_endPtr = c;
if( m_ptr < m_endPtr )
m_currentChar = *m_ptr;
else
m_currentChar = TQChar::null;
}
const TQChar currentChar() const;
TQChar peekChar( int n=1 ) const;
int currentPosition() const;
void insertCurrent( const TQString& str );
void tokenize();
void nextToken( Token& token, bool stopOnNewline=false );
void nextChar();
void nextChar( int n );
void skip( int l, int r );
void readIdentifier();
void readWhiteSpaces( bool skipNewLine=true, bool skipOnlyOnce=false );
void readLineComment();
void readMultiLineComment();
void readCharLiteral();
void readStringLiteral();
void readNumberLiteral();
int findOperator3() const;
int findOperator2() const;
bool eof() const;
// preprocessor (based on an article of Al Stevens on Dr.Dobb's journal)
int testIfLevel();
int macroDefined();
TQString readArgument();
int macroPrimary();
int macroMultiplyDivide();
int macroAddSubtract();
int macroRelational();
int macroEquality();
int macroBoolAnd();
int macroBoolXor();
int macroBoolOr();
int macroLogicalAnd();
int macroLogicalOr();
int macroExpression();
void handleDirective( const TQString& directive );
void processDefine( Macro& macro );
void processElse();
void processElif();
void processEndif();
void processIf();
void processIfdef();
void processIfndef();
void processInclude();
void processUndef();
private:
LexerData* d;
Driver* m_driver;
TQPtrVector< Token > m_tokens;
int m_size;
int m_index;
TQString m_source;
const TQChar* m_ptr;
const TQChar* m_endPtr;
TQChar m_currentChar;
bool m_recordComments;
bool m_recordWhiteSpaces;
bool m_startLine;
__gnu_cxx::hash_map< HashedString, TQPair<SkipType, TQString> > m_words;
int m_skippedLines;
int m_currentLine;
int m_currentColumn;
bool m_skipWordsEnabled;
// preprocessor
TQMemArray<bool> m_skipping;
TQMemArray<bool> m_trueTest;
int m_ifLevel;
bool m_preprocessorEnabled;
bool m_inPreproc;
bool m_reportWarnings;
bool m_reportMessages;
private:
Lexer( const Lexer& source );
void operator = ( const Lexer& source );
};
inline Token::Token(const TQString & text)
: m_type( -1 ),
m_position( 0 ),
m_length( 0 ),
m_text( text )
{
}
inline Token::Token( int type, int position, int length, const TQString& text )
: m_type( type ),
m_position( position ),
m_length( length ),
m_text( text )
{
}
inline Token::Token( const Token& source )
: m_type( source.m_type ),
m_position( source.m_position ),
m_length( source.m_length ),
m_startLine( source.m_startLine ),
m_startColumn( source.m_startColumn ),
m_endLine( source.m_endLine ),
m_endColumn( source.m_endColumn ),
m_text( source.m_text )
{
}
inline Token& Token::operator = ( const Token& source )
{
m_type = source.m_type;
m_position = source.m_position;
m_length = source.m_length;
m_startLine = source.m_startLine;
m_startColumn = source.m_startColumn;
m_endLine = source.m_endLine;
m_endColumn = source.m_endColumn;
// m_text = source.m_text;
return( *this );
}
inline Token::operator int () const
{
return m_type;
}
inline bool Token::operator == ( const Token& token ) const
{
return m_type == token.m_type &&
m_position == token.m_position &&
m_length == token.m_length &&
m_startLine == token.m_startLine &&
m_startColumn == token.m_startColumn &&
m_endLine == token.m_endLine &&
m_endColumn == token.m_endColumn &&
m_text == token.m_text;
}
inline bool Token::isNull() const
{
return m_type == Token_eof || m_length == 0;
}
inline int Token::type() const
{
return m_type;
}
inline void Token::setType( int type )
{
m_type = type;
}
inline int Token::position() const
{
return m_position;
}
inline TQString Token::text() const
{
return m_text.mid(m_position, m_length);
}
inline void Token::setStartPosition( int line, int column )
{
m_startLine = line;
m_startColumn = column;
}
inline void Token::setEndPosition( int line, int column )
{
m_endLine = line;
m_endColumn = column;
}
inline void Token::getStartPosition( int* line, int* column ) const
{
if( line ) *line = m_startLine;
if( column ) *column = m_startColumn;
}
inline void Token::getEndPosition( int* line, int* column ) const
{
if( line ) *line = m_endLine;
if( column ) *column = m_endColumn;
}
inline void Token::setPosition( int position )
{
m_position = position;
}
inline unsigned int Token::length() const
{
return m_length;
}
inline void Token::setLength( unsigned int length )
{
m_length = length;
}
inline bool Lexer::recordComments() const
{
return m_recordComments;
}
inline void Lexer::setRecordComments( bool record )
{
m_recordComments = record;
}
inline bool Lexer::recordWhiteSpaces() const
{
return m_recordWhiteSpaces;
}
inline void Lexer::setRecordWhiteSpaces( bool record )
{
m_recordWhiteSpaces = record;
}
inline TQString Lexer::source() const
{
return m_source;
}
inline int Lexer::index() const
{
return m_index;
}
inline void Lexer::setIndex( int index )
{
m_index = index;
}
inline const Token& Lexer::nextToken()
{
if( m_index < m_size )
return *m_tokens[ m_index++ ];
return *m_tokens[ m_index ];
}
inline const Token& Lexer::tokenAt( int n ) const
{
return *m_tokens[ TQMIN(n, m_size-1) ];
}
inline const Token& Lexer::lookAhead( int n ) const
{
return *m_tokens[ TQMIN(m_index + n, m_size-1) ];
}
inline int Lexer::tokenPosition( const Token& token ) const
{
return token.position();
}
inline void Lexer::nextChar()
{
if(*m_ptr == '\n') {
++m_currentLine;
m_currentColumn = 0;
m_startLine = true;
} else {
++m_currentColumn;
}
++m_ptr;
if( m_ptr < m_endPtr )
m_currentChar = *m_ptr;
else
m_currentChar = TQChar::null;
}
inline void Lexer::nextChar( int n )
{
m_currentColumn += n;
m_ptr += n;
if( m_ptr < m_endPtr )
m_currentChar = *m_ptr;
else
m_currentChar = TQChar::null;
}
inline void Lexer::readIdentifier()
{
while( currentChar().isLetterOrNumber() || currentChar() == '_' )
nextChar();
}
inline void Lexer::readWhiteSpaces( bool skipNewLine, bool skipOnlyOnce )
{
while( !currentChar().isNull() ){
TQChar ch = currentChar();
if( ch == '\n' && !skipNewLine ){
break;
} else if( ch.isSpace() ){
nextChar();
} else if( m_inPreproc && currentChar() == '\\' ){
nextChar();
readWhiteSpaces( true, true );
} else {
break;
}
if( skipOnlyOnce && ch == '\n' ) {
skipNewLine = false;
}
}
}
//little hack for better performance
inline bool isTodo( const TQString& txt, int position ) {
if( txt.length() < position + 4 ) return false;
return (txt[ position ] == 't' || txt[ position ] == 'T')
&& (txt[ position+1 ] == 'o' || txt[ position+1 ] == 'O')
&& (txt[ position+2 ] == 'd' || txt[ position+2 ] == 'D')
&& (txt[ position+3 ] == 'o' || txt[ position+3 ] == 'O');
}
inline bool isFixme( const TQString& txt, int position ) {
if( txt.length() < position + 5 ) return false;
return (txt[ position ] == 'f' || txt[ position ] == 'F')
&& (txt[ position+1 ] == 'i' || txt[ position+1 ] == 'I')
&& (txt[ position+2 ] == 'x' || txt[ position+2 ] == 'X')
&& (txt[ position+3 ] == 'm' || txt[ position+3 ] == 'M')
&& (txt[ position+4 ] == 'e' || txt[ position+4 ] == 'E');
}
inline void Lexer::readLineComment()
{
while( !currentChar().isNull() && currentChar() != '\n' ){
if( m_reportMessages && isTodo( m_source, currentPosition() ) ){
nextChar( 4 );
TQString msg;
int line = m_currentLine;
int col = m_currentColumn;
while( currentChar() ){
if( currentChar() == '*' && peekChar() == '/' )
break;
else if( currentChar() == '\n' )
break;
msg += currentChar();
nextChar();
}
m_driver->addProblem( m_driver->currentFileName(), Problem(msg, line, col, Problem::Level_Todo) );
} else
if( m_reportMessages && isFixme( m_source, currentPosition() ) ){
nextChar( 5 );
TQString msg;
int line = m_currentLine;
int col = m_currentColumn;
while( currentChar() ){
if( currentChar() == '*' && peekChar() == '/' )
break;
else if( currentChar() == '\n' )
break;
msg += currentChar();
nextChar();
}
m_driver->addProblem( m_driver->currentFileName(), Problem(msg, line, col, Problem::Level_Fixme) );
} else
nextChar();
}
}
inline void Lexer::readMultiLineComment()
{
while( !currentChar().isNull() ){
if( currentChar() == '*' && peekChar() == '/' ){
nextChar( 2 );
return;
} else if( m_reportMessages && isTodo( m_source, currentPosition() ) ){
nextChar( 4 );
TQString msg;
int line = m_currentLine;
int col = m_currentColumn;
while( currentChar() ){
if( currentChar() == '*' && peekChar() == '/' )
break;
else if( currentChar() == '\n' )
break;
msg += currentChar();
nextChar();
}
m_driver->addProblem( m_driver->currentFileName(), Problem(msg, line, col, Problem::Level_Todo) );
} else
if( m_reportMessages && isFixme( m_source, currentPosition() ) ) {
nextChar( 5 );
TQString msg;
int line = m_currentLine;
int col = m_currentColumn;
while( currentChar() ){
if( currentChar() == '*' && peekChar() == '/' )
break;
else if( currentChar() == '\n' )
break;
msg += currentChar();
nextChar();
}
m_driver->addProblem( m_driver->currentFileName(), Problem(msg, line, col, Problem::Level_Fixme) );
} else
nextChar();
}
}
inline void Lexer::readCharLiteral()
{
if( currentChar() == '\'' )
nextChar(); // skip '
else if( currentChar() == 'L' && peekChar() == '\'' )
nextChar( 2 ); // slip L'
else
return;
while( !currentChar().isNull() ){
int len = getOffset( m_endPtr ) - currentPosition();
if( len>=2 && (currentChar() == '\\' && peekChar() == '\'') ){
nextChar( 2 );
} else if( len>=2 && (currentChar() == '\\' && peekChar() == '\\') ){
nextChar( 2 );
} else if( currentChar() == '\'' ){
nextChar();
break;
} else {
nextChar();
}
}
}
inline void Lexer::readStringLiteral()
{
if( currentChar() != '"' )
return;
nextChar(); // skip "
while( !currentChar().isNull() ){
int len = getOffset( m_endPtr ) - currentPosition();
if( len>=2 && currentChar() == '\\' && peekChar() == '"' ){
nextChar( 2 );
} else if( len>=2 && currentChar() == '\\' && peekChar() == '\\' ){
nextChar( 2 );
} else if( currentChar() == '"' ){
nextChar();
break;
} else {
nextChar();
}
}
}
inline void Lexer::readNumberLiteral()
{
while( currentChar().isLetterOrNumber() || currentChar() == '.' )
nextChar();
}
inline int Lexer::findOperator3() const
{
int n = getOffset( m_endPtr ) - currentPosition();
if( n >= 3){
TQChar ch = currentChar(), ch1=peekChar(), ch2=peekChar(2);
if( ch == '<' && ch1 == '<' && ch2 == '=' ) return Token_assign;
else if( ch == '>' && ch1 == '>' && ch2 == '=' ) return Token_assign;
else if( ch == '-' && ch1 == '>' && ch2 == '*' ) return Token_ptrmem;
else if( ch == '.' && ch1 == '.' && ch2 == '.' ) return Token_ellipsis;
}
return -1;
}
inline int Lexer::findOperator2() const
{
int n = getOffset( m_endPtr ) - currentPosition();
if( n>=2 ){
TQChar ch = currentChar(), ch1=peekChar();
if( ch == ':' && ch1 == ':' ) return Token_scope;
else if( ch == '.' && ch1 == '*' ) return Token_ptrmem;
else if( ch == '+' && ch1 == '=' ) return Token_assign;
else if( ch == '-' && ch1 == '=' ) return Token_assign;
else if( ch == '*' && ch1 == '=' ) return Token_assign;
else if( ch == '/' && ch1 == '=' ) return Token_assign;
else if( ch == '%' && ch1 == '=' ) return Token_assign;
else if( ch == '^' && ch1 == '=' ) return Token_assign;
else if( ch == '&' && ch1 == '=' ) return Token_assign;
else if( ch == '|' && ch1 == '=' ) return Token_assign;
else if( ch == '<' && ch1 == '<' ) return Token_shift;
else if( ch == '>' && ch1 == '>' ) return Token_shift;
else if( ch == '=' && ch1 == '=' ) return Token_eq;
else if( ch == '!' && ch1 == '=' ) return Token_eq;
else if( ch == '<' && ch1 == '=' ) return Token_leq;
else if( ch == '>' && ch1 == '=' ) return Token_geq;
else if( ch == '&' && ch1 == '&' ) return Token_and;
else if( ch == '|' && ch1 == '|' ) return Token_or;
else if( ch == '+' && ch1 == '+' ) return Token_incr;
else if( ch == '-' && ch1 == '-' ) return Token_decr;
else if( ch == '-' && ch1 == '>' ) return Token_arrow;
else if( ch == '#' && ch1 == '#' ) return Token_concat;
}
return -1;
}
inline bool Lexer::skipWordsEnabled() const
{
return m_skipWordsEnabled;
}
inline void Lexer::setSkipWordsEnabled( bool enabled )
{
m_skipWordsEnabled = enabled;
}
inline bool Lexer::preprocessorEnabled() const
{
return m_preprocessorEnabled;
}
inline void Lexer::setPreprocessorEnabled( bool enabled )
{
m_preprocessorEnabled = enabled;
}
inline int Lexer::currentPosition() const
{
return getOffset( m_ptr );
}
inline const TQChar Lexer::currentChar() const
{
return m_currentChar;
}
inline TQChar Lexer::peekChar( int n ) const
{
const TQChar* p = m_ptr + n;
if( p < m_endPtr )
return *p;
else
return TQChar::null;
}
inline bool Lexer::eof() const
{
return m_ptr >= m_endPtr;
}
inline bool Lexer::reportWarnings() const
{
return m_reportWarnings;
}
inline void Lexer::setReportWarnings( bool enable )
{
m_reportWarnings = enable;
}
inline bool Lexer::reportMessages() const
{
return m_reportMessages;
}
inline void Lexer::setReportMessages( bool enable )
{
m_reportMessages = enable;
}
inline void Lexer::insertCurrent( const TQString& str ) {
int posi = currentPosition();
m_source.insert( posi, str );
m_ptr = offset( posi );
m_endPtr = offset( m_source.length() );
if( m_ptr < m_endPtr )
m_currentChar = *m_ptr;
else
m_currentChar = TQChar::null;
}
#endif