You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
tdesdk/poxml/antlr/antlr/CharScanner.h

268 lines
7.4 KiB

#ifndef INC_CharScanner_hpp__
#define INC_CharScanner_hpp__
/**
* <b>SOFTWARE RIGHTS</b>
* <p>
* ANTLR 2.6.0 MageLang Insitute, 1999
* <p>
* $Id$
* <p>
* We reserve no legal rights to the ANTLR--it is fully in the
* public domain. An individual or company may do whatever
* they wish with source code distributed with ANTLR or the
* code generated by ANTLR, including the incorporation of
* ANTLR, or its output, into commerical software.
* <p>
* We encourage users to develop software with ANTLR. However,
* we do ask that credit is given to us for developing
* ANTLR. By "credit", we mean that if you use ANTLR or
* incorporate any source code into one of your programs
* (commercial product, research project, or otherwise) that
* you acknowledge this fact somewhere in the documentation,
* research report, etc... If you like ANTLR and have
* developed a nice tool with the output, please mention that
* you developed it using ANTLR. In addition, we ask that the
* headers remain intact in our source code. As long as these
* guidelines are kept, we expect to continue enhancing this
* system and expect to make other tools available as they are
* completed.
* <p>
* The ANTLR gang:
* @version ANTLR 2.6.0 MageLang Insitute, 1999
* @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a>
* @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a>
* @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a>
*/
#include <cstdio>
#include <functional>
#include <map>
#include "antlr/config.h"
#include "antlr/TokenStream.h"
#include "antlr/RecognitionException.h"
#include "antlr/InputBuffer.h"
#include "antlr/BitSet.h"
#include "antlr/LexerSharedInputState.h"
ANTLR_BEGIN_NAMESPACE(antlr)
class CharScanner;
class CharScannerLiteralsLess : public ANTLR_USE_NAMESPACE(std)function<bool(ANTLR_USE_NAMESPACE(std)string,ANTLR_USE_NAMESPACE(std)string)> {
private:
const CharScanner* scanner;
public:
#ifdef NO_TEMPLATE_PARTS
CharScannerLiteralsLess(); // not really used
#endif
CharScannerLiteralsLess(const CharScanner* theScanner);
bool operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const;
private:
// CharScannerLiteralsLess(const CharScannerLiteralsLess&);
// CharScannerLiteralsLess& operator=(const CharScannerLiteralsLess&);
};
class CharScanner : public TokenStream {
private:
#ifndef NO_STATIC_CONSTS
static const int NO_CHAR = 0;
#else
enum {
NO_CHAR = 0
};
#endif
public:
#ifndef NO_STATIC_CONSTS
static const int EOF_CHAR = EOF;
#else
enum {
EOF_CHAR = EOF
};
#endif
protected:
ANTLR_USE_NAMESPACE(std)string text; // text of current token
bool saveConsumedInput; // does consume() save characters?
typedef RefToken (*factory_type)();
factory_type tokenFactory; // what kind of tokens to create?
bool caseSensitive;
ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess> literals; // set by subclass
RefToken _returnToken; // used to return tokens w/o using return val
// Input chars
LexerSharedInputState inputState;
/** Used during filter mode to indicate that path is desired.
* A subsequent scan error will report an error as usual if acceptPath=true;
*/
bool commitToPath;
public:
CharScanner();
CharScanner(InputBuffer& cb);
CharScanner(InputBuffer* cb);
CharScanner(const LexerSharedInputState& state);
virtual ~CharScanner();
virtual void append(char c);
virtual void append(const ANTLR_USE_NAMESPACE(std)string& s);
virtual void commit();
virtual void consume();
/** Consume chars until one matches the given char */
virtual void consumeUntil(int c);
/** Consume chars until one matches the given set */
virtual void consumeUntil(const BitSet& set);
virtual bool getCaseSensitive() const;
virtual bool getCaseSensitiveLiterals() const=0;
virtual int getColumn() const;
virtual void setColumn(int c);
virtual bool getCommitToPath() const;
virtual const ANTLR_USE_NAMESPACE(std)string& getFilename() const;
virtual InputBuffer& getInputBuffer();
virtual LexerSharedInputState getInputState();
virtual int getLine() const;
/** return a copy of the current text buffer */
virtual const ANTLR_USE_NAMESPACE(std)string& getText() const;
virtual RefToken getTokenObject() const;
virtual int LA(int i);
protected:
virtual RefToken makeToken(int t);
public:
virtual int mark();
virtual void match(int c);
virtual void match(const BitSet& b);
virtual void match(const ANTLR_USE_NAMESPACE(std)string& s);
virtual void matchNot(int c);
virtual void matchRange(int c1, int c2);
virtual void newline();
virtual void tab();
void panic();
void panic(const ANTLR_USE_NAMESPACE(std)string& s);
/** Report exception errors caught in nextToken() */
virtual void reportError(const RecognitionException& e);
/** Parser error-reporting function can be overridden in subclass */
virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s);
/** Parser warning-reporting function can be overridden in subclass */
virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s);
virtual void resetText();
virtual void rewind(int pos);
virtual void setCaseSensitive(bool t);
virtual void setCommitToPath(bool commit);
virtual void setFilename(const ANTLR_USE_NAMESPACE(std)string& f);
virtual void setInputState(LexerSharedInputState state);
virtual void setLine(int l);
virtual void setText(const ANTLR_USE_NAMESPACE(std)string& s);
virtual void setTokenObjectFactory(factory_type factory);
// Test the token text against the literals table
// Override this method to perform a different literals test
virtual int testLiteralsTable(int ttype) const;
// Test the text passed in against the literals table
// Override this method to perform a different literals test
// This is used primarily when you want to test a portion of
// a token
virtual int testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& text,int ttype) const;
// Override this method to get more specific case handling
virtual int toLower(int c) const;
protected:
class Tracer {
private:
CharScanner* parser;
ANTLR_USE_NAMESPACE(std)string text;
public:
Tracer(CharScanner* p,const ANTLR_USE_NAMESPACE(std)string& t)
: parser(p), text(t) { parser->traceIn(text); }
~Tracer()
{ parser->traceOut(text); }
};
int traceDepth;
public:
virtual void traceIndent();
virtual void traceIn(const ANTLR_USE_NAMESPACE(std)string& rname);
virtual void traceOut(const ANTLR_USE_NAMESPACE(std)string& rname);
/* This method is called by YourLexer::nextToken() when the lexer has
* hit EOF condition. EOF is NOT a character.
* This method is not called if EOF is reached during
* syntactic predicate evaluation or during evaluation
* of normal lexical rules, which presumably would be
* an IOException. This traps the "normal" EOF condition.
*
* uponEOF() is called after the complete evaluation of
* the previous token and only if your parser asks
* for another token beyond that last non-EOF token.
*
* You might want to throw token or char stream exceptions
* like: "Heh, premature eof" or a retry stream exception
* ("I found the end of this file, go back to referencing file").
*/
virtual void uponEOF();
};
inline int CharScanner::LA(int i)
{
if ( caseSensitive ) {
return inputState->getInput().LA(i);
} else {
return toLower(inputState->getInput().LA(i));
}
}
ANTLR_END_NAMESPACE
#endif //INC_CharScanner_hpp__