|
|
|
#ifndef INC_CharScanner_hpp__
|
|
|
|
#define INC_CharScanner_hpp__
|
|
|
|
|
|
|
|
/**
|
|
|
|
* <b>SOFTWARE RIGHTS</b>
|
|
|
|
* <p>
|
|
|
|
* ANTLR 2.6.0 MageLang Insitute, 1999
|
|
|
|
* <p>
|
|
|
|
* $Id$
|
|
|
|
* <p>
|
|
|
|
* We reserve no legal rights to the ANTLR--it is fully in the
|
|
|
|
* public domain. An individual or company may do whatever
|
|
|
|
* they wish with source code distributed with ANTLR or the
|
|
|
|
* code generated by ANTLR, including the incorporation of
|
|
|
|
* ANTLR, or its output, into commerical software.
|
|
|
|
* <p>
|
|
|
|
* We encourage users to develop software with ANTLR. However,
|
|
|
|
* we do ask that credit is given to us for developing
|
|
|
|
* ANTLR. By "credit", we mean that if you use ANTLR or
|
|
|
|
* incorporate any source code into one of your programs
|
|
|
|
* (commercial product, research project, or otherwise) that
|
|
|
|
* you acknowledge this fact somewhere in the documentation,
|
|
|
|
* research report, etc... If you like ANTLR and have
|
|
|
|
* developed a nice tool with the output, please mention that
|
|
|
|
* you developed it using ANTLR. In addition, we ask that the
|
|
|
|
* headers remain intact in our source code. As long as these
|
|
|
|
* guidelines are kept, we expect to continue enhancing this
|
|
|
|
* system and expect to make other tools available as they are
|
|
|
|
* completed.
|
|
|
|
* <p>
|
|
|
|
* The ANTLR gang:
|
|
|
|
* @version ANTLR 2.6.0 MageLang Insitute, 1999
|
|
|
|
* @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a>
|
|
|
|
* @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a>
|
|
|
|
* @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a>
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <cstdio>
|
|
|
|
#include <functional>
|
|
|
|
#include <map>
|
|
|
|
|
|
|
|
#include "antlr/config.h"
|
|
|
|
#include "antlr/TokenStream.h"
|
|
|
|
#include "antlr/RecognitionException.h"
|
|
|
|
#include "antlr/InputBuffer.h"
|
|
|
|
#include "antlr/BitSet.h"
|
|
|
|
#include "antlr/LexerSharedInputState.h"
|
|
|
|
|
|
|
|
ANTLR_BEGIN_NAMESPACE(antlr)
|
|
|
|
|
|
|
|
class CharScanner;
|
|
|
|
|
|
|
|
class CharScannerLiteralsLess : public ANTLR_USE_NAMESPACE(std)function<bool(ANTLR_USE_NAMESPACE(std)string,ANTLR_USE_NAMESPACE(std)string)> {
|
|
|
|
private:
|
|
|
|
const CharScanner* scanner;
|
|
|
|
public:
|
|
|
|
#ifdef NO_TEMPLATE_PARTS
|
|
|
|
CharScannerLiteralsLess(); // not really used
|
|
|
|
#endif
|
|
|
|
CharScannerLiteralsLess(const CharScanner* theScanner);
|
|
|
|
bool operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const;
|
|
|
|
private:
|
|
|
|
// CharScannerLiteralsLess(const CharScannerLiteralsLess&);
|
|
|
|
// CharScannerLiteralsLess& operator=(const CharScannerLiteralsLess&);
|
|
|
|
};
|
|
|
|
|
|
|
|
class CharScanner : public TokenStream {
|
|
|
|
private:
|
|
|
|
#ifndef NO_STATIC_CONSTS
|
|
|
|
static const int NO_CHAR = 0;
|
|
|
|
#else
|
|
|
|
enum {
|
|
|
|
NO_CHAR = 0
|
|
|
|
};
|
|
|
|
#endif
|
|
|
|
|
|
|
|
public:
|
|
|
|
#ifndef NO_STATIC_CONSTS
|
|
|
|
static const int EOF_CHAR = EOF;
|
|
|
|
#else
|
|
|
|
enum {
|
|
|
|
EOF_CHAR = EOF
|
|
|
|
};
|
|
|
|
#endif
|
|
|
|
|
|
|
|
protected:
|
|
|
|
ANTLR_USE_NAMESPACE(std)string text; // text of current token
|
|
|
|
|
|
|
|
bool saveConsumedInput; // does consume() save characters?
|
|
|
|
|
|
|
|
typedef RefToken (*factory_type)();
|
|
|
|
factory_type tokenFactory; // what kind of tokens to create?
|
|
|
|
|
|
|
|
bool caseSensitive;
|
|
|
|
ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess> literals; // set by subclass
|
|
|
|
|
|
|
|
RefToken _returnToken; // used to return tokens w/o using return val
|
|
|
|
|
|
|
|
// Input chars
|
|
|
|
LexerSharedInputState inputState;
|
|
|
|
|
|
|
|
/** Used during filter mode to indicate that path is desired.
|
|
|
|
* A subsequent scan error will report an error as usual if acceptPath=true;
|
|
|
|
*/
|
|
|
|
bool commitToPath;
|
|
|
|
|
|
|
|
public:
|
|
|
|
CharScanner();
|
|
|
|
|
|
|
|
CharScanner(InputBuffer& cb);
|
|
|
|
CharScanner(InputBuffer* cb);
|
|
|
|
|
|
|
|
CharScanner(const LexerSharedInputState& state);
|
|
|
|
|
|
|
|
virtual ~CharScanner();
|
|
|
|
|
|
|
|
virtual void append(char c);
|
|
|
|
|
|
|
|
virtual void append(const ANTLR_USE_NAMESPACE(std)string& s);
|
|
|
|
|
|
|
|
virtual void commit();
|
|
|
|
|
|
|
|
virtual void consume();
|
|
|
|
|
|
|
|
/** Consume chars until one matches the given char */
|
|
|
|
virtual void consumeUntil(int c);
|
|
|
|
|
|
|
|
/** Consume chars until one matches the given set */
|
|
|
|
virtual void consumeUntil(const BitSet& set);
|
|
|
|
|
|
|
|
virtual bool getCaseSensitive() const;
|
|
|
|
|
|
|
|
virtual bool getCaseSensitiveLiterals() const=0;
|
|
|
|
|
|
|
|
virtual int getColumn() const;
|
|
|
|
|
|
|
|
virtual void setColumn(int c);
|
|
|
|
|
|
|
|
virtual bool getCommitToPath() const;
|
|
|
|
|
|
|
|
virtual const ANTLR_USE_NAMESPACE(std)string& getFilename() const;
|
|
|
|
|
|
|
|
virtual InputBuffer& getInputBuffer();
|
|
|
|
|
|
|
|
virtual LexerSharedInputState getInputState();
|
|
|
|
|
|
|
|
virtual int getLine() const;
|
|
|
|
|
|
|
|
/** return a copy of the current text buffer */
|
|
|
|
virtual const ANTLR_USE_NAMESPACE(std)string& getText() const;
|
|
|
|
|
|
|
|
virtual RefToken getTokenObject() const;
|
|
|
|
|
|
|
|
virtual int LA(int i);
|
|
|
|
|
|
|
|
protected:
|
|
|
|
virtual RefToken makeToken(int t);
|
|
|
|
|
|
|
|
public:
|
|
|
|
virtual int mark();
|
|
|
|
|
|
|
|
virtual void match(int c);
|
|
|
|
|
|
|
|
virtual void match(const BitSet& b);
|
|
|
|
|
|
|
|
virtual void match(const ANTLR_USE_NAMESPACE(std)string& s);
|
|
|
|
|
|
|
|
virtual void matchNot(int c);
|
|
|
|
|
|
|
|
virtual void matchRange(int c1, int c2);
|
|
|
|
|
|
|
|
virtual void newline();
|
|
|
|
|
|
|
|
virtual void tab();
|
|
|
|
|
|
|
|
void panic();
|
|
|
|
|
|
|
|
void panic(const ANTLR_USE_NAMESPACE(std)string& s);
|
|
|
|
|
|
|
|
/** Report exception errors caught in nextToken() */
|
|
|
|
virtual void reportError(const RecognitionException& e);
|
|
|
|
|
|
|
|
/** Parser error-reporting function can be overridden in subclass */
|
|
|
|
virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s);
|
|
|
|
|
|
|
|
/** Parser warning-reporting function can be overridden in subclass */
|
|
|
|
virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s);
|
|
|
|
|
|
|
|
virtual void resetText();
|
|
|
|
|
|
|
|
virtual void rewind(int pos);
|
|
|
|
|
|
|
|
virtual void setCaseSensitive(bool t);
|
|
|
|
|
|
|
|
virtual void setCommitToPath(bool commit);
|
|
|
|
|
|
|
|
virtual void setFilename(const ANTLR_USE_NAMESPACE(std)string& f);
|
|
|
|
|
|
|
|
virtual void setInputState(LexerSharedInputState state);
|
|
|
|
|
|
|
|
virtual void setLine(int l);
|
|
|
|
|
|
|
|
virtual void setText(const ANTLR_USE_NAMESPACE(std)string& s);
|
|
|
|
|
|
|
|
virtual void setTokenObjectFactory(factory_type factory);
|
|
|
|
|
|
|
|
// Test the token text against the literals table
|
|
|
|
// Override this method to perform a different literals test
|
|
|
|
virtual int testLiteralsTable(int ttype) const;
|
|
|
|
|
|
|
|
// Test the text passed in against the literals table
|
|
|
|
// Override this method to perform a different literals test
|
|
|
|
// This is used primarily when you want to test a portion of
|
|
|
|
// a token
|
|
|
|
virtual int testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& text,int ttype) const;
|
|
|
|
|
|
|
|
// Override this method to get more specific case handling
|
|
|
|
virtual int toLower(int c) const;
|
|
|
|
|
|
|
|
protected:
|
|
|
|
class Tracer {
|
|
|
|
private:
|
|
|
|
CharScanner* parser;
|
|
|
|
ANTLR_USE_NAMESPACE(std)string text;
|
|
|
|
public:
|
|
|
|
Tracer(CharScanner* p,const ANTLR_USE_NAMESPACE(std)string& t)
|
|
|
|
: parser(p), text(t) { parser->traceIn(text); }
|
|
|
|
~Tracer()
|
|
|
|
{ parser->traceOut(text); }
|
|
|
|
};
|
|
|
|
|
|
|
|
int traceDepth;
|
|
|
|
public:
|
|
|
|
virtual void traceIndent();
|
|
|
|
virtual void traceIn(const ANTLR_USE_NAMESPACE(std)string& rname);
|
|
|
|
virtual void traceOut(const ANTLR_USE_NAMESPACE(std)string& rname);
|
|
|
|
|
|
|
|
/* This method is called by YourLexer::nextToken() when the lexer has
|
|
|
|
* hit EOF condition. EOF is NOT a character.
|
|
|
|
* This method is not called if EOF is reached during
|
|
|
|
* syntactic predicate evaluation or during evaluation
|
|
|
|
* of normal lexical rules, which presumably would be
|
|
|
|
* an IOException. This traps the "normal" EOF condition.
|
|
|
|
*
|
|
|
|
* uponEOF() is called after the complete evaluation of
|
|
|
|
* the previous token and only if your parser asks
|
|
|
|
* for another token beyond that last non-EOF token.
|
|
|
|
*
|
|
|
|
* You might want to throw token or char stream exceptions
|
|
|
|
* like: "Heh, premature eof" or a retry stream exception
|
|
|
|
* ("I found the end of this file, go back to referencing file").
|
|
|
|
*/
|
|
|
|
virtual void uponEOF();
|
|
|
|
};
|
|
|
|
|
|
|
|
inline int CharScanner::LA(int i)
|
|
|
|
{
|
|
|
|
if ( caseSensitive ) {
|
|
|
|
return inputState->getInput().LA(i);
|
|
|
|
} else {
|
|
|
|
return toLower(inputState->getInput().LA(i));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ANTLR_END_NAMESPACE
|
|
|
|
|
|
|
|
#endif //INC_CharScanner_hpp__
|