tdesdk/poxml/antlr/antlr/CharScanner.h

#ifndef INC_CharScanner_hpp__
#define INC_CharScanner_hpp__

/**
 * <b>SOFTWARE RIGHTS</b>
 * <p>
 * ANTLR 2.6.0 MageLang Insitute, 1999
 * <p>
 * $Id$
 * <p>
 * We reserve no legal rights to the ANTLR--it is fully in the
 * public domain. An individual or company may do whatever
 * they wish with source code distributed with ANTLR or the
 * code generated by ANTLR, including the incorporation of
 * ANTLR, or its output, into commerical software.
 * <p>
 * We encourage users to develop software with ANTLR. However,
 * we do ask that credit is given to us for developing
 * ANTLR. By "credit", we mean that if you use ANTLR or
 * incorporate any source code into one of your programs
 * (commercial product, research project, or otherwise) that
 * you acknowledge this fact somewhere in the documentation,
 * research report, etc... If you like ANTLR and have
 * developed a nice tool with the output, please mention that
 * you developed it using ANTLR. In addition, we ask that the
 * headers remain intact in our source code. As long as these
 * guidelines are kept, we expect to continue enhancing this
 * system and expect to make other tools available as they are
 * completed.
 * <p>
 * The ANTLR gang:
 * @version ANTLR 2.6.0 MageLang Insitute, 1999
 * @author Terence Parr, <a href=http://www.MageLang.com>MageLang Institute</a>
 * @author <br>John Lilley, <a href=http://www.Empathy.com>Empathy Software</a>
 * @author <br><a href="mailto:pete@yamuna.demon.co.uk">Pete Wells</a>
 */

#include <cstdio>
#include <functional>
#include <map>

#include "antlr/config.h"
#include "antlr/TokenStream.h"
#include "antlr/RecognitionException.h"
#include "antlr/InputBuffer.h"
#include "antlr/BitSet.h"
#include "antlr/LexerSharedInputState.h"

ANTLR_BEGIN_NAMESPACE(antlr)

class CharScanner;

class CharScannerLiteralsLess : public ANTLR_USE_NAMESPACE(std)function<bool(ANTLR_USE_NAMESPACE(std)string,ANTLR_USE_NAMESPACE(std)string)> {
private:
	const CharScanner* scanner;
public:
#ifdef NO_TEMPLATE_PARTS
	CharScannerLiteralsLess(); // not really used
#endif
	CharScannerLiteralsLess(const CharScanner* theScanner);
	bool operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const;
private:
//	CharScannerLiteralsLess(const CharScannerLiteralsLess&);
//	CharScannerLiteralsLess& operator=(const CharScannerLiteralsLess&);
};

class CharScanner : public TokenStream {
private:
#ifndef NO_STATIC_CONSTS
	static const int NO_CHAR = 0;
#else
	enum {
		NO_CHAR = 0
	};
#endif

public:
#ifndef NO_STATIC_CONSTS
	static const int EOF_CHAR = EOF;
#else
	enum {
		EOF_CHAR = EOF
	};
#endif

protected:
	ANTLR_USE_NAMESPACE(std)string text;		// text of current token

	bool saveConsumedInput; // does consume() save characters?

	typedef RefToken (*factory_type)();
	factory_type tokenFactory; // what kind of tokens to create?

	bool caseSensitive;
	ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess> literals; // set by subclass

	RefToken _returnToken; // used to return tokens w/o using return val

	// Input chars
	LexerSharedInputState inputState;

	/** Used during filter mode to indicate that path is desired.
	 *  A subsequent scan error will report an error as usual if acceptPath=true;
	 */
	bool commitToPath;

public:
	CharScanner();

	CharScanner(InputBuffer& cb);
	CharScanner(InputBuffer* cb);

	CharScanner(const LexerSharedInputState& state);

	virtual ~CharScanner();

	virtual void append(char c);

	virtual void append(const ANTLR_USE_NAMESPACE(std)string& s);

	virtual void commit();

	virtual void consume();

	/** Consume chars until one matches the given char */
	virtual void consumeUntil(int c);

	/** Consume chars until one matches the given set */
	virtual void consumeUntil(const BitSet& set);

	virtual bool getCaseSensitive() const;

	virtual bool getCaseSensitiveLiterals() const=0;

	virtual int getColumn() const;

	virtual void setColumn(int c);

	virtual bool getCommitToPath() const;

	virtual const ANTLR_USE_NAMESPACE(std)string& getFilename() const;

	virtual InputBuffer& getInputBuffer();

	virtual LexerSharedInputState getInputState();

	virtual int getLine() const;

	/** return a copy of the current text buffer */
	virtual const ANTLR_USE_NAMESPACE(std)string& getText() const;

	virtual RefToken getTokenObject() const;

	virtual int LA(int i);

protected:
	virtual RefToken makeToken(int t);

public:
	virtual int mark();

	virtual void match(int c);

	virtual void match(const BitSet& b);

	virtual void match(const ANTLR_USE_NAMESPACE(std)string& s);

	virtual void matchNot(int c);

	virtual void matchRange(int c1, int c2);

	virtual void newline();

	virtual void tab();

	void panic();

	void panic(const ANTLR_USE_NAMESPACE(std)string& s);

	/** Report exception errors caught in nextToken() */
	virtual void reportError(const RecognitionException& e);

	/** Parser error-reporting function can be overridden in subclass */
	virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s);

	/** Parser warning-reporting function can be overridden in subclass */
	virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s);

	virtual void resetText();

	virtual void rewind(int pos);

	virtual void setCaseSensitive(bool t);

	virtual void setCommitToPath(bool commit);

	virtual void setFilename(const ANTLR_USE_NAMESPACE(std)string& f);

	virtual void setInputState(LexerSharedInputState state);

	virtual void setLine(int l);

	virtual void setText(const ANTLR_USE_NAMESPACE(std)string& s);

	virtual void setTokenObjectFactory(factory_type factory);

	// Test the token text against the literals table
	// Override this method to perform a different literals test
	virtual int testLiteralsTable(int ttype) const;

	// Test the text passed in against the literals table
	// Override this method to perform a different literals test
	// This is used primarily when you want to test a portion of
	// a token
	virtual int testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& text,int ttype) const;

	// Override this method to get more specific case handling
	virtual int toLower(int c) const;

protected:
	class Tracer {
	private:
		CharScanner* parser;
		ANTLR_USE_NAMESPACE(std)string text;
	public:
		Tracer(CharScanner* p,const ANTLR_USE_NAMESPACE(std)string& t)
			: parser(p), text(t) { parser->traceIn(text); }
		~Tracer()
			{ parser->traceOut(text); }
	};

	int traceDepth;
public:
	virtual void traceIndent();
	virtual void traceIn(const ANTLR_USE_NAMESPACE(std)string& rname);
	virtual void traceOut(const ANTLR_USE_NAMESPACE(std)string& rname);

	/* This method is called by YourLexer::nextToken() when the lexer has
	*  hit EOF condition.  EOF is NOT a character.
	*  This method is not called if EOF is reached during
	*  syntactic predicate evaluation or during evaluation
	*  of normal lexical rules, which presumably would be
	*  an IOException.  This traps the "normal" EOF condition.
	*
	*  uponEOF() is called after the complete evaluation of
	*  the previous token and only if your parser asks
	*  for another token beyond that last non-EOF token.
	*
	*  You might want to throw token or char stream exceptions
	*  like: "Heh, premature eof" or a retry stream exception
	*  ("I found the end of this file, go back to referencing file").
	*/
	virtual void uponEOF();
};

inline int CharScanner::LA(int i)
{
	if ( caseSensitive ) {
		return inputState->getInput().LA(i);
	} else {
		return toLower(inputState->getInput().LA(i));
	}
}

ANTLR_END_NAMESPACE

#endif //INC_CharScanner_hpp__