You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
tdevelop/lib/antlr/antlr/TokenStreamRewriteEngine.h

440 lines
12 KiB

#ifndef INC_TokenStreamRewriteEngine_h__
#define INC_TokenStreamRewriteEngine_h__
/* ANTLR Translator Generator
* Project led by Terence Parr at http://www.jGuru.com
* Software rights: http://www.antlr.org/license.html
*/
#include <string>
#include <list>
#include <vector>
#include <map>
#include <utility>
#include <iostream>
#include <iterator>
#include <cassert>
#include <algorithm>
#include <antlr/config.h>
#include <antlr/TokenStream.h>
#include <antlr/TokenWithIndex.h>
#include <antlr/BitSet.h>
#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
namespace antlr {
#endif
/** This token stream tracks the *entire* token stream coming from
* a lexer, but does not pass on the whitespace (or whatever else
* you want to discard) to the parser.
*
* This class can then be asked for the ith token in the input stream.
* Useful for dumping out the input stream exactly after doing some
* augmentation or other manipulations. Tokens are index from 0..n-1
*
* You can insert stuff, replace, and delete chunks. Note that the
* operations are done lazily--only if you convert the buffer to a
* String. This is very efficient because you are not moving data around
* all the time. As the buffer of tokens is converted to strings, the
* toString() method(s) check to see if there is an operation at the
* current index. If so, the operation is done and then normal String
* rendering continues on the buffer. This is like having multiple Turing
* machine instruction streams (programs) operating on a single input tape. :)
*
* Since the operations are done lazily at toString-time, operations do not
* screw up the token index values. That is, an insert operation at token
* index i does not change the index values for tokens i+1..n-1.
*
* Because operations never actually alter the buffer, you may always get
* the original token stream back without undoing anything. Since
* the instructions are queued up, you can easily simulate transactions and
* roll back any changes if there is an error just by removing instructions.
* For example,
*
* TokenStreamRewriteEngine rewriteEngine =
* new TokenStreamRewriteEngine(lexer);
* JavaRecognizer parser = new JavaRecognizer(rewriteEngine);
* ...
* rewriteEngine.insertAfter("pass1", t, "foobar");}
* rewriteEngine.insertAfter("pass2", u, "start");}
* System.out.println(rewriteEngine.toString("pass1"));
* System.out.println(rewriteEngine.toString("pass2"));
*
* You can also have multiple "instruction streams" and get multiple
* rewrites from a single pass over the input. Just name the instruction
* streams and use that name again when printing the buffer. This could be
* useful for generating a C file and also its header file--all from the
* same buffer.
*
* If you don't use named rewrite streams, a "default" stream is used.
*
* Terence Parr, parrt@cs.usfca.edu
* University of San Francisco
* February 2004
*/
class TokenStreamRewriteEngine : public TokenStream
{
public:
typedef ANTLR_USE_NAMESPACE(std)vector<antlr::RefTokenWithIndex> token_list;
static const char* DEFAULT_PROGRAM_NAME;
#ifndef NO_STATIC_CONSTS
static const size_t MIN_TOKEN_INDEX;
static const int PROGRAM_INIT_SIZE;
#else
enum {
MIN_TOKEN_INDEX = 0,
PROGRAM_INIT_SIZE = 100
};
#endif
struct tokenToStream {
tokenToStream( ANTLR_USE_NAMESPACE(std)ostream& o ) : out(o) {}
template <typename T> void operator() ( const T& t ) {
out << t->getText();
}
ANTLR_USE_NAMESPACE(std)ostream& out;
};
class RewriteOperation {
protected:
RewriteOperation( size_t idx, const ANTLR_USE_NAMESPACE(std)string& txt )
: index(idx), text(txt)
{
}
public:
virtual ~RewriteOperation()
{
}
/** Execute the rewrite operation by possibly adding to the buffer.
* Return the index of the next token to operate on.
*/
virtual size_t execute( ANTLR_USE_NAMESPACE(std)ostream& /* out */ ) {
return index;
}
virtual size_t getIndex() const {
return index;
}
virtual const char* type() const {
return "RewriteOperation";
}
protected:
size_t index;
ANTLR_USE_NAMESPACE(std)string text;
};
struct executeOperation {
ANTLR_USE_NAMESPACE(std)ostream& out;
executeOperation( ANTLR_USE_NAMESPACE(std)ostream& s ) : out(s) {}
void operator () ( RewriteOperation* t ) {
t->execute(out);
}
};
/// list of rewrite operations
typedef ANTLR_USE_NAMESPACE(std)list<RewriteOperation*> operation_list;
/// map program name to <program counter,program> tuple
typedef ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,operation_list> program_map;
class InsertBeforeOp : public RewriteOperation
{
public:
InsertBeforeOp( size_t index, const ANTLR_USE_NAMESPACE(std)string& text )
: RewriteOperation(index, text)
{
}
virtual ~InsertBeforeOp() {}
virtual size_t execute( ANTLR_USE_NAMESPACE(std)ostream& out )
{
out << text;
return index;
}
virtual const char* type() const {
return "InsertBeforeOp";
}
};
class ReplaceOp : public RewriteOperation
{
public:
ReplaceOp(size_t from, size_t to, ANTLR_USE_NAMESPACE(std)string text)
: RewriteOperation(from,text)
, lastIndex(to)
{
}
virtual ~ReplaceOp() {}
virtual size_t execute( ANTLR_USE_NAMESPACE(std)ostream& out ) {
out << text;
return lastIndex+1;
}
virtual const char* type() const {
return "ReplaceOp";
}
protected:
size_t lastIndex;
};
class DeleteOp : public ReplaceOp {
public:
DeleteOp(size_t from, size_t to)
: ReplaceOp(from,to,"")
{
}
virtual const char* type() const {
return "DeleteOp";
}
};
TokenStreamRewriteEngine(TokenStream& upstream);
TokenStreamRewriteEngine(TokenStream& upstream, size_t initialSize);
RefToken nextToken( void );
void rollback(size_t instructionIndex) {
rollback(DEFAULT_PROGRAM_NAME, instructionIndex);
}
/** Rollback the instruction stream for a program so that
* the indicated instruction (via instructionIndex) is no
* longer in the stream. UNTESTED!
*/
void rollback(const ANTLR_USE_NAMESPACE(std)string& programName,
size_t instructionIndex );
void deleteProgram() {
deleteProgram(DEFAULT_PROGRAM_NAME);
}
/** Reset the program so that no instructions exist */
void deleteProgram(const ANTLR_USE_NAMESPACE(std)string& programName) {
rollback(programName, MIN_TOKEN_INDEX);
}
void insertAfter( RefTokenWithIndex t,
const ANTLR_USE_NAMESPACE(std)string& text )
{
insertAfter(DEFAULT_PROGRAM_NAME, t, text);
}
void insertAfter(size_t index, const ANTLR_USE_NAMESPACE(std)string& text) {
insertAfter(DEFAULT_PROGRAM_NAME, index, text);
}
void insertAfter( const ANTLR_USE_NAMESPACE(std)string& programName,
RefTokenWithIndex t,
const ANTLR_USE_NAMESPACE(std)string& text )
{
insertAfter(programName, t->getIndex(), text);
}
void insertAfter( const ANTLR_USE_NAMESPACE(std)string& programName,
size_t index,
const ANTLR_USE_NAMESPACE(std)string& text )
{
// to insert after, just insert before next index (even if past end)
insertBefore(programName,index+1, text);
}
void insertBefore( RefTokenWithIndex t,
const ANTLR_USE_NAMESPACE(std)string& text )
{
// std::cout << "insertBefore index " << t->getIndex() << " " << text << std::endl;
insertBefore(DEFAULT_PROGRAM_NAME, t, text);
}
void insertBefore(size_t index, const ANTLR_USE_NAMESPACE(std)string& text) {
insertBefore(DEFAULT_PROGRAM_NAME, index, text);
}
void insertBefore( const ANTLR_USE_NAMESPACE(std)string& programName,
RefTokenWithIndex t,
const ANTLR_USE_NAMESPACE(std)string& text )
{
insertBefore(programName, t->getIndex(), text);
}
void insertBefore( const ANTLR_USE_NAMESPACE(std)string& programName,
size_t index,
const ANTLR_USE_NAMESPACE(std)string& text )
{
addToSortedRewriteList(programName, new InsertBeforeOp(index,text));
}
void replace(size_t index, const ANTLR_USE_NAMESPACE(std)string& text)
{
replace(DEFAULT_PROGRAM_NAME, index, index, text);
}
void replace( size_t from, size_t to,
const ANTLR_USE_NAMESPACE(std)string& text)
{
replace(DEFAULT_PROGRAM_NAME, from, to, text);
}
void replace( RefTokenWithIndex indexT,
const ANTLR_USE_NAMESPACE(std)string& text )
{
replace(DEFAULT_PROGRAM_NAME, indexT->getIndex(), indexT->getIndex(), text);
}
void replace( RefTokenWithIndex from,
RefTokenWithIndex to,
const ANTLR_USE_NAMESPACE(std)string& text )
{
replace(DEFAULT_PROGRAM_NAME, from, to, text);
}
void replace(const ANTLR_USE_NAMESPACE(std)string& programName,
size_t from, size_t to,
const ANTLR_USE_NAMESPACE(std)string& text )
{
addToSortedRewriteList(programName,new ReplaceOp(from, to, text));
}
void replace( const ANTLR_USE_NAMESPACE(std)string& programName,
RefTokenWithIndex from,
RefTokenWithIndex to,
const ANTLR_USE_NAMESPACE(std)string& text )
{
replace(programName,
from->getIndex(),
to->getIndex(),
text);
}
void remove(size_t index) {
remove(DEFAULT_PROGRAM_NAME, index, index);
}
void remove(size_t from, size_t to) {
remove(DEFAULT_PROGRAM_NAME, from, to);
}
void remove(RefTokenWithIndex indexT) {
remove(DEFAULT_PROGRAM_NAME, indexT, indexT);
}
void remove(RefTokenWithIndex from, RefTokenWithIndex to) {
remove(DEFAULT_PROGRAM_NAME, from, to);
}
void remove( const ANTLR_USE_NAMESPACE(std)string& programName,
size_t from, size_t to)
{
replace(programName,from,to,"");
}
void remove( const ANTLR_USE_NAMESPACE(std)string& programName,
RefTokenWithIndex from, RefTokenWithIndex to )
{
replace(programName,from,to,"");
}
void discard(int ttype) {
discardMask.add(ttype);
}
RefToken getToken( size_t i )
{
return RefToken(tokens.at(i));
}
size_t getTokenStreamSize() const {
return tokens.size();
}
void originalToStream( ANTLR_USE_NAMESPACE(std)ostream& out ) const {
ANTLR_USE_NAMESPACE(std)for_each( tokens.begin(), tokens.end(), tokenToStream(out) );
}
void originalToStream( ANTLR_USE_NAMESPACE(std)ostream& out,
size_t start, size_t end ) const;
void toStream( ANTLR_USE_NAMESPACE(std)ostream& out ) const {
toStream( out, MIN_TOKEN_INDEX, getTokenStreamSize());
}
void toStream( ANTLR_USE_NAMESPACE(std)ostream& out,
const ANTLR_USE_NAMESPACE(std)string& programName ) const
{
toStream( out, programName, MIN_TOKEN_INDEX, getTokenStreamSize());
}
void toStream( ANTLR_USE_NAMESPACE(std)ostream& out,
size_t start, size_t end ) const
{
toStream(out, DEFAULT_PROGRAM_NAME, start, end);
}
void toStream( ANTLR_USE_NAMESPACE(std)ostream& out,
const ANTLR_USE_NAMESPACE(std)string& programName,
size_t firstToken, size_t lastToken ) const;
void toDebugStream( ANTLR_USE_NAMESPACE(std)ostream& out ) const {
toDebugStream( out, MIN_TOKEN_INDEX, getTokenStreamSize());
}
void toDebugStream( ANTLR_USE_NAMESPACE(std)ostream& out,
size_t start, size_t end ) const;
size_t getLastRewriteTokenIndex() const {
return getLastRewriteTokenIndex(DEFAULT_PROGRAM_NAME);
}
/** Return the last index for the program named programName
* return 0 if the program does not exist or the program is empty.
* (Note this is different from the java implementation that returns -1)
*/
size_t getLastRewriteTokenIndex(const ANTLR_USE_NAMESPACE(std)string& programName) const {
program_map::const_iterator rewrites = programs.find(programName);
if( rewrites == programs.end() )
return 0;
const operation_list& prog = rewrites->second;
if( !prog.empty() )
{
operation_list::const_iterator last = prog.end();
--last;
return (*last)->getIndex();
}
return 0;
}
protected:
/** If op.index > lastRewriteTokenIndexes, just add to the end.
* Otherwise, do linear */
void addToSortedRewriteList(RewriteOperation* op) {
addToSortedRewriteList(DEFAULT_PROGRAM_NAME, op);
}
void addToSortedRewriteList( const ANTLR_USE_NAMESPACE(std)string& programName,
RewriteOperation* op );
protected:
/** Who do we suck tokens from? */
TokenStream& stream;
/** track index of tokens */
size_t index;
/** Track the incoming list of tokens */
token_list tokens;
/** You may have multiple, named streams of rewrite operations.
* I'm calling these things "programs."
* Maps String (name) -> rewrite (List)
*/
program_map programs;
/** Which (whitespace) token(s) to throw out */
BitSet discardMask;
};
#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
}
#endif
#endif