You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

327 lines
9.5 KiB

#=======================================================================
#
# Python Lexical Analyser
#
# Classes for building NFAs and DFAs
#
#=======================================================================
import string
import sys
from sys import maxint
from types import TupleType
from Transitions import TransitionMap
LOWEST_PRIORITY = -sys.maxint
class Machine:
"""A collection of Nodes representing an NFA or DFA."""
states = None # [Node]
next_state_number = 1
initial_states = None # {(name, bol): Node}
def __init__(self):
self.states = []
self.initial_states = {}
def __del__(self):
#print "Destroying", self ###
for state in self.states:
state.destroy()
def new_state(self):
"""Add a new state to the machine and return it."""
s = Node()
n = self.next_state_number
self.next_state_number = n + 1
s.number = n
self.states.append(s)
return s
def new_initial_state(self, name):
state = self.new_state()
self.make_initial_state(name, state)
return state
def make_initial_state(self, name, state):
self.initial_states[name] = state
def get_initial_state(self, name):
return self.initial_states[name]
def dump(self, file):
file.write("Plex.Machine:\n")
if self.initial_states is not None:
file.write(" Initial states:\n")
for (name, state) in self.initial_states.items():
file.write(" '%s': %d\n" % (name, state.number))
for s in self.states:
s.dump(file)
class Node:
"""A state of an NFA or DFA."""
transitions = None # TransitionMap
action = None # Action
action_priority = None # integer
number = 0 # for debug output
epsilon_closure = None # used by nfa_to_dfa()
def __init__(self):
# Preinitialise the list of empty transitions, because
# the nfa-to-dfa algorithm needs it
#self.transitions = {'':[]}
self.transitions = TransitionMap()
self.action_priority = LOWEST_PRIORITY
def destroy(self):
#print "Destroying", self ###
self.transitions = None
self.action = None
self.epsilon_closure = None
def add_transition(self, event, new_state):
self.transitions.add(event, new_state)
def link_to(self, state):
"""Add an epsilon-move from this state to another state."""
self.add_transition('', state)
def set_action(self, action, priority):
"""Make this an accepting state with the given action. If
there is already an action, choose the action with highest
priority."""
if priority > self.action_priority:
self.action = action
self.action_priority = priority
def get_action(self):
return self.action
def get_action_priority(self):
return self.action_priority
# def merge_actions(self, other_state):
# """Merge actions of other state into this state according
# to their priorities."""
# action = other_state.get_action()
# priority = other_state.get_action_priority()
# self.set_action(action, priority)
def is_accepting(self):
return self.action is not None
def __str__(self):
return "State %d" % self.number
def dump(self, file):
import string
# Header
file.write(" State %d:\n" % self.number)
# Transitions
# self.dump_transitions(file)
self.transitions.dump(file)
# Action
action = self.action
priority = self.action_priority
if action is not None:
file.write(" %s [priority %d]\n" % (action, priority))
class FastMachine:
"""
FastMachine is a deterministic machine represented in a way that
allows fast scanning.
"""
initial_states = None # {state_name:state}
states = None # [state]
# where state = {event:state, 'else':state, 'action':Action}
next_number = 1 # for debugging
new_state_template = {
'':None, 'bol':None, 'eol':None, 'eof':None, 'else':None
}
def __init__(self, old_machine = None):
self.initial_states = initial_states = {}
self.states = []
if old_machine:
self.old_to_new = old_to_new = {}
for old_state in old_machine.states:
new_state = self.new_state()
old_to_new[old_state] = new_state
for name, old_state in old_machine.initial_states.items():
initial_states[name] = old_to_new[old_state]
for old_state in old_machine.states:
new_state = old_to_new[old_state]
for event, old_state_set in old_state.transitions.items():
if old_state_set:
new_state[event] = old_to_new[old_state_set.keys()[0]]
else:
new_state[event] = None
new_state['action'] = old_state.action
def __del__(self):
for state in self.states:
state.clear()
def new_state(self, action = None):
number = self.next_number
self.next_number = number + 1
result = self.new_state_template.copy()
result['number'] = number
result['action'] = action
self.states.append(result)
return result
def make_initial_state(self, name, state):
self.initial_states[name] = state
def add_transitions(self, state, event, new_state):
if type(event) == TupleType:
code0, code1 = event
if code0 == -maxint:
state['else'] = new_state
elif code1 <> maxint:
while code0 < code1:
state[chr(code0)] = new_state
code0 = code0 + 1
else:
state[event] = new_state
def get_initial_state(self, name):
return self.initial_states[name]
def dump(self, file):
file.write("Plex.FastMachine:\n")
file.write(" Initial states:\n")
for name, state in self.initial_states.items():
file.write(" %s: %s\n" % (repr(name), state['number']))
for state in self.states:
self.dump_state(state, file)
def dump_state(self, state, file):
import string
# Header
file.write(" State %d:\n" % state['number'])
# Transitions
self.dump_transitions(state, file)
# Action
action = state['action']
if action is not None:
file.write(" %s\n" % action)
def dump_transitions(self, state, file):
chars_leading_to_state = {}
special_to_state = {}
for (c, s) in state.items():
if len(c) == 1:
chars = chars_leading_to_state.get(id(s), None)
if chars is None:
chars = []
chars_leading_to_state[id(s)] = chars
chars.append(c)
elif len(c) <= 4:
special_to_state[c] = s
ranges_to_state = {}
for state in self.states:
char_list = chars_leading_to_state.get(id(state), None)
if char_list:
ranges = self.chars_to_ranges(char_list)
ranges_to_state[ranges] = state
ranges_list = ranges_to_state.keys()
ranges_list.sort()
for ranges in ranges_list:
key = self.ranges_to_string(ranges)
state = ranges_to_state[ranges]
file.write(" %s --> State %d\n" % (key, state['number']))
for key in ('bol', 'eol', 'eof', 'else'):
state = special_to_state.get(key, None)
if state:
file.write(" %s --> State %d\n" % (key, state['number']))
def chars_to_ranges(self, char_list):
char_list.sort()
i = 0
n = len(char_list)
result = []
while i < n:
c1 = ord(char_list[i])
c2 = c1
i = i + 1
while i < n and ord(char_list[i]) == c2 + 1:
i = i + 1
c2 = c2 + 1
result.append((chr(c1), chr(c2)))
return tuple(result)
def ranges_to_string(self, range_list):
return string.join(map(self.range_to_string, range_list), ",")
def range_to_string(self, (c1, c2)):
if c1 == c2:
return repr(c1)
else:
return "%s..%s" % (repr(c1), repr(c2))
##
## (Superseded by Machines.FastMachine)
##
## class StateTableMachine:
## """
## StateTableMachine is an alternative representation of a Machine
## that can be run more efficiently.
## """
## initial_states = None # {state_name:state_index}
## states = None # [([state] indexed by char code, Action)]
## special_map = {'bol':256, 'eol':257, 'eof':258}
## def __init__(self, m):
## """
## Initialise StateTableMachine from Machine |m|.
## """
## initial_states = self.initial_states = {}
## states = self.states = [None]
## old_to_new = {}
## i = 1
## for old_state in m.states:
## new_state = ([0] * 259, old_state.get_action())
## states.append(new_state)
## old_to_new[old_state] = i # new_state
## i = i + 1
## for name, old_state in m.initial_states.items():
## initial_states[name] = old_to_new[old_state]
## for old_state in m.states:
## new_state_index = old_to_new[old_state]
## new_table = states[new_state_index][0]
## transitions = old_state.transitions
## for c, old_targets in transitions.items():
## if old_targets:
## old_target = old_targets[0]
## new_target_index = old_to_new[old_target]
## if len(c) == 1:
## a = ord(c)
## else:
## a = self.special_map[c]
## new_table[a] = states[new_target_index]
## def dump(self, f):
## f.write("Plex.StateTableMachine:\n")
## f.write(" Initial states:\n")
## for name, index in self.initial_states.items():
## f.write(" %s: State %d\n" % (
## repr(name), id(self.states[index])))
## for i in xrange(1, len(self.states)):
## table, action = self.states[i]
## f.write(" State %d:" % i)
## if action:
## f.write("%s" % action)
## f.write("\n")
## f.write(" %s\n" % map(id,table))