Update ply

This commit is contained in:
sagudev 2023-02-19 13:38:01 +01:00
parent 4d393612b4
commit 2acdeabaf5
60 changed files with 9624 additions and 297 deletions

View file

@ -0,0 +1,79 @@
Inspired by a September 14, 2006 Salon article "Why Johnny Can't Code" by
David Brin (http://www.salon.com/tech/feature/2006/09/14/basic/index.html),
I thought that a fully working BASIC interpreter might be an interesting,
if not questionable, PLY example. Uh, okay, so maybe it's just a bad idea,
but in any case, here it is.
In this example, you'll find a rough implementation of 1964 Dartmouth BASIC
as described in the manual at:
http://www.bitsavers.org/pdf/dartmouth/BASIC_Oct64.pdf
See also:
http://en.wikipedia.org/wiki/Dartmouth_BASIC
This dialect is downright primitive---there are no string variables
and no facilities for interactive input. Moreover, subroutines and functions
are brain-dead even more than they usually are for BASIC. Of course,
the GOTO statement is provided.
Nevertheless, there are a few interesting aspects of this example:
- It illustrates a fully working interpreter including lexing, parsing,
and interpretation of instructions.
- The parser shows how to catch and report various kinds of parsing
errors in a more graceful way.
- The example both parses files (supplied on command line) and
interactive input entered line by line.
- It shows how you might represent parsed information. In this case,
each BASIC statement is encoded into a Python tuple containing the
statement type and parameters. These tuples are then stored in
a dictionary indexed by program line numbers.
- Even though it's just BASIC, the parser contains more than 80
rules and 150 parsing states. Thus, it's a little more meaty than
the calculator example.
To use the example, run it as follows:
% python basic.py hello.bas
HELLO WORLD
%
or use it interactively:
% python basic.py
[BASIC] 10 PRINT "HELLO WORLD"
[BASIC] 20 END
[BASIC] RUN
HELLO WORLD
[BASIC]
The following files are defined:
basic.py - High level script that controls everything
basiclex.py - BASIC tokenizer
basparse.py - BASIC parser
basinterp.py - BASIC interpreter that runs parsed programs.
In addition, a number of sample BASIC programs (.bas suffix) are
provided. These were taken out of the Dartmouth manual.
Disclaimer: I haven't spent a ton of time testing this and it's likely that
I've skimped here and there on a few finer details (e.g., strictly enforcing
variable naming rules). However, the interpreter seems to be able to run
the examples in the BASIC manual.
Have fun!
-Dave

View file

@ -0,0 +1,65 @@
# An implementation of Dartmouth BASIC (1964)
#
import sys
sys.path.insert(0, "../..")
if sys.version_info[0] >= 3:
raw_input = input
import basiclex
import basparse
import basinterp
# If a filename has been specified, we try to run it.
# If a runtime error occurs, we bail out and enter
# interactive mode below
if len(sys.argv) == 2:
data = open(sys.argv[1]).read()
prog = basparse.parse(data)
if not prog:
raise SystemExit
b = basinterp.BasicInterpreter(prog)
try:
b.run()
raise SystemExit
except RuntimeError:
pass
else:
b = basinterp.BasicInterpreter({})
# Interactive mode. This incrementally adds/deletes statements
# from the program stored in the BasicInterpreter object. In
# addition, special commands 'NEW','LIST',and 'RUN' are added.
# Specifying a line number with no code deletes that line from
# the program.
while 1:
try:
line = raw_input("[BASIC] ")
except EOFError:
raise SystemExit
if not line:
continue
line += "\n"
prog = basparse.parse(line)
if not prog:
continue
keys = list(prog)
if keys[0] > 0:
b.add_statements(prog)
else:
stat = prog[keys[0]]
if stat[0] == 'RUN':
try:
b.run()
except RuntimeError:
pass
elif stat[0] == 'LIST':
b.list()
elif stat[0] == 'BLANK':
b.del_line(stat[1])
elif stat[0] == 'NEW':
b.new()

View file

@ -0,0 +1,61 @@
# An implementation of Dartmouth BASIC (1964)
from ply import *
keywords = (
'LET', 'READ', 'DATA', 'PRINT', 'GOTO', 'IF', 'THEN', 'FOR', 'NEXT', 'TO', 'STEP',
'END', 'STOP', 'DEF', 'GOSUB', 'DIM', 'REM', 'RETURN', 'RUN', 'LIST', 'NEW',
)
tokens = keywords + (
'EQUALS', 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'POWER',
'LPAREN', 'RPAREN', 'LT', 'LE', 'GT', 'GE', 'NE',
'COMMA', 'SEMI', 'INTEGER', 'FLOAT', 'STRING',
'ID', 'NEWLINE'
)
t_ignore = ' \t'
def t_REM(t):
r'REM .*'
return t
def t_ID(t):
r'[A-Z][A-Z0-9]*'
if t.value in keywords:
t.type = t.value
return t
t_EQUALS = r'='
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_POWER = r'\^'
t_DIVIDE = r'/'
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_LT = r'<'
t_LE = r'<='
t_GT = r'>'
t_GE = r'>='
t_NE = r'<>'
t_COMMA = r'\,'
t_SEMI = r';'
t_INTEGER = r'\d+'
t_FLOAT = r'((\d*\.\d+)(E[\+-]?\d+)?|([1-9]\d*E[\+-]?\d+))'
t_STRING = r'\".*?\"'
def t_NEWLINE(t):
r'\n'
t.lexer.lineno += 1
return t
def t_error(t):
print("Illegal character %s" % t.value[0])
t.lexer.skip(1)
lex.lex(debug=0)

View file

@ -0,0 +1,73 @@
# An implementation of Dartmouth BASIC (1964)
#
import sys
sys.path.insert(0, "../..")
if sys.version_info[0] >= 3:
raw_input = input
import logging
logging.basicConfig(
level=logging.INFO,
filename="parselog.txt",
filemode="w"
)
log = logging.getLogger()
import basiclex
import basparse
import basinterp
# If a filename has been specified, we try to run it.
# If a runtime error occurs, we bail out and enter
# interactive mode below
if len(sys.argv) == 2:
data = open(sys.argv[1]).read()
prog = basparse.parse(data, debug=log)
if not prog:
raise SystemExit
b = basinterp.BasicInterpreter(prog)
try:
b.run()
raise SystemExit
except RuntimeError:
pass
else:
b = basinterp.BasicInterpreter({})
# Interactive mode. This incrementally adds/deletes statements
# from the program stored in the BasicInterpreter object. In
# addition, special commands 'NEW','LIST',and 'RUN' are added.
# Specifying a line number with no code deletes that line from
# the program.
while 1:
try:
line = raw_input("[BASIC] ")
except EOFError:
raise SystemExit
if not line:
continue
line += "\n"
prog = basparse.parse(line, debug=log)
if not prog:
continue
keys = list(prog)
if keys[0] > 0:
b.add_statements(prog)
else:
stat = prog[keys[0]]
if stat[0] == 'RUN':
try:
b.run()
except RuntimeError:
pass
elif stat[0] == 'LIST':
b.list()
elif stat[0] == 'BLANK':
b.del_line(stat[1])
elif stat[0] == 'NEW':
b.new()

View file

@ -0,0 +1,496 @@
# This file provides the runtime support for running a basic program
# Assumes the program has been parsed using basparse.py
import sys
import math
import random
class BasicInterpreter:
# Initialize the interpreter. prog is a dictionary
# containing (line,statement) mappings
def __init__(self, prog):
self.prog = prog
self.functions = { # Built-in function table
'SIN': lambda z: math.sin(self.eval(z)),
'COS': lambda z: math.cos(self.eval(z)),
'TAN': lambda z: math.tan(self.eval(z)),
'ATN': lambda z: math.atan(self.eval(z)),
'EXP': lambda z: math.exp(self.eval(z)),
'ABS': lambda z: abs(self.eval(z)),
'LOG': lambda z: math.log(self.eval(z)),
'SQR': lambda z: math.sqrt(self.eval(z)),
'INT': lambda z: int(self.eval(z)),
'RND': lambda z: random.random()
}
# Collect all data statements
def collect_data(self):
self.data = []
for lineno in self.stat:
if self.prog[lineno][0] == 'DATA':
self.data = self.data + self.prog[lineno][1]
self.dc = 0 # Initialize the data counter
# Check for end statements
def check_end(self):
has_end = 0
for lineno in self.stat:
if self.prog[lineno][0] == 'END' and not has_end:
has_end = lineno
if not has_end:
print("NO END INSTRUCTION")
self.error = 1
return
if has_end != lineno:
print("END IS NOT LAST")
self.error = 1
# Check loops
def check_loops(self):
for pc in range(len(self.stat)):
lineno = self.stat[pc]
if self.prog[lineno][0] == 'FOR':
forinst = self.prog[lineno]
loopvar = forinst[1]
for i in range(pc + 1, len(self.stat)):
if self.prog[self.stat[i]][0] == 'NEXT':
nextvar = self.prog[self.stat[i]][1]
if nextvar != loopvar:
continue
self.loopend[pc] = i
break
else:
print("FOR WITHOUT NEXT AT LINE %s" % self.stat[pc])
self.error = 1
# Evaluate an expression
def eval(self, expr):
etype = expr[0]
if etype == 'NUM':
return expr[1]
elif etype == 'GROUP':
return self.eval(expr[1])
elif etype == 'UNARY':
if expr[1] == '-':
return -self.eval(expr[2])
elif etype == 'BINOP':
if expr[1] == '+':
return self.eval(expr[2]) + self.eval(expr[3])
elif expr[1] == '-':
return self.eval(expr[2]) - self.eval(expr[3])
elif expr[1] == '*':
return self.eval(expr[2]) * self.eval(expr[3])
elif expr[1] == '/':
return float(self.eval(expr[2])) / self.eval(expr[3])
elif expr[1] == '^':
return abs(self.eval(expr[2]))**self.eval(expr[3])
elif etype == 'VAR':
var, dim1, dim2 = expr[1]
if not dim1 and not dim2:
if var in self.vars:
return self.vars[var]
else:
print("UNDEFINED VARIABLE %s AT LINE %s" %
(var, self.stat[self.pc]))
raise RuntimeError
# May be a list lookup or a function evaluation
if dim1 and not dim2:
if var in self.functions:
# A function
return self.functions[var](dim1)
else:
# A list evaluation
if var in self.lists:
dim1val = self.eval(dim1)
if dim1val < 1 or dim1val > len(self.lists[var]):
print("LIST INDEX OUT OF BOUNDS AT LINE %s" %
self.stat[self.pc])
raise RuntimeError
return self.lists[var][dim1val - 1]
if dim1 and dim2:
if var in self.tables:
dim1val = self.eval(dim1)
dim2val = self.eval(dim2)
if dim1val < 1 or dim1val > len(self.tables[var]) or dim2val < 1 or dim2val > len(self.tables[var][0]):
print("TABLE INDEX OUT OUT BOUNDS AT LINE %s" %
self.stat[self.pc])
raise RuntimeError
return self.tables[var][dim1val - 1][dim2val - 1]
print("UNDEFINED VARIABLE %s AT LINE %s" %
(var, self.stat[self.pc]))
raise RuntimeError
# Evaluate a relational expression
def releval(self, expr):
etype = expr[1]
lhs = self.eval(expr[2])
rhs = self.eval(expr[3])
if etype == '<':
if lhs < rhs:
return 1
else:
return 0
elif etype == '<=':
if lhs <= rhs:
return 1
else:
return 0
elif etype == '>':
if lhs > rhs:
return 1
else:
return 0
elif etype == '>=':
if lhs >= rhs:
return 1
else:
return 0
elif etype == '=':
if lhs == rhs:
return 1
else:
return 0
elif etype == '<>':
if lhs != rhs:
return 1
else:
return 0
# Assignment
def assign(self, target, value):
var, dim1, dim2 = target
if not dim1 and not dim2:
self.vars[var] = self.eval(value)
elif dim1 and not dim2:
# List assignment
dim1val = self.eval(dim1)
if not var in self.lists:
self.lists[var] = [0] * 10
if dim1val > len(self.lists[var]):
print ("DIMENSION TOO LARGE AT LINE %s" % self.stat[self.pc])
raise RuntimeError
self.lists[var][dim1val - 1] = self.eval(value)
elif dim1 and dim2:
dim1val = self.eval(dim1)
dim2val = self.eval(dim2)
if not var in self.tables:
temp = [0] * 10
v = []
for i in range(10):
v.append(temp[:])
self.tables[var] = v
# Variable already exists
if dim1val > len(self.tables[var]) or dim2val > len(self.tables[var][0]):
print("DIMENSION TOO LARGE AT LINE %s" % self.stat[self.pc])
raise RuntimeError
self.tables[var][dim1val - 1][dim2val - 1] = self.eval(value)
# Change the current line number
def goto(self, linenum):
if not linenum in self.prog:
print("UNDEFINED LINE NUMBER %d AT LINE %d" %
(linenum, self.stat[self.pc]))
raise RuntimeError
self.pc = self.stat.index(linenum)
# Run it
def run(self):
self.vars = {} # All variables
self.lists = {} # List variables
self.tables = {} # Tables
self.loops = [] # Currently active loops
self.loopend = {} # Mapping saying where loops end
self.gosub = None # Gosub return point (if any)
self.error = 0 # Indicates program error
self.stat = list(self.prog) # Ordered list of all line numbers
self.stat.sort()
self.pc = 0 # Current program counter
# Processing prior to running
self.collect_data() # Collect all of the data statements
self.check_end()
self.check_loops()
if self.error:
raise RuntimeError
while 1:
line = self.stat[self.pc]
instr = self.prog[line]
op = instr[0]
# END and STOP statements
if op == 'END' or op == 'STOP':
break # We're done
# GOTO statement
elif op == 'GOTO':
newline = instr[1]
self.goto(newline)
continue
# PRINT statement
elif op == 'PRINT':
plist = instr[1]
out = ""
for label, val in plist:
if out:
out += ' ' * (15 - (len(out) % 15))
out += label
if val:
if label:
out += " "
eval = self.eval(val)
out += str(eval)
sys.stdout.write(out)
end = instr[2]
if not (end == ',' or end == ';'):
sys.stdout.write("\n")
if end == ',':
sys.stdout.write(" " * (15 - (len(out) % 15)))
if end == ';':
sys.stdout.write(" " * (3 - (len(out) % 3)))
# LET statement
elif op == 'LET':
target = instr[1]
value = instr[2]
self.assign(target, value)
# READ statement
elif op == 'READ':
for target in instr[1]:
if self.dc < len(self.data):
value = ('NUM', self.data[self.dc])
self.assign(target, value)
self.dc += 1
else:
# No more data. Program ends
return
elif op == 'IF':
relop = instr[1]
newline = instr[2]
if (self.releval(relop)):
self.goto(newline)
continue
elif op == 'FOR':
loopvar = instr[1]
initval = instr[2]
finval = instr[3]
stepval = instr[4]
# Check to see if this is a new loop
if not self.loops or self.loops[-1][0] != self.pc:
# Looks like a new loop. Make the initial assignment
newvalue = initval
self.assign((loopvar, None, None), initval)
if not stepval:
stepval = ('NUM', 1)
stepval = self.eval(stepval) # Evaluate step here
self.loops.append((self.pc, stepval))
else:
# It's a repeat of the previous loop
# Update the value of the loop variable according to the
# step
stepval = ('NUM', self.loops[-1][1])
newvalue = (
'BINOP', '+', ('VAR', (loopvar, None, None)), stepval)
if self.loops[-1][1] < 0:
relop = '>='
else:
relop = '<='
if not self.releval(('RELOP', relop, newvalue, finval)):
# Loop is done. Jump to the NEXT
self.pc = self.loopend[self.pc]
self.loops.pop()
else:
self.assign((loopvar, None, None), newvalue)
elif op == 'NEXT':
if not self.loops:
print("NEXT WITHOUT FOR AT LINE %s" % line)
return
nextvar = instr[1]
self.pc = self.loops[-1][0]
loopinst = self.prog[self.stat[self.pc]]
forvar = loopinst[1]
if nextvar != forvar:
print("NEXT DOESN'T MATCH FOR AT LINE %s" % line)
return
continue
elif op == 'GOSUB':
newline = instr[1]
if self.gosub:
print("ALREADY IN A SUBROUTINE AT LINE %s" % line)
return
self.gosub = self.stat[self.pc]
self.goto(newline)
continue
elif op == 'RETURN':
if not self.gosub:
print("RETURN WITHOUT A GOSUB AT LINE %s" % line)
return
self.goto(self.gosub)
self.gosub = None
elif op == 'FUNC':
fname = instr[1]
pname = instr[2]
expr = instr[3]
def eval_func(pvalue, name=pname, self=self, expr=expr):
self.assign((pname, None, None), pvalue)
return self.eval(expr)
self.functions[fname] = eval_func
elif op == 'DIM':
for vname, x, y in instr[1]:
if y == 0:
# Single dimension variable
self.lists[vname] = [0] * x
else:
# Double dimension variable
temp = [0] * y
v = []
for i in range(x):
v.append(temp[:])
self.tables[vname] = v
self.pc += 1
# Utility functions for program listing
def expr_str(self, expr):
etype = expr[0]
if etype == 'NUM':
return str(expr[1])
elif etype == 'GROUP':
return "(%s)" % self.expr_str(expr[1])
elif etype == 'UNARY':
if expr[1] == '-':
return "-" + str(expr[2])
elif etype == 'BINOP':
return "%s %s %s" % (self.expr_str(expr[2]), expr[1], self.expr_str(expr[3]))
elif etype == 'VAR':
return self.var_str(expr[1])
def relexpr_str(self, expr):
return "%s %s %s" % (self.expr_str(expr[2]), expr[1], self.expr_str(expr[3]))
def var_str(self, var):
varname, dim1, dim2 = var
if not dim1 and not dim2:
return varname
if dim1 and not dim2:
return "%s(%s)" % (varname, self.expr_str(dim1))
return "%s(%s,%s)" % (varname, self.expr_str(dim1), self.expr_str(dim2))
# Create a program listing
def list(self):
stat = list(self.prog) # Ordered list of all line numbers
stat.sort()
for line in stat:
instr = self.prog[line]
op = instr[0]
if op in ['END', 'STOP', 'RETURN']:
print("%s %s" % (line, op))
continue
elif op == 'REM':
print("%s %s" % (line, instr[1]))
elif op == 'PRINT':
_out = "%s %s " % (line, op)
first = 1
for p in instr[1]:
if not first:
_out += ", "
if p[0] and p[1]:
_out += '"%s"%s' % (p[0], self.expr_str(p[1]))
elif p[1]:
_out += self.expr_str(p[1])
else:
_out += '"%s"' % (p[0],)
first = 0
if instr[2]:
_out += instr[2]
print(_out)
elif op == 'LET':
print("%s LET %s = %s" %
(line, self.var_str(instr[1]), self.expr_str(instr[2])))
elif op == 'READ':
_out = "%s READ " % line
first = 1
for r in instr[1]:
if not first:
_out += ","
_out += self.var_str(r)
first = 0
print(_out)
elif op == 'IF':
print("%s IF %s THEN %d" %
(line, self.relexpr_str(instr[1]), instr[2]))
elif op == 'GOTO' or op == 'GOSUB':
print("%s %s %s" % (line, op, instr[1]))
elif op == 'FOR':
_out = "%s FOR %s = %s TO %s" % (
line, instr[1], self.expr_str(instr[2]), self.expr_str(instr[3]))
if instr[4]:
_out += " STEP %s" % (self.expr_str(instr[4]))
print(_out)
elif op == 'NEXT':
print("%s NEXT %s" % (line, instr[1]))
elif op == 'FUNC':
print("%s DEF %s(%s) = %s" %
(line, instr[1], instr[2], self.expr_str(instr[3])))
elif op == 'DIM':
_out = "%s DIM " % line
first = 1
for vname, x, y in instr[1]:
if not first:
_out += ","
first = 0
if y == 0:
_out += "%s(%d)" % (vname, x)
else:
_out += "%s(%d,%d)" % (vname, x, y)
print(_out)
elif op == 'DATA':
_out = "%s DATA " % line
first = 1
for v in instr[1]:
if not first:
_out += ","
first = 0
_out += v
print(_out)
# Erase the current program
def new(self):
self.prog = {}
# Insert statements
def add_statements(self, prog):
for line, stat in prog.items():
self.prog[line] = stat
# Delete a statement
def del_line(self, lineno):
try:
del self.prog[lineno]
except KeyError:
pass

View file

@ -0,0 +1,474 @@
# An implementation of Dartmouth BASIC (1964)
#
from ply import *
import basiclex
tokens = basiclex.tokens
precedence = (
('left', 'PLUS', 'MINUS'),
('left', 'TIMES', 'DIVIDE'),
('left', 'POWER'),
('right', 'UMINUS')
)
# A BASIC program is a series of statements. We represent the program as a
# dictionary of tuples indexed by line number.
def p_program(p):
'''program : program statement
| statement'''
if len(p) == 2 and p[1]:
p[0] = {}
line, stat = p[1]
p[0][line] = stat
elif len(p) == 3:
p[0] = p[1]
if not p[0]:
p[0] = {}
if p[2]:
line, stat = p[2]
p[0][line] = stat
# This catch-all rule is used for any catastrophic errors. In this case,
# we simply return nothing
def p_program_error(p):
'''program : error'''
p[0] = None
p.parser.error = 1
# Format of all BASIC statements.
def p_statement(p):
'''statement : INTEGER command NEWLINE'''
if isinstance(p[2], str):
print("%s %s %s" % (p[2], "AT LINE", p[1]))
p[0] = None
p.parser.error = 1
else:
lineno = int(p[1])
p[0] = (lineno, p[2])
# Interactive statements.
def p_statement_interactive(p):
'''statement : RUN NEWLINE
| LIST NEWLINE
| NEW NEWLINE'''
p[0] = (0, (p[1], 0))
# Blank line number
def p_statement_blank(p):
'''statement : INTEGER NEWLINE'''
p[0] = (0, ('BLANK', int(p[1])))
# Error handling for malformed statements
def p_statement_bad(p):
'''statement : INTEGER error NEWLINE'''
print("MALFORMED STATEMENT AT LINE %s" % p[1])
p[0] = None
p.parser.error = 1
# Blank line
def p_statement_newline(p):
'''statement : NEWLINE'''
p[0] = None
# LET statement
def p_command_let(p):
'''command : LET variable EQUALS expr'''
p[0] = ('LET', p[2], p[4])
def p_command_let_bad(p):
'''command : LET variable EQUALS error'''
p[0] = "BAD EXPRESSION IN LET"
# READ statement
def p_command_read(p):
'''command : READ varlist'''
p[0] = ('READ', p[2])
def p_command_read_bad(p):
'''command : READ error'''
p[0] = "MALFORMED VARIABLE LIST IN READ"
# DATA statement
def p_command_data(p):
'''command : DATA numlist'''
p[0] = ('DATA', p[2])
def p_command_data_bad(p):
'''command : DATA error'''
p[0] = "MALFORMED NUMBER LIST IN DATA"
# PRINT statement
def p_command_print(p):
'''command : PRINT plist optend'''
p[0] = ('PRINT', p[2], p[3])
def p_command_print_bad(p):
'''command : PRINT error'''
p[0] = "MALFORMED PRINT STATEMENT"
# Optional ending on PRINT. Either a comma (,) or semicolon (;)
def p_optend(p):
'''optend : COMMA
| SEMI
|'''
if len(p) == 2:
p[0] = p[1]
else:
p[0] = None
# PRINT statement with no arguments
def p_command_print_empty(p):
'''command : PRINT'''
p[0] = ('PRINT', [], None)
# GOTO statement
def p_command_goto(p):
'''command : GOTO INTEGER'''
p[0] = ('GOTO', int(p[2]))
def p_command_goto_bad(p):
'''command : GOTO error'''
p[0] = "INVALID LINE NUMBER IN GOTO"
# IF-THEN statement
def p_command_if(p):
'''command : IF relexpr THEN INTEGER'''
p[0] = ('IF', p[2], int(p[4]))
def p_command_if_bad(p):
'''command : IF error THEN INTEGER'''
p[0] = "BAD RELATIONAL EXPRESSION"
def p_command_if_bad2(p):
'''command : IF relexpr THEN error'''
p[0] = "INVALID LINE NUMBER IN THEN"
# FOR statement
def p_command_for(p):
'''command : FOR ID EQUALS expr TO expr optstep'''
p[0] = ('FOR', p[2], p[4], p[6], p[7])
def p_command_for_bad_initial(p):
'''command : FOR ID EQUALS error TO expr optstep'''
p[0] = "BAD INITIAL VALUE IN FOR STATEMENT"
def p_command_for_bad_final(p):
'''command : FOR ID EQUALS expr TO error optstep'''
p[0] = "BAD FINAL VALUE IN FOR STATEMENT"
def p_command_for_bad_step(p):
'''command : FOR ID EQUALS expr TO expr STEP error'''
p[0] = "MALFORMED STEP IN FOR STATEMENT"
# Optional STEP qualifier on FOR statement
def p_optstep(p):
'''optstep : STEP expr
| empty'''
if len(p) == 3:
p[0] = p[2]
else:
p[0] = None
# NEXT statement
def p_command_next(p):
'''command : NEXT ID'''
p[0] = ('NEXT', p[2])
def p_command_next_bad(p):
'''command : NEXT error'''
p[0] = "MALFORMED NEXT"
# END statement
def p_command_end(p):
'''command : END'''
p[0] = ('END',)
# REM statement
def p_command_rem(p):
'''command : REM'''
p[0] = ('REM', p[1])
# STOP statement
def p_command_stop(p):
'''command : STOP'''
p[0] = ('STOP',)
# DEF statement
def p_command_def(p):
'''command : DEF ID LPAREN ID RPAREN EQUALS expr'''
p[0] = ('FUNC', p[2], p[4], p[7])
def p_command_def_bad_rhs(p):
'''command : DEF ID LPAREN ID RPAREN EQUALS error'''
p[0] = "BAD EXPRESSION IN DEF STATEMENT"
def p_command_def_bad_arg(p):
'''command : DEF ID LPAREN error RPAREN EQUALS expr'''
p[0] = "BAD ARGUMENT IN DEF STATEMENT"
# GOSUB statement
def p_command_gosub(p):
'''command : GOSUB INTEGER'''
p[0] = ('GOSUB', int(p[2]))
def p_command_gosub_bad(p):
'''command : GOSUB error'''
p[0] = "INVALID LINE NUMBER IN GOSUB"
# RETURN statement
def p_command_return(p):
'''command : RETURN'''
p[0] = ('RETURN',)
# DIM statement
def p_command_dim(p):
'''command : DIM dimlist'''
p[0] = ('DIM', p[2])
def p_command_dim_bad(p):
'''command : DIM error'''
p[0] = "MALFORMED VARIABLE LIST IN DIM"
# List of variables supplied to DIM statement
def p_dimlist(p):
'''dimlist : dimlist COMMA dimitem
| dimitem'''
if len(p) == 4:
p[0] = p[1]
p[0].append(p[3])
else:
p[0] = [p[1]]
# DIM items
def p_dimitem_single(p):
'''dimitem : ID LPAREN INTEGER RPAREN'''
p[0] = (p[1], eval(p[3]), 0)
def p_dimitem_double(p):
'''dimitem : ID LPAREN INTEGER COMMA INTEGER RPAREN'''
p[0] = (p[1], eval(p[3]), eval(p[5]))
# Arithmetic expressions
def p_expr_binary(p):
'''expr : expr PLUS expr
| expr MINUS expr
| expr TIMES expr
| expr DIVIDE expr
| expr POWER expr'''
p[0] = ('BINOP', p[2], p[1], p[3])
def p_expr_number(p):
'''expr : INTEGER
| FLOAT'''
p[0] = ('NUM', eval(p[1]))
def p_expr_variable(p):
'''expr : variable'''
p[0] = ('VAR', p[1])
def p_expr_group(p):
'''expr : LPAREN expr RPAREN'''
p[0] = ('GROUP', p[2])
def p_expr_unary(p):
'''expr : MINUS expr %prec UMINUS'''
p[0] = ('UNARY', '-', p[2])
# Relational expressions
def p_relexpr(p):
'''relexpr : expr LT expr
| expr LE expr
| expr GT expr
| expr GE expr
| expr EQUALS expr
| expr NE expr'''
p[0] = ('RELOP', p[2], p[1], p[3])
# Variables
def p_variable(p):
'''variable : ID
| ID LPAREN expr RPAREN
| ID LPAREN expr COMMA expr RPAREN'''
if len(p) == 2:
p[0] = (p[1], None, None)
elif len(p) == 5:
p[0] = (p[1], p[3], None)
else:
p[0] = (p[1], p[3], p[5])
# Builds a list of variable targets as a Python list
def p_varlist(p):
'''varlist : varlist COMMA variable
| variable'''
if len(p) > 2:
p[0] = p[1]
p[0].append(p[3])
else:
p[0] = [p[1]]
# Builds a list of numbers as a Python list
def p_numlist(p):
'''numlist : numlist COMMA number
| number'''
if len(p) > 2:
p[0] = p[1]
p[0].append(p[3])
else:
p[0] = [p[1]]
# A number. May be an integer or a float
def p_number(p):
'''number : INTEGER
| FLOAT'''
p[0] = eval(p[1])
# A signed number.
def p_number_signed(p):
'''number : MINUS INTEGER
| MINUS FLOAT'''
p[0] = eval("-" + p[2])
# List of targets for a print statement
# Returns a list of tuples (label,expr)
def p_plist(p):
'''plist : plist COMMA pitem
| pitem'''
if len(p) > 3:
p[0] = p[1]
p[0].append(p[3])
else:
p[0] = [p[1]]
def p_item_string(p):
'''pitem : STRING'''
p[0] = (p[1][1:-1], None)
def p_item_string_expr(p):
'''pitem : STRING expr'''
p[0] = (p[1][1:-1], p[2])
def p_item_expr(p):
'''pitem : expr'''
p[0] = ("", p[1])
# Empty
def p_empty(p):
'''empty : '''
# Catastrophic error handler
def p_error(p):
if not p:
print("SYNTAX ERROR AT EOF")
bparser = yacc.yacc()
def parse(data, debug=0):
bparser.error = 0
p = bparser.parse(data, debug=debug)
if bparser.error:
return None
return p

View file

@ -0,0 +1,14 @@
5 DIM A(50,15)
10 FOR I = 1 TO 50
20 FOR J = 1 TO 15
30 LET A(I,J) = I + J
35 REM PRINT I,J, A(I,J)
40 NEXT J
50 NEXT I
100 FOR I = 1 TO 50
110 FOR J = 1 TO 15
120 PRINT A(I,J),
130 NEXT J
140 PRINT
150 NEXT I
999 END

View file

@ -0,0 +1,5 @@
10 DEF FDX(X) = 2*X
20 FOR I = 0 TO 100
30 PRINT FDX(I)
40 NEXT I
50 END

View file

@ -0,0 +1,22 @@
10 PRINT "A","B","C","GCD"
20 READ A,B,C
30 LET X = A
40 LET Y = B
50 GOSUB 200
60 LET X = G
70 LET Y = C
80 GOSUB 200
90 PRINT A, B, C, G
100 GOTO 20
110 DATA 60, 90, 120
120 DATA 38456, 64872, 98765
130 DATA 32, 384, 72
200 LET Q = INT(X/Y)
210 LET R = X - Q*Y
220 IF R = 0 THEN 300
230 LET X = Y
240 LET Y = R
250 GOTO 200
300 LET G = Y
310 RETURN
999 END

View file

@ -0,0 +1,13 @@
100 LET X = 3
110 GOSUB 400
120 PRINT U, V, W
200 LET X = 5
210 GOSUB 400
220 LET Z = U + 2*V + 3*W
230 PRINT Z
240 GOTO 999
400 LET U = X*X
410 LET V = X*X*X
420 LET W = X*X*X*X + X*X*X + X*X + X
430 RETURN
999 END

View file

@ -0,0 +1,4 @@
5 REM HELLO WORLD PROGAM
10 PRINT "HELLO WORLD"
99 END

View file

@ -0,0 +1,17 @@
1 REM ::: SOLVE A SYSTEM OF LINEAR EQUATIONS
2 REM ::: A1*X1 + A2*X2 = B1
3 REM ::: A3*X1 + A4*X2 = B2
4 REM --------------------------------------
10 READ A1, A2, A3, A4
15 LET D = A1 * A4 - A3 * A2
20 IF D = 0 THEN 65
30 READ B1, B2
37 LET X1 = (B1*A4 - B2*A2) / D
42 LET X2 = (A1*B2 - A3*B1) / D
55 PRINT X1, X2
60 GOTO 30
65 PRINT "NO UNIQUE SOLUTION"
70 DATA 1, 2, 4
80 DATA 2, -7, 5
85 DATA 1, 3, 4, -7
90 END

View file

@ -0,0 +1,12 @@
5 PRINT "X VALUE", "SINE", "RESOLUTION"
10 READ D
20 LET M = -1
30 FOR X = 0 TO 3 STEP D
40 IF SIN(X) <= M THEN 80
50 LET X0 = X
60 LET M = SIN(X)
80 NEXT X
85 PRINT X0, M, D
90 GOTO 10
100 DATA .1, .01, .001
110 END

View file

@ -0,0 +1,13 @@
5 PRINT "THIS PROGRAM COMPUTES AND PRINTS THE NTH POWERS"
6 PRINT "OF THE NUMBERS LESS THAN OR EQUAL TO N FOR VARIOUS"
7 PRINT "N FROM 1 THROUGH 7"
8 PRINT
10 FOR N = 1 TO 7
15 PRINT "N = "N
20 FOR I = 1 TO N
30 PRINT I^N,
40 NEXT I
50 PRINT
60 PRINT
70 NEXT N
80 END

View file

@ -0,0 +1,4 @@
10 FOR I = 1 TO 20
20 PRINT INT(10*RND(0))
30 NEXT I
40 END

View file

@ -0,0 +1,20 @@
10 FOR I = 1 TO 3
20 READ P(I)
30 NEXT I
40 FOR I = 1 TO 3
50 FOR J = 1 TO 5
60 READ S(I,J)
70 NEXT J
80 NEXT I
90 FOR J = 1 TO 5
100 LET S = 0
110 FOR I = 1 TO 3
120 LET S = S + P(I) * S(I,J)
130 NEXT I
140 PRINT "TOTAL SALES FOR SALESMAN"J, "$"S
150 NEXT J
200 DATA 1.25, 4.30, 2.50
210 DATA 40, 20, 37, 29, 42
220 DATA 10, 16, 3, 21, 8
230 DATA 35, 47, 29, 16, 33
300 END

View file

@ -0,0 +1,18 @@
1 REM :: THIS PROGRAM COMPUTES HOW MANY TIMES YOU HAVE TO FOLD
2 REM :: A PIECE OF PAPER SO THAT IT IS TALLER THAN THE
3 REM :: SEARS TOWER.
4 REM :: S = HEIGHT OF TOWER (METERS)
5 REM :: T = THICKNESS OF PAPER (MILLIMETERS)
10 LET S = 442
20 LET T = 0.1
30 REM CONVERT T TO METERS
40 LET T = T * .001
50 LET F = 1
60 LET H = T
100 IF H > S THEN 200
120 LET H = 2 * H
125 LET F = F + 1
130 GOTO 100
200 PRINT "NUMBER OF FOLDS ="F
220 PRINT "FINAL HEIGHT ="H
999 END

View file

@ -0,0 +1,5 @@
10 LET X = 0
20 LET X = X + 1
30 PRINT X, SQR(X)
40 IF X < 100 THEN 20
50 END

View file

@ -0,0 +1,4 @@
10 FOR X = 1 TO 100
20 PRINT X, SQR(X)
30 NEXT X
40 END

View file

@ -0,0 +1,777 @@
# GardenSnake - a parser generator demonstration program
#
# This implements a modified version of a subset of Python:
# - only 'def', 'return' and 'if' statements
# - 'if' only has 'then' clause (no elif nor else)
# - single-quoted strings only, content in raw format
# - numbers are decimal.Decimal instances (not integers or floats)
# - no print statment; use the built-in 'print' function
# - only < > == + - / * implemented (and unary + -)
# - assignment and tuple assignment work
# - no generators of any sort
# - no ... well, no quite a lot
# Why? I'm thinking about a new indentation-based configuration
# language for a project and wanted to figure out how to do it. Once
# I got that working I needed a way to test it out. My original AST
# was dumb so I decided to target Python's AST and compile it into
# Python code. Plus, it's pretty cool that it only took a day or so
# from sitting down with Ply to having working code.
# This uses David Beazley's Ply from http://www.dabeaz.com/ply/
# This work is hereby released into the Public Domain. To view a copy of
# the public domain dedication, visit
# http://creativecommons.org/licenses/publicdomain/ or send a letter to
# Creative Commons, 543 Howard Street, 5th Floor, San Francisco,
# California, 94105, USA.
#
# Portions of this work are derived from Python's Grammar definition
# and may be covered under the Python copyright and license
#
# Andrew Dalke / Dalke Scientific Software, LLC
# 30 August 2006 / Cape Town, South Africa
# Changelog:
# 30 August - added link to CC license; removed the "swapcase" encoding
# Modifications for inclusion in PLY distribution
import sys
sys.path.insert(0, "../..")
from ply import *
##### Lexer ######
#import lex
import decimal
tokens = (
'DEF',
'IF',
'NAME',
'NUMBER', # Python decimals
'STRING', # single quoted strings only; syntax of raw strings
'LPAR',
'RPAR',
'COLON',
'EQ',
'ASSIGN',
'LT',
'GT',
'PLUS',
'MINUS',
'MULT',
'DIV',
'RETURN',
'WS',
'NEWLINE',
'COMMA',
'SEMICOLON',
'INDENT',
'DEDENT',
'ENDMARKER',
)
#t_NUMBER = r'\d+'
# taken from decmial.py but without the leading sign
def t_NUMBER(t):
r"""(\d+(\.\d*)?|\.\d+)([eE][-+]? \d+)?"""
t.value = decimal.Decimal(t.value)
return t
def t_STRING(t):
r"'([^\\']+|\\'|\\\\)*'" # I think this is right ...
t.value = t.value[1:-1].decode("string-escape") # .swapcase() # for fun
return t
t_COLON = r':'
t_EQ = r'=='
t_ASSIGN = r'='
t_LT = r'<'
t_GT = r'>'
t_PLUS = r'\+'
t_MINUS = r'-'
t_MULT = r'\*'
t_DIV = r'/'
t_COMMA = r','
t_SEMICOLON = r';'
# Ply nicely documented how to do this.
RESERVED = {
"def": "DEF",
"if": "IF",
"return": "RETURN",
}
def t_NAME(t):
r'[a-zA-Z_][a-zA-Z0-9_]*'
t.type = RESERVED.get(t.value, "NAME")
return t
# Putting this before t_WS let it consume lines with only comments in
# them so the latter code never sees the WS part. Not consuming the
# newline. Needed for "if 1: #comment"
def t_comment(t):
r"[ ]*\043[^\n]*" # \043 is '#'
pass
# Whitespace
def t_WS(t):
r' [ ]+ '
if t.lexer.at_line_start and t.lexer.paren_count == 0:
return t
# Don't generate newline tokens when inside of parenthesis, eg
# a = (1,
# 2, 3)
def t_newline(t):
r'\n+'
t.lexer.lineno += len(t.value)
t.type = "NEWLINE"
if t.lexer.paren_count == 0:
return t
def t_LPAR(t):
r'\('
t.lexer.paren_count += 1
return t
def t_RPAR(t):
r'\)'
# check for underflow? should be the job of the parser
t.lexer.paren_count -= 1
return t
def t_error(t):
raise SyntaxError("Unknown symbol %r" % (t.value[0],))
print "Skipping", repr(t.value[0])
t.lexer.skip(1)
# I implemented INDENT / DEDENT generation as a post-processing filter
# The original lex token stream contains WS and NEWLINE characters.
# WS will only occur before any other tokens on a line.
# I have three filters. One tags tokens by adding two attributes.
# "must_indent" is True if the token must be indented from the
# previous code. The other is "at_line_start" which is True for WS
# and the first non-WS/non-NEWLINE on a line. It flags the check so
# see if the new line has changed indication level.
# Python's syntax has three INDENT states
# 0) no colon hence no need to indent
# 1) "if 1: go()" - simple statements have a COLON but no need for an indent
# 2) "if 1:\n go()" - complex statements have a COLON NEWLINE and must indent
NO_INDENT = 0
MAY_INDENT = 1
MUST_INDENT = 2
# only care about whitespace at the start of a line
def track_tokens_filter(lexer, tokens):
lexer.at_line_start = at_line_start = True
indent = NO_INDENT
saw_colon = False
for token in tokens:
token.at_line_start = at_line_start
if token.type == "COLON":
at_line_start = False
indent = MAY_INDENT
token.must_indent = False
elif token.type == "NEWLINE":
at_line_start = True
if indent == MAY_INDENT:
indent = MUST_INDENT
token.must_indent = False
elif token.type == "WS":
assert token.at_line_start == True
at_line_start = True
token.must_indent = False
else:
# A real token; only indent after COLON NEWLINE
if indent == MUST_INDENT:
token.must_indent = True
else:
token.must_indent = False
at_line_start = False
indent = NO_INDENT
yield token
lexer.at_line_start = at_line_start
def _new_token(type, lineno):
tok = lex.LexToken()
tok.type = type
tok.value = None
tok.lineno = lineno
return tok
# Synthesize a DEDENT tag
def DEDENT(lineno):
return _new_token("DEDENT", lineno)
# Synthesize an INDENT tag
def INDENT(lineno):
return _new_token("INDENT", lineno)
# Track the indentation level and emit the right INDENT / DEDENT events.
def indentation_filter(tokens):
# A stack of indentation levels; will never pop item 0
levels = [0]
token = None
depth = 0
prev_was_ws = False
for token in tokens:
# if 1:
# print "Process", token,
# if token.at_line_start:
# print "at_line_start",
# if token.must_indent:
# print "must_indent",
# print
# WS only occurs at the start of the line
# There may be WS followed by NEWLINE so
# only track the depth here. Don't indent/dedent
# until there's something real.
if token.type == "WS":
assert depth == 0
depth = len(token.value)
prev_was_ws = True
# WS tokens are never passed to the parser
continue
if token.type == "NEWLINE":
depth = 0
if prev_was_ws or token.at_line_start:
# ignore blank lines
continue
# pass the other cases on through
yield token
continue
# then it must be a real token (not WS, not NEWLINE)
# which can affect the indentation level
prev_was_ws = False
if token.must_indent:
# The current depth must be larger than the previous level
if not (depth > levels[-1]):
raise IndentationError("expected an indented block")
levels.append(depth)
yield INDENT(token.lineno)
elif token.at_line_start:
# Must be on the same level or one of the previous levels
if depth == levels[-1]:
# At the same level
pass
elif depth > levels[-1]:
raise IndentationError(
"indentation increase but not in new block")
else:
# Back up; but only if it matches a previous level
try:
i = levels.index(depth)
except ValueError:
raise IndentationError("inconsistent indentation")
for _ in range(i + 1, len(levels)):
yield DEDENT(token.lineno)
levels.pop()
yield token
### Finished processing ###
# Must dedent any remaining levels
if len(levels) > 1:
assert token is not None
for _ in range(1, len(levels)):
yield DEDENT(token.lineno)
# The top-level filter adds an ENDMARKER, if requested.
# Python's grammar uses it.
def filter(lexer, add_endmarker=True):
token = None
tokens = iter(lexer.token, None)
tokens = track_tokens_filter(lexer, tokens)
for token in indentation_filter(tokens):
yield token
if add_endmarker:
lineno = 1
if token is not None:
lineno = token.lineno
yield _new_token("ENDMARKER", lineno)
# Combine Ply and my filters into a new lexer
class IndentLexer(object):
def __init__(self, debug=0, optimize=0, lextab='lextab', reflags=0):
self.lexer = lex.lex(debug=debug, optimize=optimize,
lextab=lextab, reflags=reflags)
self.token_stream = None
def input(self, s, add_endmarker=True):
self.lexer.paren_count = 0
self.lexer.input(s)
self.token_stream = filter(self.lexer, add_endmarker)
def token(self):
try:
return self.token_stream.next()
except StopIteration:
return None
########## Parser (tokens -> AST) ######
# also part of Ply
#import yacc
# I use the Python AST
from compiler import ast
# Helper function
def Assign(left, right):
names = []
if isinstance(left, ast.Name):
# Single assignment on left
return ast.Assign([ast.AssName(left.name, 'OP_ASSIGN')], right)
elif isinstance(left, ast.Tuple):
# List of things - make sure they are Name nodes
names = []
for child in left.getChildren():
if not isinstance(child, ast.Name):
raise SyntaxError("that assignment not supported")
names.append(child.name)
ass_list = [ast.AssName(name, 'OP_ASSIGN') for name in names]
return ast.Assign([ast.AssTuple(ass_list)], right)
else:
raise SyntaxError("Can't do that yet")
# The grammar comments come from Python's Grammar/Grammar file
# NB: compound_stmt in single_input is followed by extra NEWLINE!
# file_input: (NEWLINE | stmt)* ENDMARKER
def p_file_input_end(p):
"""file_input_end : file_input ENDMARKER"""
p[0] = ast.Stmt(p[1])
def p_file_input(p):
"""file_input : file_input NEWLINE
| file_input stmt
| NEWLINE
| stmt"""
if isinstance(p[len(p) - 1], basestring):
if len(p) == 3:
p[0] = p[1]
else:
p[0] = [] # p == 2 --> only a blank line
else:
if len(p) == 3:
p[0] = p[1] + p[2]
else:
p[0] = p[1]
# funcdef: [decorators] 'def' NAME parameters ':' suite
# ignoring decorators
def p_funcdef(p):
"funcdef : DEF NAME parameters COLON suite"
p[0] = ast.Function(None, p[2], tuple(p[3]), (), 0, None, p[5])
# parameters: '(' [varargslist] ')'
def p_parameters(p):
"""parameters : LPAR RPAR
| LPAR varargslist RPAR"""
if len(p) == 3:
p[0] = []
else:
p[0] = p[2]
# varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME] | '**' NAME) |
# highly simplified
def p_varargslist(p):
"""varargslist : varargslist COMMA NAME
| NAME"""
if len(p) == 4:
p[0] = p[1] + p[3]
else:
p[0] = [p[1]]
# stmt: simple_stmt | compound_stmt
def p_stmt_simple(p):
"""stmt : simple_stmt"""
# simple_stmt is a list
p[0] = p[1]
def p_stmt_compound(p):
"""stmt : compound_stmt"""
p[0] = [p[1]]
# simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
def p_simple_stmt(p):
"""simple_stmt : small_stmts NEWLINE
| small_stmts SEMICOLON NEWLINE"""
p[0] = p[1]
def p_small_stmts(p):
"""small_stmts : small_stmts SEMICOLON small_stmt
| small_stmt"""
if len(p) == 4:
p[0] = p[1] + [p[3]]
else:
p[0] = [p[1]]
# small_stmt: expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt |
# import_stmt | global_stmt | exec_stmt | assert_stmt
def p_small_stmt(p):
"""small_stmt : flow_stmt
| expr_stmt"""
p[0] = p[1]
# expr_stmt: testlist (augassign (yield_expr|testlist) |
# ('=' (yield_expr|testlist))*)
# augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
# '<<=' | '>>=' | '**=' | '//=')
def p_expr_stmt(p):
"""expr_stmt : testlist ASSIGN testlist
| testlist """
if len(p) == 2:
# a list of expressions
p[0] = ast.Discard(p[1])
else:
p[0] = Assign(p[1], p[3])
def p_flow_stmt(p):
"flow_stmt : return_stmt"
p[0] = p[1]
# return_stmt: 'return' [testlist]
def p_return_stmt(p):
"return_stmt : RETURN testlist"
p[0] = ast.Return(p[2])
def p_compound_stmt(p):
"""compound_stmt : if_stmt
| funcdef"""
p[0] = p[1]
def p_if_stmt(p):
'if_stmt : IF test COLON suite'
p[0] = ast.If([(p[2], p[4])], None)
def p_suite(p):
"""suite : simple_stmt
| NEWLINE INDENT stmts DEDENT"""
if len(p) == 2:
p[0] = ast.Stmt(p[1])
else:
p[0] = ast.Stmt(p[3])
def p_stmts(p):
"""stmts : stmts stmt
| stmt"""
if len(p) == 3:
p[0] = p[1] + p[2]
else:
p[0] = p[1]
# No using Python's approach because Ply supports precedence
# comparison: expr (comp_op expr)*
# arith_expr: term (('+'|'-') term)*
# term: factor (('*'|'/'|'%'|'//') factor)*
# factor: ('+'|'-'|'~') factor | power
# comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
def make_lt_compare((left, right)):
return ast.Compare(left, [('<', right), ])
def make_gt_compare((left, right)):
return ast.Compare(left, [('>', right), ])
def make_eq_compare((left, right)):
return ast.Compare(left, [('==', right), ])
binary_ops = {
"+": ast.Add,
"-": ast.Sub,
"*": ast.Mul,
"/": ast.Div,
"<": make_lt_compare,
">": make_gt_compare,
"==": make_eq_compare,
}
unary_ops = {
"+": ast.UnaryAdd,
"-": ast.UnarySub,
}
precedence = (
("left", "EQ", "GT", "LT"),
("left", "PLUS", "MINUS"),
("left", "MULT", "DIV"),
)
def p_comparison(p):
"""comparison : comparison PLUS comparison
| comparison MINUS comparison
| comparison MULT comparison
| comparison DIV comparison
| comparison LT comparison
| comparison EQ comparison
| comparison GT comparison
| PLUS comparison
| MINUS comparison
| power"""
if len(p) == 4:
p[0] = binary_ops[p[2]]((p[1], p[3]))
elif len(p) == 3:
p[0] = unary_ops[p[1]](p[2])
else:
p[0] = p[1]
# power: atom trailer* ['**' factor]
# trailers enables function calls. I only allow one level of calls
# so this is 'trailer'
def p_power(p):
"""power : atom
| atom trailer"""
if len(p) == 2:
p[0] = p[1]
else:
if p[2][0] == "CALL":
p[0] = ast.CallFunc(p[1], p[2][1], None, None)
else:
raise AssertionError("not implemented")
def p_atom_name(p):
"""atom : NAME"""
p[0] = ast.Name(p[1])
def p_atom_number(p):
"""atom : NUMBER
| STRING"""
p[0] = ast.Const(p[1])
def p_atom_tuple(p):
"""atom : LPAR testlist RPAR"""
p[0] = p[2]
# trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
def p_trailer(p):
"trailer : LPAR arglist RPAR"
p[0] = ("CALL", p[2])
# testlist: test (',' test)* [',']
# Contains shift/reduce error
def p_testlist(p):
"""testlist : testlist_multi COMMA
| testlist_multi """
if len(p) == 2:
p[0] = p[1]
else:
# May need to promote singleton to tuple
if isinstance(p[1], list):
p[0] = p[1]
else:
p[0] = [p[1]]
# Convert into a tuple?
if isinstance(p[0], list):
p[0] = ast.Tuple(p[0])
def p_testlist_multi(p):
"""testlist_multi : testlist_multi COMMA test
| test"""
if len(p) == 2:
# singleton
p[0] = p[1]
else:
if isinstance(p[1], list):
p[0] = p[1] + [p[3]]
else:
# singleton -> tuple
p[0] = [p[1], p[3]]
# test: or_test ['if' or_test 'else' test] | lambdef
# as I don't support 'and', 'or', and 'not' this works down to 'comparison'
def p_test(p):
"test : comparison"
p[0] = p[1]
# arglist: (argument ',')* (argument [',']| '*' test [',' '**' test] | '**' test)
# XXX INCOMPLETE: this doesn't allow the trailing comma
def p_arglist(p):
"""arglist : arglist COMMA argument
| argument"""
if len(p) == 4:
p[0] = p[1] + [p[3]]
else:
p[0] = [p[1]]
# argument: test [gen_for] | test '=' test # Really [keyword '='] test
def p_argument(p):
"argument : test"
p[0] = p[1]
def p_error(p):
# print "Error!", repr(p)
raise SyntaxError(p)
class GardenSnakeParser(object):
def __init__(self, lexer=None):
if lexer is None:
lexer = IndentLexer()
self.lexer = lexer
self.parser = yacc.yacc(start="file_input_end")
def parse(self, code):
self.lexer.input(code)
result = self.parser.parse(lexer=self.lexer)
return ast.Module(None, result)
###### Code generation ######
from compiler import misc, syntax, pycodegen
class GardenSnakeCompiler(object):
def __init__(self):
self.parser = GardenSnakeParser()
def compile(self, code, filename="<string>"):
tree = self.parser.parse(code)
# print tree
misc.set_filename(filename, tree)
syntax.check(tree)
gen = pycodegen.ModuleCodeGenerator(tree)
code = gen.getCode()
return code
####### Test code #######
compile = GardenSnakeCompiler().compile
code = r"""
print('LET\'S TRY THIS \\OUT')
#Comment here
def x(a):
print('called with',a)
if a == 1:
return 2
if a*2 > 10: return 999 / 4
# Another comment here
return a+2*3
ints = (1, 2,
3, 4,
5)
print('mutiline-expression', ints)
t = 4+1/3*2+6*(9-5+1)
print('predence test; should be 34+2/3:', t, t==(34+2/3))
print('numbers', 1,2,3,4,5)
if 1:
8
a=9
print(x(a))
print(x(1))
print(x(2))
print(x(8),'3')
print('this is decimal', 1/5)
print('BIG DECIMAL', 1.234567891234567e12345)
"""
# Set up the GardenSnake run-time environment
def print_(*args):
print "-->", " ".join(map(str, args))
globals()["print"] = print_
compiled_code = compile(code)
exec compiled_code in globals()
print "Done"

View file

@ -0,0 +1,5 @@
This example is Andrew Dalke's GardenSnake language. It shows how to process an
indentation-like language like Python. Further details can be found here:
http://dalkescientific.com/writings/diary/archive/2006/08/30/gardensnake_language.html

View file

@ -0,0 +1,10 @@
Simple examples:
calc - Simple calculator
classcalc - Simple calculate defined as a class
Complex examples
ansic - ANSI C grammar from K&R
BASIC - A small BASIC interpreter
GardenSnake - A simple python-like language
yply - Converts Unix yacc files to PLY programs.

View file

@ -0,0 +1,2 @@
This example is incomplete. Was going to specify an ANSI C parser.
This is part of it.

View file

@ -0,0 +1,168 @@
# ----------------------------------------------------------------------
# clex.py
#
# A lexer for ANSI C.
# ----------------------------------------------------------------------
import sys
sys.path.insert(0, "../..")
import ply.lex as lex
# Reserved words
reserved = (
'AUTO', 'BREAK', 'CASE', 'CHAR', 'CONST', 'CONTINUE', 'DEFAULT', 'DO', 'DOUBLE',
'ELSE', 'ENUM', 'EXTERN', 'FLOAT', 'FOR', 'GOTO', 'IF', 'INT', 'LONG', 'REGISTER',
'RETURN', 'SHORT', 'SIGNED', 'SIZEOF', 'STATIC', 'STRUCT', 'SWITCH', 'TYPEDEF',
'UNION', 'UNSIGNED', 'VOID', 'VOLATILE', 'WHILE',
)
tokens = reserved + (
# Literals (identifier, integer constant, float constant, string constant,
# char const)
'ID', 'TYPEID', 'ICONST', 'FCONST', 'SCONST', 'CCONST',
# Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=)
'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD',
'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT',
'LOR', 'LAND', 'LNOT',
'LT', 'LE', 'GT', 'GE', 'EQ', 'NE',
# Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=)
'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL',
'LSHIFTEQUAL', 'RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL',
# Increment/decrement (++,--)
'PLUSPLUS', 'MINUSMINUS',
# Structure dereference (->)
'ARROW',
# Conditional operator (?)
'CONDOP',
# Delimeters ( ) [ ] { } , . ; :
'LPAREN', 'RPAREN',
'LBRACKET', 'RBRACKET',
'LBRACE', 'RBRACE',
'COMMA', 'PERIOD', 'SEMI', 'COLON',
# Ellipsis (...)
'ELLIPSIS',
)
# Completely ignored characters
t_ignore = ' \t\x0c'
# Newlines
def t_NEWLINE(t):
r'\n+'
t.lexer.lineno += t.value.count("\n")
# Operators
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_MOD = r'%'
t_OR = r'\|'
t_AND = r'&'
t_NOT = r'~'
t_XOR = r'\^'
t_LSHIFT = r'<<'
t_RSHIFT = r'>>'
t_LOR = r'\|\|'
t_LAND = r'&&'
t_LNOT = r'!'
t_LT = r'<'
t_GT = r'>'
t_LE = r'<='
t_GE = r'>='
t_EQ = r'=='
t_NE = r'!='
# Assignment operators
t_EQUALS = r'='
t_TIMESEQUAL = r'\*='
t_DIVEQUAL = r'/='
t_MODEQUAL = r'%='
t_PLUSEQUAL = r'\+='
t_MINUSEQUAL = r'-='
t_LSHIFTEQUAL = r'<<='
t_RSHIFTEQUAL = r'>>='
t_ANDEQUAL = r'&='
t_OREQUAL = r'\|='
t_XOREQUAL = r'\^='
# Increment/decrement
t_PLUSPLUS = r'\+\+'
t_MINUSMINUS = r'--'
# ->
t_ARROW = r'->'
# ?
t_CONDOP = r'\?'
# Delimeters
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_LBRACKET = r'\['
t_RBRACKET = r'\]'
t_LBRACE = r'\{'
t_RBRACE = r'\}'
t_COMMA = r','
t_PERIOD = r'\.'
t_SEMI = r';'
t_COLON = r':'
t_ELLIPSIS = r'\.\.\.'
# Identifiers and reserved words
reserved_map = {}
for r in reserved:
reserved_map[r.lower()] = r
def t_ID(t):
r'[A-Za-z_][\w_]*'
t.type = reserved_map.get(t.value, "ID")
return t
# Integer literal
t_ICONST = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?'
# Floating literal
t_FCONST = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
# String literal
t_SCONST = r'\"([^\\\n]|(\\.))*?\"'
# Character constant 'c' or L'c'
t_CCONST = r'(L)?\'([^\\\n]|(\\.))*?\''
# Comments
def t_comment(t):
r'/\*(.|\n)*?\*/'
t.lexer.lineno += t.value.count('\n')
# Preprocessor directive (ignored)
def t_preprocessor(t):
r'\#(.)*?\n'
t.lexer.lineno += 1
def t_error(t):
print("Illegal character %s" % repr(t.value[0]))
t.lexer.skip(1)
lexer = lex.lex()
if __name__ == "__main__":
lex.runmain(lexer)

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,123 @@
# -----------------------------------------------------------------------------
# calc.py
#
# A simple calculator with variables. This is from O'Reilly's
# "Lex and Yacc", p. 63.
# -----------------------------------------------------------------------------
import sys
sys.path.insert(0, "../..")
if sys.version_info[0] >= 3:
raw_input = input
tokens = (
'NAME', 'NUMBER',
)
literals = ['=', '+', '-', '*', '/', '(', ')']
# Tokens
t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
def t_NUMBER(t):
r'\d+'
t.value = int(t.value)
return t
t_ignore = " \t"
def t_newline(t):
r'\n+'
t.lexer.lineno += t.value.count("\n")
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Build the lexer
import ply.lex as lex
lex.lex()
# Parsing rules
precedence = (
('left', '+', '-'),
('left', '*', '/'),
('right', 'UMINUS'),
)
# dictionary of names
names = {}
def p_statement_assign(p):
'statement : NAME "=" expression'
names[p[1]] = p[3]
def p_statement_expr(p):
'statement : expression'
print(p[1])
def p_expression_binop(p):
'''expression : expression '+' expression
| expression '-' expression
| expression '*' expression
| expression '/' expression'''
if p[2] == '+':
p[0] = p[1] + p[3]
elif p[2] == '-':
p[0] = p[1] - p[3]
elif p[2] == '*':
p[0] = p[1] * p[3]
elif p[2] == '/':
p[0] = p[1] / p[3]
def p_expression_uminus(p):
"expression : '-' expression %prec UMINUS"
p[0] = -p[2]
def p_expression_group(p):
"expression : '(' expression ')'"
p[0] = p[2]
def p_expression_number(p):
"expression : NUMBER"
p[0] = p[1]
def p_expression_name(p):
"expression : NAME"
try:
p[0] = names[p[1]]
except LookupError:
print("Undefined name '%s'" % p[1])
p[0] = 0
def p_error(p):
if p:
print("Syntax error at '%s'" % p.value)
else:
print("Syntax error at EOF")
import ply.yacc as yacc
yacc.yacc()
while 1:
try:
s = raw_input('calc > ')
except EOFError:
break
if not s:
continue
yacc.parse(s)

View file

@ -0,0 +1,129 @@
# -----------------------------------------------------------------------------
# calc.py
#
# This example shows how to run the parser in a debugging mode
# with output routed to a logging object.
# -----------------------------------------------------------------------------
import sys
sys.path.insert(0, "../..")
if sys.version_info[0] >= 3:
raw_input = input
tokens = (
'NAME', 'NUMBER',
)
literals = ['=', '+', '-', '*', '/', '(', ')']
# Tokens
t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
def t_NUMBER(t):
r'\d+'
t.value = int(t.value)
return t
t_ignore = " \t"
def t_newline(t):
r'\n+'
t.lexer.lineno += t.value.count("\n")
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Build the lexer
import ply.lex as lex
lex.lex()
# Parsing rules
precedence = (
('left', '+', '-'),
('left', '*', '/'),
('right', 'UMINUS'),
)
# dictionary of names
names = {}
def p_statement_assign(p):
'statement : NAME "=" expression'
names[p[1]] = p[3]
def p_statement_expr(p):
'statement : expression'
print(p[1])
def p_expression_binop(p):
'''expression : expression '+' expression
| expression '-' expression
| expression '*' expression
| expression '/' expression'''
if p[2] == '+':
p[0] = p[1] + p[3]
elif p[2] == '-':
p[0] = p[1] - p[3]
elif p[2] == '*':
p[0] = p[1] * p[3]
elif p[2] == '/':
p[0] = p[1] / p[3]
def p_expression_uminus(p):
"expression : '-' expression %prec UMINUS"
p[0] = -p[2]
def p_expression_group(p):
"expression : '(' expression ')'"
p[0] = p[2]
def p_expression_number(p):
"expression : NUMBER"
p[0] = p[1]
def p_expression_name(p):
"expression : NAME"
try:
p[0] = names[p[1]]
except LookupError:
print("Undefined name '%s'" % p[1])
p[0] = 0
def p_error(p):
if p:
print("Syntax error at '%s'" % p.value)
else:
print("Syntax error at EOF")
import ply.yacc as yacc
yacc.yacc()
import logging
logging.basicConfig(
level=logging.INFO,
filename="parselog.txt"
)
while 1:
try:
s = raw_input('calc > ')
except EOFError:
break
if not s:
continue
yacc.parse(s, debug=logging.getLogger())

View file

@ -0,0 +1,132 @@
# -----------------------------------------------------------------------------
# calc.py
#
# A simple calculator with variables. Asks the user for more input and
# demonstrates the use of the t_eof() rule.
# -----------------------------------------------------------------------------
import sys
sys.path.insert(0, "../..")
if sys.version_info[0] >= 3:
raw_input = input
tokens = (
'NAME', 'NUMBER',
)
literals = ['=', '+', '-', '*', '/', '(', ')']
# Tokens
t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
def t_NUMBER(t):
r'\d+'
t.value = int(t.value)
return t
t_ignore = " \t"
def t_newline(t):
r'\n+'
t.lexer.lineno += t.value.count("\n")
def t_eof(t):
more = raw_input('... ')
if more:
t.lexer.input(more + '\n')
return t.lexer.token()
else:
return None
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Build the lexer
import ply.lex as lex
lex.lex()
# Parsing rules
precedence = (
('left', '+', '-'),
('left', '*', '/'),
('right', 'UMINUS'),
)
# dictionary of names
names = {}
def p_statement_assign(p):
'statement : NAME "=" expression'
names[p[1]] = p[3]
def p_statement_expr(p):
'statement : expression'
print(p[1])
def p_expression_binop(p):
'''expression : expression '+' expression
| expression '-' expression
| expression '*' expression
| expression '/' expression'''
if p[2] == '+':
p[0] = p[1] + p[3]
elif p[2] == '-':
p[0] = p[1] - p[3]
elif p[2] == '*':
p[0] = p[1] * p[3]
elif p[2] == '/':
p[0] = p[1] / p[3]
def p_expression_uminus(p):
"expression : '-' expression %prec UMINUS"
p[0] = -p[2]
def p_expression_group(p):
"expression : '(' expression ')'"
p[0] = p[2]
def p_expression_number(p):
"expression : NUMBER"
p[0] = p[1]
def p_expression_name(p):
"expression : NAME"
try:
p[0] = names[p[1]]
except LookupError:
print("Undefined name '%s'" % p[1])
p[0] = 0
def p_error(p):
if p:
print("Syntax error at '%s'" % p.value)
else:
print("Syntax error at EOF")
import ply.yacc as yacc
yacc.yacc()
while 1:
try:
s = raw_input('calc > ')
except EOFError:
break
if not s:
continue
yacc.parse(s + '\n')

View file

@ -0,0 +1,165 @@
#!/usr/bin/env python
# -----------------------------------------------------------------------------
# calc.py
#
# A simple calculator with variables. This is from O'Reilly's
# "Lex and Yacc", p. 63.
#
# Class-based example contributed to PLY by David McNab
# -----------------------------------------------------------------------------
import sys
sys.path.insert(0, "../..")
if sys.version_info[0] >= 3:
raw_input = input
import ply.lex as lex
import ply.yacc as yacc
import os
class Parser:
"""
Base class for a lexer/parser that has the rules defined as methods
"""
tokens = ()
precedence = ()
def __init__(self, **kw):
self.debug = kw.get('debug', 0)
self.names = {}
try:
modname = os.path.split(os.path.splitext(__file__)[0])[
1] + "_" + self.__class__.__name__
except:
modname = "parser" + "_" + self.__class__.__name__
self.debugfile = modname + ".dbg"
self.tabmodule = modname + "_" + "parsetab"
# print self.debugfile, self.tabmodule
# Build the lexer and parser
lex.lex(module=self, debug=self.debug)
yacc.yacc(module=self,
debug=self.debug,
debugfile=self.debugfile,
tabmodule=self.tabmodule)
def run(self):
while 1:
try:
s = raw_input('calc > ')
except EOFError:
break
if not s:
continue
yacc.parse(s)
class Calc(Parser):
tokens = (
'NAME', 'NUMBER',
'PLUS', 'MINUS', 'EXP', 'TIMES', 'DIVIDE', 'EQUALS',
'LPAREN', 'RPAREN',
)
# Tokens
t_PLUS = r'\+'
t_MINUS = r'-'
t_EXP = r'\*\*'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_EQUALS = r'='
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
def t_NUMBER(self, t):
r'\d+'
try:
t.value = int(t.value)
except ValueError:
print("Integer value too large %s" % t.value)
t.value = 0
# print "parsed number %s" % repr(t.value)
return t
t_ignore = " \t"
def t_newline(self, t):
r'\n+'
t.lexer.lineno += t.value.count("\n")
def t_error(self, t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Parsing rules
precedence = (
('left', 'PLUS', 'MINUS'),
('left', 'TIMES', 'DIVIDE'),
('left', 'EXP'),
('right', 'UMINUS'),
)
def p_statement_assign(self, p):
'statement : NAME EQUALS expression'
self.names[p[1]] = p[3]
def p_statement_expr(self, p):
'statement : expression'
print(p[1])
def p_expression_binop(self, p):
"""
expression : expression PLUS expression
| expression MINUS expression
| expression TIMES expression
| expression DIVIDE expression
| expression EXP expression
"""
# print [repr(p[i]) for i in range(0,4)]
if p[2] == '+':
p[0] = p[1] + p[3]
elif p[2] == '-':
p[0] = p[1] - p[3]
elif p[2] == '*':
p[0] = p[1] * p[3]
elif p[2] == '/':
p[0] = p[1] / p[3]
elif p[2] == '**':
p[0] = p[1] ** p[3]
def p_expression_uminus(self, p):
'expression : MINUS expression %prec UMINUS'
p[0] = -p[2]
def p_expression_group(self, p):
'expression : LPAREN expression RPAREN'
p[0] = p[2]
def p_expression_number(self, p):
'expression : NUMBER'
p[0] = p[1]
def p_expression_name(self, p):
'expression : NAME'
try:
p[0] = self.names[p[1]]
except LookupError:
print("Undefined name '%s'" % p[1])
p[0] = 0
def p_error(self, p):
if p:
print("Syntax error at '%s'" % p.value)
else:
print("Syntax error at EOF")
if __name__ == '__main__':
calc = Calc()
calc.run()

View file

@ -0,0 +1,2 @@
#!/bin/sh
rm -f */*.pyc */parsetab.py */parser.out */*~ */*.class

View file

@ -0,0 +1,132 @@
# -----------------------------------------------------------------------------
# calc.py
#
# A calculator parser that makes use of closures. The function make_calculator()
# returns a function that accepts an input string and returns a result. All
# lexing rules, parsing rules, and internal state are held inside the function.
# -----------------------------------------------------------------------------
import sys
sys.path.insert(0, "../..")
if sys.version_info[0] >= 3:
raw_input = input
# Make a calculator function
def make_calculator():
import ply.lex as lex
import ply.yacc as yacc
# ------- Internal calculator state
variables = {} # Dictionary of stored variables
# ------- Calculator tokenizing rules
tokens = (
'NAME', 'NUMBER',
)
literals = ['=', '+', '-', '*', '/', '(', ')']
t_ignore = " \t"
t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
def t_NUMBER(t):
r'\d+'
t.value = int(t.value)
return t
def t_newline(t):
r'\n+'
t.lexer.lineno += t.value.count("\n")
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Build the lexer
lexer = lex.lex()
# ------- Calculator parsing rules
precedence = (
('left', '+', '-'),
('left', '*', '/'),
('right', 'UMINUS'),
)
def p_statement_assign(p):
'statement : NAME "=" expression'
variables[p[1]] = p[3]
p[0] = None
def p_statement_expr(p):
'statement : expression'
p[0] = p[1]
def p_expression_binop(p):
'''expression : expression '+' expression
| expression '-' expression
| expression '*' expression
| expression '/' expression'''
if p[2] == '+':
p[0] = p[1] + p[3]
elif p[2] == '-':
p[0] = p[1] - p[3]
elif p[2] == '*':
p[0] = p[1] * p[3]
elif p[2] == '/':
p[0] = p[1] / p[3]
def p_expression_uminus(p):
"expression : '-' expression %prec UMINUS"
p[0] = -p[2]
def p_expression_group(p):
"expression : '(' expression ')'"
p[0] = p[2]
def p_expression_number(p):
"expression : NUMBER"
p[0] = p[1]
def p_expression_name(p):
"expression : NAME"
try:
p[0] = variables[p[1]]
except LookupError:
print("Undefined name '%s'" % p[1])
p[0] = 0
def p_error(p):
if p:
print("Syntax error at '%s'" % p.value)
else:
print("Syntax error at EOF")
# Build the parser
parser = yacc.yacc()
# ------- Input function
def input(text):
result = parser.parse(text, lexer=lexer)
return result
return input
# Make a calculator object and use it
calc = make_calculator()
while True:
try:
s = raw_input("calc > ")
except EOFError:
break
r = calc(s)
if r:
print(r)

View file

@ -0,0 +1,48 @@
# -----------------------------------------------------------------------------
# hedit.py
#
# Paring of Fortran H Edit descriptions (Contributed by Pearu Peterson)
#
# These tokens can't be easily tokenized because they are of the following
# form:
#
# nHc1...cn
#
# where n is a positive integer and c1 ... cn are characters.
#
# This example shows how to modify the state of the lexer to parse
# such tokens
# -----------------------------------------------------------------------------
import sys
sys.path.insert(0, "../..")
tokens = (
'H_EDIT_DESCRIPTOR',
)
# Tokens
t_ignore = " \t\n"
def t_H_EDIT_DESCRIPTOR(t):
r"\d+H.*" # This grabs all of the remaining text
i = t.value.index('H')
n = eval(t.value[:i])
# Adjust the tokenizing position
t.lexer.lexpos -= len(t.value) - (i + 1 + n)
t.value = t.value[i + 1:i + 1 + n]
return t
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Build the lexer
import ply.lex as lex
lex.lex()
lex.runmain()

View file

@ -0,0 +1,167 @@
#!/usr/bin/env python
# -----------------------------------------------------------------------------
# calc.py
#
# A simple calculator with variables. This is from O'Reilly's
# "Lex and Yacc", p. 63.
#
# Class-based example contributed to PLY by David McNab.
#
# Modified to use new-style classes. Test case.
# -----------------------------------------------------------------------------
import sys
sys.path.insert(0, "../..")
if sys.version_info[0] >= 3:
raw_input = input
import ply.lex as lex
import ply.yacc as yacc
import os
class Parser(object):
"""
Base class for a lexer/parser that has the rules defined as methods
"""
tokens = ()
precedence = ()
def __init__(self, **kw):
self.debug = kw.get('debug', 0)
self.names = {}
try:
modname = os.path.split(os.path.splitext(__file__)[0])[
1] + "_" + self.__class__.__name__
except:
modname = "parser" + "_" + self.__class__.__name__
self.debugfile = modname + ".dbg"
self.tabmodule = modname + "_" + "parsetab"
# print self.debugfile, self.tabmodule
# Build the lexer and parser
lex.lex(module=self, debug=self.debug)
yacc.yacc(module=self,
debug=self.debug,
debugfile=self.debugfile,
tabmodule=self.tabmodule)
def run(self):
while 1:
try:
s = raw_input('calc > ')
except EOFError:
break
if not s:
continue
yacc.parse(s)
class Calc(Parser):
tokens = (
'NAME', 'NUMBER',
'PLUS', 'MINUS', 'EXP', 'TIMES', 'DIVIDE', 'EQUALS',
'LPAREN', 'RPAREN',
)
# Tokens
t_PLUS = r'\+'
t_MINUS = r'-'
t_EXP = r'\*\*'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_EQUALS = r'='
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
def t_NUMBER(self, t):
r'\d+'
try:
t.value = int(t.value)
except ValueError:
print("Integer value too large %s" % t.value)
t.value = 0
# print "parsed number %s" % repr(t.value)
return t
t_ignore = " \t"
def t_newline(self, t):
r'\n+'
t.lexer.lineno += t.value.count("\n")
def t_error(self, t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Parsing rules
precedence = (
('left', 'PLUS', 'MINUS'),
('left', 'TIMES', 'DIVIDE'),
('left', 'EXP'),
('right', 'UMINUS'),
)
def p_statement_assign(self, p):
'statement : NAME EQUALS expression'
self.names[p[1]] = p[3]
def p_statement_expr(self, p):
'statement : expression'
print(p[1])
def p_expression_binop(self, p):
"""
expression : expression PLUS expression
| expression MINUS expression
| expression TIMES expression
| expression DIVIDE expression
| expression EXP expression
"""
# print [repr(p[i]) for i in range(0,4)]
if p[2] == '+':
p[0] = p[1] + p[3]
elif p[2] == '-':
p[0] = p[1] - p[3]
elif p[2] == '*':
p[0] = p[1] * p[3]
elif p[2] == '/':
p[0] = p[1] / p[3]
elif p[2] == '**':
p[0] = p[1] ** p[3]
def p_expression_uminus(self, p):
'expression : MINUS expression %prec UMINUS'
p[0] = -p[2]
def p_expression_group(self, p):
'expression : LPAREN expression RPAREN'
p[0] = p[2]
def p_expression_number(self, p):
'expression : NUMBER'
p[0] = p[1]
def p_expression_name(self, p):
'expression : NAME'
try:
p[0] = self.names[p[1]]
except LookupError:
print("Undefined name '%s'" % p[1])
p[0] = 0
def p_error(self, p):
if p:
print("Syntax error at '%s'" % p.value)
else:
print("Syntax error at EOF")
if __name__ == '__main__':
calc = Calc()
calc.run()

View file

@ -0,0 +1,9 @@
An example showing how to use Python optimized mode.
To run:
- First run 'python calc.py'
- Then run 'python -OO calc.py'
If working correctly, the second version should run the
same way.

View file

@ -0,0 +1,134 @@
# -----------------------------------------------------------------------------
# calc.py
#
# A simple calculator with variables. This is from O'Reilly's
# "Lex and Yacc", p. 63.
# -----------------------------------------------------------------------------
import sys
sys.path.insert(0, "../..")
if sys.version_info[0] >= 3:
raw_input = input
tokens = (
'NAME', 'NUMBER',
'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'EQUALS',
'LPAREN', 'RPAREN',
)
# Tokens
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_EQUALS = r'='
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
def t_NUMBER(t):
r'\d+'
try:
t.value = int(t.value)
except ValueError:
print("Integer value too large %s" % t.value)
t.value = 0
return t
t_ignore = " \t"
def t_newline(t):
r'\n+'
t.lexer.lineno += t.value.count("\n")
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Build the lexer
import ply.lex as lex
lex.lex(optimize=1)
# Parsing rules
precedence = (
('left', 'PLUS', 'MINUS'),
('left', 'TIMES', 'DIVIDE'),
('right', 'UMINUS'),
)
# dictionary of names
names = {}
def p_statement_assign(t):
'statement : NAME EQUALS expression'
names[t[1]] = t[3]
def p_statement_expr(t):
'statement : expression'
print(t[1])
def p_expression_binop(t):
'''expression : expression PLUS expression
| expression MINUS expression
| expression TIMES expression
| expression DIVIDE expression'''
if t[2] == '+':
t[0] = t[1] + t[3]
elif t[2] == '-':
t[0] = t[1] - t[3]
elif t[2] == '*':
t[0] = t[1] * t[3]
elif t[2] == '/':
t[0] = t[1] / t[3]
elif t[2] == '<':
t[0] = t[1] < t[3]
def p_expression_uminus(t):
'expression : MINUS expression %prec UMINUS'
t[0] = -t[2]
def p_expression_group(t):
'expression : LPAREN expression RPAREN'
t[0] = t[2]
def p_expression_number(t):
'expression : NUMBER'
t[0] = t[1]
def p_expression_name(t):
'expression : NAME'
try:
t[0] = names[t[1]]
except LookupError:
print("Undefined name '%s'" % t[1])
t[0] = 0
def p_error(t):
if t:
print("Syntax error at '%s'" % t.value)
else:
print("Syntax error at EOF")
import ply.yacc as yacc
yacc.yacc(optimize=1)
while 1:
try:
s = raw_input('calc > ')
except EOFError:
break
yacc.parse(s)

View file

@ -0,0 +1,133 @@
# -----------------------------------------------------------------------------
# calc.py
#
# A simple calculator with variables. This is from O'Reilly's
# "Lex and Yacc", p. 63.
#
# This example uses unicode strings for tokens, docstrings, and input.
# -----------------------------------------------------------------------------
import sys
sys.path.insert(0, "../..")
tokens = (
'NAME', 'NUMBER',
'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'EQUALS',
'LPAREN', 'RPAREN',
)
# Tokens
t_PLUS = ur'\+'
t_MINUS = ur'-'
t_TIMES = ur'\*'
t_DIVIDE = ur'/'
t_EQUALS = ur'='
t_LPAREN = ur'\('
t_RPAREN = ur'\)'
t_NAME = ur'[a-zA-Z_][a-zA-Z0-9_]*'
def t_NUMBER(t):
ur'\d+'
try:
t.value = int(t.value)
except ValueError:
print "Integer value too large", t.value
t.value = 0
return t
t_ignore = u" \t"
def t_newline(t):
ur'\n+'
t.lexer.lineno += t.value.count("\n")
def t_error(t):
print "Illegal character '%s'" % t.value[0]
t.lexer.skip(1)
# Build the lexer
import ply.lex as lex
lex.lex()
# Parsing rules
precedence = (
('left', 'PLUS', 'MINUS'),
('left', 'TIMES', 'DIVIDE'),
('right', 'UMINUS'),
)
# dictionary of names
names = {}
def p_statement_assign(p):
'statement : NAME EQUALS expression'
names[p[1]] = p[3]
def p_statement_expr(p):
'statement : expression'
print p[1]
def p_expression_binop(p):
'''expression : expression PLUS expression
| expression MINUS expression
| expression TIMES expression
| expression DIVIDE expression'''
if p[2] == u'+':
p[0] = p[1] + p[3]
elif p[2] == u'-':
p[0] = p[1] - p[3]
elif p[2] == u'*':
p[0] = p[1] * p[3]
elif p[2] == u'/':
p[0] = p[1] / p[3]
def p_expression_uminus(p):
'expression : MINUS expression %prec UMINUS'
p[0] = -p[2]
def p_expression_group(p):
'expression : LPAREN expression RPAREN'
p[0] = p[2]
def p_expression_number(p):
'expression : NUMBER'
p[0] = p[1]
def p_expression_name(p):
'expression : NAME'
try:
p[0] = names[p[1]]
except LookupError:
print "Undefined name '%s'" % p[1]
p[0] = 0
def p_error(p):
if p:
print "Syntax error at '%s'" % p.value
else:
print "Syntax error at EOF"
import ply.yacc as yacc
yacc.yacc()
while 1:
try:
s = raw_input('calc > ')
except EOFError:
break
if not s:
continue
yacc.parse(unicode(s))

View file

@ -0,0 +1,41 @@
yply.py
This example implements a program yply.py that converts a UNIX-yacc
specification file into a PLY-compatible program. To use, simply
run it like this:
% python yply.py [-nocode] inputfile.y >myparser.py
The output of this program is Python code. In the output,
any C code in the original file is included, but is commented out.
If you use the -nocode option, then all of the C code in the
original file is just discarded.
To use the resulting grammer with PLY, you'll need to edit the
myparser.py file. Within this file, some stub code is included that
can be used to test the construction of the parsing tables. However,
you'll need to do more editing to make a workable parser.
Disclaimer: This just an example I threw together in an afternoon.
It might have some bugs. However, it worked when I tried it on
a yacc-specified C++ parser containing 442 rules and 855 parsing
states.
Comments:
1. This example does not parse specification files meant for lex/flex.
You'll need to specify the tokenizer on your own.
2. This example shows a number of interesting PLY features including
- Parsing of literal text delimited by nested parentheses
- Some interaction between the parser and the lexer.
- Use of literals in the grammar specification
- One pass compilation. The program just emits the result,
there is no intermediate parse tree.
3. This program could probably be cleaned up and enhanced a lot.
It would be great if someone wanted to work on this (hint).
-Dave

View file

@ -0,0 +1,119 @@
# lexer for yacc-grammars
#
# Author: David Beazley (dave@dabeaz.com)
# Date : October 2, 2006
import sys
sys.path.append("../..")
from ply import *
tokens = (
'LITERAL', 'SECTION', 'TOKEN', 'LEFT', 'RIGHT', 'PREC', 'START', 'TYPE', 'NONASSOC', 'UNION', 'CODE',
'ID', 'QLITERAL', 'NUMBER',
)
states = (('code', 'exclusive'),)
literals = [';', ',', '<', '>', '|', ':']
t_ignore = ' \t'
t_TOKEN = r'%token'
t_LEFT = r'%left'
t_RIGHT = r'%right'
t_NONASSOC = r'%nonassoc'
t_PREC = r'%prec'
t_START = r'%start'
t_TYPE = r'%type'
t_UNION = r'%union'
t_ID = r'[a-zA-Z_][a-zA-Z_0-9]*'
t_QLITERAL = r'''(?P<quote>['"]).*?(?P=quote)'''
t_NUMBER = r'\d+'
def t_SECTION(t):
r'%%'
if getattr(t.lexer, "lastsection", 0):
t.value = t.lexer.lexdata[t.lexpos + 2:]
t.lexer.lexpos = len(t.lexer.lexdata)
else:
t.lexer.lastsection = 0
return t
# Comments
def t_ccomment(t):
r'/\*(.|\n)*?\*/'
t.lexer.lineno += t.value.count('\n')
t_ignore_cppcomment = r'//.*'
def t_LITERAL(t):
r'%\{(.|\n)*?%\}'
t.lexer.lineno += t.value.count("\n")
return t
def t_NEWLINE(t):
r'\n'
t.lexer.lineno += 1
def t_code(t):
r'\{'
t.lexer.codestart = t.lexpos
t.lexer.level = 1
t.lexer.begin('code')
def t_code_ignore_string(t):
r'\"([^\\\n]|(\\.))*?\"'
def t_code_ignore_char(t):
r'\'([^\\\n]|(\\.))*?\''
def t_code_ignore_comment(t):
r'/\*(.|\n)*?\*/'
def t_code_ignore_cppcom(t):
r'//.*'
def t_code_lbrace(t):
r'\{'
t.lexer.level += 1
def t_code_rbrace(t):
r'\}'
t.lexer.level -= 1
if t.lexer.level == 0:
t.type = 'CODE'
t.value = t.lexer.lexdata[t.lexer.codestart:t.lexpos + 1]
t.lexer.begin('INITIAL')
t.lexer.lineno += t.value.count('\n')
return t
t_code_ignore_nonspace = r'[^\s\}\'\"\{]+'
t_code_ignore_whitespace = r'\s+'
t_code_ignore = ""
def t_code_error(t):
raise RuntimeError
def t_error(t):
print("%d: Illegal character '%s'" % (t.lexer.lineno, t.value[0]))
print(t.value)
t.lexer.skip(1)
lex.lex()
if __name__ == '__main__':
lex.runmain()

View file

@ -0,0 +1,244 @@
# parser for Unix yacc-based grammars
#
# Author: David Beazley (dave@dabeaz.com)
# Date : October 2, 2006
import ylex
tokens = ylex.tokens
from ply import *
tokenlist = []
preclist = []
emit_code = 1
def p_yacc(p):
'''yacc : defsection rulesection'''
def p_defsection(p):
'''defsection : definitions SECTION
| SECTION'''
p.lexer.lastsection = 1
print("tokens = ", repr(tokenlist))
print()
print("precedence = ", repr(preclist))
print()
print("# -------------- RULES ----------------")
print()
def p_rulesection(p):
'''rulesection : rules SECTION'''
print("# -------------- RULES END ----------------")
print_code(p[2], 0)
def p_definitions(p):
'''definitions : definitions definition
| definition'''
def p_definition_literal(p):
'''definition : LITERAL'''
print_code(p[1], 0)
def p_definition_start(p):
'''definition : START ID'''
print("start = '%s'" % p[2])
def p_definition_token(p):
'''definition : toktype opttype idlist optsemi '''
for i in p[3]:
if i[0] not in "'\"":
tokenlist.append(i)
if p[1] == '%left':
preclist.append(('left',) + tuple(p[3]))
elif p[1] == '%right':
preclist.append(('right',) + tuple(p[3]))
elif p[1] == '%nonassoc':
preclist.append(('nonassoc',) + tuple(p[3]))
def p_toktype(p):
'''toktype : TOKEN
| LEFT
| RIGHT
| NONASSOC'''
p[0] = p[1]
def p_opttype(p):
'''opttype : '<' ID '>'
| empty'''
def p_idlist(p):
'''idlist : idlist optcomma tokenid
| tokenid'''
if len(p) == 2:
p[0] = [p[1]]
else:
p[0] = p[1]
p[1].append(p[3])
def p_tokenid(p):
'''tokenid : ID
| ID NUMBER
| QLITERAL
| QLITERAL NUMBER'''
p[0] = p[1]
def p_optsemi(p):
'''optsemi : ';'
| empty'''
def p_optcomma(p):
'''optcomma : ','
| empty'''
def p_definition_type(p):
'''definition : TYPE '<' ID '>' namelist optsemi'''
# type declarations are ignored
def p_namelist(p):
'''namelist : namelist optcomma ID
| ID'''
def p_definition_union(p):
'''definition : UNION CODE optsemi'''
# Union declarations are ignored
def p_rules(p):
'''rules : rules rule
| rule'''
if len(p) == 2:
rule = p[1]
else:
rule = p[2]
# Print out a Python equivalent of this rule
embedded = [] # Embedded actions (a mess)
embed_count = 0
rulename = rule[0]
rulecount = 1
for r in rule[1]:
# r contains one of the rule possibilities
print("def p_%s_%d(p):" % (rulename, rulecount))
prod = []
prodcode = ""
for i in range(len(r)):
item = r[i]
if item[0] == '{': # A code block
if i == len(r) - 1:
prodcode = item
break
else:
# an embedded action
embed_name = "_embed%d_%s" % (embed_count, rulename)
prod.append(embed_name)
embedded.append((embed_name, item))
embed_count += 1
else:
prod.append(item)
print(" '''%s : %s'''" % (rulename, " ".join(prod)))
# Emit code
print_code(prodcode, 4)
print()
rulecount += 1
for e, code in embedded:
print("def p_%s(p):" % e)
print(" '''%s : '''" % e)
print_code(code, 4)
print()
def p_rule(p):
'''rule : ID ':' rulelist ';' '''
p[0] = (p[1], [p[3]])
def p_rule2(p):
'''rule : ID ':' rulelist morerules ';' '''
p[4].insert(0, p[3])
p[0] = (p[1], p[4])
def p_rule_empty(p):
'''rule : ID ':' ';' '''
p[0] = (p[1], [[]])
def p_rule_empty2(p):
'''rule : ID ':' morerules ';' '''
p[3].insert(0, [])
p[0] = (p[1], p[3])
def p_morerules(p):
'''morerules : morerules '|' rulelist
| '|' rulelist
| '|' '''
if len(p) == 2:
p[0] = [[]]
elif len(p) == 3:
p[0] = [p[2]]
else:
p[0] = p[1]
p[0].append(p[3])
# print("morerules", len(p), p[0])
def p_rulelist(p):
'''rulelist : rulelist ruleitem
| ruleitem'''
if len(p) == 2:
p[0] = [p[1]]
else:
p[0] = p[1]
p[1].append(p[2])
def p_ruleitem(p):
'''ruleitem : ID
| QLITERAL
| CODE
| PREC'''
p[0] = p[1]
def p_empty(p):
'''empty : '''
def p_error(p):
pass
yacc.yacc(debug=0)
def print_code(code, indent):
if not emit_code:
return
codelines = code.splitlines()
for c in codelines:
print("%s# %s" % (" " * indent, c))

View file

@ -0,0 +1,51 @@
#!/usr/local/bin/python
# yply.py
#
# Author: David Beazley (dave@dabeaz.com)
# Date : October 2, 2006
#
# Converts a UNIX-yacc specification file into a PLY-compatible
# specification. To use, simply do this:
#
# % python yply.py [-nocode] inputfile.y >myparser.py
#
# The output of this program is Python code. In the output,
# any C code in the original file is included, but is commented.
# If you use the -nocode option, then all of the C code in the
# original file is discarded.
#
# Disclaimer: This just an example I threw together in an afternoon.
# It might have some bugs. However, it worked when I tried it on
# a yacc-specified C++ parser containing 442 rules and 855 parsing
# states.
#
import sys
sys.path.insert(0, "../..")
import ylex
import yparse
from ply import *
if len(sys.argv) == 1:
print("usage : yply.py [-nocode] inputfile")
raise SystemExit
if len(sys.argv) == 3:
if sys.argv[1] == '-nocode':
yparse.emit_code = 0
else:
print("Unknown option '%s'" % sys.argv[1])
raise SystemExit
filename = sys.argv[2]
else:
filename = sys.argv[1]
yacc.parse(open(filename).read())
print("""
if __name__ == '__main__':
from ply import *
yacc.yacc()
""")