mirror of
https://github.com/servo/servo.git
synced 2025-08-06 14:10:11 +01:00
Update ply
This commit is contained in:
parent
4d393612b4
commit
2acdeabaf5
60 changed files with 9624 additions and 297 deletions
|
@ -0,0 +1,41 @@
|
|||
yply.py
|
||||
|
||||
This example implements a program yply.py that converts a UNIX-yacc
|
||||
specification file into a PLY-compatible program. To use, simply
|
||||
run it like this:
|
||||
|
||||
% python yply.py [-nocode] inputfile.y >myparser.py
|
||||
|
||||
The output of this program is Python code. In the output,
|
||||
any C code in the original file is included, but is commented out.
|
||||
If you use the -nocode option, then all of the C code in the
|
||||
original file is just discarded.
|
||||
|
||||
To use the resulting grammer with PLY, you'll need to edit the
|
||||
myparser.py file. Within this file, some stub code is included that
|
||||
can be used to test the construction of the parsing tables. However,
|
||||
you'll need to do more editing to make a workable parser.
|
||||
|
||||
Disclaimer: This just an example I threw together in an afternoon.
|
||||
It might have some bugs. However, it worked when I tried it on
|
||||
a yacc-specified C++ parser containing 442 rules and 855 parsing
|
||||
states.
|
||||
|
||||
Comments:
|
||||
|
||||
1. This example does not parse specification files meant for lex/flex.
|
||||
You'll need to specify the tokenizer on your own.
|
||||
|
||||
2. This example shows a number of interesting PLY features including
|
||||
|
||||
- Parsing of literal text delimited by nested parentheses
|
||||
- Some interaction between the parser and the lexer.
|
||||
- Use of literals in the grammar specification
|
||||
- One pass compilation. The program just emits the result,
|
||||
there is no intermediate parse tree.
|
||||
|
||||
3. This program could probably be cleaned up and enhanced a lot.
|
||||
It would be great if someone wanted to work on this (hint).
|
||||
|
||||
-Dave
|
||||
|
119
components/script/dom/bindings/codegen/ply/example/yply/ylex.py
Normal file
119
components/script/dom/bindings/codegen/ply/example/yply/ylex.py
Normal file
|
@ -0,0 +1,119 @@
|
|||
# lexer for yacc-grammars
|
||||
#
|
||||
# Author: David Beazley (dave@dabeaz.com)
|
||||
# Date : October 2, 2006
|
||||
|
||||
import sys
|
||||
sys.path.append("../..")
|
||||
|
||||
from ply import *
|
||||
|
||||
tokens = (
|
||||
'LITERAL', 'SECTION', 'TOKEN', 'LEFT', 'RIGHT', 'PREC', 'START', 'TYPE', 'NONASSOC', 'UNION', 'CODE',
|
||||
'ID', 'QLITERAL', 'NUMBER',
|
||||
)
|
||||
|
||||
states = (('code', 'exclusive'),)
|
||||
|
||||
literals = [';', ',', '<', '>', '|', ':']
|
||||
t_ignore = ' \t'
|
||||
|
||||
t_TOKEN = r'%token'
|
||||
t_LEFT = r'%left'
|
||||
t_RIGHT = r'%right'
|
||||
t_NONASSOC = r'%nonassoc'
|
||||
t_PREC = r'%prec'
|
||||
t_START = r'%start'
|
||||
t_TYPE = r'%type'
|
||||
t_UNION = r'%union'
|
||||
t_ID = r'[a-zA-Z_][a-zA-Z_0-9]*'
|
||||
t_QLITERAL = r'''(?P<quote>['"]).*?(?P=quote)'''
|
||||
t_NUMBER = r'\d+'
|
||||
|
||||
|
||||
def t_SECTION(t):
|
||||
r'%%'
|
||||
if getattr(t.lexer, "lastsection", 0):
|
||||
t.value = t.lexer.lexdata[t.lexpos + 2:]
|
||||
t.lexer.lexpos = len(t.lexer.lexdata)
|
||||
else:
|
||||
t.lexer.lastsection = 0
|
||||
return t
|
||||
|
||||
# Comments
|
||||
|
||||
|
||||
def t_ccomment(t):
|
||||
r'/\*(.|\n)*?\*/'
|
||||
t.lexer.lineno += t.value.count('\n')
|
||||
|
||||
t_ignore_cppcomment = r'//.*'
|
||||
|
||||
|
||||
def t_LITERAL(t):
|
||||
r'%\{(.|\n)*?%\}'
|
||||
t.lexer.lineno += t.value.count("\n")
|
||||
return t
|
||||
|
||||
|
||||
def t_NEWLINE(t):
|
||||
r'\n'
|
||||
t.lexer.lineno += 1
|
||||
|
||||
|
||||
def t_code(t):
|
||||
r'\{'
|
||||
t.lexer.codestart = t.lexpos
|
||||
t.lexer.level = 1
|
||||
t.lexer.begin('code')
|
||||
|
||||
|
||||
def t_code_ignore_string(t):
|
||||
r'\"([^\\\n]|(\\.))*?\"'
|
||||
|
||||
|
||||
def t_code_ignore_char(t):
|
||||
r'\'([^\\\n]|(\\.))*?\''
|
||||
|
||||
|
||||
def t_code_ignore_comment(t):
|
||||
r'/\*(.|\n)*?\*/'
|
||||
|
||||
|
||||
def t_code_ignore_cppcom(t):
|
||||
r'//.*'
|
||||
|
||||
|
||||
def t_code_lbrace(t):
|
||||
r'\{'
|
||||
t.lexer.level += 1
|
||||
|
||||
|
||||
def t_code_rbrace(t):
|
||||
r'\}'
|
||||
t.lexer.level -= 1
|
||||
if t.lexer.level == 0:
|
||||
t.type = 'CODE'
|
||||
t.value = t.lexer.lexdata[t.lexer.codestart:t.lexpos + 1]
|
||||
t.lexer.begin('INITIAL')
|
||||
t.lexer.lineno += t.value.count('\n')
|
||||
return t
|
||||
|
||||
t_code_ignore_nonspace = r'[^\s\}\'\"\{]+'
|
||||
t_code_ignore_whitespace = r'\s+'
|
||||
t_code_ignore = ""
|
||||
|
||||
|
||||
def t_code_error(t):
|
||||
raise RuntimeError
|
||||
|
||||
|
||||
def t_error(t):
|
||||
print("%d: Illegal character '%s'" % (t.lexer.lineno, t.value[0]))
|
||||
print(t.value)
|
||||
t.lexer.skip(1)
|
||||
|
||||
lex.lex()
|
||||
|
||||
if __name__ == '__main__':
|
||||
lex.runmain()
|
|
@ -0,0 +1,244 @@
|
|||
# parser for Unix yacc-based grammars
|
||||
#
|
||||
# Author: David Beazley (dave@dabeaz.com)
|
||||
# Date : October 2, 2006
|
||||
|
||||
import ylex
|
||||
tokens = ylex.tokens
|
||||
|
||||
from ply import *
|
||||
|
||||
tokenlist = []
|
||||
preclist = []
|
||||
|
||||
emit_code = 1
|
||||
|
||||
|
||||
def p_yacc(p):
|
||||
'''yacc : defsection rulesection'''
|
||||
|
||||
|
||||
def p_defsection(p):
|
||||
'''defsection : definitions SECTION
|
||||
| SECTION'''
|
||||
p.lexer.lastsection = 1
|
||||
print("tokens = ", repr(tokenlist))
|
||||
print()
|
||||
print("precedence = ", repr(preclist))
|
||||
print()
|
||||
print("# -------------- RULES ----------------")
|
||||
print()
|
||||
|
||||
|
||||
def p_rulesection(p):
|
||||
'''rulesection : rules SECTION'''
|
||||
|
||||
print("# -------------- RULES END ----------------")
|
||||
print_code(p[2], 0)
|
||||
|
||||
|
||||
def p_definitions(p):
|
||||
'''definitions : definitions definition
|
||||
| definition'''
|
||||
|
||||
|
||||
def p_definition_literal(p):
|
||||
'''definition : LITERAL'''
|
||||
print_code(p[1], 0)
|
||||
|
||||
|
||||
def p_definition_start(p):
|
||||
'''definition : START ID'''
|
||||
print("start = '%s'" % p[2])
|
||||
|
||||
|
||||
def p_definition_token(p):
|
||||
'''definition : toktype opttype idlist optsemi '''
|
||||
for i in p[3]:
|
||||
if i[0] not in "'\"":
|
||||
tokenlist.append(i)
|
||||
if p[1] == '%left':
|
||||
preclist.append(('left',) + tuple(p[3]))
|
||||
elif p[1] == '%right':
|
||||
preclist.append(('right',) + tuple(p[3]))
|
||||
elif p[1] == '%nonassoc':
|
||||
preclist.append(('nonassoc',) + tuple(p[3]))
|
||||
|
||||
|
||||
def p_toktype(p):
|
||||
'''toktype : TOKEN
|
||||
| LEFT
|
||||
| RIGHT
|
||||
| NONASSOC'''
|
||||
p[0] = p[1]
|
||||
|
||||
|
||||
def p_opttype(p):
|
||||
'''opttype : '<' ID '>'
|
||||
| empty'''
|
||||
|
||||
|
||||
def p_idlist(p):
|
||||
'''idlist : idlist optcomma tokenid
|
||||
| tokenid'''
|
||||
if len(p) == 2:
|
||||
p[0] = [p[1]]
|
||||
else:
|
||||
p[0] = p[1]
|
||||
p[1].append(p[3])
|
||||
|
||||
|
||||
def p_tokenid(p):
|
||||
'''tokenid : ID
|
||||
| ID NUMBER
|
||||
| QLITERAL
|
||||
| QLITERAL NUMBER'''
|
||||
p[0] = p[1]
|
||||
|
||||
|
||||
def p_optsemi(p):
|
||||
'''optsemi : ';'
|
||||
| empty'''
|
||||
|
||||
|
||||
def p_optcomma(p):
|
||||
'''optcomma : ','
|
||||
| empty'''
|
||||
|
||||
|
||||
def p_definition_type(p):
|
||||
'''definition : TYPE '<' ID '>' namelist optsemi'''
|
||||
# type declarations are ignored
|
||||
|
||||
|
||||
def p_namelist(p):
|
||||
'''namelist : namelist optcomma ID
|
||||
| ID'''
|
||||
|
||||
|
||||
def p_definition_union(p):
|
||||
'''definition : UNION CODE optsemi'''
|
||||
# Union declarations are ignored
|
||||
|
||||
|
||||
def p_rules(p):
|
||||
'''rules : rules rule
|
||||
| rule'''
|
||||
if len(p) == 2:
|
||||
rule = p[1]
|
||||
else:
|
||||
rule = p[2]
|
||||
|
||||
# Print out a Python equivalent of this rule
|
||||
|
||||
embedded = [] # Embedded actions (a mess)
|
||||
embed_count = 0
|
||||
|
||||
rulename = rule[0]
|
||||
rulecount = 1
|
||||
for r in rule[1]:
|
||||
# r contains one of the rule possibilities
|
||||
print("def p_%s_%d(p):" % (rulename, rulecount))
|
||||
prod = []
|
||||
prodcode = ""
|
||||
for i in range(len(r)):
|
||||
item = r[i]
|
||||
if item[0] == '{': # A code block
|
||||
if i == len(r) - 1:
|
||||
prodcode = item
|
||||
break
|
||||
else:
|
||||
# an embedded action
|
||||
embed_name = "_embed%d_%s" % (embed_count, rulename)
|
||||
prod.append(embed_name)
|
||||
embedded.append((embed_name, item))
|
||||
embed_count += 1
|
||||
else:
|
||||
prod.append(item)
|
||||
print(" '''%s : %s'''" % (rulename, " ".join(prod)))
|
||||
# Emit code
|
||||
print_code(prodcode, 4)
|
||||
print()
|
||||
rulecount += 1
|
||||
|
||||
for e, code in embedded:
|
||||
print("def p_%s(p):" % e)
|
||||
print(" '''%s : '''" % e)
|
||||
print_code(code, 4)
|
||||
print()
|
||||
|
||||
|
||||
def p_rule(p):
|
||||
'''rule : ID ':' rulelist ';' '''
|
||||
p[0] = (p[1], [p[3]])
|
||||
|
||||
|
||||
def p_rule2(p):
|
||||
'''rule : ID ':' rulelist morerules ';' '''
|
||||
p[4].insert(0, p[3])
|
||||
p[0] = (p[1], p[4])
|
||||
|
||||
|
||||
def p_rule_empty(p):
|
||||
'''rule : ID ':' ';' '''
|
||||
p[0] = (p[1], [[]])
|
||||
|
||||
|
||||
def p_rule_empty2(p):
|
||||
'''rule : ID ':' morerules ';' '''
|
||||
|
||||
p[3].insert(0, [])
|
||||
p[0] = (p[1], p[3])
|
||||
|
||||
|
||||
def p_morerules(p):
|
||||
'''morerules : morerules '|' rulelist
|
||||
| '|' rulelist
|
||||
| '|' '''
|
||||
|
||||
if len(p) == 2:
|
||||
p[0] = [[]]
|
||||
elif len(p) == 3:
|
||||
p[0] = [p[2]]
|
||||
else:
|
||||
p[0] = p[1]
|
||||
p[0].append(p[3])
|
||||
|
||||
# print("morerules", len(p), p[0])
|
||||
|
||||
|
||||
def p_rulelist(p):
|
||||
'''rulelist : rulelist ruleitem
|
||||
| ruleitem'''
|
||||
|
||||
if len(p) == 2:
|
||||
p[0] = [p[1]]
|
||||
else:
|
||||
p[0] = p[1]
|
||||
p[1].append(p[2])
|
||||
|
||||
|
||||
def p_ruleitem(p):
|
||||
'''ruleitem : ID
|
||||
| QLITERAL
|
||||
| CODE
|
||||
| PREC'''
|
||||
p[0] = p[1]
|
||||
|
||||
|
||||
def p_empty(p):
|
||||
'''empty : '''
|
||||
|
||||
|
||||
def p_error(p):
|
||||
pass
|
||||
|
||||
yacc.yacc(debug=0)
|
||||
|
||||
|
||||
def print_code(code, indent):
|
||||
if not emit_code:
|
||||
return
|
||||
codelines = code.splitlines()
|
||||
for c in codelines:
|
||||
print("%s# %s" % (" " * indent, c))
|
51
components/script/dom/bindings/codegen/ply/example/yply/yply.py
Executable file
51
components/script/dom/bindings/codegen/ply/example/yply/yply.py
Executable file
|
@ -0,0 +1,51 @@
|
|||
#!/usr/local/bin/python
|
||||
# yply.py
|
||||
#
|
||||
# Author: David Beazley (dave@dabeaz.com)
|
||||
# Date : October 2, 2006
|
||||
#
|
||||
# Converts a UNIX-yacc specification file into a PLY-compatible
|
||||
# specification. To use, simply do this:
|
||||
#
|
||||
# % python yply.py [-nocode] inputfile.y >myparser.py
|
||||
#
|
||||
# The output of this program is Python code. In the output,
|
||||
# any C code in the original file is included, but is commented.
|
||||
# If you use the -nocode option, then all of the C code in the
|
||||
# original file is discarded.
|
||||
#
|
||||
# Disclaimer: This just an example I threw together in an afternoon.
|
||||
# It might have some bugs. However, it worked when I tried it on
|
||||
# a yacc-specified C++ parser containing 442 rules and 855 parsing
|
||||
# states.
|
||||
#
|
||||
|
||||
import sys
|
||||
sys.path.insert(0, "../..")
|
||||
|
||||
import ylex
|
||||
import yparse
|
||||
|
||||
from ply import *
|
||||
|
||||
if len(sys.argv) == 1:
|
||||
print("usage : yply.py [-nocode] inputfile")
|
||||
raise SystemExit
|
||||
|
||||
if len(sys.argv) == 3:
|
||||
if sys.argv[1] == '-nocode':
|
||||
yparse.emit_code = 0
|
||||
else:
|
||||
print("Unknown option '%s'" % sys.argv[1])
|
||||
raise SystemExit
|
||||
filename = sys.argv[2]
|
||||
else:
|
||||
filename = sys.argv[1]
|
||||
|
||||
yacc.parse(open(filename).read())
|
||||
|
||||
print("""
|
||||
if __name__ == '__main__':
|
||||
from ply import *
|
||||
yacc.yacc()
|
||||
""")
|
Loading…
Add table
Add a link
Reference in a new issue