Update ply

2025-08-07 14:35:33 +01:00 · 2023-02-19 13:38:01 +01:00 · 2023-02-19 13:38:01 +01:00 · 2acdeabaf5
commit 2acdeabaf5
parent 4d393612b4
60 changed files with 9624 additions and 297 deletions
--- a/components/script/dom/bindings/codegen/ply/example/yply/README
+++ b/components/script/dom/bindings/codegen/ply/example/yply/README
@ -0,0 +1,41 @@
+yply.py
+
+This example implements a program yply.py that converts a UNIX-yacc
+specification file into a PLY-compatible program.  To use, simply
+run it like this:
+
+   % python yply.py [-nocode] inputfile.y >myparser.py
+
+The output of this program is Python code. In the output,
+any C code in the original file is included, but is commented out.
+If you use the -nocode option, then all of the C code in the
+original file is just discarded.
+
+To use the resulting grammer with PLY, you'll need to edit the
+myparser.py file. Within this file, some stub code is included that
+can be used to test the construction of the parsing tables. However,
+you'll need to do more editing to make a workable parser.
+
+Disclaimer:  This just an example I threw together in an afternoon.
+It might have some bugs.  However, it worked when I tried it on
+a yacc-specified C++ parser containing 442 rules and 855 parsing
+states.
+
+Comments:
+
+1. This example does not parse specification files meant for lex/flex.
+   You'll need to specify the tokenizer on your own.
+
+2. This example shows a number of interesting PLY features including
+    
+     - Parsing of literal text delimited by nested parentheses
+     - Some interaction between the parser and the lexer.
+     - Use of literals in the grammar specification
+     - One pass compilation.  The program just emits the result,
+       there is no intermediate parse tree.
+
+3. This program could probably be cleaned up and enhanced a lot.
+   It would be great if someone wanted to work on this (hint).
+
+-Dave
+       
--- a/components/script/dom/bindings/codegen/ply/example/yply/ylex.py
+++ b/components/script/dom/bindings/codegen/ply/example/yply/ylex.py
@ -0,0 +1,119 @@
+# lexer for yacc-grammars
+#
+# Author: David Beazley (dave@dabeaz.com)
+# Date  : October 2, 2006
+
+import sys
+sys.path.append("../..")
+
+from ply import *
+
+tokens = (
+    'LITERAL', 'SECTION', 'TOKEN', 'LEFT', 'RIGHT', 'PREC', 'START', 'TYPE', 'NONASSOC', 'UNION', 'CODE',
+    'ID', 'QLITERAL', 'NUMBER',
+)
+
+states = (('code', 'exclusive'),)
+
+literals = [';', ',', '<', '>', '|', ':']
+t_ignore = ' \t'
+
+t_TOKEN = r'%token'
+t_LEFT = r'%left'
+t_RIGHT = r'%right'
+t_NONASSOC = r'%nonassoc'
+t_PREC = r'%prec'
+t_START = r'%start'
+t_TYPE = r'%type'
+t_UNION = r'%union'
+t_ID = r'[a-zA-Z_][a-zA-Z_0-9]*'
+t_QLITERAL  = r'''(?P<quote>['"]).*?(?P=quote)'''
+t_NUMBER = r'\d+'
+
+
+def t_SECTION(t):
+    r'%%'
+    if getattr(t.lexer, "lastsection", 0):
+        t.value = t.lexer.lexdata[t.lexpos + 2:]
+        t.lexer.lexpos = len(t.lexer.lexdata)
+    else:
+        t.lexer.lastsection = 0
+    return t
+
+# Comments
+
+
+def t_ccomment(t):
+    r'/\*(.|\n)*?\*/'
+    t.lexer.lineno += t.value.count('\n')
+
+t_ignore_cppcomment = r'//.*'
+
+
+def t_LITERAL(t):
+    r'%\{(.|\n)*?%\}'
+    t.lexer.lineno += t.value.count("\n")
+    return t
+
+
+def t_NEWLINE(t):
+    r'\n'
+    t.lexer.lineno += 1
+
+
+def t_code(t):
+    r'\{'
+    t.lexer.codestart = t.lexpos
+    t.lexer.level = 1
+    t.lexer.begin('code')
+
+
+def t_code_ignore_string(t):
+    r'\"([^\\\n]|(\\.))*?\"'
+
+
+def t_code_ignore_char(t):
+    r'\'([^\\\n]|(\\.))*?\''
+
+
+def t_code_ignore_comment(t):
+    r'/\*(.|\n)*?\*/'
+
+
+def t_code_ignore_cppcom(t):
+    r'//.*'
+
+
+def t_code_lbrace(t):
+    r'\{'
+    t.lexer.level += 1
+
+
+def t_code_rbrace(t):
+    r'\}'
+    t.lexer.level -= 1
+    if t.lexer.level == 0:
+        t.type = 'CODE'
+        t.value = t.lexer.lexdata[t.lexer.codestart:t.lexpos + 1]
+        t.lexer.begin('INITIAL')
+        t.lexer.lineno += t.value.count('\n')
+        return t
+
+t_code_ignore_nonspace = r'[^\s\}\'\"\{]+'
+t_code_ignore_whitespace = r'\s+'
+t_code_ignore = ""
+
+
+def t_code_error(t):
+    raise RuntimeError
+
+
+def t_error(t):
+    print("%d: Illegal character '%s'" % (t.lexer.lineno, t.value[0]))
+    print(t.value)
+    t.lexer.skip(1)
+
+lex.lex()
+
+if __name__ == '__main__':
+    lex.runmain()
--- a/components/script/dom/bindings/codegen/ply/example/yply/yparse.py
+++ b/components/script/dom/bindings/codegen/ply/example/yply/yparse.py
@ -0,0 +1,244 @@
+# parser for Unix yacc-based grammars
+#
+# Author: David Beazley (dave@dabeaz.com)
+# Date  : October 2, 2006
+
+import ylex
+tokens = ylex.tokens
+
+from ply import *
+
+tokenlist = []
+preclist = []
+
+emit_code = 1
+
+
+def p_yacc(p):
+    '''yacc : defsection rulesection'''
+
+
+def p_defsection(p):
+    '''defsection : definitions SECTION
+                  | SECTION'''
+    p.lexer.lastsection = 1
+    print("tokens = ", repr(tokenlist))
+    print()
+    print("precedence = ", repr(preclist))
+    print()
+    print("# -------------- RULES ----------------")
+    print()
+
+
+def p_rulesection(p):
+    '''rulesection : rules SECTION'''
+
+    print("# -------------- RULES END ----------------")
+    print_code(p[2], 0)
+
+
+def p_definitions(p):
+    '''definitions : definitions definition
+                   | definition'''
+
+
+def p_definition_literal(p):
+    '''definition : LITERAL'''
+    print_code(p[1], 0)
+
+
+def p_definition_start(p):
+    '''definition : START ID'''
+    print("start = '%s'" % p[2])
+
+
+def p_definition_token(p):
+    '''definition : toktype opttype idlist optsemi '''
+    for i in p[3]:
+        if i[0] not in "'\"":
+            tokenlist.append(i)
+    if p[1] == '%left':
+        preclist.append(('left',) + tuple(p[3]))
+    elif p[1] == '%right':
+        preclist.append(('right',) + tuple(p[3]))
+    elif p[1] == '%nonassoc':
+        preclist.append(('nonassoc',) + tuple(p[3]))
+
+
+def p_toktype(p):
+    '''toktype : TOKEN
+               | LEFT
+               | RIGHT
+               | NONASSOC'''
+    p[0] = p[1]
+
+
+def p_opttype(p):
+    '''opttype : '<' ID '>'
+               | empty'''
+
+
+def p_idlist(p):
+    '''idlist  : idlist optcomma tokenid
+               | tokenid'''
+    if len(p) == 2:
+        p[0] = [p[1]]
+    else:
+        p[0] = p[1]
+        p[1].append(p[3])
+
+
+def p_tokenid(p):
+    '''tokenid : ID 
+               | ID NUMBER
+               | QLITERAL
+               | QLITERAL NUMBER'''
+    p[0] = p[1]
+
+
+def p_optsemi(p):
+    '''optsemi : ';'
+               | empty'''
+
+
+def p_optcomma(p):
+    '''optcomma : ','
+                | empty'''
+
+
+def p_definition_type(p):
+    '''definition : TYPE '<' ID '>' namelist optsemi'''
+    # type declarations are ignored
+
+
+def p_namelist(p):
+    '''namelist : namelist optcomma ID
+                | ID'''
+
+
+def p_definition_union(p):
+    '''definition : UNION CODE optsemi'''
+    # Union declarations are ignored
+
+
+def p_rules(p):
+    '''rules   : rules rule
+               | rule'''
+    if len(p) == 2:
+        rule = p[1]
+    else:
+        rule = p[2]
+
+    # Print out a Python equivalent of this rule
+
+    embedded = []      # Embedded actions (a mess)
+    embed_count = 0
+
+    rulename = rule[0]
+    rulecount = 1
+    for r in rule[1]:
+        # r contains one of the rule possibilities
+        print("def p_%s_%d(p):" % (rulename, rulecount))
+        prod = []
+        prodcode = ""
+        for i in range(len(r)):
+            item = r[i]
+            if item[0] == '{':    # A code block
+                if i == len(r) - 1:
+                    prodcode = item
+                    break
+                else:
+                    # an embedded action
+                    embed_name = "_embed%d_%s" % (embed_count, rulename)
+                    prod.append(embed_name)
+                    embedded.append((embed_name, item))
+                    embed_count += 1
+            else:
+                prod.append(item)
+        print("    '''%s : %s'''" % (rulename, " ".join(prod)))
+        # Emit code
+        print_code(prodcode, 4)
+        print()
+        rulecount += 1
+
+    for e, code in embedded:
+        print("def p_%s(p):" % e)
+        print("    '''%s : '''" % e)
+        print_code(code, 4)
+        print()
+
+
+def p_rule(p):
+    '''rule : ID ':' rulelist ';' '''
+    p[0] = (p[1], [p[3]])
+
+
+def p_rule2(p):
+    '''rule : ID ':' rulelist morerules ';' '''
+    p[4].insert(0, p[3])
+    p[0] = (p[1], p[4])
+
+
+def p_rule_empty(p):
+    '''rule : ID ':' ';' '''
+    p[0] = (p[1], [[]])
+
+
+def p_rule_empty2(p):
+    '''rule : ID ':' morerules ';' '''
+
+    p[3].insert(0, [])
+    p[0] = (p[1], p[3])
+
+
+def p_morerules(p):
+    '''morerules : morerules '|' rulelist
+                 | '|' rulelist
+                 | '|'  '''
+
+    if len(p) == 2:
+        p[0] = [[]]
+    elif len(p) == 3:
+        p[0] = [p[2]]
+    else:
+        p[0] = p[1]
+        p[0].append(p[3])
+
+#   print("morerules", len(p), p[0])
+
+
+def p_rulelist(p):
+    '''rulelist : rulelist ruleitem
+                | ruleitem'''
+
+    if len(p) == 2:
+        p[0] = [p[1]]
+    else:
+        p[0] = p[1]
+        p[1].append(p[2])
+
+
+def p_ruleitem(p):
+    '''ruleitem : ID
+                | QLITERAL
+                | CODE
+                | PREC'''
+    p[0] = p[1]
+
+
+def p_empty(p):
+    '''empty : '''
+
+
+def p_error(p):
+    pass
+
+yacc.yacc(debug=0)
+
+
+def print_code(code, indent):
+    if not emit_code:
+        return
+    codelines = code.splitlines()
+    for c in codelines:
+        print("%s# %s" % (" " * indent, c))
--- a/components/script/dom/bindings/codegen/ply/example/yply/yply.py
+++ b/components/script/dom/bindings/codegen/ply/example/yply/yply.py
@ -0,0 +1,51 @@
+#!/usr/local/bin/python
+# yply.py
+#
+# Author: David Beazley (dave@dabeaz.com)
+# Date  : October 2, 2006
+#
+# Converts a UNIX-yacc specification file into a PLY-compatible
+# specification.   To use, simply do this:
+#
+#   % python yply.py [-nocode] inputfile.y >myparser.py
+#
+# The output of this program is Python code. In the output,
+# any C code in the original file is included, but is commented.
+# If you use the -nocode option, then all of the C code in the
+# original file is discarded.
+#
+# Disclaimer:  This just an example I threw together in an afternoon.
+# It might have some bugs.  However, it worked when I tried it on
+# a yacc-specified C++ parser containing 442 rules and 855 parsing
+# states.
+#
+
+import sys
+sys.path.insert(0, "../..")
+
+import ylex
+import yparse
+
+from ply import *
+
+if len(sys.argv) == 1:
+    print("usage : yply.py [-nocode] inputfile")
+    raise SystemExit
+
+if len(sys.argv) == 3:
+    if sys.argv[1] == '-nocode':
+        yparse.emit_code = 0
+    else:
+        print("Unknown option '%s'" % sys.argv[1])
+        raise SystemExit
+    filename = sys.argv[2]
+else:
+    filename = sys.argv[1]
+
+yacc.parse(open(filename).read())
+
+print("""
+if __name__ == '__main__':
+    from ply import *
+    yacc.yacc()
+""")