mirror of
https://github.com/servo/servo.git
synced 2025-06-24 17:14:33 +01:00
88 lines
2.7 KiB
Python
88 lines
2.7 KiB
Python
import json
|
|
|
|
import html5lib
|
|
|
|
def parse(path="html5ents.xml"):
|
|
return html5lib.parse(open(path), treebuilder="lxml")
|
|
|
|
def entity_table(tree):
|
|
return dict((entity_name("".join(tr[0].xpath(".//text()"))),
|
|
entity_characters(tr[1].text))
|
|
for tr in tree.xpath("//h:tbody/h:tr",
|
|
namespaces={"h":"http://www.w3.org/1999/xhtml"}))
|
|
|
|
def entity_name(inp):
|
|
return inp.strip()
|
|
|
|
def entity_characters(inp):
|
|
return "".join(codepoint_to_character(item)
|
|
for item in inp.split()
|
|
if item)
|
|
|
|
def codepoint_to_character(inp):
|
|
return ("\U000"+inp[2:]).decode("unicode-escape")
|
|
|
|
def make_tests_json(entities):
|
|
test_list = make_test_list(entities)
|
|
tests_json = {"tests":
|
|
[make_test(*item) for item in test_list]
|
|
}
|
|
return tests_json
|
|
|
|
def make_test(name, characters, good):
|
|
return {
|
|
"description":test_description(name, good),
|
|
"input":"&%s"%name,
|
|
"output":test_expected(name, characters, good)
|
|
}
|
|
|
|
def test_description(name, good):
|
|
with_semicolon = name.endswith(";")
|
|
semicolon_text = {True:"with a semi-colon",
|
|
False:"without a semi-colon"}[with_semicolon]
|
|
if good:
|
|
text = "Named entity: %s %s"%(name, semicolon_text)
|
|
else:
|
|
text = "Bad named entity: %s %s"%(name, semicolon_text)
|
|
return text
|
|
|
|
def test_expected(name, characters, good):
|
|
rv = []
|
|
if not good or not name.endswith(";"):
|
|
rv.append("ParseError")
|
|
rv.append(["Character", characters])
|
|
return rv
|
|
|
|
def make_test_list(entities):
|
|
tests = []
|
|
for entity_name, characters in entities.items():
|
|
if entity_name.endswith(";") and not subentity_exists(entity_name, entities):
|
|
tests.append((entity_name[:-1], "&" + entity_name[:-1], False))
|
|
tests.append((entity_name, characters, True))
|
|
return sorted(tests)
|
|
|
|
def subentity_exists(entity_name, entities):
|
|
for i in range(1, len(entity_name)):
|
|
if entity_name[:-i] in entities:
|
|
return True
|
|
return False
|
|
|
|
def make_entities_code(entities):
|
|
entities_text = "\n".join(" \"%s\": u\"%s\","%(
|
|
name, entities[name].encode(
|
|
"unicode-escape").replace("\"", "\\\""))
|
|
for name in sorted(entities.keys()))
|
|
return """entities = {
|
|
%s
|
|
}"""%entities_text
|
|
|
|
def main():
|
|
entities = entity_table(parse())
|
|
tests_json = make_tests_json(entities)
|
|
json.dump(tests_json, open("namedEntities.test", "w"), indent=4)
|
|
code = make_entities_code(entities)
|
|
open("entities_constants.py", "w").write(code)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
|