mirror of
https://github.com/servo/servo.git
synced 2025-08-06 14:10:11 +01:00
Update web-platform-tests to revision 0d318188757a9c996e20b82db201fd04de5aa255
This commit is contained in:
parent
b2a5225831
commit
1a81b18b9f
12321 changed files with 544385 additions and 6 deletions
177
tests/wpt/web-platform-tests/tools/html5lib/doc/Makefile
Normal file
177
tests/wpt/web-platform-tests/tools/html5lib/doc/Makefile
Normal file
|
@ -0,0 +1,177 @@
|
|||
# Makefile for Sphinx documentation
|
||||
#
|
||||
|
||||
# You can set these variables from the command line.
|
||||
SPHINXOPTS =
|
||||
SPHINXBUILD = sphinx-build
|
||||
PAPER =
|
||||
BUILDDIR = _build
|
||||
|
||||
# User-friendly check for sphinx-build
|
||||
ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
|
||||
$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
|
||||
endif
|
||||
|
||||
# Internal variables.
|
||||
PAPEROPT_a4 = -D latex_paper_size=a4
|
||||
PAPEROPT_letter = -D latex_paper_size=letter
|
||||
ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
|
||||
# the i18n builder cannot share the environment and doctrees with the others
|
||||
I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
|
||||
|
||||
.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
|
||||
|
||||
help:
|
||||
@echo "Please use \`make <target>' where <target> is one of"
|
||||
@echo " html to make standalone HTML files"
|
||||
@echo " dirhtml to make HTML files named index.html in directories"
|
||||
@echo " singlehtml to make a single large HTML file"
|
||||
@echo " pickle to make pickle files"
|
||||
@echo " json to make JSON files"
|
||||
@echo " htmlhelp to make HTML files and a HTML help project"
|
||||
@echo " qthelp to make HTML files and a qthelp project"
|
||||
@echo " devhelp to make HTML files and a Devhelp project"
|
||||
@echo " epub to make an epub"
|
||||
@echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
|
||||
@echo " latexpdf to make LaTeX files and run them through pdflatex"
|
||||
@echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
|
||||
@echo " text to make text files"
|
||||
@echo " man to make manual pages"
|
||||
@echo " texinfo to make Texinfo files"
|
||||
@echo " info to make Texinfo files and run them through makeinfo"
|
||||
@echo " gettext to make PO message catalogs"
|
||||
@echo " changes to make an overview of all changed/added/deprecated items"
|
||||
@echo " xml to make Docutils-native XML files"
|
||||
@echo " pseudoxml to make pseudoxml-XML files for display purposes"
|
||||
@echo " linkcheck to check all external links for integrity"
|
||||
@echo " doctest to run all doctests embedded in the documentation (if enabled)"
|
||||
|
||||
clean:
|
||||
rm -rf $(BUILDDIR)/*
|
||||
|
||||
html:
|
||||
$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
|
||||
@echo
|
||||
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
|
||||
|
||||
dirhtml:
|
||||
$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
|
||||
@echo
|
||||
@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
|
||||
|
||||
singlehtml:
|
||||
$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
|
||||
@echo
|
||||
@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
|
||||
|
||||
pickle:
|
||||
$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
|
||||
@echo
|
||||
@echo "Build finished; now you can process the pickle files."
|
||||
|
||||
json:
|
||||
$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
|
||||
@echo
|
||||
@echo "Build finished; now you can process the JSON files."
|
||||
|
||||
htmlhelp:
|
||||
$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
|
||||
@echo
|
||||
@echo "Build finished; now you can run HTML Help Workshop with the" \
|
||||
".hhp project file in $(BUILDDIR)/htmlhelp."
|
||||
|
||||
qthelp:
|
||||
$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
|
||||
@echo
|
||||
@echo "Build finished; now you can run "qcollectiongenerator" with the" \
|
||||
".qhcp project file in $(BUILDDIR)/qthelp, like this:"
|
||||
@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/html5lib.qhcp"
|
||||
@echo "To view the help file:"
|
||||
@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/html5lib.qhc"
|
||||
|
||||
devhelp:
|
||||
$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
|
||||
@echo
|
||||
@echo "Build finished."
|
||||
@echo "To view the help file:"
|
||||
@echo "# mkdir -p $$HOME/.local/share/devhelp/html5lib"
|
||||
@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/html5lib"
|
||||
@echo "# devhelp"
|
||||
|
||||
epub:
|
||||
$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
|
||||
@echo
|
||||
@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
|
||||
|
||||
latex:
|
||||
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
|
||||
@echo
|
||||
@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
|
||||
@echo "Run \`make' in that directory to run these through (pdf)latex" \
|
||||
"(use \`make latexpdf' here to do that automatically)."
|
||||
|
||||
latexpdf:
|
||||
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
|
||||
@echo "Running LaTeX files through pdflatex..."
|
||||
$(MAKE) -C $(BUILDDIR)/latex all-pdf
|
||||
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
|
||||
|
||||
latexpdfja:
|
||||
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
|
||||
@echo "Running LaTeX files through platex and dvipdfmx..."
|
||||
$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
|
||||
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
|
||||
|
||||
text:
|
||||
$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
|
||||
@echo
|
||||
@echo "Build finished. The text files are in $(BUILDDIR)/text."
|
||||
|
||||
man:
|
||||
$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
|
||||
@echo
|
||||
@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
|
||||
|
||||
texinfo:
|
||||
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
|
||||
@echo
|
||||
@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
|
||||
@echo "Run \`make' in that directory to run these through makeinfo" \
|
||||
"(use \`make info' here to do that automatically)."
|
||||
|
||||
info:
|
||||
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
|
||||
@echo "Running Texinfo files through makeinfo..."
|
||||
make -C $(BUILDDIR)/texinfo info
|
||||
@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
|
||||
|
||||
gettext:
|
||||
$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
|
||||
@echo
|
||||
@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
|
||||
|
||||
changes:
|
||||
$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
|
||||
@echo
|
||||
@echo "The overview file is in $(BUILDDIR)/changes."
|
||||
|
||||
linkcheck:
|
||||
$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
|
||||
@echo
|
||||
@echo "Link check complete; look for any errors in the above output " \
|
||||
"or in $(BUILDDIR)/linkcheck/output.txt."
|
||||
|
||||
doctest:
|
||||
$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
|
||||
@echo "Testing of doctests in the sources finished, look at the " \
|
||||
"results in $(BUILDDIR)/doctest/output.txt."
|
||||
|
||||
xml:
|
||||
$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
|
||||
@echo
|
||||
@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
|
||||
|
||||
pseudoxml:
|
||||
$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
|
||||
@echo
|
||||
@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
|
|
@ -0,0 +1,3 @@
|
|||
.. :changelog:
|
||||
|
||||
.. include:: ../CHANGES.rst
|
280
tests/wpt/web-platform-tests/tools/html5lib/doc/conf.py
Normal file
280
tests/wpt/web-platform-tests/tools/html5lib/doc/conf.py
Normal file
|
@ -0,0 +1,280 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# html5lib documentation build configuration file, created by
|
||||
# sphinx-quickstart on Wed May 8 00:04:49 2013.
|
||||
#
|
||||
# This file is execfile()d with the current directory set to its containing dir.
|
||||
#
|
||||
# Note that not all possible configuration values are present in this
|
||||
# autogenerated file.
|
||||
#
|
||||
# All configuration values have a default; values that are commented out
|
||||
# serve to show the default.
|
||||
|
||||
import sys, os
|
||||
|
||||
# If extensions (or modules to document with autodoc) are in another directory,
|
||||
# add these directories to sys.path here. If the directory is relative to the
|
||||
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
||||
#sys.path.insert(0, os.path.abspath('.'))
|
||||
|
||||
# -- General configuration -----------------------------------------------------
|
||||
|
||||
# If your documentation needs a minimal Sphinx version, state it here.
|
||||
#needs_sphinx = '1.0'
|
||||
|
||||
# Add any Sphinx extension module names here, as strings. They can be extensions
|
||||
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
|
||||
extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.viewcode']
|
||||
|
||||
# Add any paths that contain templates here, relative to this directory.
|
||||
templates_path = ['_templates']
|
||||
|
||||
# The suffix of source filenames.
|
||||
source_suffix = '.rst'
|
||||
|
||||
# The encoding of source files.
|
||||
#source_encoding = 'utf-8-sig'
|
||||
|
||||
# The master toctree document.
|
||||
master_doc = 'index'
|
||||
|
||||
# General information about the project.
|
||||
project = 'html5lib'
|
||||
copyright = '2006 - 2013, James Graham, Geoffrey Sneddon, and contributors'
|
||||
|
||||
# The version info for the project you're documenting, acts as replacement for
|
||||
# |version| and |release|, also used in various other places throughout the
|
||||
# built documents.
|
||||
#
|
||||
# The short X.Y version.
|
||||
version = '1.0'
|
||||
# The full version, including alpha/beta/rc tags.
|
||||
sys.path.append(os.path.abspath('..'))
|
||||
from html5lib import __version__
|
||||
release = __version__
|
||||
|
||||
# The language for content autogenerated by Sphinx. Refer to documentation
|
||||
# for a list of supported languages.
|
||||
#language = 'en'
|
||||
|
||||
# There are two options for replacing |today|: either, you set today to some
|
||||
# non-false value, then it is used:
|
||||
#today = ''
|
||||
# Else, today_fmt is used as the format for a strftime call.
|
||||
#today_fmt = '%B %d, %Y'
|
||||
|
||||
# List of patterns, relative to source directory, that match files and
|
||||
# directories to ignore when looking for source files.
|
||||
exclude_patterns = ['_build', 'theme']
|
||||
|
||||
# The reST default role (used for this markup: `text`) to use for all documents.
|
||||
#default_role = None
|
||||
|
||||
# If true, '()' will be appended to :func: etc. cross-reference text.
|
||||
#add_function_parentheses = True
|
||||
|
||||
# If true, the current module name will be prepended to all description
|
||||
# unit titles (such as .. function::).
|
||||
#add_module_names = True
|
||||
|
||||
# If true, sectionauthor and moduleauthor directives will be shown in the
|
||||
# output. They are ignored by default.
|
||||
#show_authors = False
|
||||
|
||||
# The name of the Pygments (syntax highlighting) style to use.
|
||||
pygments_style = 'sphinx'
|
||||
|
||||
# A list of ignored prefixes for module index sorting.
|
||||
#modindex_common_prefix = []
|
||||
|
||||
# If true, keep warnings as "system message" paragraphs in the built documents.
|
||||
#keep_warnings = False
|
||||
|
||||
|
||||
# -- Options for HTML output ---------------------------------------------------
|
||||
|
||||
# The theme to use for HTML and HTML Help pages. See the documentation for
|
||||
# a list of builtin themes.
|
||||
html_theme = 'default'
|
||||
|
||||
# Theme options are theme-specific and customize the look and feel of a theme
|
||||
# further. For a list of options available for each theme, see the
|
||||
# documentation.
|
||||
#html_theme_options = {}
|
||||
|
||||
# Add any paths that contain custom themes here, relative to this directory.
|
||||
#html_theme_path = []
|
||||
|
||||
# The name for this set of Sphinx documents. If None, it defaults to
|
||||
# "<project> v<release> documentation".
|
||||
#html_title = None
|
||||
|
||||
# A shorter title for the navigation bar. Default is the same as html_title.
|
||||
#html_short_title = None
|
||||
|
||||
# The name of an image file (relative to this directory) to place at the top
|
||||
# of the sidebar.
|
||||
#html_logo = None
|
||||
|
||||
# The name of an image file (within the static path) to use as favicon of the
|
||||
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
|
||||
# pixels large.
|
||||
#html_favicon = None
|
||||
|
||||
# Add any paths that contain custom static files (such as style sheets) here,
|
||||
# relative to this directory. They are copied after the builtin static files,
|
||||
# so a file named "default.css" will overwrite the builtin "default.css".
|
||||
html_static_path = ['_static']
|
||||
|
||||
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
|
||||
# using the given strftime format.
|
||||
#html_last_updated_fmt = '%b %d, %Y'
|
||||
|
||||
# If true, SmartyPants will be used to convert quotes and dashes to
|
||||
# typographically correct entities.
|
||||
#html_use_smartypants = True
|
||||
|
||||
# Custom sidebar templates, maps document names to template names.
|
||||
#html_sidebars = {}
|
||||
|
||||
# Additional templates that should be rendered to pages, maps page names to
|
||||
# template names.
|
||||
#html_additional_pages = {}
|
||||
|
||||
# If false, no module index is generated.
|
||||
#html_domain_indices = True
|
||||
|
||||
# If false, no index is generated.
|
||||
#html_use_index = True
|
||||
|
||||
# If true, the index is split into individual pages for each letter.
|
||||
#html_split_index = False
|
||||
|
||||
# If true, links to the reST sources are added to the pages.
|
||||
#html_show_sourcelink = True
|
||||
|
||||
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
|
||||
#html_show_sphinx = True
|
||||
|
||||
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
|
||||
#html_show_copyright = True
|
||||
|
||||
# If true, an OpenSearch description file will be output, and all pages will
|
||||
# contain a <link> tag referring to it. The value of this option must be the
|
||||
# base URL from which the finished HTML is served.
|
||||
#html_use_opensearch = ''
|
||||
|
||||
# This is the file name suffix for HTML files (e.g. ".xhtml").
|
||||
#html_file_suffix = None
|
||||
|
||||
# Output file base name for HTML help builder.
|
||||
htmlhelp_basename = 'html5libdoc'
|
||||
|
||||
|
||||
# -- Options for LaTeX output --------------------------------------------------
|
||||
|
||||
latex_elements = {
|
||||
# The paper size ('letterpaper' or 'a4paper').
|
||||
#'papersize': 'letterpaper',
|
||||
|
||||
# The font size ('10pt', '11pt' or '12pt').
|
||||
#'pointsize': '10pt',
|
||||
|
||||
# Additional stuff for the LaTeX preamble.
|
||||
#'preamble': '',
|
||||
}
|
||||
|
||||
# Grouping the document tree into LaTeX files. List of tuples
|
||||
# (source start file, target name, title, author, documentclass [howto/manual]).
|
||||
latex_documents = [
|
||||
('index', 'html5lib.tex', 'html5lib Documentation',
|
||||
'James Graham, Geoffrey Sneddon, and contributors', 'manual'),
|
||||
]
|
||||
|
||||
# The name of an image file (relative to this directory) to place at the top of
|
||||
# the title page.
|
||||
#latex_logo = None
|
||||
|
||||
# For "manual" documents, if this is true, then toplevel headings are parts,
|
||||
# not chapters.
|
||||
#latex_use_parts = False
|
||||
|
||||
# If true, show page references after internal links.
|
||||
#latex_show_pagerefs = False
|
||||
|
||||
# If true, show URL addresses after external links.
|
||||
#latex_show_urls = False
|
||||
|
||||
# Documents to append as an appendix to all manuals.
|
||||
#latex_appendices = []
|
||||
|
||||
# If false, no module index is generated.
|
||||
#latex_domain_indices = True
|
||||
|
||||
|
||||
# -- Options for manual page output --------------------------------------------
|
||||
|
||||
# One entry per manual page. List of tuples
|
||||
# (source start file, name, description, authors, manual section).
|
||||
man_pages = [
|
||||
('index', 'html5lib', 'html5lib Documentation',
|
||||
['James Graham, Geoffrey Sneddon, and contributors'], 1)
|
||||
]
|
||||
|
||||
# If true, show URL addresses after external links.
|
||||
#man_show_urls = False
|
||||
|
||||
|
||||
# -- Options for Texinfo output ------------------------------------------------
|
||||
|
||||
# Grouping the document tree into Texinfo files. List of tuples
|
||||
# (source start file, target name, title, author,
|
||||
# dir menu entry, description, category)
|
||||
texinfo_documents = [
|
||||
('index', 'html5lib', 'html5lib Documentation',
|
||||
'James Graham, Geoffrey Sneddon, and contributors', 'html5lib', 'One line description of project.',
|
||||
'Miscellaneous'),
|
||||
]
|
||||
|
||||
# Documents to append as an appendix to all manuals.
|
||||
#texinfo_appendices = []
|
||||
|
||||
# If false, no module index is generated.
|
||||
#texinfo_domain_indices = True
|
||||
|
||||
# How to display URL addresses: 'footnote', 'no', or 'inline'.
|
||||
#texinfo_show_urls = 'footnote'
|
||||
|
||||
# If true, do not generate a @detailmenu in the "Top" node's menu.
|
||||
#texinfo_no_detailmenu = False
|
||||
|
||||
class CExtMock(object):
|
||||
"""Required for autodoc on readthedocs.org where you cannot build C extensions."""
|
||||
def __init__(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
return CExtMock()
|
||||
|
||||
@classmethod
|
||||
def __getattr__(cls, name):
|
||||
if name in ('__file__', '__path__'):
|
||||
return '/dev/null'
|
||||
else:
|
||||
return CExtMock()
|
||||
|
||||
try:
|
||||
import lxml # flake8: noqa
|
||||
except ImportError:
|
||||
sys.modules['lxml'] = CExtMock()
|
||||
sys.modules['lxml.etree'] = CExtMock()
|
||||
print("warning: lxml modules mocked.")
|
||||
|
||||
try:
|
||||
import genshi # flake8: noqa
|
||||
except ImportError:
|
||||
sys.modules['genshi'] = CExtMock()
|
||||
sys.modules['genshi.core'] = CExtMock()
|
||||
print("warning: genshi modules mocked.")
|
|
@ -0,0 +1,59 @@
|
|||
filters Package
|
||||
===============
|
||||
|
||||
:mod:`_base` Module
|
||||
-------------------
|
||||
|
||||
.. automodule:: html5lib.filters._base
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
:mod:`alphabeticalattributes` Module
|
||||
------------------------------------
|
||||
|
||||
.. automodule:: html5lib.filters.alphabeticalattributes
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
:mod:`inject_meta_charset` Module
|
||||
---------------------------------
|
||||
|
||||
.. automodule:: html5lib.filters.inject_meta_charset
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
:mod:`lint` Module
|
||||
------------------
|
||||
|
||||
.. automodule:: html5lib.filters.lint
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
:mod:`optionaltags` Module
|
||||
--------------------------
|
||||
|
||||
.. automodule:: html5lib.filters.optionaltags
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
:mod:`sanitizer` Module
|
||||
-----------------------
|
||||
|
||||
.. automodule:: html5lib.filters.sanitizer
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
:mod:`whitespace` Module
|
||||
------------------------
|
||||
|
||||
.. automodule:: html5lib.filters.whitespace
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
77
tests/wpt/web-platform-tests/tools/html5lib/doc/html5lib.rst
Normal file
77
tests/wpt/web-platform-tests/tools/html5lib/doc/html5lib.rst
Normal file
|
@ -0,0 +1,77 @@
|
|||
html5lib Package
|
||||
================
|
||||
|
||||
:mod:`html5lib` Package
|
||||
-----------------------
|
||||
|
||||
.. automodule:: html5lib.__init__
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
:mod:`constants` Module
|
||||
-----------------------
|
||||
|
||||
.. automodule:: html5lib.constants
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
:mod:`html5parser` Module
|
||||
-------------------------
|
||||
|
||||
.. automodule:: html5lib.html5parser
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
:mod:`ihatexml` Module
|
||||
----------------------
|
||||
|
||||
.. automodule:: html5lib.ihatexml
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
:mod:`inputstream` Module
|
||||
-------------------------
|
||||
|
||||
.. automodule:: html5lib.inputstream
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
:mod:`sanitizer` Module
|
||||
-----------------------
|
||||
|
||||
.. automodule:: html5lib.sanitizer
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
:mod:`tokenizer` Module
|
||||
-----------------------
|
||||
|
||||
.. automodule:: html5lib.tokenizer
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
:mod:`utils` Module
|
||||
-------------------
|
||||
|
||||
.. automodule:: html5lib.utils
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Subpackages
|
||||
-----------
|
||||
|
||||
.. toctree::
|
||||
|
||||
html5lib.filters
|
||||
html5lib.serializer
|
||||
html5lib.treebuilders
|
||||
html5lib.treewalkers
|
||||
|
|
@ -0,0 +1,19 @@
|
|||
serializer Package
|
||||
==================
|
||||
|
||||
:mod:`serializer` Package
|
||||
-------------------------
|
||||
|
||||
.. automodule:: html5lib.serializer
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
:mod:`htmlserializer` Module
|
||||
----------------------------
|
||||
|
||||
.. automodule:: html5lib.serializer.htmlserializer
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
|
@ -0,0 +1,43 @@
|
|||
treebuilders Package
|
||||
====================
|
||||
|
||||
:mod:`treebuilders` Package
|
||||
---------------------------
|
||||
|
||||
.. automodule:: html5lib.treebuilders
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
:mod:`_base` Module
|
||||
-------------------
|
||||
|
||||
.. automodule:: html5lib.treebuilders._base
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
:mod:`dom` Module
|
||||
-----------------
|
||||
|
||||
.. automodule:: html5lib.treebuilders.dom
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
:mod:`etree` Module
|
||||
-------------------
|
||||
|
||||
.. automodule:: html5lib.treebuilders.etree
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
:mod:`etree_lxml` Module
|
||||
------------------------
|
||||
|
||||
.. automodule:: html5lib.treebuilders.etree_lxml
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
treewalkers Package
|
||||
===================
|
||||
|
||||
:mod:`treewalkers` Package
|
||||
--------------------------
|
||||
|
||||
.. automodule:: html5lib.treewalkers
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
:mod:`_base` Module
|
||||
-------------------
|
||||
|
||||
.. automodule:: html5lib.treewalkers._base
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
:mod:`dom` Module
|
||||
-----------------
|
||||
|
||||
.. automodule:: html5lib.treewalkers.dom
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
:mod:`etree` Module
|
||||
-------------------
|
||||
|
||||
.. automodule:: html5lib.treewalkers.etree
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
:mod:`genshistream` Module
|
||||
--------------------------
|
||||
|
||||
.. automodule:: html5lib.treewalkers.genshistream
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
:mod:`lxmletree` Module
|
||||
-----------------------
|
||||
|
||||
.. automodule:: html5lib.treewalkers.lxmletree
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
:mod:`pulldom` Module
|
||||
---------------------
|
||||
|
||||
.. automodule:: html5lib.treewalkers.pulldom
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
21
tests/wpt/web-platform-tests/tools/html5lib/doc/index.rst
Normal file
21
tests/wpt/web-platform-tests/tools/html5lib/doc/index.rst
Normal file
|
@ -0,0 +1,21 @@
|
|||
Overview
|
||||
========
|
||||
|
||||
.. include:: ../README.rst
|
||||
:start-line: 6
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
movingparts
|
||||
changes
|
||||
License <license>
|
||||
|
||||
|
||||
Indices and tables
|
||||
==================
|
||||
|
||||
* :ref:`genindex`
|
||||
* :ref:`modindex`
|
||||
* :ref:`search`
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
License
|
||||
=======
|
||||
|
||||
.. include:: ../LICENSE
|
242
tests/wpt/web-platform-tests/tools/html5lib/doc/make.bat
Normal file
242
tests/wpt/web-platform-tests/tools/html5lib/doc/make.bat
Normal file
|
@ -0,0 +1,242 @@
|
|||
@ECHO OFF
|
||||
|
||||
REM Command file for Sphinx documentation
|
||||
|
||||
if "%SPHINXBUILD%" == "" (
|
||||
set SPHINXBUILD=sphinx-build
|
||||
)
|
||||
set BUILDDIR=_build
|
||||
set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
|
||||
set I18NSPHINXOPTS=%SPHINXOPTS% .
|
||||
if NOT "%PAPER%" == "" (
|
||||
set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
|
||||
set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
|
||||
)
|
||||
|
||||
if "%1" == "" goto help
|
||||
|
||||
if "%1" == "help" (
|
||||
:help
|
||||
echo.Please use `make ^<target^>` where ^<target^> is one of
|
||||
echo. html to make standalone HTML files
|
||||
echo. dirhtml to make HTML files named index.html in directories
|
||||
echo. singlehtml to make a single large HTML file
|
||||
echo. pickle to make pickle files
|
||||
echo. json to make JSON files
|
||||
echo. htmlhelp to make HTML files and a HTML help project
|
||||
echo. qthelp to make HTML files and a qthelp project
|
||||
echo. devhelp to make HTML files and a Devhelp project
|
||||
echo. epub to make an epub
|
||||
echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter
|
||||
echo. text to make text files
|
||||
echo. man to make manual pages
|
||||
echo. texinfo to make Texinfo files
|
||||
echo. gettext to make PO message catalogs
|
||||
echo. changes to make an overview over all changed/added/deprecated items
|
||||
echo. xml to make Docutils-native XML files
|
||||
echo. pseudoxml to make pseudoxml-XML files for display purposes
|
||||
echo. linkcheck to check all external links for integrity
|
||||
echo. doctest to run all doctests embedded in the documentation if enabled
|
||||
goto end
|
||||
)
|
||||
|
||||
if "%1" == "clean" (
|
||||
for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
|
||||
del /q /s %BUILDDIR%\*
|
||||
goto end
|
||||
)
|
||||
|
||||
|
||||
%SPHINXBUILD% 2> nul
|
||||
if errorlevel 9009 (
|
||||
echo.
|
||||
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
|
||||
echo.installed, then set the SPHINXBUILD environment variable to point
|
||||
echo.to the full path of the 'sphinx-build' executable. Alternatively you
|
||||
echo.may add the Sphinx directory to PATH.
|
||||
echo.
|
||||
echo.If you don't have Sphinx installed, grab it from
|
||||
echo.http://sphinx-doc.org/
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
if "%1" == "html" (
|
||||
%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
|
||||
if errorlevel 1 exit /b 1
|
||||
echo.
|
||||
echo.Build finished. The HTML pages are in %BUILDDIR%/html.
|
||||
goto end
|
||||
)
|
||||
|
||||
if "%1" == "dirhtml" (
|
||||
%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
|
||||
if errorlevel 1 exit /b 1
|
||||
echo.
|
||||
echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
|
||||
goto end
|
||||
)
|
||||
|
||||
if "%1" == "singlehtml" (
|
||||
%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
|
||||
if errorlevel 1 exit /b 1
|
||||
echo.
|
||||
echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
|
||||
goto end
|
||||
)
|
||||
|
||||
if "%1" == "pickle" (
|
||||
%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
|
||||
if errorlevel 1 exit /b 1
|
||||
echo.
|
||||
echo.Build finished; now you can process the pickle files.
|
||||
goto end
|
||||
)
|
||||
|
||||
if "%1" == "json" (
|
||||
%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
|
||||
if errorlevel 1 exit /b 1
|
||||
echo.
|
||||
echo.Build finished; now you can process the JSON files.
|
||||
goto end
|
||||
)
|
||||
|
||||
if "%1" == "htmlhelp" (
|
||||
%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
|
||||
if errorlevel 1 exit /b 1
|
||||
echo.
|
||||
echo.Build finished; now you can run HTML Help Workshop with the ^
|
||||
.hhp project file in %BUILDDIR%/htmlhelp.
|
||||
goto end
|
||||
)
|
||||
|
||||
if "%1" == "qthelp" (
|
||||
%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
|
||||
if errorlevel 1 exit /b 1
|
||||
echo.
|
||||
echo.Build finished; now you can run "qcollectiongenerator" with the ^
|
||||
.qhcp project file in %BUILDDIR%/qthelp, like this:
|
||||
echo.^> qcollectiongenerator %BUILDDIR%\qthelp\html5lib.qhcp
|
||||
echo.To view the help file:
|
||||
echo.^> assistant -collectionFile %BUILDDIR%\qthelp\html5lib.ghc
|
||||
goto end
|
||||
)
|
||||
|
||||
if "%1" == "devhelp" (
|
||||
%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
|
||||
if errorlevel 1 exit /b 1
|
||||
echo.
|
||||
echo.Build finished.
|
||||
goto end
|
||||
)
|
||||
|
||||
if "%1" == "epub" (
|
||||
%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
|
||||
if errorlevel 1 exit /b 1
|
||||
echo.
|
||||
echo.Build finished. The epub file is in %BUILDDIR%/epub.
|
||||
goto end
|
||||
)
|
||||
|
||||
if "%1" == "latex" (
|
||||
%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
|
||||
if errorlevel 1 exit /b 1
|
||||
echo.
|
||||
echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
|
||||
goto end
|
||||
)
|
||||
|
||||
if "%1" == "latexpdf" (
|
||||
%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
|
||||
cd %BUILDDIR%/latex
|
||||
make all-pdf
|
||||
cd %BUILDDIR%/..
|
||||
echo.
|
||||
echo.Build finished; the PDF files are in %BUILDDIR%/latex.
|
||||
goto end
|
||||
)
|
||||
|
||||
if "%1" == "latexpdfja" (
|
||||
%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
|
||||
cd %BUILDDIR%/latex
|
||||
make all-pdf-ja
|
||||
cd %BUILDDIR%/..
|
||||
echo.
|
||||
echo.Build finished; the PDF files are in %BUILDDIR%/latex.
|
||||
goto end
|
||||
)
|
||||
|
||||
if "%1" == "text" (
|
||||
%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
|
||||
if errorlevel 1 exit /b 1
|
||||
echo.
|
||||
echo.Build finished. The text files are in %BUILDDIR%/text.
|
||||
goto end
|
||||
)
|
||||
|
||||
if "%1" == "man" (
|
||||
%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
|
||||
if errorlevel 1 exit /b 1
|
||||
echo.
|
||||
echo.Build finished. The manual pages are in %BUILDDIR%/man.
|
||||
goto end
|
||||
)
|
||||
|
||||
if "%1" == "texinfo" (
|
||||
%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
|
||||
if errorlevel 1 exit /b 1
|
||||
echo.
|
||||
echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
|
||||
goto end
|
||||
)
|
||||
|
||||
if "%1" == "gettext" (
|
||||
%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
|
||||
if errorlevel 1 exit /b 1
|
||||
echo.
|
||||
echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
|
||||
goto end
|
||||
)
|
||||
|
||||
if "%1" == "changes" (
|
||||
%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
|
||||
if errorlevel 1 exit /b 1
|
||||
echo.
|
||||
echo.The overview file is in %BUILDDIR%/changes.
|
||||
goto end
|
||||
)
|
||||
|
||||
if "%1" == "linkcheck" (
|
||||
%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
|
||||
if errorlevel 1 exit /b 1
|
||||
echo.
|
||||
echo.Link check complete; look for any errors in the above output ^
|
||||
or in %BUILDDIR%/linkcheck/output.txt.
|
||||
goto end
|
||||
)
|
||||
|
||||
if "%1" == "doctest" (
|
||||
%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
|
||||
if errorlevel 1 exit /b 1
|
||||
echo.
|
||||
echo.Testing of doctests in the sources finished, look at the ^
|
||||
results in %BUILDDIR%/doctest/output.txt.
|
||||
goto end
|
||||
)
|
||||
|
||||
if "%1" == "xml" (
|
||||
%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
|
||||
if errorlevel 1 exit /b 1
|
||||
echo.
|
||||
echo.Build finished. The XML files are in %BUILDDIR%/xml.
|
||||
goto end
|
||||
)
|
||||
|
||||
if "%1" == "pseudoxml" (
|
||||
%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
|
||||
if errorlevel 1 exit /b 1
|
||||
echo.
|
||||
echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
|
||||
goto end
|
||||
)
|
||||
|
||||
:end
|
|
@ -0,0 +1,7 @@
|
|||
html5lib
|
||||
========
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 4
|
||||
|
||||
html5lib
|
209
tests/wpt/web-platform-tests/tools/html5lib/doc/movingparts.rst
Normal file
209
tests/wpt/web-platform-tests/tools/html5lib/doc/movingparts.rst
Normal file
|
@ -0,0 +1,209 @@
|
|||
The moving parts
|
||||
================
|
||||
|
||||
html5lib consists of a number of components, which are responsible for
|
||||
handling its features.
|
||||
|
||||
|
||||
Tree builders
|
||||
-------------
|
||||
|
||||
The parser reads HTML by tokenizing the content and building a tree that
|
||||
the user can later access. There are three main types of trees that
|
||||
html5lib can build:
|
||||
|
||||
* ``etree`` - this is the default; builds a tree based on ``xml.etree``,
|
||||
which can be found in the standard library. Whenever possible, the
|
||||
accelerated ``ElementTree`` implementation (i.e.
|
||||
``xml.etree.cElementTree`` on Python 2.x) is used.
|
||||
|
||||
* ``dom`` - builds a tree based on ``xml.dom.minidom``.
|
||||
|
||||
* ``lxml.etree`` - uses lxml's implementation of the ``ElementTree``
|
||||
API. The performance gains are relatively small compared to using the
|
||||
accelerated ``ElementTree`` module.
|
||||
|
||||
You can specify the builder by name when using the shorthand API:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import html5lib
|
||||
with open("mydocument.html", "rb") as f:
|
||||
lxml_etree_document = html5lib.parse(f, treebuilder="lxml")
|
||||
|
||||
When instantiating a parser object, you have to pass a tree builder
|
||||
class in the ``tree`` keyword attribute:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import html5lib
|
||||
parser = html5lib.HTMLParser(tree=SomeTreeBuilder)
|
||||
document = parser.parse("<p>Hello World!")
|
||||
|
||||
To get a builder class by name, use the ``getTreeBuilder`` function:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import html5lib
|
||||
parser = html5lib.HTMLParser(tree=html5lib.getTreeBuilder("dom"))
|
||||
minidom_document = parser.parse("<p>Hello World!")
|
||||
|
||||
The implementation of builders can be found in `html5lib/treebuilders/
|
||||
<https://github.com/html5lib/html5lib-python/tree/master/html5lib/treebuilders>`_.
|
||||
|
||||
|
||||
Tree walkers
|
||||
------------
|
||||
|
||||
Once a tree is ready, you can work on it either manually, or using
|
||||
a tree walker, which provides a streaming view of the tree. html5lib
|
||||
provides walkers for all three supported types of trees (``etree``,
|
||||
``dom`` and ``lxml``).
|
||||
|
||||
The implementation of walkers can be found in `html5lib/treewalkers/
|
||||
<https://github.com/html5lib/html5lib-python/tree/master/html5lib/treewalkers>`_.
|
||||
|
||||
Walkers make consuming HTML easier. html5lib uses them to provide you
|
||||
with has a couple of handy tools.
|
||||
|
||||
|
||||
HTMLSerializer
|
||||
~~~~~~~~~~~~~~
|
||||
|
||||
The serializer lets you write HTML back as a stream of bytes.
|
||||
|
||||
.. code-block:: pycon
|
||||
|
||||
>>> import html5lib
|
||||
>>> element = html5lib.parse('<p xml:lang="pl">Witam wszystkich')
|
||||
>>> walker = html5lib.getTreeWalker("etree")
|
||||
>>> stream = walker(element)
|
||||
>>> s = html5lib.serializer.HTMLSerializer()
|
||||
>>> output = s.serialize(stream)
|
||||
>>> for item in output:
|
||||
... print("%r" % item)
|
||||
'<p'
|
||||
' '
|
||||
'xml:lang'
|
||||
'='
|
||||
'pl'
|
||||
'>'
|
||||
'Witam wszystkich'
|
||||
|
||||
You can customize the serializer behaviour in a variety of ways, consult
|
||||
the :class:`~html5lib.serializer.htmlserializer.HTMLSerializer`
|
||||
documentation.
|
||||
|
||||
|
||||
Filters
|
||||
~~~~~~~
|
||||
|
||||
You can alter the stream content with filters provided by html5lib:
|
||||
|
||||
* :class:`alphabeticalattributes.Filter
|
||||
<html5lib.filters.alphabeticalattributes.Filter>` sorts attributes on
|
||||
tags to be in alphabetical order
|
||||
|
||||
* :class:`inject_meta_charset.Filter
|
||||
<html5lib.filters.inject_meta_charset.Filter>` sets a user-specified
|
||||
encoding in the correct ``<meta>`` tag in the ``<head>`` section of
|
||||
the document
|
||||
|
||||
* :class:`lint.Filter <html5lib.filters.lint.Filter>` raises
|
||||
``LintError`` exceptions on invalid tag and attribute names, invalid
|
||||
PCDATA, etc.
|
||||
|
||||
* :class:`optionaltags.Filter <html5lib.filters.optionaltags.Filter>`
|
||||
removes tags from the stream which are not necessary to produce valid
|
||||
HTML
|
||||
|
||||
* :class:`sanitizer.Filter <html5lib.filters.sanitizer.Filter>` removes
|
||||
unsafe markup and CSS. Elements that are known to be safe are passed
|
||||
through and the rest is converted to visible text. The default
|
||||
configuration of the sanitizer follows the `WHATWG Sanitization Rules
|
||||
<http://wiki.whatwg.org/wiki/Sanitization_rules>`_.
|
||||
|
||||
* :class:`whitespace.Filter <html5lib.filters.whitespace.Filter>`
|
||||
collapses all whitespace characters to single spaces unless they're in
|
||||
``<pre/>`` or ``textarea`` tags.
|
||||
|
||||
To use a filter, simply wrap it around a stream:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
>>> import html5lib
|
||||
>>> from html5lib.filters import sanitizer
|
||||
>>> dom = html5lib.parse("<p><script>alert('Boo!')", treebuilder="dom")
|
||||
>>> walker = html5lib.getTreeWalker("dom")
|
||||
>>> stream = walker(dom)
|
||||
>>> sane_stream = sanitizer.Filter(stream) clean_stream = sanitizer.Filter(stream)
|
||||
|
||||
|
||||
Tree adapters
|
||||
-------------
|
||||
|
||||
Used to translate one type of tree to another. More documentation
|
||||
pending, sorry.
|
||||
|
||||
|
||||
Encoding discovery
|
||||
------------------
|
||||
|
||||
Parsed trees are always Unicode. However a large variety of input
|
||||
encodings are supported. The encoding of the document is determined in
|
||||
the following way:
|
||||
|
||||
* The encoding may be explicitly specified by passing the name of the
|
||||
encoding as the encoding parameter to the
|
||||
:meth:`~html5lib.html5parser.HTMLParser.parse` method on
|
||||
``HTMLParser`` objects.
|
||||
|
||||
* If no encoding is specified, the parser will attempt to detect the
|
||||
encoding from a ``<meta>`` element in the first 512 bytes of the
|
||||
document (this is only a partial implementation of the current HTML
|
||||
5 specification).
|
||||
|
||||
* If no encoding can be found and the chardet library is available, an
|
||||
attempt will be made to sniff the encoding from the byte pattern.
|
||||
|
||||
* If all else fails, the default encoding will be used. This is usually
|
||||
`Windows-1252 <http://en.wikipedia.org/wiki/Windows-1252>`_, which is
|
||||
a common fallback used by Web browsers.
|
||||
|
||||
|
||||
Tokenizers
|
||||
----------
|
||||
|
||||
The part of the parser responsible for translating a raw input stream
|
||||
into meaningful tokens is the tokenizer. Currently html5lib provides
|
||||
two.
|
||||
|
||||
To set up a tokenizer, simply pass it when instantiating
|
||||
a :class:`~html5lib.html5parser.HTMLParser`:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import html5lib
|
||||
from html5lib import sanitizer
|
||||
|
||||
p = html5lib.HTMLParser(tokenizer=sanitizer.HTMLSanitizer)
|
||||
p.parse("<p>Surprise!<script>alert('Boo!');</script>")
|
||||
|
||||
HTMLTokenizer
|
||||
~~~~~~~~~~~~~
|
||||
|
||||
This is the default tokenizer, the heart of html5lib. The implementation
|
||||
can be found in `html5lib/tokenizer.py
|
||||
<https://github.com/html5lib/html5lib-python/blob/master/html5lib/tokenizer.py>`_.
|
||||
|
||||
HTMLSanitizer
|
||||
~~~~~~~~~~~~~
|
||||
|
||||
This is a tokenizer that removes unsafe markup and CSS styles from the
|
||||
input. Elements that are known to be safe are passed through and the
|
||||
rest is converted to visible text. The default configuration of the
|
||||
sanitizer follows the `WHATWG Sanitization Rules
|
||||
<http://wiki.whatwg.org/wiki/Sanitization_rules>`_.
|
||||
|
||||
The implementation can be found in `html5lib/sanitizer.py
|
||||
<https://github.com/html5lib/html5lib-python/blob/master/html5lib/sanitizer.py>`_.
|
Loading…
Add table
Add a link
Reference in a new issue