Update web-platform-tests to revision 0d318188757a9c996e20b82db201fd04de5aa255

This commit is contained in:
James Graham 2015-03-27 09:15:38 +00:00
parent b2a5225831
commit 1a81b18b9f
12321 changed files with 544385 additions and 6 deletions

View file

@ -0,0 +1,177 @@
# Makefile for Sphinx documentation
#
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
PAPER =
BUILDDIR = _build
# User-friendly check for sphinx-build
ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
endif
# Internal variables.
PAPEROPT_a4 = -D latex_paper_size=a4
PAPEROPT_letter = -D latex_paper_size=letter
ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
# the i18n builder cannot share the environment and doctrees with the others
I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
help:
@echo "Please use \`make <target>' where <target> is one of"
@echo " html to make standalone HTML files"
@echo " dirhtml to make HTML files named index.html in directories"
@echo " singlehtml to make a single large HTML file"
@echo " pickle to make pickle files"
@echo " json to make JSON files"
@echo " htmlhelp to make HTML files and a HTML help project"
@echo " qthelp to make HTML files and a qthelp project"
@echo " devhelp to make HTML files and a Devhelp project"
@echo " epub to make an epub"
@echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
@echo " latexpdf to make LaTeX files and run them through pdflatex"
@echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
@echo " text to make text files"
@echo " man to make manual pages"
@echo " texinfo to make Texinfo files"
@echo " info to make Texinfo files and run them through makeinfo"
@echo " gettext to make PO message catalogs"
@echo " changes to make an overview of all changed/added/deprecated items"
@echo " xml to make Docutils-native XML files"
@echo " pseudoxml to make pseudoxml-XML files for display purposes"
@echo " linkcheck to check all external links for integrity"
@echo " doctest to run all doctests embedded in the documentation (if enabled)"
clean:
rm -rf $(BUILDDIR)/*
html:
$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
dirhtml:
$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
singlehtml:
$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
@echo
@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
pickle:
$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
@echo
@echo "Build finished; now you can process the pickle files."
json:
$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
@echo
@echo "Build finished; now you can process the JSON files."
htmlhelp:
$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
@echo
@echo "Build finished; now you can run HTML Help Workshop with the" \
".hhp project file in $(BUILDDIR)/htmlhelp."
qthelp:
$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
@echo
@echo "Build finished; now you can run "qcollectiongenerator" with the" \
".qhcp project file in $(BUILDDIR)/qthelp, like this:"
@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/html5lib.qhcp"
@echo "To view the help file:"
@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/html5lib.qhc"
devhelp:
$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
@echo
@echo "Build finished."
@echo "To view the help file:"
@echo "# mkdir -p $$HOME/.local/share/devhelp/html5lib"
@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/html5lib"
@echo "# devhelp"
epub:
$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
@echo
@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
latex:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo
@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
@echo "Run \`make' in that directory to run these through (pdf)latex" \
"(use \`make latexpdf' here to do that automatically)."
latexpdf:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo "Running LaTeX files through pdflatex..."
$(MAKE) -C $(BUILDDIR)/latex all-pdf
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
latexpdfja:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo "Running LaTeX files through platex and dvipdfmx..."
$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
text:
$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
@echo
@echo "Build finished. The text files are in $(BUILDDIR)/text."
man:
$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
@echo
@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
texinfo:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo
@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
@echo "Run \`make' in that directory to run these through makeinfo" \
"(use \`make info' here to do that automatically)."
info:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo "Running Texinfo files through makeinfo..."
make -C $(BUILDDIR)/texinfo info
@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
gettext:
$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
@echo
@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
changes:
$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
@echo
@echo "The overview file is in $(BUILDDIR)/changes."
linkcheck:
$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
@echo
@echo "Link check complete; look for any errors in the above output " \
"or in $(BUILDDIR)/linkcheck/output.txt."
doctest:
$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
@echo "Testing of doctests in the sources finished, look at the " \
"results in $(BUILDDIR)/doctest/output.txt."
xml:
$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
@echo
@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
pseudoxml:
$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
@echo
@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."

View file

@ -0,0 +1,3 @@
.. :changelog:
.. include:: ../CHANGES.rst

View file

@ -0,0 +1,280 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# html5lib documentation build configuration file, created by
# sphinx-quickstart on Wed May 8 00:04:49 2013.
#
# This file is execfile()d with the current directory set to its containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.
import sys, os
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#sys.path.insert(0, os.path.abspath('.'))
# -- General configuration -----------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
#needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be extensions
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.viewcode']
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix of source filenames.
source_suffix = '.rst'
# The encoding of source files.
#source_encoding = 'utf-8-sig'
# The master toctree document.
master_doc = 'index'
# General information about the project.
project = 'html5lib'
copyright = '2006 - 2013, James Graham, Geoffrey Sneddon, and contributors'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = '1.0'
# The full version, including alpha/beta/rc tags.
sys.path.append(os.path.abspath('..'))
from html5lib import __version__
release = __version__
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#language = 'en'
# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
#today = ''
# Else, today_fmt is used as the format for a strftime call.
#today_fmt = '%B %d, %Y'
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = ['_build', 'theme']
# The reST default role (used for this markup: `text`) to use for all documents.
#default_role = None
# If true, '()' will be appended to :func: etc. cross-reference text.
#add_function_parentheses = True
# If true, the current module name will be prepended to all description
# unit titles (such as .. function::).
#add_module_names = True
# If true, sectionauthor and moduleauthor directives will be shown in the
# output. They are ignored by default.
#show_authors = False
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
# A list of ignored prefixes for module index sorting.
#modindex_common_prefix = []
# If true, keep warnings as "system message" paragraphs in the built documents.
#keep_warnings = False
# -- Options for HTML output ---------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
html_theme = 'default'
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#html_theme_options = {}
# Add any paths that contain custom themes here, relative to this directory.
#html_theme_path = []
# The name for this set of Sphinx documents. If None, it defaults to
# "<project> v<release> documentation".
#html_title = None
# A shorter title for the navigation bar. Default is the same as html_title.
#html_short_title = None
# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
#html_logo = None
# The name of an image file (within the static path) to use as favicon of the
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
# pixels large.
#html_favicon = None
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
# using the given strftime format.
#html_last_updated_fmt = '%b %d, %Y'
# If true, SmartyPants will be used to convert quotes and dashes to
# typographically correct entities.
#html_use_smartypants = True
# Custom sidebar templates, maps document names to template names.
#html_sidebars = {}
# Additional templates that should be rendered to pages, maps page names to
# template names.
#html_additional_pages = {}
# If false, no module index is generated.
#html_domain_indices = True
# If false, no index is generated.
#html_use_index = True
# If true, the index is split into individual pages for each letter.
#html_split_index = False
# If true, links to the reST sources are added to the pages.
#html_show_sourcelink = True
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
#html_show_sphinx = True
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
#html_show_copyright = True
# If true, an OpenSearch description file will be output, and all pages will
# contain a <link> tag referring to it. The value of this option must be the
# base URL from which the finished HTML is served.
#html_use_opensearch = ''
# This is the file name suffix for HTML files (e.g. ".xhtml").
#html_file_suffix = None
# Output file base name for HTML help builder.
htmlhelp_basename = 'html5libdoc'
# -- Options for LaTeX output --------------------------------------------------
latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
#'papersize': 'letterpaper',
# The font size ('10pt', '11pt' or '12pt').
#'pointsize': '10pt',
# Additional stuff for the LaTeX preamble.
#'preamble': '',
}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title, author, documentclass [howto/manual]).
latex_documents = [
('index', 'html5lib.tex', 'html5lib Documentation',
'James Graham, Geoffrey Sneddon, and contributors', 'manual'),
]
# The name of an image file (relative to this directory) to place at the top of
# the title page.
#latex_logo = None
# For "manual" documents, if this is true, then toplevel headings are parts,
# not chapters.
#latex_use_parts = False
# If true, show page references after internal links.
#latex_show_pagerefs = False
# If true, show URL addresses after external links.
#latex_show_urls = False
# Documents to append as an appendix to all manuals.
#latex_appendices = []
# If false, no module index is generated.
#latex_domain_indices = True
# -- Options for manual page output --------------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
('index', 'html5lib', 'html5lib Documentation',
['James Graham, Geoffrey Sneddon, and contributors'], 1)
]
# If true, show URL addresses after external links.
#man_show_urls = False
# -- Options for Texinfo output ------------------------------------------------
# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
('index', 'html5lib', 'html5lib Documentation',
'James Graham, Geoffrey Sneddon, and contributors', 'html5lib', 'One line description of project.',
'Miscellaneous'),
]
# Documents to append as an appendix to all manuals.
#texinfo_appendices = []
# If false, no module index is generated.
#texinfo_domain_indices = True
# How to display URL addresses: 'footnote', 'no', or 'inline'.
#texinfo_show_urls = 'footnote'
# If true, do not generate a @detailmenu in the "Top" node's menu.
#texinfo_no_detailmenu = False
class CExtMock(object):
"""Required for autodoc on readthedocs.org where you cannot build C extensions."""
def __init__(self, *args, **kwargs):
pass
def __call__(self, *args, **kwargs):
return CExtMock()
@classmethod
def __getattr__(cls, name):
if name in ('__file__', '__path__'):
return '/dev/null'
else:
return CExtMock()
try:
import lxml # flake8: noqa
except ImportError:
sys.modules['lxml'] = CExtMock()
sys.modules['lxml.etree'] = CExtMock()
print("warning: lxml modules mocked.")
try:
import genshi # flake8: noqa
except ImportError:
sys.modules['genshi'] = CExtMock()
sys.modules['genshi.core'] = CExtMock()
print("warning: genshi modules mocked.")

View file

@ -0,0 +1,59 @@
filters Package
===============
:mod:`_base` Module
-------------------
.. automodule:: html5lib.filters._base
:members:
:undoc-members:
:show-inheritance:
:mod:`alphabeticalattributes` Module
------------------------------------
.. automodule:: html5lib.filters.alphabeticalattributes
:members:
:undoc-members:
:show-inheritance:
:mod:`inject_meta_charset` Module
---------------------------------
.. automodule:: html5lib.filters.inject_meta_charset
:members:
:undoc-members:
:show-inheritance:
:mod:`lint` Module
------------------
.. automodule:: html5lib.filters.lint
:members:
:undoc-members:
:show-inheritance:
:mod:`optionaltags` Module
--------------------------
.. automodule:: html5lib.filters.optionaltags
:members:
:undoc-members:
:show-inheritance:
:mod:`sanitizer` Module
-----------------------
.. automodule:: html5lib.filters.sanitizer
:members:
:undoc-members:
:show-inheritance:
:mod:`whitespace` Module
------------------------
.. automodule:: html5lib.filters.whitespace
:members:
:undoc-members:
:show-inheritance:

View file

@ -0,0 +1,77 @@
html5lib Package
================
:mod:`html5lib` Package
-----------------------
.. automodule:: html5lib.__init__
:members:
:undoc-members:
:show-inheritance:
:mod:`constants` Module
-----------------------
.. automodule:: html5lib.constants
:members:
:undoc-members:
:show-inheritance:
:mod:`html5parser` Module
-------------------------
.. automodule:: html5lib.html5parser
:members:
:undoc-members:
:show-inheritance:
:mod:`ihatexml` Module
----------------------
.. automodule:: html5lib.ihatexml
:members:
:undoc-members:
:show-inheritance:
:mod:`inputstream` Module
-------------------------
.. automodule:: html5lib.inputstream
:members:
:undoc-members:
:show-inheritance:
:mod:`sanitizer` Module
-----------------------
.. automodule:: html5lib.sanitizer
:members:
:undoc-members:
:show-inheritance:
:mod:`tokenizer` Module
-----------------------
.. automodule:: html5lib.tokenizer
:members:
:undoc-members:
:show-inheritance:
:mod:`utils` Module
-------------------
.. automodule:: html5lib.utils
:members:
:undoc-members:
:show-inheritance:
Subpackages
-----------
.. toctree::
html5lib.filters
html5lib.serializer
html5lib.treebuilders
html5lib.treewalkers

View file

@ -0,0 +1,19 @@
serializer Package
==================
:mod:`serializer` Package
-------------------------
.. automodule:: html5lib.serializer
:members:
:undoc-members:
:show-inheritance:
:mod:`htmlserializer` Module
----------------------------
.. automodule:: html5lib.serializer.htmlserializer
:members:
:undoc-members:
:show-inheritance:

View file

@ -0,0 +1,43 @@
treebuilders Package
====================
:mod:`treebuilders` Package
---------------------------
.. automodule:: html5lib.treebuilders
:members:
:undoc-members:
:show-inheritance:
:mod:`_base` Module
-------------------
.. automodule:: html5lib.treebuilders._base
:members:
:undoc-members:
:show-inheritance:
:mod:`dom` Module
-----------------
.. automodule:: html5lib.treebuilders.dom
:members:
:undoc-members:
:show-inheritance:
:mod:`etree` Module
-------------------
.. automodule:: html5lib.treebuilders.etree
:members:
:undoc-members:
:show-inheritance:
:mod:`etree_lxml` Module
------------------------
.. automodule:: html5lib.treebuilders.etree_lxml
:members:
:undoc-members:
:show-inheritance:

View file

@ -0,0 +1,59 @@
treewalkers Package
===================
:mod:`treewalkers` Package
--------------------------
.. automodule:: html5lib.treewalkers
:members:
:undoc-members:
:show-inheritance:
:mod:`_base` Module
-------------------
.. automodule:: html5lib.treewalkers._base
:members:
:undoc-members:
:show-inheritance:
:mod:`dom` Module
-----------------
.. automodule:: html5lib.treewalkers.dom
:members:
:undoc-members:
:show-inheritance:
:mod:`etree` Module
-------------------
.. automodule:: html5lib.treewalkers.etree
:members:
:undoc-members:
:show-inheritance:
:mod:`genshistream` Module
--------------------------
.. automodule:: html5lib.treewalkers.genshistream
:members:
:undoc-members:
:show-inheritance:
:mod:`lxmletree` Module
-----------------------
.. automodule:: html5lib.treewalkers.lxmletree
:members:
:undoc-members:
:show-inheritance:
:mod:`pulldom` Module
---------------------
.. automodule:: html5lib.treewalkers.pulldom
:members:
:undoc-members:
:show-inheritance:

View file

@ -0,0 +1,21 @@
Overview
========
.. include:: ../README.rst
:start-line: 6
.. toctree::
:maxdepth: 2
movingparts
changes
License <license>
Indices and tables
==================
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`

View file

@ -0,0 +1,4 @@
License
=======
.. include:: ../LICENSE

View file

@ -0,0 +1,242 @@
@ECHO OFF
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set BUILDDIR=_build
set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
set I18NSPHINXOPTS=%SPHINXOPTS% .
if NOT "%PAPER%" == "" (
set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
)
if "%1" == "" goto help
if "%1" == "help" (
:help
echo.Please use `make ^<target^>` where ^<target^> is one of
echo. html to make standalone HTML files
echo. dirhtml to make HTML files named index.html in directories
echo. singlehtml to make a single large HTML file
echo. pickle to make pickle files
echo. json to make JSON files
echo. htmlhelp to make HTML files and a HTML help project
echo. qthelp to make HTML files and a qthelp project
echo. devhelp to make HTML files and a Devhelp project
echo. epub to make an epub
echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter
echo. text to make text files
echo. man to make manual pages
echo. texinfo to make Texinfo files
echo. gettext to make PO message catalogs
echo. changes to make an overview over all changed/added/deprecated items
echo. xml to make Docutils-native XML files
echo. pseudoxml to make pseudoxml-XML files for display purposes
echo. linkcheck to check all external links for integrity
echo. doctest to run all doctests embedded in the documentation if enabled
goto end
)
if "%1" == "clean" (
for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
del /q /s %BUILDDIR%\*
goto end
)
%SPHINXBUILD% 2> nul
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.http://sphinx-doc.org/
exit /b 1
)
if "%1" == "html" (
%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The HTML pages are in %BUILDDIR%/html.
goto end
)
if "%1" == "dirhtml" (
%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
goto end
)
if "%1" == "singlehtml" (
%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
goto end
)
if "%1" == "pickle" (
%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can process the pickle files.
goto end
)
if "%1" == "json" (
%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can process the JSON files.
goto end
)
if "%1" == "htmlhelp" (
%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can run HTML Help Workshop with the ^
.hhp project file in %BUILDDIR%/htmlhelp.
goto end
)
if "%1" == "qthelp" (
%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can run "qcollectiongenerator" with the ^
.qhcp project file in %BUILDDIR%/qthelp, like this:
echo.^> qcollectiongenerator %BUILDDIR%\qthelp\html5lib.qhcp
echo.To view the help file:
echo.^> assistant -collectionFile %BUILDDIR%\qthelp\html5lib.ghc
goto end
)
if "%1" == "devhelp" (
%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
if errorlevel 1 exit /b 1
echo.
echo.Build finished.
goto end
)
if "%1" == "epub" (
%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The epub file is in %BUILDDIR%/epub.
goto end
)
if "%1" == "latex" (
%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
if errorlevel 1 exit /b 1
echo.
echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
goto end
)
if "%1" == "latexpdf" (
%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
cd %BUILDDIR%/latex
make all-pdf
cd %BUILDDIR%/..
echo.
echo.Build finished; the PDF files are in %BUILDDIR%/latex.
goto end
)
if "%1" == "latexpdfja" (
%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
cd %BUILDDIR%/latex
make all-pdf-ja
cd %BUILDDIR%/..
echo.
echo.Build finished; the PDF files are in %BUILDDIR%/latex.
goto end
)
if "%1" == "text" (
%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The text files are in %BUILDDIR%/text.
goto end
)
if "%1" == "man" (
%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The manual pages are in %BUILDDIR%/man.
goto end
)
if "%1" == "texinfo" (
%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
goto end
)
if "%1" == "gettext" (
%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
goto end
)
if "%1" == "changes" (
%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
if errorlevel 1 exit /b 1
echo.
echo.The overview file is in %BUILDDIR%/changes.
goto end
)
if "%1" == "linkcheck" (
%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
if errorlevel 1 exit /b 1
echo.
echo.Link check complete; look for any errors in the above output ^
or in %BUILDDIR%/linkcheck/output.txt.
goto end
)
if "%1" == "doctest" (
%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
if errorlevel 1 exit /b 1
echo.
echo.Testing of doctests in the sources finished, look at the ^
results in %BUILDDIR%/doctest/output.txt.
goto end
)
if "%1" == "xml" (
%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The XML files are in %BUILDDIR%/xml.
goto end
)
if "%1" == "pseudoxml" (
%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
goto end
)
:end

View file

@ -0,0 +1,7 @@
html5lib
========
.. toctree::
:maxdepth: 4
html5lib

View file

@ -0,0 +1,209 @@
The moving parts
================
html5lib consists of a number of components, which are responsible for
handling its features.
Tree builders
-------------
The parser reads HTML by tokenizing the content and building a tree that
the user can later access. There are three main types of trees that
html5lib can build:
* ``etree`` - this is the default; builds a tree based on ``xml.etree``,
which can be found in the standard library. Whenever possible, the
accelerated ``ElementTree`` implementation (i.e.
``xml.etree.cElementTree`` on Python 2.x) is used.
* ``dom`` - builds a tree based on ``xml.dom.minidom``.
* ``lxml.etree`` - uses lxml's implementation of the ``ElementTree``
API. The performance gains are relatively small compared to using the
accelerated ``ElementTree`` module.
You can specify the builder by name when using the shorthand API:
.. code-block:: python
import html5lib
with open("mydocument.html", "rb") as f:
lxml_etree_document = html5lib.parse(f, treebuilder="lxml")
When instantiating a parser object, you have to pass a tree builder
class in the ``tree`` keyword attribute:
.. code-block:: python
import html5lib
parser = html5lib.HTMLParser(tree=SomeTreeBuilder)
document = parser.parse("<p>Hello World!")
To get a builder class by name, use the ``getTreeBuilder`` function:
.. code-block:: python
import html5lib
parser = html5lib.HTMLParser(tree=html5lib.getTreeBuilder("dom"))
minidom_document = parser.parse("<p>Hello World!")
The implementation of builders can be found in `html5lib/treebuilders/
<https://github.com/html5lib/html5lib-python/tree/master/html5lib/treebuilders>`_.
Tree walkers
------------
Once a tree is ready, you can work on it either manually, or using
a tree walker, which provides a streaming view of the tree. html5lib
provides walkers for all three supported types of trees (``etree``,
``dom`` and ``lxml``).
The implementation of walkers can be found in `html5lib/treewalkers/
<https://github.com/html5lib/html5lib-python/tree/master/html5lib/treewalkers>`_.
Walkers make consuming HTML easier. html5lib uses them to provide you
with has a couple of handy tools.
HTMLSerializer
~~~~~~~~~~~~~~
The serializer lets you write HTML back as a stream of bytes.
.. code-block:: pycon
>>> import html5lib
>>> element = html5lib.parse('<p xml:lang="pl">Witam wszystkich')
>>> walker = html5lib.getTreeWalker("etree")
>>> stream = walker(element)
>>> s = html5lib.serializer.HTMLSerializer()
>>> output = s.serialize(stream)
>>> for item in output:
... print("%r" % item)
'<p'
' '
'xml:lang'
'='
'pl'
'>'
'Witam wszystkich'
You can customize the serializer behaviour in a variety of ways, consult
the :class:`~html5lib.serializer.htmlserializer.HTMLSerializer`
documentation.
Filters
~~~~~~~
You can alter the stream content with filters provided by html5lib:
* :class:`alphabeticalattributes.Filter
<html5lib.filters.alphabeticalattributes.Filter>` sorts attributes on
tags to be in alphabetical order
* :class:`inject_meta_charset.Filter
<html5lib.filters.inject_meta_charset.Filter>` sets a user-specified
encoding in the correct ``<meta>`` tag in the ``<head>`` section of
the document
* :class:`lint.Filter <html5lib.filters.lint.Filter>` raises
``LintError`` exceptions on invalid tag and attribute names, invalid
PCDATA, etc.
* :class:`optionaltags.Filter <html5lib.filters.optionaltags.Filter>`
removes tags from the stream which are not necessary to produce valid
HTML
* :class:`sanitizer.Filter <html5lib.filters.sanitizer.Filter>` removes
unsafe markup and CSS. Elements that are known to be safe are passed
through and the rest is converted to visible text. The default
configuration of the sanitizer follows the `WHATWG Sanitization Rules
<http://wiki.whatwg.org/wiki/Sanitization_rules>`_.
* :class:`whitespace.Filter <html5lib.filters.whitespace.Filter>`
collapses all whitespace characters to single spaces unless they're in
``<pre/>`` or ``textarea`` tags.
To use a filter, simply wrap it around a stream:
.. code-block:: python
>>> import html5lib
>>> from html5lib.filters import sanitizer
>>> dom = html5lib.parse("<p><script>alert('Boo!')", treebuilder="dom")
>>> walker = html5lib.getTreeWalker("dom")
>>> stream = walker(dom)
>>> sane_stream = sanitizer.Filter(stream) clean_stream = sanitizer.Filter(stream)
Tree adapters
-------------
Used to translate one type of tree to another. More documentation
pending, sorry.
Encoding discovery
------------------
Parsed trees are always Unicode. However a large variety of input
encodings are supported. The encoding of the document is determined in
the following way:
* The encoding may be explicitly specified by passing the name of the
encoding as the encoding parameter to the
:meth:`~html5lib.html5parser.HTMLParser.parse` method on
``HTMLParser`` objects.
* If no encoding is specified, the parser will attempt to detect the
encoding from a ``<meta>`` element in the first 512 bytes of the
document (this is only a partial implementation of the current HTML
5 specification).
* If no encoding can be found and the chardet library is available, an
attempt will be made to sniff the encoding from the byte pattern.
* If all else fails, the default encoding will be used. This is usually
`Windows-1252 <http://en.wikipedia.org/wiki/Windows-1252>`_, which is
a common fallback used by Web browsers.
Tokenizers
----------
The part of the parser responsible for translating a raw input stream
into meaningful tokens is the tokenizer. Currently html5lib provides
two.
To set up a tokenizer, simply pass it when instantiating
a :class:`~html5lib.html5parser.HTMLParser`:
.. code-block:: python
import html5lib
from html5lib import sanitizer
p = html5lib.HTMLParser(tokenizer=sanitizer.HTMLSanitizer)
p.parse("<p>Surprise!<script>alert('Boo!');</script>")
HTMLTokenizer
~~~~~~~~~~~~~
This is the default tokenizer, the heart of html5lib. The implementation
can be found in `html5lib/tokenizer.py
<https://github.com/html5lib/html5lib-python/blob/master/html5lib/tokenizer.py>`_.
HTMLSanitizer
~~~~~~~~~~~~~
This is a tokenizer that removes unsafe markup and CSS styles from the
input. Elements that are known to be safe are passed through and the
rest is converted to visible text. The default configuration of the
sanitizer follows the `WHATWG Sanitization Rules
<http://wiki.whatwg.org/wiki/Sanitization_rules>`_.
The implementation can be found in `html5lib/sanitizer.py
<https://github.com/html5lib/html5lib-python/blob/master/html5lib/sanitizer.py>`_.