Update web-platform-tests to revision af43e2eb32555059316b67fba4a1d7df6ea3148d

This commit is contained in:
WPT Sync Bot 2018-08-15 21:29:12 -04:00
parent 97c6246385
commit 2f89d25484
296 changed files with 21168 additions and 821 deletions

View file

@ -22,6 +22,7 @@ sys.path.insert(0, os.path.join(here, "third_party", "h2"))
sys.path.insert(0, os.path.join(here, "third_party", "hpack"))
sys.path.insert(0, os.path.join(here, "third_party", "hyperframe"))
sys.path.insert(0, os.path.join(here, "third_party", "certifi"))
sys.path.insert(0, os.path.join(here, "third_party", "hyper"))
sys.path.insert(0, os.path.join(here, "webdriver"))
sys.path.insert(0, os.path.join(here, "wptrunner"))

View file

@ -230,7 +230,6 @@ class Encoder(object):
# are already in the header table we can represent them using the
# indexed representation: the same is true if they are in the static
# table. Otherwise, a literal representation will be used.
log.debug("HPACK encoding %s", headers)
header_block = []
# Turn the headers into a list of tuples if possible. This is the

View file

@ -0,0 +1,36 @@
# -*- coding: utf-8 -*-
"""
hyper
~~~~~~
A module for providing an abstraction layer over the differences between
HTTP/1.1 and HTTP/2.
"""
import logging
from .common.connection import HTTPConnection
from .http20.connection import HTTP20Connection
from .http20.response import HTTP20Response, HTTP20Push
from .http11.connection import HTTP11Connection
from .http11.response import HTTP11Response
# Throw import errors on Python <2.7 and 3.0-3.2.
import sys as _sys
if _sys.version_info < (2, 7) or (3, 0) <= _sys.version_info < (3, 3):
raise ImportError(
"hyper only supports Python 2.7 and Python 3.3 or higher."
)
__all__ = [
HTTPConnection,
HTTP20Response,
HTTP20Push,
HTTP20Connection,
HTTP11Connection,
HTTP11Response,
]
# Set default logging handler.
logging.getLogger(__name__).addHandler(logging.NullHandler())
__version__ = '0.7.0'

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,264 @@
# -*- coding: utf-8 -*-
"""
hyper/cli
~~~~~~~~~
Command line interface for Hyper inspired by Httpie.
"""
import json
import locale
import logging
import sys
from argparse import ArgumentParser, RawTextHelpFormatter
from argparse import OPTIONAL, ZERO_OR_MORE
from pprint import pformat
from textwrap import dedent
from hyper import HTTPConnection, HTTP20Connection
from hyper import __version__
from hyper.compat import is_py2, urlencode, urlsplit, write_to_stdout
from hyper.common.util import to_host_port_tuple
log = logging.getLogger('hyper')
PREFERRED_ENCODING = locale.getpreferredencoding()
# Various separators used in args
SEP_HEADERS = ':'
SEP_QUERY = '=='
SEP_DATA = '='
SEP_GROUP_ITEMS = [
SEP_HEADERS,
SEP_QUERY,
SEP_DATA,
]
class KeyValue(object):
"""Base key-value pair parsed from CLI."""
def __init__(self, key, value, sep, orig):
self.key = key
self.value = value
self.sep = sep
self.orig = orig
class KeyValueArgType(object):
"""A key-value pair argument type used with `argparse`.
Parses a key-value arg and constructs a `KeyValue` instance.
Used for headers, form data, and other key-value pair types.
This class is inspired by httpie and implements simple tokenizer only.
"""
def __init__(self, *separators):
self.separators = separators
def __call__(self, string):
for sep in self.separators:
splitted = string.split(sep, 1)
if len(splitted) == 2:
key, value = splitted
return KeyValue(key, value, sep, string)
def make_positional_argument(parser):
parser.add_argument(
'method', metavar='METHOD', nargs=OPTIONAL, default='GET',
help=dedent("""
The HTTP method to be used for the request
(GET, POST, PUT, DELETE, ...).
"""))
parser.add_argument(
'_url', metavar='URL',
help=dedent("""
The scheme defaults to 'https://' if the URL does not include one.
"""))
parser.add_argument(
'items',
metavar='REQUEST_ITEM',
nargs=ZERO_OR_MORE,
type=KeyValueArgType(*SEP_GROUP_ITEMS),
help=dedent("""
Optional key-value pairs to be included in the request.
The separator used determines the type:
':' HTTP headers:
Referer:http://httpie.org Cookie:foo=bar User-Agent:bacon/1.0
'==' URL parameters to be appended to the request URI:
search==hyper
'=' Data fields to be serialized into a JSON object:
name=Hyper language=Python description='CLI HTTP client'
"""))
def make_troubleshooting_argument(parser):
parser.add_argument(
'--version', action='version', version=__version__,
help='Show version and exit.')
parser.add_argument(
'--debug', action='store_true', default=False,
help='Show debugging information (loglevel=DEBUG)')
parser.add_argument(
'--h2', action='store_true', default=False,
help="Do HTTP/2 directly, skipping plaintext upgrade and ignoring "
"NPN/ALPN."
)
def split_host_and_port(hostname):
if ':' in hostname:
return to_host_port_tuple(hostname, default_port=443)
return hostname, None
class UrlInfo(object):
def __init__(self):
self.fragment = None
self.host = 'localhost'
self.netloc = None
self.path = '/'
self.port = 443
self.query = None
self.scheme = 'https'
self.secure = False
def set_url_info(args):
info = UrlInfo()
_result = urlsplit(args._url)
for attr in vars(info).keys():
value = getattr(_result, attr, None)
if value:
setattr(info, attr, value)
if info.scheme == 'http' and not _result.port:
info.port = 80
# Set the secure arg is the scheme is HTTPS, otherwise do unsecured.
info.secure = info.scheme == 'https'
if info.netloc:
hostname, _ = split_host_and_port(info.netloc)
info.host = hostname # ensure stripping port number
else:
if _result.path:
_path = _result.path.split('/', 1)
hostname, port = split_host_and_port(_path[0])
info.host = hostname
if info.path == _path[0]:
info.path = '/'
elif len(_path) == 2 and _path[1]:
info.path = '/' + _path[1]
if port is not None:
info.port = port
log.debug('Url Info: %s', vars(info))
args.url = info
def set_request_data(args):
body, headers, params = {}, {}, {}
for i in args.items:
if i.sep == SEP_HEADERS:
if i.key:
headers[i.key] = i.value
else:
# when overriding a HTTP/2 special header there will be a
# leading colon, which tricks the command line parser into
# thinking the header is empty
k, v = i.value.split(':', 1)
headers[':' + k] = v
elif i.sep == SEP_QUERY:
params[i.key] = i.value
elif i.sep == SEP_DATA:
value = i.value
if is_py2: # pragma: no cover
value = value.decode(PREFERRED_ENCODING)
body[i.key] = value
if params:
args.url.path += '?' + urlencode(params)
if body:
content_type = 'application/json'
headers.setdefault('content-type', content_type)
args.body = json.dumps(body)
if args.method is None:
args.method = 'POST' if args.body else 'GET'
args.method = args.method.upper()
args.headers = headers
def parse_argument(argv=None):
parser = ArgumentParser(formatter_class=RawTextHelpFormatter)
parser.set_defaults(body=None, headers={})
make_positional_argument(parser)
make_troubleshooting_argument(parser)
args = parser.parse_args(sys.argv[1:] if argv is None else argv)
if args.debug:
handler = logging.StreamHandler()
handler.setLevel(logging.DEBUG)
log.addHandler(handler)
log.setLevel(logging.DEBUG)
set_url_info(args)
set_request_data(args)
return args
def get_content_type_and_charset(response):
charset = 'utf-8'
content_type = response.headers.get('content-type')
if content_type is None:
return 'unknown', charset
content_type = content_type[0].decode('utf-8').lower()
type_and_charset = content_type.split(';', 1)
ctype = type_and_charset[0].strip()
if len(type_and_charset) == 2:
charset = type_and_charset[1].strip().split('=')[1]
return ctype, charset
def request(args):
if not args.h2:
conn = HTTPConnection(
args.url.host, args.url.port, secure=args.url.secure
)
else: # pragma: no cover
conn = HTTP20Connection(
args.url.host,
args.url.port,
secure=args.url.secure,
force_proto='h2'
)
conn.request(args.method, args.url.path, args.body, args.headers)
response = conn.get_response()
log.debug('Response Headers:\n%s', pformat(response.headers))
ctype, charset = get_content_type_and_charset(response)
data = response.read()
return data
def main(argv=None):
args = parse_argument(argv)
log.debug('Commandline Argument: %s', args)
data = request(args)
write_to_stdout(data)
if __name__ == '__main__': # pragma: no cover
main()

View file

@ -0,0 +1,7 @@
# -*- coding: utf-8 -*-
"""
hyper/common
~~~~~~~~~~~~
Common code in hyper.
"""

View file

@ -0,0 +1,240 @@
# -*- coding: utf-8 -*-
"""
hyper/http20/bufsocket.py
~~~~~~~~~~~~~~~~~~~~~~~~~
This file implements a buffered socket wrapper.
The purpose of this is to avoid the overhead of unnecessary syscalls while
allowing small reads from the network. This represents a potentially massive
performance optimisation at the cost of burning some memory in the userspace
process.
"""
import select
from .exceptions import ConnectionResetError, LineTooLongError
class BufferedSocket(object):
"""
A buffered socket wrapper.
The purpose of this is to avoid the overhead of unnecessary syscalls while
allowing small reads from the network. This represents a potentially
massive performance optimisation at the cost of burning some memory in the
userspace process.
"""
def __init__(self, sck, buffer_size=1000):
"""
Create the buffered socket.
:param sck: The socket to wrap.
:param buffer_size: The size of the backing buffer in bytes. This
parameter should be set to an appropriate value for your use case.
Small values of ``buffer_size`` increase the overhead of buffer
management: large values cause more memory to be used.
"""
# The wrapped socket.
self._sck = sck
# The buffer we're using.
self._backing_buffer = bytearray(buffer_size)
self._buffer_view = memoryview(self._backing_buffer)
# The size of the buffer.
self._buffer_size = buffer_size
# The start index in the memory view.
self._index = 0
# The number of bytes in the buffer.
self._bytes_in_buffer = 0
@property
def _remaining_capacity(self):
"""
The maximum number of bytes the buffer could still contain.
"""
return self._buffer_size - self._index
@property
def _buffer_end(self):
"""
The index of the first free byte in the buffer.
"""
return self._index + self._bytes_in_buffer
@property
def can_read(self):
"""
Whether or not there is more data to read from the socket.
"""
read = select.select([self._sck], [], [], 0)[0]
if read:
return True
return False
@property
def buffer(self):
"""
Get access to the buffer itself.
"""
return self._buffer_view[self._index:self._buffer_end]
def advance_buffer(self, count):
"""
Advances the buffer by the amount of data consumed outside the socket.
"""
self._index += count
self._bytes_in_buffer -= count
def new_buffer(self):
"""
This method moves all the data in the backing buffer to the start of
a new, fresh buffer. This gives the ability to read much more data.
"""
def read_all_from_buffer():
end = self._index + self._bytes_in_buffer
return self._buffer_view[self._index:end]
new_buffer = bytearray(self._buffer_size)
new_buffer_view = memoryview(new_buffer)
new_buffer_view[0:self._bytes_in_buffer] = read_all_from_buffer()
self._index = 0
self._backing_buffer = new_buffer
self._buffer_view = new_buffer_view
return
def recv(self, amt):
"""
Read some data from the socket.
:param amt: The amount of data to read.
:returns: A ``memoryview`` object containing the appropriate number of
bytes. The data *must* be copied out by the caller before the next
call to this function.
"""
# In this implementation you can never read more than the number of
# bytes in the buffer.
if amt > self._buffer_size:
amt = self._buffer_size
# If the amount of data we've been asked to read is less than the
# remaining space in the buffer, we need to clear out the buffer and
# start over.
if amt > self._remaining_capacity:
self.new_buffer()
# If there's still some room in the buffer, opportunistically attempt
# to read into it.
# If we don't actually _need_ the data (i.e. there's enough in the
# buffer to satisfy the request), use select to work out if the read
# attempt will block. If it will, don't bother reading. If we need the
# data, always do the read.
if self._bytes_in_buffer >= amt:
should_read = select.select([self._sck], [], [], 0)[0]
else:
should_read = True
if (self._remaining_capacity > self._bytes_in_buffer and should_read):
count = self._sck.recv_into(self._buffer_view[self._buffer_end:])
# The socket just got closed. We should throw an exception if we
# were asked for more data than we can return.
if not count and amt > self._bytes_in_buffer:
raise ConnectionResetError()
self._bytes_in_buffer += count
# Read out the bytes and update the index.
amt = min(amt, self._bytes_in_buffer)
data = self._buffer_view[self._index:self._index+amt]
self._index += amt
self._bytes_in_buffer -= amt
return data
def fill(self):
"""
Attempts to fill the buffer as much as possible. It will block for at
most the time required to have *one* ``recv_into`` call return.
"""
if not self._remaining_capacity:
self.new_buffer()
count = self._sck.recv_into(self._buffer_view[self._buffer_end:])
if not count:
raise ConnectionResetError()
self._bytes_in_buffer += count
return
def readline(self):
"""
Read up to a newline from the network and returns it. The implicit
maximum line length is the buffer size of the buffered socket.
Note that, unlike recv, this method absolutely *does* block until it
can read the line.
:returns: A ``memoryview`` object containing the appropriate number of
bytes. The data *must* be copied out by the caller before the next
call to this function.
"""
# First, check if there's anything in the buffer. This is one of those
# rare circumstances where this will work correctly on all platforms.
index = self._backing_buffer.find(
b'\n',
self._index,
self._index + self._bytes_in_buffer
)
if index != -1:
length = index + 1 - self._index
data = self._buffer_view[self._index:self._index+length]
self._index += length
self._bytes_in_buffer -= length
return data
# In this case, we didn't find a newline in the buffer. To fix that,
# read some data into the buffer. To do our best to satisfy the read,
# we should shunt the data down in the buffer so that it's right at
# the start. We don't bother if we're already at the start of the
# buffer.
if self._index != 0:
self.new_buffer()
while self._bytes_in_buffer < self._buffer_size:
count = self._sck.recv_into(self._buffer_view[self._buffer_end:])
if not count:
raise ConnectionResetError()
# We have some more data. Again, look for a newline in that gap.
first_new_byte = self._buffer_end
self._bytes_in_buffer += count
index = self._backing_buffer.find(
b'\n',
first_new_byte,
first_new_byte + count,
)
if index != -1:
# The length of the buffer is the index into the
# buffer at which we found the newline plus 1, minus the start
# index of the buffer, which really should be zero.
assert not self._index
length = index + 1
data = self._buffer_view[:length]
self._index += length
self._bytes_in_buffer -= length
return data
# If we got here, it means we filled the buffer without ever getting
# a newline. Time to throw an exception.
raise LineTooLongError()
def __getattr__(self, name):
return getattr(self._sck, name)

View file

@ -0,0 +1,157 @@
# -*- coding: utf-8 -*-
"""
hyper/common/connection
~~~~~~~~~~~~~~~~~~~~~~~
Hyper's HTTP/1.1 and HTTP/2 abstraction layer.
"""
from .exceptions import TLSUpgrade, HTTPUpgrade
from ..http11.connection import HTTP11Connection
from ..http20.connection import HTTP20Connection
from ..tls import H2_NPN_PROTOCOLS, H2C_PROTOCOL
class HTTPConnection(object):
"""
An object representing a single HTTP connection to a server.
This object behaves similarly to the Python standard library's
``HTTPConnection`` object, with a few critical differences.
Most of the standard library's arguments to the constructor are not
supported by hyper. Most optional parameters apply to *either* HTTP/1.1 or
HTTP/2.
:param host: The host to connect to. This may be an IP address or a
hostname, and optionally may include a port: for example,
``'http2bin.org'``, ``'http2bin.org:443'`` or ``'127.0.0.1'``.
:param port: (optional) The port to connect to. If not provided and one
also isn't provided in the ``host`` parameter, defaults to 80.
:param secure: (optional) Whether the request should use TLS.
Defaults to ``False`` for most requests, but to ``True`` for any
request issued to port 443.
:param window_manager: (optional) The class to use to manage flow control
windows. This needs to be a subclass of the
:class:`BaseFlowControlManager
<hyper.http20.window.BaseFlowControlManager>`. If not provided,
:class:`FlowControlManager <hyper.http20.window.FlowControlManager>`
will be used.
:param enable_push: (optional) Whether the server is allowed to push
resources to the client (see
:meth:`get_pushes() <hyper.HTTP20Connection.get_pushes>`).
:param ssl_context: (optional) A class with custom certificate settings.
If not provided then hyper's default ``SSLContext`` is used instead.
:param proxy_host: (optional) The proxy to connect to. This can be an IP
address or a host name and may include a port.
:param proxy_port: (optional) The proxy port to connect to. If not provided
and one also isn't provided in the ``proxy`` parameter, defaults to
8080.
"""
def __init__(self,
host,
port=None,
secure=None,
window_manager=None,
enable_push=False,
ssl_context=None,
proxy_host=None,
proxy_port=None,
**kwargs):
self._host = host
self._port = port
self._h1_kwargs = {
'secure': secure, 'ssl_context': ssl_context,
'proxy_host': proxy_host, 'proxy_port': proxy_port
}
self._h2_kwargs = {
'window_manager': window_manager, 'enable_push': enable_push,
'secure': secure, 'ssl_context': ssl_context,
'proxy_host': proxy_host, 'proxy_port': proxy_port
}
# Add any unexpected kwargs to both dictionaries.
self._h1_kwargs.update(kwargs)
self._h2_kwargs.update(kwargs)
self._conn = HTTP11Connection(
self._host, self._port, **self._h1_kwargs
)
def request(self, method, url, body=None, headers=None):
"""
This will send a request to the server using the HTTP request method
``method`` and the selector ``url``. If the ``body`` argument is
present, it should be string or bytes object of data to send after the
headers are finished. Strings are encoded as UTF-8. To use other
encodings, pass a bytes object. The Content-Length header is set to the
length of the body field.
:param method: The request method, e.g. ``'GET'``.
:param url: The URL to contact, e.g. ``'/path/segment'``.
:param body: (optional) The request body to send. Must be a bytestring
or a file-like object.
:param headers: (optional) The headers to send on the request.
:returns: A stream ID for the request, or ``None`` if the request is
made over HTTP/1.1.
"""
headers = headers or {}
try:
return self._conn.request(
method=method, url=url, body=body, headers=headers
)
except TLSUpgrade as e:
# We upgraded in the NPN/ALPN handshake. We can just go straight to
# the world of HTTP/2. Replace the backing object and insert the
# socket into it.
assert e.negotiated in H2_NPN_PROTOCOLS
self._conn = HTTP20Connection(
self._host, self._port, **self._h2_kwargs
)
self._conn._sock = e.sock
# Because we skipped the connecting logic, we need to send the
# HTTP/2 preamble.
self._conn._send_preamble()
return self._conn.request(
method=method, url=url, body=body, headers=headers
)
def get_response(self, *args, **kwargs):
"""
Returns a response object.
"""
try:
return self._conn.get_response(*args, **kwargs)
except HTTPUpgrade as e:
# We upgraded via the HTTP Upgrade mechanism. We can just
# go straight to the world of HTTP/2. Replace the backing object
# and insert the socket into it.
assert e.negotiated == H2C_PROTOCOL
self._conn = HTTP20Connection(
self._host, self._port, **self._h2_kwargs
)
self._conn._connect_upgrade(e.sock)
# stream id 1 is used by the upgrade request and response
# and is half-closed by the client
return self._conn.get_response(1)
# The following two methods are the implementation of the context manager
# protocol.
def __enter__(self): # pragma: no cover
return self
def __exit__(self, type, value, tb): # pragma: no cover
self._conn.close()
return False # Never swallow exceptions.
# Can anyone say 'proxy object pattern'?
def __getattr__(self, name):
return getattr(self._conn, name)

View file

@ -0,0 +1,48 @@
# -*- coding: utf-8 -*-
"""
hyper/common/decoder
~~~~~~~~~~~~~~~~~~~~
Contains hyper's code for handling compressed bodies.
"""
import zlib
class DeflateDecoder(object):
"""
This is a decoding object that wraps ``zlib`` and is used for decoding
deflated content.
This rationale for the existence of this object is pretty unpleasant.
The HTTP RFC specifies that 'deflate' is a valid content encoding. However,
the spec _meant_ the zlib encoding form. Unfortunately, people who didn't
read the RFC very carefully actually implemented a different form of
'deflate'. Insanely, ``zlib`` handles them using two wbits values. This is
such a mess it's hard to adequately articulate.
This class was lovingly borrowed from the excellent urllib3 library under
license: see NOTICES. If you ever see @shazow, you should probably buy him
a drink or something.
"""
def __init__(self):
self._first_try = True
self._data = b''
self._obj = zlib.decompressobj(zlib.MAX_WBITS)
def __getattr__(self, name):
return getattr(self._obj, name)
def decompress(self, data):
if not self._first_try:
return self._obj.decompress(data)
self._data += data
try:
return self._obj.decompress(data)
except zlib.error:
self._first_try = False
self._obj = zlib.decompressobj(-zlib.MAX_WBITS)
try:
return self.decompress(self._data)
finally:
self._data = None

View file

@ -0,0 +1,73 @@
# -*- coding: utf-8 -*-
"""
hyper/common/exceptions
~~~~~~~~~~~~~~~~~~~~~~~
Contains hyper's exceptions.
"""
class ChunkedDecodeError(Exception):
"""
An error was encountered while decoding a chunked response.
"""
pass
class InvalidResponseError(Exception):
"""
A problem was found with the response that makes it invalid.
"""
pass
class SocketError(Exception):
"""
An error occurred during socket operation.
"""
pass
class LineTooLongError(Exception):
"""
An attempt to read a line from a socket failed because no newline was
found.
"""
pass
# Create our own ConnectionResetError.
try: # pragma: no cover
ConnectionResetError = ConnectionResetError
except NameError: # pragma: no cover
class ConnectionResetError(Exception):
"""
A HTTP connection was unexpectedly reset.
"""
class TLSUpgrade(Exception):
"""
We upgraded to a new protocol in the NPN/ALPN handshake.
"""
def __init__(self, negotiated, sock):
super(TLSUpgrade, self).__init__()
self.negotiated = negotiated
self.sock = sock
class HTTPUpgrade(Exception):
"""
We upgraded to a new protocol via the HTTP Upgrade response.
"""
def __init__(self, negotiated, sock):
super(HTTPUpgrade, self).__init__()
self.negotiated = negotiated
self.sock = sock
class MissingCertFile(Exception):
"""
The certificate file could not be found.
"""
pass

View file

@ -0,0 +1,260 @@
# -*- coding: utf-8 -*-
"""
hyper/common/headers
~~~~~~~~~~~~~~~~~~~~~
Contains hyper's structures for storing and working with HTTP headers.
"""
import collections
from hyper.common.util import to_bytestring, to_bytestring_tuple
class HTTPHeaderMap(collections.MutableMapping):
"""
A structure that contains HTTP headers.
HTTP headers are a curious beast. At the surface level they look roughly
like a name-value set, but in practice they have many variations that
make them tricky:
- duplicate keys are allowed
- keys are compared case-insensitively
- duplicate keys are isomorphic to comma-separated values, *except when
they aren't*!
- they logically contain a form of ordering
This data structure is an attempt to preserve all of that information
while being as user-friendly as possible. It retains all of the mapping
convenience methods (allowing by-name indexing), while avoiding using a
dictionary for storage.
When iterated over, this structure returns headers in 'canonical form'.
This form is a tuple, where the first entry is the header name (in
lower-case), and the second entry is a list of header values (in original
case).
The mapping always emits both names and values in the form of bytestrings:
never unicode strings. It can accept names and values in unicode form, and
will automatically be encoded to bytestrings using UTF-8. The reason for
what appears to be a user-unfriendly decision here is primarily to allow
the broadest-possible compatibility (to make it possible to send headers in
unusual encodings) while ensuring that users are never confused about what
type of data they will receive.
.. warning:: Note that this data structure makes none of the performance
guarantees of a dictionary. Lookup and deletion is not an O(1)
operation. Inserting a new value *is* O(1), all other
operations are O(n), including *replacing* a header entirely.
"""
def __init__(self, *args, **kwargs):
# The meat of the structure. In practice, headers are an ordered list
# of tuples. This early version of the data structure simply uses this
# directly under the covers.
#
# An important curiosity here is that the headers are not stored in
# 'canonical form', but are instead stored in the form they were
# provided in. This is to ensure that it is always possible to
# reproduce the original header structure if necessary. This leads to
# some unfortunate performance costs on structure access where it is
# often necessary to transform the data into canonical form on access.
# This cost is judged acceptable in low-level code like `hyper`, but
# higher-level abstractions should consider if they really require this
# logic.
self._items = []
for arg in args:
self._items.extend(map(lambda x: to_bytestring_tuple(*x), arg))
for k, v in kwargs.items():
self._items.append(to_bytestring_tuple(k, v))
def __getitem__(self, key):
"""
Unlike the dict __getitem__, this returns a list of items in the order
they were added. These items are returned in 'canonical form', meaning
that comma-separated values are split into multiple values.
"""
key = to_bytestring(key)
values = []
for k, v in self._items:
if _keys_equal(k, key):
values.extend(x[1] for x in canonical_form(k, v))
if not values:
raise KeyError("Nonexistent header key: {}".format(key))
return values
def __setitem__(self, key, value):
"""
Unlike the dict __setitem__, this appends to the list of items.
"""
self._items.append(to_bytestring_tuple(key, value))
def __delitem__(self, key):
"""
Sadly, __delitem__ is kind of stupid here, but the best we can do is
delete all headers with a given key. To correctly achieve the 'KeyError
on missing key' logic from dictionaries, we need to do this slowly.
"""
key = to_bytestring(key)
indices = []
for (i, (k, v)) in enumerate(self._items):
if _keys_equal(k, key):
indices.append(i)
if not indices:
raise KeyError("Nonexistent header key: {}".format(key))
for i in indices[::-1]:
self._items.pop(i)
def __iter__(self):
"""
This mapping iterates like the list of tuples it is. The headers are
returned in canonical form.
"""
for pair in self._items:
for value in canonical_form(*pair):
yield value
def __len__(self):
"""
The length of this mapping is the number of individual headers in
canonical form. Sadly, this is a somewhat expensive operation.
"""
size = 0
for _ in self:
size += 1
return size
def __contains__(self, key):
"""
If any header is present with this key, returns True.
"""
key = to_bytestring(key)
return any(_keys_equal(key, k) for k, _ in self._items)
def keys(self):
"""
Returns an iterable of the header keys in the mapping. This explicitly
does not filter duplicates, ensuring that it's the same length as
len().
"""
for n, _ in self:
yield n
def items(self):
"""
This mapping iterates like the list of tuples it is.
"""
return self.__iter__()
def values(self):
"""
This is an almost nonsensical query on a header dictionary, but we
satisfy it in the exact same way we satisfy 'keys'.
"""
for _, v in self:
yield v
def get(self, name, default=None):
"""
Unlike the dict get, this returns a list of items in the order
they were added.
"""
try:
return self[name]
except KeyError:
return default
def iter_raw(self):
"""
Allows iterating over the headers in 'raw' form: that is, the form in
which they were added to the structure. This iteration is in order,
and can be used to rebuild the original headers (e.g. to determine
exactly what a server sent).
"""
for item in self._items:
yield item
def replace(self, key, value):
"""
Replace existing header with new value. If header doesn't exist this
method work like ``__setitem__``. Replacing leads to deletion of all
existing headers with the same name.
"""
key, value = to_bytestring_tuple(key, value)
indices = []
for (i, (k, v)) in enumerate(self._items):
if _keys_equal(k, key):
indices.append(i)
# If the key isn't present, this is easy: just append and abort early.
if not indices:
self._items.append((key, value))
return
# Delete all but the first. I swear, this is the correct slicing
# syntax!
base_index = indices[0]
for i in indices[:0:-1]:
self._items.pop(i)
del self._items[base_index]
self._items.insert(base_index, (key, value))
def merge(self, other):
"""
Merge another header set or any other dict-like into this one.
"""
# Short circuit to avoid infinite loops in case we try to merge into
# ourselves.
if other is self:
return
if isinstance(other, HTTPHeaderMap):
self._items.extend(other.iter_raw())
return
for k, v in other.items():
self._items.append(to_bytestring_tuple(k, v))
def __eq__(self, other):
return self._items == other._items
def __ne__(self, other):
return self._items != other._items
def __str__(self): # pragma: no cover
return 'HTTPHeaderMap(%s)' % self._items
def __repr__(self): # pragma: no cover
return str(self)
def canonical_form(k, v):
"""
Returns an iterable of key-value-pairs corresponding to the header in
canonical form. This means that the header is split on commas unless for
any reason it's a super-special snowflake (I'm looking at you Set-Cookie).
"""
SPECIAL_SNOWFLAKES = set([b'set-cookie', b'set-cookie2'])
k = k.lower()
if k in SPECIAL_SNOWFLAKES:
yield k, v
else:
for sub_val in v.split(b','):
yield k, sub_val.strip()
def _keys_equal(x, y):
"""
Returns 'True' if the two keys are equal by the laws of HTTP headers.
"""
return x.lower() == y.lower()

View file

@ -0,0 +1,59 @@
# -*- coding: utf-8 -*-
"""
hyper/common/util
~~~~~~~~~~~~~~~~~
General utility functions for use with hyper.
"""
from hyper.compat import unicode, bytes, imap
from ..packages.rfc3986.uri import URIReference
from ..compat import is_py3
def to_bytestring(element):
"""
Converts a single string to a bytestring, encoding via UTF-8 if needed.
"""
if isinstance(element, unicode):
return element.encode('utf-8')
elif isinstance(element, bytes):
return element
else:
raise ValueError("Non string type.")
def to_bytestring_tuple(*x):
"""
Converts the given strings to a bytestring if necessary, returning a
tuple. Uses ``to_bytestring``.
"""
return tuple(imap(to_bytestring, x))
def to_host_port_tuple(host_port_str, default_port=80):
"""
Converts the given string containing a host and possibly a port
to a tuple.
"""
uri = URIReference(
scheme=None,
authority=host_port_str,
path=None,
query=None,
fragment=None
)
host = uri.host.strip('[]')
if not uri.port:
port = default_port
else:
port = int(uri.port)
return (host, port)
def to_native_string(string, encoding='utf-8'):
if isinstance(string, str):
return string
return string.decode(encoding) if is_py3 else string.encode(encoding)

View file

@ -0,0 +1,84 @@
# -*- coding: utf-8 -*-
# flake8: noqa
"""
hyper/compat
~~~~~~~~~~~~
Normalizes the Python 2/3 API for internal use.
"""
from contextlib import contextmanager
import sys
import zlib
try:
from . import ssl_compat
except ImportError:
# TODO log?
ssl_compat = None
_ver = sys.version_info
is_py2 = _ver[0] == 2
is_py2_7_9_or_later = _ver[0] >= 2 and _ver[1] >= 7 and _ver[2] >= 9
is_py3 = _ver[0] == 3
is_py3_3 = is_py3 and _ver[1] == 3
@contextmanager
def ignore_missing():
try:
yield
except (AttributeError, NotImplementedError): # pragma: no cover
pass
if is_py2:
if is_py2_7_9_or_later:
import ssl
else:
ssl = ssl_compat
from urllib import urlencode
from urlparse import urlparse, urlsplit
from itertools import imap
def to_byte(char):
return ord(char)
def decode_hex(b):
return b.decode('hex')
def write_to_stdout(data):
sys.stdout.write(data + '\n')
sys.stdout.flush()
# The standard zlib.compressobj() accepts only positional arguments.
def zlib_compressobj(level=6, method=zlib.DEFLATED, wbits=15, memlevel=8,
strategy=zlib.Z_DEFAULT_STRATEGY):
return zlib.compressobj(level, method, wbits, memlevel, strategy)
unicode = unicode
bytes = str
elif is_py3:
from urllib.parse import urlencode, urlparse, urlsplit
imap = map
def to_byte(char):
return char
def decode_hex(b):
return bytes.fromhex(b)
def write_to_stdout(data):
sys.stdout.buffer.write(data + b'\n')
sys.stdout.buffer.flush()
zlib_compressobj = zlib.compressobj
if is_py3_3:
ssl = ssl_compat
else:
import ssl
unicode = str
bytes = bytes

View file

@ -0,0 +1,159 @@
# -*- coding: utf-8 -*-
"""
hyper/contrib
~~~~~~~~~~~~~
Contains a few utilities for use with other HTTP libraries.
"""
try:
from requests.adapters import HTTPAdapter
from requests.models import Response
from requests.structures import CaseInsensitiveDict
from requests.utils import get_encoding_from_headers
from requests.cookies import extract_cookies_to_jar
except ImportError: # pragma: no cover
HTTPAdapter = object
from hyper.common.connection import HTTPConnection
from hyper.compat import urlparse
from hyper.tls import init_context
class HTTP20Adapter(HTTPAdapter):
"""
A Requests Transport Adapter that uses hyper to send requests over
HTTP/2. This implements some degree of connection pooling to maximise the
HTTP/2 gain.
"""
def __init__(self, *args, **kwargs):
#: A mapping between HTTP netlocs and ``HTTP20Connection`` objects.
self.connections = {}
def get_connection(self, host, port, scheme, cert=None):
"""
Gets an appropriate HTTP/2 connection object based on
host/port/scheme/cert tuples.
"""
secure = (scheme == 'https')
if port is None: # pragma: no cover
port = 80 if not secure else 443
ssl_context = None
if cert is not None:
ssl_context = init_context(cert=cert)
try:
conn = self.connections[(host, port, scheme, cert)]
except KeyError:
conn = HTTPConnection(
host,
port,
secure=secure,
ssl_context=ssl_context)
self.connections[(host, port, scheme, cert)] = conn
return conn
def send(self, request, stream=False, cert=None, **kwargs):
"""
Sends a HTTP message to the server.
"""
parsed = urlparse(request.url)
conn = self.get_connection(
parsed.hostname,
parsed.port,
parsed.scheme,
cert=cert)
# Build the selector.
selector = parsed.path
selector += '?' + parsed.query if parsed.query else ''
selector += '#' + parsed.fragment if parsed.fragment else ''
conn.request(
request.method,
selector,
request.body,
request.headers
)
resp = conn.get_response()
r = self.build_response(request, resp)
if not stream:
r.content
return r
def build_response(self, request, resp):
"""
Builds a Requests' response object. This emulates most of the logic of
the standard fuction but deals with the lack of the ``.headers``
property on the HTTP20Response object.
Additionally, this function builds in a number of features that are
purely for HTTPie. This is to allow maximum compatibility with what
urllib3 does, so that HTTPie doesn't fall over when it uses us.
"""
response = Response()
response.status_code = resp.status
response.headers = CaseInsensitiveDict(resp.headers.iter_raw())
response.raw = resp
response.reason = resp.reason
response.encoding = get_encoding_from_headers(response.headers)
extract_cookies_to_jar(response.cookies, request, response)
response.url = request.url
response.request = request
response.connection = self
# First horrible patch: Requests expects its raw responses to have a
# release_conn method, which I don't. We should monkeypatch a no-op on.
resp.release_conn = lambda: None
# Next, add the things HTTPie needs. It needs the following things:
#
# - The `raw` object has a property called `_original_response` that is
# a `httplib` response object.
# - `raw._original_response` has three simple properties: `version`,
# `status`, `reason`.
# - `raw._original_response.version` has one of three values: `9`,
# `10`, `11`.
# - `raw._original_response.msg` exists.
# - `raw._original_response.msg._headers` exists and is an iterable of
# two-tuples.
#
# We fake this out. Most of this exists on our response object already,
# and the rest can be faked.
#
# All of this exists for httpie, which I don't have any tests for,
# so I'm not going to bother adding test coverage for it.
class FakeOriginalResponse(object): # pragma: no cover
def __init__(self, headers):
self._headers = headers
def get_all(self, name, default=None):
values = []
for n, v in self._headers:
if n == name.lower():
values.append(v)
if not values:
return default
return values
def getheaders(self, name):
return self.get_all(name, [])
response.raw._original_response = orig = FakeOriginalResponse(None)
orig.version = 20
orig.status = resp.status
orig.reason = resp.reason
orig.msg = FakeOriginalResponse(resp.headers.iter_raw())
return response

View file

@ -0,0 +1,8 @@
# -*- coding: utf-8 -*-
"""
h2
~~
A HTTP/2 implementation.
"""
__version__ = '2.6.2'

View file

@ -0,0 +1,146 @@
# -*- coding: utf-8 -*-
"""
h2/config
~~~~~~~~~
Objects for controlling the configuration of the HTTP/2 stack.
"""
class _BooleanConfigOption(object):
"""
Descriptor for handling a boolean config option. This will block
attempts to set boolean config options to non-bools.
"""
def __init__(self, name):
self.name = name
self.attr_name = '_%s' % self.name
def __get__(self, instance, owner):
return getattr(instance, self.attr_name)
def __set__(self, instance, value):
if not isinstance(value, bool):
raise ValueError("%s must be a bool" % self.name)
setattr(instance, self.attr_name, value)
class DummyLogger(object):
"""
An Logger object that does not actual logging, hence a DummyLogger.
For the class the log operation is merely a no-op. The intent is to avoid
conditionals being sprinkled throughout the hyper-h2 code for calls to
logging functions when no logger is passed into the corresponding object.
"""
def __init__(self, *vargs):
pass
def debug(self, *vargs, **kwargs):
"""
No-op logging. Only level needed for now.
"""
pass
class H2Configuration(object):
"""
An object that controls the way a single HTTP/2 connection behaves.
This object allows the users to customize behaviour. In particular, it
allows users to enable or disable optional features, or to otherwise handle
various unusual behaviours.
This object has very little behaviour of its own: it mostly just ensures
that configuration is self-consistent.
:param client_side: Whether this object is to be used on the client side of
a connection, or on the server side. Affects the logic used by the
state machine, the default settings values, the allowable stream IDs,
and several other properties. Defaults to ``True``.
:type client_side: ``bool``
:param header_encoding: Controls whether the headers emitted by this object
in events are transparently decoded to ``unicode`` strings, and what
encoding is used to do that decoding. For historical reasons, this
defaults to ``'utf-8'``. To prevent the decoding of headers (that is,
to force them to be returned as bytestrings), this can be set to
``False`` or the empty string.
:type header_encoding: ``str``, ``False``, or ``None``
:param validate_outbound_headers: Controls whether the headers emitted
by this object are validated against the rules in RFC 7540.
Disabling this setting will cause outbound header validation to
be skipped, and allow the object to emit headers that may be illegal
according to RFC 7540. Defaults to ``True``.
:type validate_outbound_headers: ``bool``
:param normalize_outbound_headers: Controls whether the headers emitted
by this object are normalized before sending. Disabling this setting
will cause outbound header normalization to be skipped, and allow
the object to emit headers that may be illegal according to
RFC 7540. Defaults to ``True``.
:type normalize_outbound_headers: ``bool``
:param validate_inbound_headers: Controls whether the headers received
by this object are validated against the rules in RFC 7540.
Disabling this setting will cause inbound header validation to
be skipped, and allow the object to receive headers that may be illegal
according to RFC 7540. Defaults to ``True``.
:type validate_inbound_headers: ``bool``
:param logger: A logger that conforms to the requirements for this module,
those being no I/O and no context switches, which is needed in order
to run in asynchronous operation.
.. versionadded:: 2.6.0
:type logger: ``logging.Logger``
"""
client_side = _BooleanConfigOption('client_side')
validate_outbound_headers = _BooleanConfigOption(
'validate_outbound_headers'
)
normalize_outbound_headers = _BooleanConfigOption(
'normalize_outbound_headers'
)
validate_inbound_headers = _BooleanConfigOption(
'validate_inbound_headers'
)
def __init__(self,
client_side=True,
header_encoding='utf-8',
validate_outbound_headers=True,
normalize_outbound_headers=True,
validate_inbound_headers=True,
logger=None):
self.client_side = client_side
self.header_encoding = header_encoding
self.validate_outbound_headers = validate_outbound_headers
self.normalize_outbound_headers = normalize_outbound_headers
self.validate_inbound_headers = validate_inbound_headers
self.logger = logger or DummyLogger(__name__)
@property
def header_encoding(self):
"""
Controls whether the headers emitted by this object in events are
transparently decoded to ``unicode`` strings, and what encoding is used
to do that decoding. For historical reasons, this defaults to
``'utf-8'``. To prevent the decoding of headers (that is, to force them
to be returned as bytestrings), this can be set to ``False`` or the
empty string.
"""
return self._header_encoding
@header_encoding.setter
def header_encoding(self, value):
"""
Enforces constraints on the value of header encoding.
"""
if not isinstance(value, (bool, str, type(None))):
raise ValueError("header_encoding must be bool, string, or None")
if value is True:
raise ValueError("header_encoding cannot be True")
self._header_encoding = value

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,183 @@
# -*- coding: utf-8 -*-
"""
h2/errors
~~~~~~~~~~~~~~~~~~~
Global error code registry containing the established HTTP/2 error codes.
The current registry is available at:
https://tools.ietf.org/html/rfc7540#section-11.4
"""
import enum
class ErrorCodes(enum.IntEnum):
"""
All known HTTP/2 error codes.
.. versionadded:: 2.5.0
"""
#: Graceful shutdown.
NO_ERROR = 0x0
#: Protocol error detected.
PROTOCOL_ERROR = 0x1
#: Implementation fault.
INTERNAL_ERROR = 0x2
#: Flow-control limits exceeded.
FLOW_CONTROL_ERROR = 0x3
#: Settings not acknowledged.
SETTINGS_TIMEOUT = 0x4
#: Frame received for closed stream.
STREAM_CLOSED = 0x5
#: Frame size incorrect.
FRAME_SIZE_ERROR = 0x6
#: Stream not processed.
REFUSED_STREAM = 0x7
#: Stream cancelled.
CANCEL = 0x8
#: Compression state not updated.
COMPRESSION_ERROR = 0x9
#: TCP connection error for CONNECT method.
CONNECT_ERROR = 0xa
#: Processing capacity exceeded.
ENHANCE_YOUR_CALM = 0xb
#: Negotiated TLS parameters not acceptable.
INADEQUATE_SECURITY = 0xc
#: Use HTTP/1.1 for the request.
HTTP_1_1_REQUIRED = 0xd
def _error_code_from_int(code):
"""
Given an integer error code, returns either one of :class:`ErrorCodes
<h2.errors.ErrorCodes>` or, if not present in the known set of codes,
returns the integer directly.
"""
try:
return ErrorCodes(code)
except ValueError:
return code
#: Graceful shutdown.
#:
#: .. deprecated:: 2.5.0
#: Deprecated in favour of :class:`ErrorCodes.NO_ERROR
#: <h2.errors.ErrorCodes.NO_ERROR>`.
NO_ERROR = ErrorCodes.NO_ERROR
#: Protocol error detected.
#:
#: .. deprecated:: 2.5.0
#: Deprecated in favour of :class:`ErrorCodes.PROTOCOL_ERROR
#: <h2.errors.ErrorCodes.PROTOCOL_ERROR>`.
PROTOCOL_ERROR = ErrorCodes.PROTOCOL_ERROR
#: Implementation fault.
#:
#: .. deprecated:: 2.5.0
#: Deprecated in favour of :class:`ErrorCodes.INTERNAL_ERROR
#: <h2.errors.ErrorCodes.INTERNAL_ERROR>`.
INTERNAL_ERROR = ErrorCodes.INTERNAL_ERROR
#: Flow-control limits exceeded.
#:
#: .. deprecated:: 2.5.0
#: Deprecated in favour of :class:`ErrorCodes.FLOW_CONTROL_ERROR
#: <h2.errors.ErrorCodes.FLOW_CONTROL_ERROR>`.
FLOW_CONTROL_ERROR = ErrorCodes.FLOW_CONTROL_ERROR
#: Settings not acknowledged.
#:
#: .. deprecated:: 2.5.0
#: Deprecated in favour of :class:`ErrorCodes.SETTINGS_TIMEOUT
#: <h2.errors.ErrorCodes.SETTINGS_TIMEOUT>`.
SETTINGS_TIMEOUT = ErrorCodes.SETTINGS_TIMEOUT
#: Frame received for closed stream.
#:
#: .. deprecated:: 2.5.0
#: Deprecated in favour of :class:`ErrorCodes.STREAM_CLOSED
#: <h2.errors.ErrorCodes.STREAM_CLOSED>`.
STREAM_CLOSED = ErrorCodes.STREAM_CLOSED
#: Frame size incorrect.
#:
#: .. deprecated:: 2.5.0
#: Deprecated in favour of :class:`ErrorCodes.FRAME_SIZE_ERROR
#: <h2.errors.ErrorCodes.FRAME_SIZE_ERROR>`.
FRAME_SIZE_ERROR = ErrorCodes.FRAME_SIZE_ERROR
#: Stream not processed.
#:
#: .. deprecated:: 2.5.0
#: Deprecated in favour of :class:`ErrorCodes.REFUSED_STREAM
#: <h2.errors.ErrorCodes.REFUSED_STREAM>`.
REFUSED_STREAM = ErrorCodes.REFUSED_STREAM
#: Stream cancelled.
#:
#: .. deprecated:: 2.5.0
#: Deprecated in favour of :class:`ErrorCodes.CANCEL
#: <h2.errors.ErrorCodes.CANCEL>`.
CANCEL = ErrorCodes.CANCEL
#: Compression state not updated.
#:
#: .. deprecated:: 2.5.0
#: Deprecated in favour of :class:`ErrorCodes.COMPRESSION_ERROR
#: <h2.errors.ErrorCodes.COMPRESSION_ERROR>`.
COMPRESSION_ERROR = ErrorCodes.COMPRESSION_ERROR
#: TCP connection error for CONNECT method.
#:
#: .. deprecated:: 2.5.0
#: Deprecated in favour of :class:`ErrorCodes.CONNECT_ERROR
#: <h2.errors.ErrorCodes.CONNECT_ERROR>`.
CONNECT_ERROR = ErrorCodes.CONNECT_ERROR
#: Processing capacity exceeded.
#:
#: .. deprecated:: 2.5.0
#: Deprecated in favour of :class:`ErrorCodes.ENHANCE_YOUR_CALM
#: <h2.errors.ErrorCodes.ENHANCE_YOUR_CALM>`.
ENHANCE_YOUR_CALM = ErrorCodes.ENHANCE_YOUR_CALM
#: Negotiated TLS parameters not acceptable.
#:
#: .. deprecated:: 2.5.0
#: Deprecated in favour of :class:`ErrorCodes.INADEQUATE_SECURITY
#: <h2.errors.ErrorCodes.INADEQUATE_SECURITY>`.
INADEQUATE_SECURITY = ErrorCodes.INADEQUATE_SECURITY
#: Use HTTP/1.1 for the request.
#:
#: .. deprecated:: 2.5.0
#: Deprecated in favour of :class:`ErrorCodes.HTTP_1_1_REQUIRED
#: <h2.errors.ErrorCodes.HTTP_1_1_REQUIRED>`.
HTTP_1_1_REQUIRED = ErrorCodes.HTTP_1_1_REQUIRED
#: All known HTTP/2 error codes.
#:
#: .. deprecated:: 2.5.0
#: Deprecated in favour of :class:`ErrorCodes <h2.errors.ErrorCodes>`.
H2_ERRORS = list(ErrorCodes)
__all__ = ['H2_ERRORS', 'NO_ERROR', 'PROTOCOL_ERROR', 'INTERNAL_ERROR',
'FLOW_CONTROL_ERROR', 'SETTINGS_TIMEOUT', 'STREAM_CLOSED',
'FRAME_SIZE_ERROR', 'REFUSED_STREAM', 'CANCEL', 'COMPRESSION_ERROR',
'CONNECT_ERROR', 'ENHANCE_YOUR_CALM', 'INADEQUATE_SECURITY',
'HTTP_1_1_REQUIRED', 'ErrorCodes']

View file

@ -0,0 +1,597 @@
# -*- coding: utf-8 -*-
"""
h2/events
~~~~~~~~~
Defines Event types for HTTP/2.
Events are returned by the H2 state machine to allow implementations to keep
track of events triggered by receiving data. Each time data is provided to the
H2 state machine it processes the data and returns a list of Event objects.
"""
import binascii
from .settings import ChangedSetting, _setting_code_from_int
class Event(object):
"""
Base class for h2 events.
"""
pass
class RequestReceived(Event):
"""
The RequestReceived event is fired whenever request headers are received.
This event carries the HTTP headers for the given request and the stream ID
of the new stream.
.. versionchanged:: 2.3.0
Changed the type of ``headers`` to :class:`HeaderTuple
<hpack:hpack.HeaderTuple>`. This has no effect on current users.
.. versionchanged:: 2.4.0
Added ``stream_ended`` and ``priority_updated`` properties.
"""
def __init__(self):
#: The Stream ID for the stream this request was made on.
self.stream_id = None
#: The request headers.
self.headers = None
#: If this request also ended the stream, the associated
#: :class:`StreamEnded <h2.events.StreamEnded>` event will be available
#: here.
#:
#: .. versionadded:: 2.4.0
self.stream_ended = None
#: If this request also had associated priority information, the
#: associated :class:`PriorityUpdated <h2.events.PriorityUpdated>`
#: event will be available here.
#:
#: .. versionadded:: 2.4.0
self.priority_updated = None
def __repr__(self):
return "<RequestReceived stream_id:%s, headers:%s>" % (
self.stream_id, self.headers
)
class ResponseReceived(Event):
"""
The ResponseReceived event is fired whenever response headers are received.
This event carries the HTTP headers for the given response and the stream
ID of the new stream.
.. versionchanged:: 2.3.0
Changed the type of ``headers`` to :class:`HeaderTuple
<hpack:hpack.HeaderTuple>`. This has no effect on current users.
.. versionchanged:: 2.4.0
Added ``stream_ended`` and ``priority_updated`` properties.
"""
def __init__(self):
#: The Stream ID for the stream this response was made on.
self.stream_id = None
#: The response headers.
self.headers = None
#: If this response also ended the stream, the associated
#: :class:`StreamEnded <h2.events.StreamEnded>` event will be available
#: here.
#:
#: .. versionadded:: 2.4.0
self.stream_ended = None
#: If this response also had associated priority information, the
#: associated :class:`PriorityUpdated <h2.events.PriorityUpdated>`
#: event will be available here.
#:
#: .. versionadded:: 2.4.0
self.priority_updated = None
def __repr__(self):
return "<ResponseReceived stream_id:%s, headers:%s>" % (
self.stream_id, self.headers
)
class TrailersReceived(Event):
"""
The TrailersReceived event is fired whenever trailers are received on a
stream. Trailers are a set of headers sent after the body of the
request/response, and are used to provide information that wasn't known
ahead of time (e.g. content-length). This event carries the HTTP header
fields that form the trailers and the stream ID of the stream on which they
were received.
.. versionchanged:: 2.3.0
Changed the type of ``headers`` to :class:`HeaderTuple
<hpack:hpack.HeaderTuple>`. This has no effect on current users.
.. versionchanged:: 2.4.0
Added ``stream_ended`` and ``priority_updated`` properties.
"""
def __init__(self):
#: The Stream ID for the stream on which these trailers were received.
self.stream_id = None
#: The trailers themselves.
self.headers = None
#: Trailers always end streams. This property has the associated
#: :class:`StreamEnded <h2.events.StreamEnded>` in it.
#:
#: .. versionadded:: 2.4.0
self.stream_ended = None
#: If the trailers also set associated priority information, the
#: associated :class:`PriorityUpdated <h2.events.PriorityUpdated>`
#: event will be available here.
#:
#: .. versionadded:: 2.4.0
self.priority_updated = None
def __repr__(self):
return "<TrailersReceived stream_id:%s, headers:%s>" % (
self.stream_id, self.headers
)
class _HeadersSent(Event):
"""
The _HeadersSent event is fired whenever headers are sent.
This is an internal event, used to determine validation steps on
outgoing header blocks.
"""
pass
class _ResponseSent(_HeadersSent):
"""
The _ResponseSent event is fired whenever response headers are sent
on a stream.
This is an internal event, used to determine validation steps on
outgoing header blocks.
"""
pass
class _RequestSent(_HeadersSent):
"""
The _RequestSent event is fired whenever request headers are sent
on a stream.
This is an internal event, used to determine validation steps on
outgoing header blocks.
"""
pass
class _TrailersSent(_HeadersSent):
"""
The _TrailersSent event is fired whenever trailers are sent on a
stream. Trailers are a set of headers sent after the body of the
request/response, and are used to provide information that wasn't known
ahead of time (e.g. content-length).
This is an internal event, used to determine validation steps on
outgoing header blocks.
"""
pass
class _PushedRequestSent(_HeadersSent):
"""
The _PushedRequestSent event is fired whenever pushed request headers are
sent.
This is an internal event, used to determine validation steps on outgoing
header blocks.
"""
pass
class InformationalResponseReceived(Event):
"""
The InformationalResponseReceived event is fired when an informational
response (that is, one whose status code is a 1XX code) is received from
the remote peer.
The remote peer may send any number of these, from zero upwards. These
responses are most commonly sent in response to requests that have the
``expect: 100-continue`` header field present. Most users can safely
ignore this event unless you are intending to use the
``expect: 100-continue`` flow, or are for any reason expecting a different
1XX status code.
.. versionadded:: 2.2.0
.. versionchanged:: 2.3.0
Changed the type of ``headers`` to :class:`HeaderTuple
<hpack:hpack.HeaderTuple>`. This has no effect on current users.
.. versionchanged:: 2.4.0
Added ``priority_updated`` property.
"""
def __init__(self):
#: The Stream ID for the stream this informational response was made
#: on.
self.stream_id = None
#: The headers for this informational response.
self.headers = None
#: If this response also had associated priority information, the
#: associated :class:`PriorityUpdated <h2.events.PriorityUpdated>`
#: event will be available here.
#:
#: .. versionadded:: 2.4.0
self.priority_updated = None
def __repr__(self):
return "<InformationalResponseReceived stream_id:%s, headers:%s>" % (
self.stream_id, self.headers
)
class DataReceived(Event):
"""
The DataReceived event is fired whenever data is received on a stream from
the remote peer. The event carries the data itself, and the stream ID on
which the data was received.
.. versionchanged:: 2.4.0
Added ``stream_ended`` property.
"""
def __init__(self):
#: The Stream ID for the stream this data was received on.
self.stream_id = None
#: The data itself.
self.data = None
#: The amount of data received that counts against the flow control
#: window. Note that padding counts against the flow control window, so
#: when adjusting flow control you should always use this field rather
#: than ``len(data)``.
self.flow_controlled_length = None
#: If this data chunk also completed the stream, the associated
#: :class:`StreamEnded <h2.events.StreamEnded>` event will be available
#: here.
#:
#: .. versionadded:: 2.4.0
self.stream_ended = None
def __repr__(self):
return (
"<DataReceived stream_id:%s, "
"flow_controlled_length:%s, "
"data:%s>" % (
self.stream_id,
self.flow_controlled_length,
_bytes_representation(self.data[:20]),
)
)
class WindowUpdated(Event):
"""
The WindowUpdated event is fired whenever a flow control window changes
size. HTTP/2 defines flow control windows for connections and streams: this
event fires for both connections and streams. The event carries the ID of
the stream to which it applies (set to zero if the window update applies to
the connection), and the delta in the window size.
"""
def __init__(self):
#: The Stream ID of the stream whose flow control window was changed.
#: May be ``0`` if the connection window was changed.
self.stream_id = None
#: The window delta.
self.delta = None
def __repr__(self):
return "<WindowUpdated stream_id:%s, delta:%s>" % (
self.stream_id, self.delta
)
class RemoteSettingsChanged(Event):
"""
The RemoteSettingsChanged event is fired whenever the remote peer changes
its settings. It contains a complete inventory of changed settings,
including their previous values.
In HTTP/2, settings changes need to be acknowledged. hyper-h2 automatically
acknowledges settings changes for efficiency. However, it is possible that
the caller may not be happy with the changed setting.
When this event is received, the caller should confirm that the new
settings are acceptable. If they are not acceptable, the user should close
the connection with the error code :data:`PROTOCOL_ERROR
<h2.errors.ErrorCodes.PROTOCOL_ERROR>`.
.. versionchanged:: 2.0.0
Prior to this version the user needed to acknowledge settings changes.
This is no longer the case: hyper-h2 now automatically acknowledges
them.
"""
def __init__(self):
#: A dictionary of setting byte to
#: :class:`ChangedSetting <h2.settings.ChangedSetting>`, representing
#: the changed settings.
self.changed_settings = {}
@classmethod
def from_settings(cls, old_settings, new_settings):
"""
Build a RemoteSettingsChanged event from a set of changed settings.
:param old_settings: A complete collection of old settings, in the form
of a dictionary of ``{setting: value}``.
:param new_settings: All the changed settings and their new values, in
the form of a dictionary of ``{setting: value}``.
"""
e = cls()
for setting, new_value in new_settings.items():
setting = _setting_code_from_int(setting)
original_value = old_settings.get(setting)
change = ChangedSetting(setting, original_value, new_value)
e.changed_settings[setting] = change
return e
def __repr__(self):
return "<RemoteSettingsChanged changed_settings:{%s}>" % (
", ".join(repr(cs) for cs in self.changed_settings.values()),
)
class PingAcknowledged(Event):
"""
The PingAcknowledged event is fired whenever a user-emitted PING is
acknowledged. This contains the data in the ACK'ed PING, allowing the
user to correlate PINGs and calculate RTT.
"""
def __init__(self):
#: The data included on the ping.
self.ping_data = None
def __repr__(self):
return "<PingAcknowledged ping_data:%s>" % (
_bytes_representation(self.ping_data),
)
class StreamEnded(Event):
"""
The StreamEnded event is fired whenever a stream is ended by a remote
party. The stream may not be fully closed if it has not been closed
locally, but no further data or headers should be expected on that stream.
"""
def __init__(self):
#: The Stream ID of the stream that was closed.
self.stream_id = None
def __repr__(self):
return "<StreamEnded stream_id:%s>" % self.stream_id
class StreamReset(Event):
"""
The StreamReset event is fired in two situations. The first is when the
remote party forcefully resets the stream. The second is when the remote
party has made a protocol error which only affects a single stream. In this
case, Hyper-h2 will terminate the stream early and return this event.
.. versionchanged:: 2.0.0
This event is now fired when Hyper-h2 automatically resets a stream.
"""
def __init__(self):
#: The Stream ID of the stream that was reset.
self.stream_id = None
#: The error code given. Either one of :class:`ErrorCodes
#: <h2.errors.ErrorCodes>` or ``int``
self.error_code = None
#: Whether the remote peer sent a RST_STREAM or we did.
self.remote_reset = True
def __repr__(self):
return "<StreamReset stream_id:%s, error_code:%s, remote_reset:%s>" % (
self.stream_id, self.error_code, self.remote_reset
)
class PushedStreamReceived(Event):
"""
The PushedStreamReceived event is fired whenever a pushed stream has been
received from a remote peer. The event carries on it the new stream ID, the
ID of the parent stream, and the request headers pushed by the remote peer.
"""
def __init__(self):
#: The Stream ID of the stream created by the push.
self.pushed_stream_id = None
#: The Stream ID of the stream that the push is related to.
self.parent_stream_id = None
#: The request headers, sent by the remote party in the push.
self.headers = None
def __repr__(self):
return (
"<PushedStreamReceived pushed_stream_id:%s, parent_stream_id:%s, "
"headers:%s>" % (
self.pushed_stream_id,
self.parent_stream_id,
self.headers,
)
)
class SettingsAcknowledged(Event):
"""
The SettingsAcknowledged event is fired whenever a settings ACK is received
from the remote peer. The event carries on it the settings that were
acknowedged, in the same format as
:class:`h2.events.RemoteSettingsChanged`.
"""
def __init__(self):
#: A dictionary of setting byte to
#: :class:`ChangedSetting <h2.settings.ChangedSetting>`, representing
#: the changed settings.
self.changed_settings = {}
def __repr__(self):
return "<SettingsAcknowledged changed_settings:{%s}>" % (
", ".join(repr(cs) for cs in self.changed_settings.values()),
)
class PriorityUpdated(Event):
"""
The PriorityUpdated event is fired whenever a stream sends updated priority
information. This can occur when the stream is opened, or at any time
during the stream lifetime.
This event is purely advisory, and does not need to be acted on.
.. versionadded:: 2.0.0
"""
def __init__(self):
#: The ID of the stream whose priority information is being updated.
self.stream_id = None
#: The new stream weight. May be the same as the original stream
#: weight. An integer between 1 and 256.
self.weight = None
#: The stream ID this stream now depends on. May be ``0``.
self.depends_on = None
#: Whether the stream *exclusively* depends on the parent stream. If it
#: does, this stream should inherit the current children of its new
#: parent.
self.exclusive = None
def __repr__(self):
return (
"<PriorityUpdated stream_id:%s, weight:%s, depends_on:%s, "
"exclusive:%s>" % (
self.stream_id,
self.weight,
self.depends_on,
self.exclusive
)
)
class ConnectionTerminated(Event):
"""
The ConnectionTerminated event is fired when a connection is torn down by
the remote peer using a GOAWAY frame. Once received, no further action may
be taken on the connection: a new connection must be established.
"""
def __init__(self):
#: The error code cited when tearing down the connection. Should be
#: one of :class:`ErrorCodes <h2.errors.ErrorCodes>`, but may not be if
#: unknown HTTP/2 extensions are being used.
self.error_code = None
#: The stream ID of the last stream the remote peer saw. This can
#: provide an indication of what data, if any, never reached the remote
#: peer and so can safely be resent.
self.last_stream_id = None
#: Additional debug data that can be appended to GOAWAY frame.
self.additional_data = None
def __repr__(self):
return (
"<ConnectionTerminated error_code:%s, last_stream_id:%s, "
"additional_data:%s>" % (
self.error_code,
self.last_stream_id,
_bytes_representation(
self.additional_data[:20]
if self.additional_data else None)
)
)
class AlternativeServiceAvailable(Event):
"""
The AlternativeServiceAvailable event is fired when the remote peer
advertises an `RFC 7838 <https://tools.ietf.org/html/rfc7838>`_ Alternative
Service using an ALTSVC frame.
This event always carries the origin to which the ALTSVC information
applies. That origin is either supplied by the server directly, or inferred
by hyper-h2 from the ``:authority`` pseudo-header field that was sent by
the user when initiating a given stream.
This event also carries what RFC 7838 calls the "Alternative Service Field
Value", which is formatted like a HTTP header field and contains the
relevant alternative service information. Hyper-h2 does not parse or in any
way modify that information: the user is required to do that.
This event can only be fired on the client end of a connection.
.. versionadded:: 2.3.0
"""
def __init__(self):
#: The origin to which the alternative service field value applies.
#: This field is either supplied by the server directly, or inferred by
#: hyper-h2 from the ``:authority`` pseudo-header field that was sent
#: by the user when initiating the stream on which the frame was
#: received.
self.origin = None
#: The ALTSVC field value. This contains information about the HTTP
#: alternative service being advertised by the server. Hyper-h2 does
#: not parse this field: it is left exactly as sent by the server. The
#: structure of the data in this field is given by `RFC 7838 Section 3
#: <https://tools.ietf.org/html/rfc7838#section-3>`_.
self.field_value = None
def __repr__(self):
return (
"<AlternativeServiceAvailable origin:%s, field_value:%s>" % (
self.origin.decode('utf-8', 'ignore'),
self.field_value.decode('utf-8', 'ignore'),
)
)
def _bytes_representation(data):
"""
Converts a bytestring into something that is safe to print on all Python
platforms.
This function is relatively expensive, so it should not be called on the
mainline of the code. It's safe to use in things like object repr methods
though.
"""
if data is None:
return None
hex = binascii.hexlify(data)
# This is moderately clever: on all Python versions hexlify returns a byte
# string. On Python 3 we want an actual string, so we just check whether
# that's what we have.
if not isinstance(hex, str): # pragma: no cover
hex = hex.decode('ascii')
return hex

View file

@ -0,0 +1,186 @@
# -*- coding: utf-8 -*-
"""
h2/exceptions
~~~~~~~~~~~~~
Exceptions for the HTTP/2 module.
"""
import h2.errors
class H2Error(Exception):
"""
The base class for all exceptions for the HTTP/2 module.
"""
class ProtocolError(H2Error):
"""
An action was attempted in violation of the HTTP/2 protocol.
"""
#: The error code corresponds to this kind of Protocol Error.
error_code = h2.errors.ErrorCodes.PROTOCOL_ERROR
class FrameTooLargeError(ProtocolError):
"""
The frame that we tried to send or that we received was too large.
"""
#: This error code that corresponds to this kind of Protocol Error.
error_code = h2.errors.ErrorCodes.FRAME_SIZE_ERROR
class FrameDataMissingError(ProtocolError):
"""
The frame that we received is missing some data.
.. versionadded:: 2.0.0
"""
#: The error code that corresponds to this kind of Protocol Error
error_code = h2.errors.ErrorCodes.FRAME_SIZE_ERROR
class TooManyStreamsError(ProtocolError):
"""
An attempt was made to open a stream that would lead to too many concurrent
streams.
"""
pass
class FlowControlError(ProtocolError):
"""
An attempted action violates flow control constraints.
"""
#: The error code that corresponds to this kind of
#: :class:`ProtocolError <h2.exceptions.ProtocolError>`
error_code = h2.errors.ErrorCodes.FLOW_CONTROL_ERROR
class StreamIDTooLowError(ProtocolError):
"""
An attempt was made to open a stream that had an ID that is lower than the
highest ID we have seen on this connection.
"""
def __init__(self, stream_id, max_stream_id):
#: The ID of the stream that we attempted to open.
self.stream_id = stream_id
#: The current highest-seen stream ID.
self.max_stream_id = max_stream_id
def __str__(self):
return "StreamIDTooLowError: %d is lower than %d" % (
self.stream_id, self.max_stream_id
)
class NoAvailableStreamIDError(ProtocolError):
"""
There are no available stream IDs left to the connection. All stream IDs
have been exhausted.
.. versionadded:: 2.0.0
"""
pass
class NoSuchStreamError(ProtocolError):
"""
A stream-specific action referenced a stream that does not exist.
.. versionchanged:: 2.0.0
Became a subclass of :class:`ProtocolError
<h2.exceptions.ProtocolError>`
"""
def __init__(self, stream_id):
#: The stream ID that corresponds to the non-existent stream.
self.stream_id = stream_id
class StreamClosedError(NoSuchStreamError):
"""
A more specific form of
:class:`NoSuchStreamError <h2.exceptions.NoSuchStreamError>`. Indicates
that the stream has since been closed, and that all state relating to that
stream has been removed.
"""
def __init__(self, stream_id):
#: The stream ID that corresponds to the nonexistent stream.
self.stream_id = stream_id
#: The relevant HTTP/2 error code.
self.error_code = h2.errors.ErrorCodes.STREAM_CLOSED
# Any events that internal code may need to fire. Not relevant to
# external users that may receive a StreamClosedError.
self._events = []
class InvalidSettingsValueError(ProtocolError, ValueError):
"""
An attempt was made to set an invalid Settings value.
.. versionadded:: 2.0.0
"""
def __init__(self, msg, error_code):
super(InvalidSettingsValueError, self).__init__(msg)
self.error_code = error_code
class InvalidBodyLengthError(ProtocolError):
"""
The remote peer sent more or less data that the Content-Length header
indicated.
.. versionadded:: 2.0.0
"""
def __init__(self, expected, actual):
self.expected_length = expected
self.actual_length = actual
def __str__(self):
return "InvalidBodyLengthError: Expected %d bytes, received %d" % (
self.expected_length, self.actual_length
)
class UnsupportedFrameError(ProtocolError, KeyError):
"""
The remote peer sent a frame that is unsupported in this context.
.. versionadded:: 2.1.0
"""
# TODO: Remove the KeyError in 3.0.0
pass
class RFC1122Error(H2Error):
"""
Emitted when users attempt to do something that is literally allowed by the
relevant RFC, but is sufficiently ill-defined that it's unwise to allow
users to actually do it.
While there is some disagreement about whether or not we should be liberal
in what accept, it is a truth universally acknowledged that we should be
conservative in what emit.
.. versionadded:: 2.4.0
"""
# shazow says I'm going to regret naming the exception this way. If that
# turns out to be true, TELL HIM NOTHING.
pass
class DenialOfServiceError(ProtocolError):
"""
Emitted when the remote peer exhibits a behaviour that is likely to be an
attempt to perform a Denial of Service attack on the implementation. This
is a form of ProtocolError that carries a different error code, and allows
more easy detection of this kind of behaviour.
.. versionadded:: 2.5.0
"""
#: The error code that corresponds to this kind of
#: :class:`ProtocolError <h2.exceptions.ProtocolError>`
error_code = h2.errors.ErrorCodes.ENHANCE_YOUR_CALM

View file

@ -0,0 +1,184 @@
# -*- coding: utf-8 -*-
"""
h2/frame_buffer
~~~~~~~~~~~~~~~
A data structure that provides a way to iterate over a byte buffer in terms of
frames.
"""
from hyperframe.exceptions import UnknownFrameError, InvalidFrameError
from hyperframe.frame import (
Frame, HeadersFrame, ContinuationFrame, PushPromiseFrame
)
from .exceptions import (
ProtocolError, FrameTooLargeError, FrameDataMissingError
)
# To avoid a DOS attack based on sending loads of continuation frames, we limit
# the maximum number we're perpared to receive. In this case, we'll set the
# limit to 64, which means the largest encoded header block we can receive by
# default is 262144 bytes long, and the largest possible *at all* is 1073741760
# bytes long.
#
# This value seems reasonable for now, but in future we may want to evaluate
# making it configurable.
CONTINUATION_BACKLOG = 64
class FrameBuffer(object):
"""
This is a data structure that expects to act as a buffer for HTTP/2 data
that allows iteraton in terms of H2 frames.
"""
def __init__(self, server=False):
self.data = b''
self.max_frame_size = 0
self._preamble = b'PRI * HTTP/2.0\r\n\r\nSM\r\n\r\n' if server else b''
self._preamble_len = len(self._preamble)
self._headers_buffer = []
def add_data(self, data):
"""
Add more data to the frame buffer.
:param data: A bytestring containing the byte buffer.
"""
if self._preamble_len:
data_len = len(data)
of_which_preamble = min(self._preamble_len, data_len)
if self._preamble[:of_which_preamble] != data[:of_which_preamble]:
raise ProtocolError("Invalid HTTP/2 preamble.")
data = data[of_which_preamble:]
self._preamble_len -= of_which_preamble
self._preamble = self._preamble[of_which_preamble:]
self.data += data
def _parse_frame_header(self, data):
"""
Parses the frame header from the data. Either returns a tuple of
(frame, length), or throws an exception. The returned frame may be None
if the frame is of unknown type.
"""
try:
frame, length = Frame.parse_frame_header(data[:9])
except UnknownFrameError as e: # Platform-specific: Hyperframe < 5.0
# Here we do something a bit odd. We want to consume the frame data
# as consistently as possible, but we also don't ever want to yield
# None. Instead, we make sure that, if there is no frame, we
# recurse into ourselves.
# This can only happen now on older versions of hyperframe.
# TODO: Remove in 3.0
length = e.length
frame = None
except ValueError as e:
# The frame header is invalid. This is a ProtocolError
raise ProtocolError("Invalid frame header received: %s" % str(e))
return frame, length
def _validate_frame_length(self, length):
"""
Confirm that the frame is an appropriate length.
"""
if length > self.max_frame_size:
raise FrameTooLargeError(
"Received overlong frame: length %d, max %d" %
(length, self.max_frame_size)
)
def _update_header_buffer(self, f):
"""
Updates the internal header buffer. Returns a frame that should replace
the current one. May throw exceptions if this frame is invalid.
"""
# Check if we're in the middle of a headers block. If we are, this
# frame *must* be a CONTINUATION frame with the same stream ID as the
# leading HEADERS or PUSH_PROMISE frame. Anything else is a
# ProtocolError. If the frame *is* valid, append it to the header
# buffer.
if self._headers_buffer:
stream_id = self._headers_buffer[0].stream_id
valid_frame = (
f is not None and
isinstance(f, ContinuationFrame) and
f.stream_id == stream_id
)
if not valid_frame:
raise ProtocolError("Invalid frame during header block.")
# Append the frame to the buffer.
self._headers_buffer.append(f)
if len(self._headers_buffer) > CONTINUATION_BACKLOG:
raise ProtocolError("Too many continuation frames received.")
# If this is the end of the header block, then we want to build a
# mutant HEADERS frame that's massive. Use the original one we got,
# then set END_HEADERS and set its data appopriately. If it's not
# the end of the block, lose the current frame: we can't yield it.
if 'END_HEADERS' in f.flags:
f = self._headers_buffer[0]
f.flags.add('END_HEADERS')
f.data = b''.join(x.data for x in self._headers_buffer)
self._headers_buffer = []
else:
f = None
elif (isinstance(f, (HeadersFrame, PushPromiseFrame)) and
'END_HEADERS' not in f.flags):
# This is the start of a headers block! Save the frame off and then
# act like we didn't receive one.
self._headers_buffer.append(f)
f = None
return f
# The methods below support the iterator protocol.
def __iter__(self):
return self
def next(self): # Python 2
# First, check that we have enough data to successfully parse the
# next frame header. If not, bail. Otherwise, parse it.
if len(self.data) < 9:
raise StopIteration()
try:
f, length = self._parse_frame_header(self.data)
except InvalidFrameError: # pragma: no cover
raise ProtocolError("Received frame with invalid frame header.")
# Next, check that we have enough length to parse the frame body. If
# not, bail, leaving the frame header data in the buffer for next time.
if len(self.data) < length + 9:
raise StopIteration()
# Confirm the frame has an appropriate length.
self._validate_frame_length(length)
# Don't try to parse the body if we didn't get a frame we know about:
# there's nothing we can do with it anyway.
if f is not None:
try:
f.parse_body(memoryview(self.data[9:9+length]))
except InvalidFrameError:
raise FrameDataMissingError("Frame data missing or invalid")
# At this point, as we know we'll use or discard the entire frame, we
# can update the data.
self.data = self.data[9+length:]
# Pass the frame through the header buffer.
f = self._update_header_buffer(f)
# If we got a frame we didn't understand or shouldn't yield, rather
# than return None it'd be better if we just tried to get the next
# frame in the sequence instead. Recurse back into ourselves to do
# that. This is safe because the amount of work we have to do here is
# strictly bounded by the length of the buffer.
return f if f is not None else self.next()
def __next__(self): # Python 3
return self.next()

View file

@ -0,0 +1,376 @@
# -*- coding: utf-8 -*-
"""
h2/settings
~~~~~~~~~~~
This module contains a HTTP/2 settings object. This object provides a simple
API for manipulating HTTP/2 settings, keeping track of both the current active
state of the settings and the unacknowledged future values of the settings.
"""
import collections
import enum
from hyperframe.frame import SettingsFrame
from h2.errors import ErrorCodes
from h2.exceptions import InvalidSettingsValueError
class SettingCodes(enum.IntEnum):
"""
All known HTTP/2 setting codes.
.. versionadded:: 2.6.0
"""
#: Allows the sender to inform the remote endpoint of the maximum size of
#: the header compression table used to decode header blocks, in octets.
HEADER_TABLE_SIZE = SettingsFrame.HEADER_TABLE_SIZE
#: This setting can be used to disable server push. To disable server push
#: on a client, set this to 0.
ENABLE_PUSH = SettingsFrame.ENABLE_PUSH
#: Indicates the maximum number of concurrent streams that the sender will
#: allow.
MAX_CONCURRENT_STREAMS = SettingsFrame.MAX_CONCURRENT_STREAMS
#: Indicates the sender's initial window size (in octets) for stream-level
#: flow control.
INITIAL_WINDOW_SIZE = SettingsFrame.INITIAL_WINDOW_SIZE
try: # Platform-specific: Hyperframe < 4.0.0
_max_frame_size = SettingsFrame.SETTINGS_MAX_FRAME_SIZE
except AttributeError: # Platform-specific: Hyperframe >= 4.0.0
_max_frame_size = SettingsFrame.MAX_FRAME_SIZE
#: Indicates the size of the largest frame payload that the sender is
#: willing to receive, in octets.
MAX_FRAME_SIZE = _max_frame_size
try: # Platform-specific: Hyperframe < 4.0.0
_max_header_list_size = SettingsFrame.SETTINGS_MAX_HEADER_LIST_SIZE
except AttributeError: # Platform-specific: Hyperframe >= 4.0.0
_max_header_list_size = SettingsFrame.MAX_HEADER_LIST_SIZE
#: This advisory setting informs a peer of the maximum size of header list
#: that the sender is prepared to accept, in octets. The value is based on
#: the uncompressed size of header fields, including the length of the name
#: and value in octets plus an overhead of 32 octets for each header field.
MAX_HEADER_LIST_SIZE = _max_header_list_size
def _setting_code_from_int(code):
"""
Given an integer setting code, returns either one of :class:`SettingCodes
<h2.settings.SettingCodes>` or, if not present in the known set of codes,
returns the integer directly.
"""
try:
return SettingCodes(code)
except ValueError:
return code
# Aliases for all the settings values.
#: Allows the sender to inform the remote endpoint of the maximum size of the
#: header compression table used to decode header blocks, in octets.
#:
#: .. deprecated:: 2.6.0
#: Deprecated in favour of :data:`SettingCodes.HEADER_TABLE_SIZE
#: <h2.settings.SettingCodes.HEADER_TABLE_SIZE>`.
HEADER_TABLE_SIZE = SettingCodes.HEADER_TABLE_SIZE
#: This setting can be used to disable server push. To disable server push on
#: a client, set this to 0.
#:
#: .. deprecated:: 2.6.0
#: Deprecated in favour of :data:`SettingCodes.ENABLE_PUSH
#: <h2.settings.SettingCodes.ENABLE_PUSH>`.
ENABLE_PUSH = SettingCodes.ENABLE_PUSH
#: Indicates the maximum number of concurrent streams that the sender will
#: allow.
#:
#: .. deprecated:: 2.6.0
#: Deprecated in favour of :data:`SettingCodes.MAX_CONCURRENT_STREAMS
#: <h2.settings.SettingCodes.MAX_CONCURRENT_STREAMS>`.
MAX_CONCURRENT_STREAMS = SettingCodes.MAX_CONCURRENT_STREAMS
#: Indicates the sender's initial window size (in octets) for stream-level flow
#: control.
#:
#: .. deprecated:: 2.6.0
#: Deprecated in favour of :data:`SettingCodes.INITIAL_WINDOW_SIZE
#: <h2.settings.SettingCodes.INITIAL_WINDOW_SIZE>`.
INITIAL_WINDOW_SIZE = SettingCodes.INITIAL_WINDOW_SIZE
#: Indicates the size of the largest frame payload that the sender is willing
#: to receive, in octets.
#:
#: .. deprecated:: 2.6.0
#: Deprecated in favour of :data:`SettingCodes.MAX_FRAME_SIZE
#: <h2.settings.SettingCodes.MAX_FRAME_SIZE>`.
MAX_FRAME_SIZE = SettingCodes.MAX_FRAME_SIZE
#: This advisory setting informs a peer of the maximum size of header list that
#: the sender is prepared to accept, in octets. The value is based on the
#: uncompressed size of header fields, including the length of the name and
#: value in octets plus an overhead of 32 octets for each header field.
#:
#: .. deprecated:: 2.6.0
#: Deprecated in favour of :data:`SettingCodes.MAX_HEADER_LIST_SIZE
#: <h2.settings.SettingCodes.MAX_HEADER_LIST_SIZE>`.
MAX_HEADER_LIST_SIZE = SettingCodes.MAX_HEADER_LIST_SIZE
class ChangedSetting:
def __init__(self, setting, original_value, new_value):
#: The setting code given. Either one of :class:`SettingCodes
#: <h2.settings.SettingCodes>` or ``int``
#:
#: .. versionchanged:: 2.6.0
self.setting = setting
#: The original value before being changed.
self.original_value = original_value
#: The new value after being changed.
self.new_value = new_value
def __repr__(self):
return (
"ChangedSetting(setting=%s, original_value=%s, "
"new_value=%s)"
) % (
self.setting,
self.original_value,
self.new_value
)
class Settings(collections.MutableMapping):
"""
An object that encapsulates HTTP/2 settings state.
HTTP/2 Settings are a complex beast. Each party, remote and local, has its
own settings and a view of the other party's settings. When a settings
frame is emitted by a peer it cannot assume that the new settings values
are in place until the remote peer acknowledges the setting. In principle,
multiple settings changes can be "in flight" at the same time, all with
different values.
This object encapsulates this mess. It provides a dict-like interface to
settings, which return the *current* values of the settings in question.
Additionally, it keeps track of the stack of proposed values: each time an
acknowledgement is sent/received, it updates the current values with the
stack of proposed values. On top of all that, it validates the values to
make sure they're allowed, and raises :class:`InvalidSettingsValueError
<h2.exceptions.InvalidSettingsValueError>` if they are not.
Finally, this object understands what the default values of the HTTP/2
settings are, and sets those defaults appropriately.
.. versionchanged:: 2.2.0
Added the ``initial_values`` parameter.
.. versionchanged:: 2.5.0
Added the ``max_header_list_size`` property.
:param client: (optional) Whether these settings should be defaulted for a
client implementation or a server implementation. Defaults to ``True``.
:type client: ``bool``
:param initial_values: (optional) Any initial values the user would like
set, rather than RFC 7540's defaults.
:type initial_vales: ``MutableMapping``
"""
def __init__(self, client=True, initial_values=None):
# Backing object for the settings. This is a dictionary of
# (setting: [list of values]), where the first value in the list is the
# current value of the setting. Strictly this doesn't use lists but
# instead uses collections.deque to avoid repeated memory allocations.
#
# This contains the default values for HTTP/2.
self._settings = {
SettingCodes.HEADER_TABLE_SIZE: collections.deque([4096]),
SettingCodes.ENABLE_PUSH: collections.deque([int(client)]),
SettingCodes.INITIAL_WINDOW_SIZE: collections.deque([65535]),
SettingCodes.MAX_FRAME_SIZE: collections.deque([16384]),
}
if initial_values is not None:
for key, value in initial_values.items():
invalid = _validate_setting(key, value)
if invalid:
raise InvalidSettingsValueError(
"Setting %d has invalid value %d" % (key, value),
error_code=invalid
)
self._settings[key] = collections.deque([value])
def acknowledge(self):
"""
The settings have been acknowledged, either by the user (remote
settings) or by the remote peer (local settings).
:returns: A dict of {setting: ChangedSetting} that were applied.
"""
changed_settings = {}
# If there is more than one setting in the list, we have a setting
# value outstanding. Update them.
for k, v in self._settings.items():
if len(v) > 1:
old_setting = v.popleft()
new_setting = v[0]
changed_settings[k] = ChangedSetting(
k, old_setting, new_setting
)
return changed_settings
# Provide easy-access to well known settings.
@property
def header_table_size(self):
"""
The current value of the :data:`HEADER_TABLE_SIZE
<h2.settings.SettingCodes.HEADER_TABLE_SIZE>` setting.
"""
return self[SettingCodes.HEADER_TABLE_SIZE]
@header_table_size.setter
def header_table_size(self, value):
self[SettingCodes.HEADER_TABLE_SIZE] = value
@property
def enable_push(self):
"""
The current value of the :data:`ENABLE_PUSH
<h2.settings.SettingCodes.ENABLE_PUSH>` setting.
"""
return self[SettingCodes.ENABLE_PUSH]
@enable_push.setter
def enable_push(self, value):
self[SettingCodes.ENABLE_PUSH] = value
@property
def initial_window_size(self):
"""
The current value of the :data:`INITIAL_WINDOW_SIZE
<h2.settings.SettingCodes.INITIAL_WINDOW_SIZE>` setting.
"""
return self[SettingCodes.INITIAL_WINDOW_SIZE]
@initial_window_size.setter
def initial_window_size(self, value):
self[SettingCodes.INITIAL_WINDOW_SIZE] = value
@property
def max_frame_size(self):
"""
The current value of the :data:`MAX_FRAME_SIZE
<h2.settings.SettingCodes.MAX_FRAME_SIZE>` setting.
"""
return self[SettingCodes.MAX_FRAME_SIZE]
@max_frame_size.setter
def max_frame_size(self, value):
self[SettingCodes.MAX_FRAME_SIZE] = value
@property
def max_concurrent_streams(self):
"""
The current value of the :data:`MAX_CONCURRENT_STREAMS
<h2.settings.SettingCodes.MAX_CONCURRENT_STREAMS>` setting.
"""
return self.get(SettingCodes.MAX_CONCURRENT_STREAMS, 2**32+1)
@max_concurrent_streams.setter
def max_concurrent_streams(self, value):
self[SettingCodes.MAX_CONCURRENT_STREAMS] = value
@property
def max_header_list_size(self):
"""
The current value of the :data:`MAX_HEADER_LIST_SIZE
<h2.settings.SettingCodes.MAX_HEADER_LIST_SIZE>` setting. If not set,
returns ``None``, which means unlimited.
.. versionadded:: 2.5.0
"""
return self.get(SettingCodes.MAX_HEADER_LIST_SIZE, None)
@max_header_list_size.setter
def max_header_list_size(self, value):
self[SettingCodes.MAX_HEADER_LIST_SIZE] = value
# Implement the MutableMapping API.
def __getitem__(self, key):
val = self._settings[key][0]
# Things that were created when a setting was received should stay
# KeyError'd.
if val is None:
raise KeyError
return val
def __setitem__(self, key, value):
invalid = _validate_setting(key, value)
if invalid:
raise InvalidSettingsValueError(
"Setting %d has invalid value %d" % (key, value),
error_code=invalid
)
try:
items = self._settings[key]
except KeyError:
items = collections.deque([None])
self._settings[key] = items
items.append(value)
def __delitem__(self, key):
del self._settings[key]
def __iter__(self):
return self._settings.__iter__()
def __len__(self):
return len(self._settings)
def __eq__(self, other):
if isinstance(other, Settings):
return self._settings == other._settings
else:
return NotImplemented
def __ne__(self, other):
if isinstance(other, Settings):
return not self == other
else:
return NotImplemented
def _validate_setting(setting, value):
"""
Confirms that a specific setting has a well-formed value. If the setting is
invalid, returns an error code. Otherwise, returns 0 (NO_ERROR).
"""
if setting == SettingCodes.ENABLE_PUSH:
if value not in (0, 1):
return ErrorCodes.PROTOCOL_ERROR
elif setting == SettingCodes.INITIAL_WINDOW_SIZE:
if not 0 <= value <= 2147483647: # 2^31 - 1
return ErrorCodes.FLOW_CONTROL_ERROR
elif setting == SettingCodes.MAX_FRAME_SIZE:
if not 16384 <= value <= 16777215: # 2^14 and 2^24 - 1
return ErrorCodes.PROTOCOL_ERROR
elif setting == SettingCodes.MAX_HEADER_LIST_SIZE:
if value < 0:
return ErrorCodes.PROTOCOL_ERROR
return 0

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,585 @@
# -*- coding: utf-8 -*-
"""
h2/utilities
~~~~~~~~~~~~
Utility functions that do not belong in a separate module.
"""
import collections
import re
from string import whitespace
import sys
from hpack import HeaderTuple, NeverIndexedHeaderTuple
from .exceptions import ProtocolError, FlowControlError
UPPER_RE = re.compile(b"[A-Z]")
# A set of headers that are hop-by-hop or connection-specific and thus
# forbidden in HTTP/2. This list comes from RFC 7540 § 8.1.2.2.
CONNECTION_HEADERS = frozenset([
b'connection', u'connection',
b'proxy-connection', u'proxy-connection',
b'keep-alive', u'keep-alive',
b'transfer-encoding', u'transfer-encoding',
b'upgrade', u'upgrade',
])
_ALLOWED_PSEUDO_HEADER_FIELDS = frozenset([
b':method', u':method',
b':scheme', u':scheme',
b':authority', u':authority',
b':path', u':path',
b':status', u':status',
])
_SECURE_HEADERS = frozenset([
# May have basic credentials which are vulnerable to dictionary attacks.
b'authorization', u'authorization',
b'proxy-authorization', u'proxy-authorization',
])
_REQUEST_ONLY_HEADERS = frozenset([
b':scheme', u':scheme',
b':path', u':path',
b':authority', u':authority',
b':method', u':method'
])
_RESPONSE_ONLY_HEADERS = frozenset([b':status', u':status'])
if sys.version_info[0] == 2: # Python 2.X
_WHITESPACE = frozenset(whitespace)
else: # Python 3.3+
_WHITESPACE = frozenset(map(ord, whitespace))
def _secure_headers(headers, hdr_validation_flags):
"""
Certain headers are at risk of being attacked during the header compression
phase, and so need to be kept out of header compression contexts. This
function automatically transforms certain specific headers into HPACK
never-indexed fields to ensure they don't get added to header compression
contexts.
This function currently implements two rules:
- 'authorization' and 'proxy-authorization' fields are automatically made
never-indexed.
- Any 'cookie' header field shorter than 20 bytes long is made
never-indexed.
These fields are the most at-risk. These rules are inspired by Firefox
and nghttp2.
"""
for header in headers:
if header[0] in _SECURE_HEADERS:
yield NeverIndexedHeaderTuple(*header)
elif header[0] in (b'cookie', u'cookie') and len(header[1]) < 20:
yield NeverIndexedHeaderTuple(*header)
else:
yield header
def extract_method_header(headers):
"""
Extracts the request method from the headers list.
"""
for k, v in headers:
if k in (b':method', u':method'):
if not isinstance(v, bytes):
return v.encode('utf-8')
else:
return v
def is_informational_response(headers):
"""
Searches a header block for a :status header to confirm that a given
collection of headers are an informational response. Assumes the header
block is well formed: that is, that the HTTP/2 special headers are first
in the block, and so that it can stop looking when it finds the first
header field whose name does not begin with a colon.
:param headers: The HTTP/2 header block.
:returns: A boolean indicating if this is an informational response.
"""
for n, v in headers:
if isinstance(n, bytes):
sigil = b':'
status = b':status'
informational_start = b'1'
else:
sigil = u':'
status = u':status'
informational_start = u'1'
# If we find a non-special header, we're done here: stop looping.
if not n.startswith(sigil):
return False
# This isn't the status header, bail.
if n != status:
continue
# If the first digit is a 1, we've got informational headers.
return v.startswith(informational_start)
def guard_increment_window(current, increment):
"""
Increments a flow control window, guarding against that window becoming too
large.
:param current: The current value of the flow control window.
:param increment: The increment to apply to that window.
:returns: The new value of the window.
:raises: ``FlowControlError``
"""
# The largest value the flow control window may take.
LARGEST_FLOW_CONTROL_WINDOW = 2**31 - 1
new_size = current + increment
if new_size > LARGEST_FLOW_CONTROL_WINDOW:
raise FlowControlError(
"May not increment flow control window past %d" %
LARGEST_FLOW_CONTROL_WINDOW
)
return new_size
def authority_from_headers(headers):
"""
Given a header set, searches for the authority header and returns the
value.
Note that this doesn't terminate early, so should only be called if the
headers are for a client request. Otherwise, will loop over the entire
header set, which is potentially unwise.
:param headers: The HTTP header set.
:returns: The value of the authority header, or ``None``.
:rtype: ``bytes`` or ``None``.
"""
for n, v in headers:
# This gets run against headers that come both from HPACK and from the
# user, so we may have unicode floating around in here. We only want
# bytes.
if n in (b':authority', u':authority'):
return v.encode('utf-8') if not isinstance(v, bytes) else v
return None
# Flags used by the validate_headers pipeline to determine which checks
# should be applied to a given set of headers.
HeaderValidationFlags = collections.namedtuple(
'HeaderValidationFlags',
['is_client', 'is_trailer', 'is_response_header', 'is_push_promise']
)
def validate_headers(headers, hdr_validation_flags):
"""
Validates a header sequence against a set of constraints from RFC 7540.
:param headers: The HTTP header set.
:param hdr_validation_flags: An instance of HeaderValidationFlags.
"""
# This validation logic is built on a sequence of generators that are
# iterated over to provide the final header list. This reduces some of the
# overhead of doing this checking. However, it's worth noting that this
# checking remains somewhat expensive, and attempts should be made wherever
# possible to reduce the time spent doing them.
#
# For example, we avoid tuple upacking in loops because it represents a
# fixed cost that we don't want to spend, instead indexing into the header
# tuples.
headers = _reject_uppercase_header_fields(
headers, hdr_validation_flags
)
headers = _reject_surrounding_whitespace(
headers, hdr_validation_flags
)
headers = _reject_te(
headers, hdr_validation_flags
)
headers = _reject_connection_header(
headers, hdr_validation_flags
)
headers = _reject_pseudo_header_fields(
headers, hdr_validation_flags
)
headers = _check_host_authority_header(
headers, hdr_validation_flags
)
headers = _check_path_header(headers, hdr_validation_flags)
return list(headers)
def _reject_uppercase_header_fields(headers, hdr_validation_flags):
"""
Raises a ProtocolError if any uppercase character is found in a header
block.
"""
for header in headers:
if UPPER_RE.search(header[0]):
raise ProtocolError(
"Received uppercase header name %s." % header[0])
yield header
def _reject_surrounding_whitespace(headers, hdr_validation_flags):
"""
Raises a ProtocolError if any header name or value is surrounded by
whitespace characters.
"""
# For compatibility with RFC 7230 header fields, we need to allow the field
# value to be an empty string. This is ludicrous, but technically allowed.
# The field name may not be empty, though, so we can safely assume that it
# must have at least one character in it and throw exceptions if it
# doesn't.
for header in headers:
if header[0][0] in _WHITESPACE or header[0][-1] in _WHITESPACE:
raise ProtocolError(
"Received header name surrounded by whitespace %r" % header[0])
if header[1] and ((header[1][0] in _WHITESPACE) or
(header[1][-1] in _WHITESPACE)):
raise ProtocolError(
"Received header value surrounded by whitespace %r" % header[1]
)
yield header
def _reject_te(headers, hdr_validation_flags):
"""
Raises a ProtocolError if the TE header is present in a header block and
its value is anything other than "trailers".
"""
for header in headers:
if header[0] in (b'te', u'te'):
if header[1].lower() not in (b'trailers', u'trailers'):
raise ProtocolError(
"Invalid value for Transfer-Encoding header: %s" %
header[1]
)
yield header
def _reject_connection_header(headers, hdr_validation_flags):
"""
Raises a ProtocolError if the Connection header is present in a header
block.
"""
for header in headers:
if header[0] in CONNECTION_HEADERS:
raise ProtocolError(
"Connection-specific header field present: %s." % header[0]
)
yield header
def _custom_startswith(test_string, bytes_prefix, unicode_prefix):
"""
Given a string that might be a bytestring or a Unicode string,
return True if it starts with the appropriate prefix.
"""
if isinstance(test_string, bytes):
return test_string.startswith(bytes_prefix)
else:
return test_string.startswith(unicode_prefix)
def _assert_header_in_set(string_header, bytes_header, header_set):
"""
Given a set of header names, checks whether the string or byte version of
the header name is present. Raises a Protocol error with the appropriate
error if it's missing.
"""
if not (string_header in header_set or bytes_header in header_set):
raise ProtocolError(
"Header block missing mandatory %s header" % string_header
)
def _reject_pseudo_header_fields(headers, hdr_validation_flags):
"""
Raises a ProtocolError if duplicate pseudo-header fields are found in a
header block or if a pseudo-header field appears in a block after an
ordinary header field.
Raises a ProtocolError if pseudo-header fields are found in trailers.
"""
seen_pseudo_header_fields = set()
seen_regular_header = False
for header in headers:
if _custom_startswith(header[0], b':', u':'):
if header[0] in seen_pseudo_header_fields:
raise ProtocolError(
"Received duplicate pseudo-header field %s" % header[0]
)
seen_pseudo_header_fields.add(header[0])
if seen_regular_header:
raise ProtocolError(
"Received pseudo-header field out of sequence: %s" %
header[0]
)
if header[0] not in _ALLOWED_PSEUDO_HEADER_FIELDS:
raise ProtocolError(
"Received custom pseudo-header field %s" % header[0]
)
else:
seen_regular_header = True
yield header
# Check the pseudo-headers we got to confirm they're acceptable.
_check_pseudo_header_field_acceptability(
seen_pseudo_header_fields, hdr_validation_flags
)
def _check_pseudo_header_field_acceptability(pseudo_headers,
hdr_validation_flags):
"""
Given the set of pseudo-headers present in a header block and the
validation flags, confirms that RFC 7540 allows them.
"""
# Pseudo-header fields MUST NOT appear in trailers - RFC 7540 § 8.1.2.1
if hdr_validation_flags.is_trailer and pseudo_headers:
raise ProtocolError(
"Received pseudo-header in trailer %s" % pseudo_headers
)
# If ':status' pseudo-header is not there in a response header, reject it.
# Similarly, if ':path', ':method', or ':scheme' are not there in a request
# header, reject it. Additionally, if a response contains any request-only
# headers or vice-versa, reject it.
# Relevant RFC section: RFC 7540 § 8.1.2.4
# https://tools.ietf.org/html/rfc7540#section-8.1.2.4
if hdr_validation_flags.is_response_header:
_assert_header_in_set(u':status', b':status', pseudo_headers)
invalid_response_headers = pseudo_headers & _REQUEST_ONLY_HEADERS
if invalid_response_headers:
raise ProtocolError(
"Encountered request-only headers %s" %
invalid_response_headers
)
elif (not hdr_validation_flags.is_response_header and
not hdr_validation_flags.is_trailer):
# This is a request, so we need to have seen :path, :method, and
# :scheme.
_assert_header_in_set(u':path', b':path', pseudo_headers)
_assert_header_in_set(u':method', b':method', pseudo_headers)
_assert_header_in_set(u':scheme', b':scheme', pseudo_headers)
invalid_request_headers = pseudo_headers & _RESPONSE_ONLY_HEADERS
if invalid_request_headers:
raise ProtocolError(
"Encountered response-only headers %s" %
invalid_request_headers
)
def _validate_host_authority_header(headers):
"""
Given the :authority and Host headers from a request block that isn't
a trailer, check that:
1. At least one of these headers is set.
2. If both headers are set, they match.
:param headers: The HTTP header set.
:raises: ``ProtocolError``
"""
# We use None as a sentinel value. Iterate over the list of headers,
# and record the value of these headers (if present). We don't need
# to worry about receiving duplicate :authority headers, as this is
# enforced by the _reject_pseudo_header_fields() pipeline.
#
# TODO: We should also guard against receiving duplicate Host headers,
# and against sending duplicate headers.
authority_header_val = None
host_header_val = None
for header in headers:
if header[0] in (b':authority', u':authority'):
authority_header_val = header[1]
elif header[0] in (b'host', u'host'):
host_header_val = header[1]
yield header
# If we have not-None values for these variables, then we know we saw
# the corresponding header.
authority_present = (authority_header_val is not None)
host_present = (host_header_val is not None)
# It is an error for a request header block to contain neither
# an :authority header nor a Host header.
if not authority_present and not host_present:
raise ProtocolError(
"Request header block does not have an :authority or Host header."
)
# If we receive both headers, they should definitely match.
if authority_present and host_present:
if authority_header_val != host_header_val:
raise ProtocolError(
"Request header block has mismatched :authority and "
"Host headers: %r / %r"
% (authority_header_val, host_header_val)
)
def _check_host_authority_header(headers, hdr_validation_flags):
"""
Raises a ProtocolError if a header block arrives that does not contain an
:authority or a Host header, or if a header block contains both fields,
but their values do not match.
"""
# We only expect to see :authority and Host headers on request header
# blocks that aren't trailers, so skip this validation if this is a
# response header or we're looking at trailer blocks.
skip_validation = (
hdr_validation_flags.is_response_header or
hdr_validation_flags.is_trailer
)
if skip_validation:
return headers
return _validate_host_authority_header(headers)
def _check_path_header(headers, hdr_validation_flags):
"""
Raise a ProtocolError if a header block arrives or is sent that contains an
empty :path header.
"""
def inner():
for header in headers:
if header[0] in (b':path', u':path'):
if not header[1]:
raise ProtocolError("An empty :path header is forbidden")
yield header
# We only expect to see :authority and Host headers on request header
# blocks that aren't trailers, so skip this validation if this is a
# response header or we're looking at trailer blocks.
skip_validation = (
hdr_validation_flags.is_response_header or
hdr_validation_flags.is_trailer
)
if skip_validation:
return headers
else:
return inner()
def _lowercase_header_names(headers, hdr_validation_flags):
"""
Given an iterable of header two-tuples, rebuilds that iterable with the
header names lowercased. This generator produces tuples that preserve the
original type of the header tuple for tuple and any ``HeaderTuple``.
"""
for header in headers:
if isinstance(header, HeaderTuple):
yield header.__class__(header[0].lower(), header[1])
else:
yield (header[0].lower(), header[1])
def _strip_surrounding_whitespace(headers, hdr_validation_flags):
"""
Given an iterable of header two-tuples, strip both leading and trailing
whitespace from both header names and header values. This generator
produces tuples that preserve the original type of the header tuple for
tuple and any ``HeaderTuple``.
"""
for header in headers:
if isinstance(header, HeaderTuple):
yield header.__class__(header[0].strip(), header[1].strip())
else:
yield (header[0].strip(), header[1].strip())
def _strip_connection_headers(headers, hdr_validation_flags):
"""
Strip any connection headers as per RFC7540 § 8.1.2.2.
"""
for header in headers:
if header[0] not in CONNECTION_HEADERS:
yield header
def _check_sent_host_authority_header(headers, hdr_validation_flags):
"""
Raises an InvalidHeaderBlockError if we try to send a header block
that does not contain an :authority or a Host header, or if
the header block contains both fields, but their values do not match.
"""
# We only expect to see :authority and Host headers on request header
# blocks that aren't trailers, so skip this validation if this is a
# response header or we're looking at trailer blocks.
skip_validation = (
hdr_validation_flags.is_response_header or
hdr_validation_flags.is_trailer
)
if skip_validation:
return headers
return _validate_host_authority_header(headers)
def normalize_outbound_headers(headers, hdr_validation_flags):
"""
Normalizes a header sequence that we are about to send.
:param headers: The HTTP header set.
:param hdr_validation_flags: An instance of HeaderValidationFlags.
"""
headers = _lowercase_header_names(headers, hdr_validation_flags)
headers = _strip_surrounding_whitespace(headers, hdr_validation_flags)
headers = _strip_connection_headers(headers, hdr_validation_flags)
headers = _secure_headers(headers, hdr_validation_flags)
return headers
def validate_outbound_headers(headers, hdr_validation_flags):
"""
Validates and normalizes a header sequence that we are about to send.
:param headers: The HTTP header set.
:param hdr_validation_flags: An instance of HeaderValidationFlags.
"""
headers = _reject_te(
headers, hdr_validation_flags
)
headers = _reject_connection_header(
headers, hdr_validation_flags
)
headers = _reject_pseudo_header_fields(
headers, hdr_validation_flags
)
headers = _check_sent_host_authority_header(
headers, hdr_validation_flags
)
headers = _check_path_header(headers, hdr_validation_flags)
return headers

View file

@ -0,0 +1,139 @@
# -*- coding: utf-8 -*-
"""
h2/windows
~~~~~~~~~~
Defines tools for managing HTTP/2 flow control windows.
The objects defined in this module are used to automatically manage HTTP/2
flow control windows. Specifically, they keep track of what the size of the
window is, how much data has been consumed from that window, and how much data
the user has already used. It then implements a basic algorithm that attempts
to manage the flow control window without user input, trying to ensure that it
does not emit too many WINDOW_UPDATE frames.
"""
from __future__ import division
from .exceptions import FlowControlError
# The largest acceptable value for a HTTP/2 flow control window.
LARGEST_FLOW_CONTROL_WINDOW = 2**31 - 1
class WindowManager(object):
"""
A basic HTTP/2 window manager.
:param max_window_size: The maximum size of the flow control window.
:type max_window_size: ``int``
"""
def __init__(self, max_window_size):
assert max_window_size <= LARGEST_FLOW_CONTROL_WINDOW
self.max_window_size = max_window_size
self.current_window_size = max_window_size
self._bytes_processed = 0
def window_consumed(self, size):
"""
We have received a certain number of bytes from the remote peer. This
necessarily shrinks the flow control window!
:param size: The number of flow controlled bytes we received from the
remote peer.
:type size: ``int``
:returns: Nothing.
:rtype: ``None``
"""
self.current_window_size -= size
if self.current_window_size < 0:
raise FlowControlError("Flow control window shrunk below 0")
def window_opened(self, size):
"""
The flow control window has been incremented, either because of manual
flow control management or because of the user changing the flow
control settings. This can have the effect of increasing what we
consider to be the "maximum" flow control window size.
This does not increase our view of how many bytes have been processed,
only of how much space is in the window.
:param size: The increment to the flow control window we received.
:type size: ``int``
:returns: Nothing
:rtype: ``None``
"""
self.current_window_size += size
if self.current_window_size > LARGEST_FLOW_CONTROL_WINDOW:
raise FlowControlError(
"Flow control window mustn't exceed %d" %
LARGEST_FLOW_CONTROL_WINDOW
)
if self.current_window_size > self.max_window_size:
self.max_window_size = self.current_window_size
def process_bytes(self, size):
"""
The application has informed us that it has processed a certain number
of bytes. This may cause us to want to emit a window update frame. If
we do want to emit a window update frame, this method will return the
number of bytes that we should increment the window by.
:param size: The number of flow controlled bytes that the application
has processed.
:type size: ``int``
:returns: The number of bytes to increment the flow control window by,
or ``None``.
:rtype: ``int`` or ``None``
"""
self._bytes_processed += size
return self._maybe_update_window()
def _maybe_update_window(self):
"""
Run the algorithm.
Our current algorithm can be described like this.
1. If no bytes have been processed, we immediately return 0. There is
no meaningful way for us to hand space in the window back to the
remote peer, so let's not even try.
2. If there is no space in the flow control window, and we have
processed at least 1024 bytes (or 1/4 of the window, if the window
is smaller), we will emit a window update frame. This is to avoid
the risk of blocking a stream altogether.
3. If there is space in the flow control window, and we have processed
at least 1/2 of the window worth of bytes, we will emit a window
update frame. This is to minimise the number of window update frames
we have to emit.
In a healthy system with large flow control windows, this will
irregularly emit WINDOW_UPDATE frames. This prevents us starving the
connection by emitting eleventy bajillion WINDOW_UPDATE frames,
especially in situations where the remote peer is sending a lot of very
small DATA frames.
"""
# TODO: Can the window be smaller than 1024 bytes? If not, we can
# streamline this algorithm.
if not self._bytes_processed:
return None
max_increment = (self.max_window_size - self.current_window_size)
increment = 0
# Note that, even though we may increment less than _bytes_processed,
# we still want to set it to zero whenever we emit an increment. This
# is because we'll always increment up to the maximum we can.
if (self.current_window_size == 0) and (
self._bytes_processed > min(1024, self.max_window_size // 4)):
increment = min(self._bytes_processed, max_increment)
self._bytes_processed = 0
elif self._bytes_processed >= (self.max_window_size // 2):
increment = min(self._bytes_processed, max_increment)
self._bytes_processed = 0
self.current_window_size += increment
return increment

View file

@ -0,0 +1,7 @@
# -*- coding: utf-8 -*-
"""
hyper/http11
~~~~~~~~~~~~
The HTTP/1.1 submodule that powers hyper.
"""

View file

@ -0,0 +1,384 @@
# -*- coding: utf-8 -*-
"""
hyper/http11/connection
~~~~~~~~~~~~~~~~~~~~~~~
Objects that build hyper's connection-level HTTP/1.1 abstraction.
"""
import logging
import os
import socket
import base64
from collections import Iterable, Mapping
import collections
from hyperframe.frame import SettingsFrame
from .response import HTTP11Response
from ..tls import wrap_socket, H2C_PROTOCOL
from ..common.bufsocket import BufferedSocket
from ..common.exceptions import TLSUpgrade, HTTPUpgrade
from ..common.headers import HTTPHeaderMap
from ..common.util import to_bytestring, to_host_port_tuple
from ..compat import bytes
# We prefer pycohttpparser to the pure-Python interpretation
try: # pragma: no cover
from pycohttpparser.api import Parser
except ImportError: # pragma: no cover
from .parser import Parser
log = logging.getLogger(__name__)
BODY_CHUNKED = 1
BODY_FLAT = 2
class HTTP11Connection(object):
"""
An object representing a single HTTP/1.1 connection to a server.
:param host: The host to connect to. This may be an IP address or a
hostname, and optionally may include a port: for example,
``'twitter.com'``, ``'twitter.com:443'`` or ``'127.0.0.1'``.
:param port: (optional) The port to connect to. If not provided and one
also isn't provided in the ``host`` parameter, defaults to 80.
:param secure: (optional) Whether the request should use TLS. Defaults to
``False`` for most requests, but to ``True`` for any request issued to
port 443.
:param ssl_context: (optional) A class with custom certificate settings.
If not provided then hyper's default ``SSLContext`` is used instead.
:param proxy_host: (optional) The proxy to connect to. This can be an IP
address or a host name and may include a port.
:param proxy_port: (optional) The proxy port to connect to. If not provided
and one also isn't provided in the ``proxy`` parameter,
defaults to 8080.
"""
def __init__(self, host, port=None, secure=None, ssl_context=None,
proxy_host=None, proxy_port=None, **kwargs):
if port is None:
self.host, self.port = to_host_port_tuple(host, default_port=80)
else:
self.host, self.port = host, port
# Record whether we plan to secure the request. In future this should
# be extended to a security profile, but a bool will do for now.
# TODO: Actually do something with this!
if secure is not None:
self.secure = secure
elif self.port == 443:
self.secure = True
else:
self.secure = False
# only send http upgrade headers for non-secure connection
self._send_http_upgrade = not self.secure
self.ssl_context = ssl_context
self._sock = None
# Setup proxy details if applicable.
if proxy_host:
if proxy_port is None:
self.proxy_host, self.proxy_port = to_host_port_tuple(
proxy_host, default_port=8080
)
else:
self.proxy_host, self.proxy_port = proxy_host, proxy_port
else:
self.proxy_host = None
self.proxy_port = None
#: The size of the in-memory buffer used to store data from the
#: network. This is used as a performance optimisation. Increase buffer
#: size to improve performance: decrease it to conserve memory.
#: Defaults to 64kB.
self.network_buffer_size = 65536
#: The object used to perform HTTP/1.1 parsing. Needs to conform to
#: the standard hyper parsing interface.
self.parser = Parser()
def connect(self):
"""
Connect to the server specified when the object was created. This is a
no-op if we're already connected.
:returns: Nothing.
"""
if self._sock is None:
if not self.proxy_host:
host = self.host
port = self.port
else:
host = self.proxy_host
port = self.proxy_port
sock = socket.create_connection((host, port), 5)
proto = None
if self.secure:
assert not self.proxy_host, "Proxy with HTTPS not supported."
sock, proto = wrap_socket(sock, host, self.ssl_context)
log.debug("Selected protocol: %s", proto)
sock = BufferedSocket(sock, self.network_buffer_size)
if proto not in ('http/1.1', None):
raise TLSUpgrade(proto, sock)
self._sock = sock
return
def request(self, method, url, body=None, headers=None):
"""
This will send a request to the server using the HTTP request method
``method`` and the selector ``url``. If the ``body`` argument is
present, it should be string or bytes object of data to send after the
headers are finished. Strings are encoded as UTF-8. To use other
encodings, pass a bytes object. The Content-Length header is set to the
length of the body field.
:param method: The request method, e.g. ``'GET'``.
:param url: The URL to contact, e.g. ``'/path/segment'``.
:param body: (optional) The request body to send. Must be a bytestring,
an iterable of bytestring, or a file-like object.
:param headers: (optional) The headers to send on the request.
:returns: Nothing.
"""
headers = headers or {}
method = to_bytestring(method)
url = to_bytestring(url)
if not isinstance(headers, HTTPHeaderMap):
if isinstance(headers, Mapping):
headers = HTTPHeaderMap(headers.items())
elif isinstance(headers, Iterable):
headers = HTTPHeaderMap(headers)
else:
raise ValueError(
'Header argument must be a dictionary or an iterable'
)
if self._sock is None:
self.connect()
if self._send_http_upgrade:
self._add_upgrade_headers(headers)
self._send_http_upgrade = False
# We may need extra headers.
if body:
body_type = self._add_body_headers(headers, body)
if b'host' not in headers:
headers[b'host'] = self.host
# Begin by emitting the header block.
self._send_headers(method, url, headers)
# Next, send the request body.
if body:
self._send_body(body, body_type)
return
def get_response(self):
"""
Returns a response object.
This is an early beta, so the response object is pretty stupid. That's
ok, we'll fix it later.
"""
headers = HTTPHeaderMap()
response = None
while response is None:
# 'encourage' the socket to receive data.
self._sock.fill()
response = self.parser.parse_response(self._sock.buffer)
for n, v in response.headers:
headers[n.tobytes()] = v.tobytes()
self._sock.advance_buffer(response.consumed)
if (response.status == 101 and
b'upgrade' in headers['connection'] and
H2C_PROTOCOL.encode('utf-8') in headers['upgrade']):
raise HTTPUpgrade(H2C_PROTOCOL, self._sock)
return HTTP11Response(
response.status,
response.msg.tobytes(),
headers,
self._sock,
self
)
def _send_headers(self, method, url, headers):
"""
Handles the logic of sending the header block.
"""
self._sock.send(b' '.join([method, url, b'HTTP/1.1\r\n']))
for name, value in headers.iter_raw():
name, value = to_bytestring(name), to_bytestring(value)
header = b''.join([name, b': ', value, b'\r\n'])
self._sock.send(header)
self._sock.send(b'\r\n')
def _add_body_headers(self, headers, body):
"""
Adds any headers needed for sending the request body. This will always
defer to the user-supplied header content.
:returns: One of (BODY_CHUNKED, BODY_FLAT), indicating what type of
request body should be used.
"""
if b'content-length' in headers:
return BODY_FLAT
if b'chunked' in headers.get(b'transfer-encoding', []):
return BODY_CHUNKED
# For bytestring bodies we upload the content with a fixed length.
# For file objects, we use the length of the file object.
if isinstance(body, bytes):
length = str(len(body)).encode('utf-8')
elif hasattr(body, 'fileno'):
length = str(os.fstat(body.fileno()).st_size).encode('utf-8')
else:
length = None
if length:
headers[b'content-length'] = length
return BODY_FLAT
headers[b'transfer-encoding'] = b'chunked'
return BODY_CHUNKED
def _add_upgrade_headers(self, headers):
# Add HTTP Upgrade headers.
headers[b'connection'] = b'Upgrade, HTTP2-Settings'
headers[b'upgrade'] = H2C_PROTOCOL
# Encode SETTINGS frame payload in Base64 and put into the HTTP-2
# Settings header.
http2_settings = SettingsFrame(0)
http2_settings.settings[SettingsFrame.INITIAL_WINDOW_SIZE] = 65535
encoded_settings = base64.urlsafe_b64encode(
http2_settings.serialize_body()
)
headers[b'HTTP2-Settings'] = encoded_settings.rstrip(b'=')
def _send_body(self, body, body_type):
"""
Handles the HTTP/1.1 logic for sending HTTP bodies. This does magical
different things in different cases.
"""
if body_type == BODY_FLAT:
# Special case for files and other 'readable' objects.
if hasattr(body, 'read'):
return self._send_file_like_obj(body)
# Case for bytestrings.
elif isinstance(body, bytes):
self._sock.send(body)
return
# Iterables that set a specific content length.
elif isinstance(body, collections.Iterable):
for item in body:
try:
self._sock.send(item)
except TypeError:
raise ValueError(
"Elements in iterable body must be bytestrings. "
"Illegal element: {}".format(item)
)
return
else:
raise ValueError(
'Request body must be a bytestring, a file-like object '
'returning bytestrings or an iterable of bytestrings. '
'Got: {}'.format(type(body))
)
# Chunked!
return self._send_chunked(body)
def _send_chunked(self, body):
"""
Handles the HTTP/1.1 logic for sending a chunk-encoded body.
"""
# Chunked! For chunked bodies we don't special-case, we just iterate
# over what we have and send stuff out.
for chunk in body:
length = '{0:x}'.format(len(chunk)).encode('ascii')
# For now write this as four 'send' calls. That's probably
# inefficient, let's come back to it.
try:
self._sock.send(length)
self._sock.send(b'\r\n')
self._sock.send(chunk)
self._sock.send(b'\r\n')
except TypeError:
raise ValueError(
"Iterable bodies must always iterate in bytestrings"
)
self._sock.send(b'0\r\n\r\n')
return
def _send_file_like_obj(self, fobj):
"""
Handles streaming a file-like object to the network.
"""
while True:
block = fobj.read(16*1024)
if not block:
break
try:
self._sock.send(block)
except TypeError:
raise ValueError(
"File-like bodies must return bytestrings. Got: "
"{}".format(type(block))
)
return
def close(self):
"""
Closes the connection. This closes the socket and then abandons the
reference to it. After calling this method, any outstanding
:class:`Response <hyper.http11.response.Response>` objects will throw
exceptions if attempts are made to read their bodies.
In some cases this method will automatically be called.
.. warning:: This method should absolutely only be called when you are
certain the connection object is no longer needed.
"""
self._sock.close()
self._sock = None
# The following two methods are the implementation of the context manager
# protocol.
def __enter__(self):
return self
def __exit__(self, type, value, tb):
self.close()
return False # Never swallow exceptions.

View file

@ -0,0 +1,82 @@
# -*- coding: utf-8 -*-
"""
hyper/http11/parser
~~~~~~~~~~~~~~~~~~~
This module contains hyper's pure-Python HTTP/1.1 parser. This module defines
an abstraction layer for HTTP/1.1 parsing that allows for dropping in other
modules if needed, in order to obtain speedups on your chosen platform.
"""
from collections import namedtuple
Response = namedtuple(
'Response', ['status', 'msg', 'minor_version', 'headers', 'consumed']
)
class ParseError(Exception):
"""
An invalid HTTP message was passed to the parser.
"""
pass
class Parser(object):
"""
A single HTTP parser object.
This object is not thread-safe, and it does maintain state that is shared
across parsing requests. For this reason, make sure that access to this
object is synchronized if you use it across multiple threads.
"""
def __init__(self):
pass
def parse_response(self, buffer):
"""
Parses a single HTTP response from a buffer.
:param buffer: A ``memoryview`` object wrapping a buffer containing a
HTTP response.
:returns: A :class:`Response <hyper.http11.parser.Response>` object, or
``None`` if there is not enough data in the buffer.
"""
# Begin by copying the data out of the buffer. This is necessary
# because as much as possible we want to use the built-in bytestring
# methods, rather than looping over the data in Python.
temp_buffer = buffer.tobytes()
index = temp_buffer.find(b'\n')
if index == -1:
return None
version, status, reason = temp_buffer[0:index].split(None, 2)
if not version.startswith(b'HTTP/1.'):
raise ParseError("Not HTTP/1.X!")
minor_version = int(version[7:])
status = int(status)
reason = memoryview(reason.strip())
# Chomp the newline.
index += 1
# Now, parse the headers out.
end_index = index
headers = []
while True:
end_index = temp_buffer.find(b'\n', index)
if end_index == -1:
return None
elif (end_index - index) <= 1:
# Chomp the newline
end_index += 1
break
name, value = temp_buffer[index:end_index].split(b':', 1)
value = value.strip()
headers.append((memoryview(name), memoryview(value)))
index = end_index + 1
resp = Response(status, reason, minor_version, headers, end_index)
return resp

View file

@ -0,0 +1,322 @@
# -*- coding: utf-8 -*-
"""
hyper/http11/response
~~~~~~~~~~~~~~~~~~~~~
Contains the HTTP/1.1 equivalent of the HTTPResponse object defined in
httplib/http.client.
"""
import logging
import weakref
import zlib
from ..common.decoder import DeflateDecoder
from ..common.exceptions import ChunkedDecodeError, InvalidResponseError
from ..common.exceptions import ConnectionResetError
log = logging.getLogger(__name__)
class HTTP11Response(object):
"""
An ``HTTP11Response`` wraps the HTTP/1.1 response from the server. It
provides access to the response headers and the entity body. The response
is an iterable object and can be used in a with statement.
"""
def __init__(self, code, reason, headers, sock, connection=None):
#: The reason phrase returned by the server.
self.reason = reason
#: The status code returned by the server.
self.status = code
#: The response headers. These are determined upon creation, assigned
#: once, and never assigned again.
self.headers = headers
#: The response trailers. These are always intially ``None``.
self.trailers = None
# The socket this response is being sent over.
self._sock = sock
# Whether we expect the connection to be closed. If we do, we don't
# bother checking for content-length, we just keep reading until
# we no longer can.
self._expect_close = False
if b'close' in self.headers.get(b'connection', []):
self._expect_close = True
# The expected length of the body.
try:
self._length = int(self.headers[b'content-length'][0])
except KeyError:
self._length = None
# Whether we expect a chunked response.
self._chunked = (
b'chunked' in self.headers.get(b'transfer-encoding', [])
)
# One of the following must be true: we must expect that the connection
# will be closed following the body, or that a content-length was sent,
# or that we're getting a chunked response.
# FIXME: Remove naked assert, replace with something better.
assert self._expect_close or self._length is not None or self._chunked
# This object is used for decompressing gzipped request bodies. Right
# now we only support gzip because that's all the RFC mandates of us.
# Later we'll add support for more encodings.
# This 16 + MAX_WBITS nonsense is to force gzip. See this
# Stack Overflow answer for more:
# http://stackoverflow.com/a/2695466/1401686
if b'gzip' in self.headers.get(b'content-encoding', []):
self._decompressobj = zlib.decompressobj(16 + zlib.MAX_WBITS)
elif b'deflate' in self.headers.get(b'content-encoding', []):
self._decompressobj = DeflateDecoder()
else:
self._decompressobj = None
# This is a reference that allows for the Response class to tell the
# parent connection object to throw away its socket object. This is to
# be used when the connection is genuinely closed, so that the user
# can keep using the Connection object.
# Strictly, we take a weakreference to this so that we don't set up a
# reference cycle.
if connection is not None:
self._parent = weakref.ref(connection)
else:
self._parent = None
self._buffered_data = b''
self._chunker = None
def read(self, amt=None, decode_content=True):
"""
Reads the response body, or up to the next ``amt`` bytes.
:param amt: (optional) The amount of data to read. If not provided, all
the data will be read from the response.
:param decode_content: (optional) If ``True``, will transparently
decode the response data.
:returns: The read data. Note that if ``decode_content`` is set to
``True``, the actual amount of data returned may be different to
the amount requested.
"""
# Return early if we've lost our connection.
if self._sock is None:
return b''
if self._chunked:
return self._normal_read_chunked(amt, decode_content)
# If we're asked to do a read without a length, we need to read
# everything. That means either the entire content length, or until the
# socket is closed, depending.
if amt is None:
if self._length is not None:
amt = self._length
elif self._expect_close:
return self._read_expect_closed(decode_content)
else: # pragma: no cover
raise InvalidResponseError(
"Response must either have length or Connection: close"
)
# Otherwise, we've been asked to do a bounded read. We should read no
# more than the remaining length, obviously.
# FIXME: Handle cases without _length
if self._length is not None:
amt = min(amt, self._length)
# Now, issue reads until we read that length. This is to account for
# the fact that it's possible that we'll be asked to read more than
# 65kB in one shot.
to_read = amt
chunks = []
# Ideally I'd like this to read 'while to_read', but I want to be
# defensive against the admittedly unlikely case that the socket
# returns *more* data than I want.
while to_read > 0:
chunk = self._sock.recv(amt).tobytes()
# If we got an empty read, but were expecting more, the remote end
# has hung up. Raise an exception if we were expecting more data,
# but if we were expecting the remote end to close then it's ok.
if not chunk:
if self._length is not None or not self._expect_close:
self.close(socket_close=True)
raise ConnectionResetError("Remote end hung up!")
break
to_read -= len(chunk)
chunks.append(chunk)
data = b''.join(chunks)
if self._length is not None:
self._length -= len(data)
# If we're at the end of the request, we have some cleaning up to do.
# Close the stream, and if necessary flush the buffer. Checking that
# we're at the end is actually obscenely complex: either we've read the
# full content-length or, if we were expecting a closed connection,
# we've had a read shorter than the requested amount. We also have to
# do this before we try to decompress the body.
end_of_request = (self._length == 0 or
(self._expect_close and len(data) < amt))
# We may need to decode the body.
if decode_content and self._decompressobj and data:
data = self._decompressobj.decompress(data)
if decode_content and self._decompressobj and end_of_request:
data += self._decompressobj.flush()
# We're at the end. Close the connection. Explicit check for zero here
# because self._length might be None.
if end_of_request:
self.close(socket_close=self._expect_close)
return data
def read_chunked(self, decode_content=True):
"""
Reads chunked transfer encoded bodies. This method returns a generator:
each iteration of which yields one chunk *unless* the chunks are
compressed, in which case it yields whatever the decompressor provides
for each chunk.
.. warning:: This may yield the empty string, without that being the
end of the body!
"""
if not self._chunked:
raise ChunkedDecodeError(
"Attempted chunked read of non-chunked body."
)
# Return early if possible.
if self._sock is None:
return
while True:
# Read to the newline to get the chunk length. This is a
# hexadecimal integer.
chunk_length = int(self._sock.readline().tobytes().strip(), 16)
data = b''
# If the chunk length is zero, consume the newline and then we're
# done. If we were decompressing data, return the remaining data.
if not chunk_length:
self._sock.readline()
if decode_content and self._decompressobj:
yield self._decompressobj.flush()
self.close(socket_close=self._expect_close)
break
# Then read that many bytes.
while chunk_length > 0:
chunk = self._sock.recv(chunk_length).tobytes()
data += chunk
chunk_length -= len(chunk)
assert chunk_length == 0
# Now, consume the newline.
self._sock.readline()
# We may need to decode the body.
if decode_content and self._decompressobj and data:
data = self._decompressobj.decompress(data)
yield data
return
def close(self, socket_close=False):
"""
Close the response. This causes the Response to lose access to the
backing socket. In some cases, it can also cause the backing connection
to be torn down.
:param socket_close: Whether to close the backing socket.
:returns: Nothing.
"""
if socket_close and self._parent is not None:
# The double call is necessary because we need to dereference the
# weakref. If the weakref is no longer valid, that's fine, there's
# no connection object to tell.
parent = self._parent()
if parent is not None:
parent.close()
self._sock = None
def _read_expect_closed(self, decode_content):
"""
Implements the logic for an unbounded read on a socket that we expect
to be closed by the remote end.
"""
# In this case, just read until we cannot read anymore. Then, close the
# socket, becuase we know we have to.
chunks = []
while True:
try:
chunk = self._sock.recv(65535).tobytes()
if not chunk:
break
except ConnectionResetError:
break
else:
chunks.append(chunk)
self.close(socket_close=True)
# We may need to decompress the data.
data = b''.join(chunks)
if decode_content and self._decompressobj:
data = self._decompressobj.decompress(data)
data += self._decompressobj.flush()
return data
def _normal_read_chunked(self, amt, decode_content):
"""
Implements the logic for calling ``read()`` on a chunked response.
"""
# If we're doing a full read, read it as chunked and then just join
# the chunks together!
if amt is None:
return self._buffered_data + b''.join(self.read_chunked())
if self._chunker is None:
self._chunker = self.read_chunked()
# Otherwise, we have a certain amount of data we want to read.
current_amount = len(self._buffered_data)
extra_data = [self._buffered_data]
while current_amount < amt:
try:
chunk = next(self._chunker)
except StopIteration:
self.close(socket_close=self._expect_close)
break
current_amount += len(chunk)
extra_data.append(chunk)
data = b''.join(extra_data)
self._buffered_data = data[amt:]
return data[:amt]
# The following methods implement the context manager protocol.
def __enter__(self):
return self
def __exit__(self, *args):
self.close()
return False # Never swallow exceptions.

View file

@ -0,0 +1,7 @@
# -*- coding: utf-8 -*-
"""
hyper/http20
~~~~~~~~~~~~
The HTTP/2 submodule that powers hyper.
"""

View file

@ -0,0 +1,850 @@
# -*- coding: utf-8 -*-
"""
hyper/http20/connection
~~~~~~~~~~~~~~~~~~~~~~~
Objects that build hyper's connection-level HTTP/2 abstraction.
"""
from ..h2 import connection as h2Connection
from ..h2 import events as h2Events
from ..h2 import settings as h2Settings
from ..compat import ssl
from ..tls import wrap_socket, H2_NPN_PROTOCOLS, H2C_PROTOCOL
from ..common.exceptions import ConnectionResetError
from ..common.bufsocket import BufferedSocket
from ..common.headers import HTTPHeaderMap
from ..common.util import to_host_port_tuple, to_native_string, to_bytestring
from ..compat import unicode, bytes
from .stream import Stream
from .response import HTTP20Response, HTTP20Push
from .window import FlowControlManager
from .exceptions import ConnectionError, StreamResetError
from . import errors
import errno
import logging
import socket
import time
import threading
log = logging.getLogger(__name__)
DEFAULT_WINDOW_SIZE = 65535
TRANSIENT_SSL_ERRORS = (ssl.SSL_ERROR_WANT_READ, ssl.SSL_ERROR_WANT_WRITE)
class _LockedObject(object):
"""
A wrapper class that hides a specific object behind a lock.
The goal here is to provide a simple way to protect access to an object
that cannot safely be simultaneously accessed from multiple threads. The
intended use of this class is simple: take hold of it with a context
manager, which returns the protected object.
"""
def __init__(self, obj):
self.lock = threading.RLock()
self._obj = obj
def __enter__(self):
self.lock.acquire()
return self._obj
def __exit__(self, _exc_type, _exc_val, _exc_tb):
self.lock.release()
class HTTP20Connection(object):
"""
An object representing a single HTTP/2 connection to a server.
This object behaves similarly to the Python standard library's
``HTTPConnection`` object, with a few critical differences.
Most of the standard library's arguments to the constructor are irrelevant
for HTTP/2 or not supported by hyper.
:param host: The host to connect to. This may be an IP address or a
hostname, and optionally may include a port: for example,
``'http2bin.org'``, ``'http2bin.org:443'`` or ``'127.0.0.1'``.
:param port: (optional) The port to connect to. If not provided and one
also isn't provided in the ``host`` parameter, defaults to 443.
:param secure: (optional) Whether the request should use TLS. Defaults to
``False`` for most requests, but to ``True`` for any request issued to
port 443.
:param window_manager: (optional) The class to use to manage flow control
windows. This needs to be a subclass of the
:class:`BaseFlowControlManager
<hyper.http20.window.BaseFlowControlManager>`. If not provided,
:class:`FlowControlManager <hyper.http20.window.FlowControlManager>`
will be used.
:param enable_push: (optional) Whether the server is allowed to push
resources to the client (see
:meth:`get_pushes() <hyper.HTTP20Connection.get_pushes>`).
:param ssl_context: (optional) A class with custom certificate settings.
If not provided then hyper's default ``SSLContext`` is used instead.
:param proxy_host: (optional) The proxy to connect to. This can be an IP
address or a host name and may include a port.
:param proxy_port: (optional) The proxy port to connect to. If not provided
and one also isn't provided in the ``proxy`` parameter, defaults to
8080.
"""
def __init__(self, host, port=None, secure=None, window_manager=None,
enable_push=False, ssl_context=None, proxy_host=None,
proxy_port=None, force_proto=None, **kwargs):
"""
Creates an HTTP/2 connection to a specific server.
"""
if port is None:
self.host, self.port = to_host_port_tuple(host, default_port=443)
else:
self.host, self.port = host, port
if secure is not None:
self.secure = secure
elif self.port == 443:
self.secure = True
else:
self.secure = False
self._enable_push = enable_push
self.ssl_context = ssl_context
# Setup proxy details if applicable.
if proxy_host:
if proxy_port is None:
self.proxy_host, self.proxy_port = to_host_port_tuple(
proxy_host, default_port=8080
)
else:
self.proxy_host, self.proxy_port = proxy_host, proxy_port
else:
self.proxy_host = None
self.proxy_port = None
#: The size of the in-memory buffer used to store data from the
#: network. This is used as a performance optimisation. Increase buffer
#: size to improve performance: decrease it to conserve memory.
#: Defaults to 64kB.
self.network_buffer_size = 65536
self.force_proto = force_proto
# Concurrency
#
# Use one lock (_lock) to synchronize any interaction with global
# connection state, e.g. stream creation/deletion.
#
# It's ok to use the same in lock all these cases as they occur at
# different/linked points in the connection's lifecycle.
#
# Use another 2 locks (_write_lock, _read_lock) to synchronize
# - _send_cb
# - _recv_cb
# respectively.
#
# I.e, send/recieve on the connection and its streams are serialized
# separately across the threads accessing the connection. This is a
# simple way of providing thread-safety.
#
# _write_lock and _read_lock synchronize all interactions between
# streams and the connnection. There is a third I/O callback,
# _close_stream, passed to a stream's constructor. It does not need to
# be synchronized, it uses _send_cb internally (which is serialized);
# its other activity (safe deletion of the stream from self.streams)
# does not require synchronization.
#
# _read_lock may be acquired when already holding the _write_lock,
# when they both held it is always by acquiring _write_lock first.
#
# Either _read_lock or _write_lock may be acquired whilst holding _lock
# which should always be acquired before either of the other two.
self._lock = threading.RLock()
self._write_lock = threading.RLock()
self._read_lock = threading.RLock()
# Create the mutable state.
self.__wm_class = window_manager or FlowControlManager
self.__init_state()
return
def __init_state(self):
"""
Initializes the 'mutable state' portions of the HTTP/2 connection
object.
This method exists to enable HTTP20Connection objects to be reused if
they're closed, by resetting the connection object to its basic state
whenever it ends up closed. Any situation that needs to recreate the
connection can call this method and it will be done.
This is one of the only methods in hyper that is truly private, as
users should be strongly discouraged from messing about with connection
objects themselves.
"""
self._conn = _LockedObject(h2Connection.H2Connection())
# Streams are stored in a dictionary keyed off their stream IDs. We
# also save the most recent one for easy access without having to walk
# the dictionary.
#
# We add a set of all streams that we or the remote party forcefully
# closed with RST_STREAM, to avoid encountering issues where frames
# were already in flight before the RST was processed.
#
# Finally, we add a set of streams that recently received data. When
# using multiple threads, this avoids reading on threads that have just
# acquired the I/O lock whose streams have already had their data read
# for them by prior threads.
self.streams = {}
self.recent_stream = None
self.next_stream_id = 1
self.reset_streams = set()
self.recent_recv_streams = set()
# The socket used to send data.
self._sock = None
# Instantiate a window manager.
self.window_manager = self.__wm_class(65535)
return
def ping(self, opaque_data):
"""
Send a PING frame.
Concurrency
-----------
This method is thread-safe.
:param opaque_data: A bytestring of length 8 that will be sent in the
PING frame.
:returns: Nothing
"""
self.connect()
with self._write_lock:
with self._conn as conn:
conn.ping(to_bytestring(opaque_data))
self._send_outstanding_data()
def request(self, method, url, body=None, headers=None):
"""
This will send a request to the server using the HTTP request method
``method`` and the selector ``url``. If the ``body`` argument is
present, it should be string or bytes object of data to send after the
headers are finished. Strings are encoded as UTF-8. To use other
encodings, pass a bytes object. The Content-Length header is set to the
length of the body field.
Concurrency
-----------
This method is thread-safe.
:param method: The request method, e.g. ``'GET'``.
:param url: The URL to contact, e.g. ``'/path/segment'``.
:param body: (optional) The request body to send. Must be a bytestring
or a file-like object.
:param headers: (optional) The headers to send on the request.
:returns: A stream ID for the request.
"""
headers = headers or {}
# Concurrency
#
# It's necessary to hold a lock while this method runs to satisfy H2
# protocol requirements.
#
# - putrequest obtains the next valid new stream_id
# - endheaders sends a http2 message using the new stream_id
#
# If threads interleave these operations, it could result in messages
# being sent in the wrong order, which can lead to the out-of-order
# messages with lower stream IDs being closed prematurely.
with self._write_lock:
stream_id = self.putrequest(method, url)
default_headers = (':method', ':scheme', ':authority', ':path')
for name, value in headers.items():
is_default = to_native_string(name) in default_headers
self.putheader(name, value, stream_id, replace=is_default)
# Convert the body to bytes if needed.
if body and isinstance(body, (unicode, bytes)):
body = to_bytestring(body)
self.endheaders(message_body=body, final=True, stream_id=stream_id)
return stream_id
def _get_stream(self, stream_id):
if stream_id is None:
return self.recent_stream
elif stream_id in self.reset_streams or stream_id not in self.streams:
raise StreamResetError("Stream forcefully closed")
else:
return self.streams[stream_id]
def get_response(self, stream_id=None):
"""
Should be called after a request is sent to get a response from the
server. If sending multiple parallel requests, pass the stream ID of
the request whose response you want. Returns a
:class:`HTTP20Response <hyper.HTTP20Response>` instance.
If you pass no ``stream_id``, you will receive the oldest
:class:`HTTPResponse <hyper.HTTP20Response>` still outstanding.
Concurrency
-----------
This method is thread-safe.
:param stream_id: (optional) The stream ID of the request for which to
get a response.
:returns: A :class:`HTTP20Response <hyper.HTTP20Response>` object.
"""
stream = self._get_stream(stream_id)
return HTTP20Response(stream.getheaders(), stream)
def get_pushes(self, stream_id=None, capture_all=False):
"""
Returns a generator that yields push promises from the server. **Note
that this method is not idempotent**: promises returned in one call
will not be returned in subsequent calls. Iterating through generators
returned by multiple calls to this method simultaneously results in
undefined behavior.
:param stream_id: (optional) The stream ID of the request for which to
get push promises.
:param capture_all: (optional) If ``False``, the generator will yield
all buffered push promises without blocking. If ``True``, the
generator will first yield all buffered push promises, then yield
additional ones as they arrive, and terminate when the original
stream closes.
:returns: A generator of :class:`HTTP20Push <hyper.HTTP20Push>` objects
corresponding to the streams pushed by the server.
"""
stream = self._get_stream(stream_id)
for promised_stream_id, headers in stream.get_pushes(capture_all):
yield HTTP20Push(
HTTPHeaderMap(headers), self.streams[promised_stream_id]
)
def connect(self):
"""
Connect to the server specified when the object was created. This is a
no-op if we're already connected.
Concurrency
-----------
This method is thread-safe. It may be called from multiple threads, and
is a noop for all threads apart from the first.
:returns: Nothing.
"""
with self._lock:
if self._sock is not None:
return
if not self.proxy_host:
host = self.host
port = self.port
else:
host = self.proxy_host
port = self.proxy_port
sock = socket.create_connection((host, port))
if self.secure:
assert not self.proxy_host, "Proxy with HTTPS not supported."
sock, proto = wrap_socket(sock, host, self.ssl_context,
force_proto=self.force_proto)
else:
proto = H2C_PROTOCOL
log.debug("Selected NPN protocol: %s", proto)
assert proto in H2_NPN_PROTOCOLS or proto == H2C_PROTOCOL
self._sock = BufferedSocket(sock, self.network_buffer_size)
self._send_preamble()
def _connect_upgrade(self, sock):
"""
Called by the generic HTTP connection when we're being upgraded. Locks
in a new socket and places the backing state machine into an upgrade
state, then sends the preamble.
"""
self._sock = sock
with self._conn as conn:
conn.initiate_upgrade_connection()
conn.update_settings(
{h2Settings.ENABLE_PUSH: int(self._enable_push)}
)
self._send_outstanding_data()
# The server will also send an initial settings frame, so get it.
# However, we need to make sure our stream state is set up properly
# first, or any extra data we receive might cause us problems.
s = self._new_stream(local_closed=True)
self.recent_stream = s
self._recv_cb()
def _send_preamble(self):
"""
Sends the necessary HTTP/2 preamble.
"""
# We need to send the connection header immediately on this
# connection, followed by an initial settings frame.
with self._conn as conn:
conn.initiate_connection()
conn.update_settings(
{h2Settings.ENABLE_PUSH: int(self._enable_push)}
)
self._send_outstanding_data()
# The server will also send an initial settings frame, so get it.
self._recv_cb()
def close(self, error_code=None):
"""
Close the connection to the server.
Concurrency
-----------
This method is thread-safe.
:param error_code: (optional) The error code to reset all streams with.
:returns: Nothing.
"""
# Concurrency
#
# It's necessary to hold the lock here to ensure that threads closing
# the connection see consistent state, and to prevent creation of
# of new streams while the connection is being closed.
#
# I/O occurs while the lock is held; waiting threads will see a delay.
with self._lock:
# Close all streams
for stream in list(self.streams.values()):
log.debug("Close stream %d" % stream.stream_id)
stream.close(error_code)
# Send GoAway frame to the server
try:
with self._conn as conn:
conn.close_connection(error_code or 0)
self._send_outstanding_data(tolerate_peer_gone=True)
except Exception as e: # pragma: no cover
log.warn("GoAway frame could not be sent: %s" % e)
if self._sock is not None:
self._sock.close()
self.__init_state()
def _send_outstanding_data(self, tolerate_peer_gone=False,
send_empty=True):
# Concurrency
#
# Hold _write_lock; getting and writing data from _conn is synchronized
#
# I/O occurs while the lock is held; waiting threads will see a delay.
with self._write_lock:
with self._conn as conn:
data = conn.data_to_send()
if data or send_empty:
self._send_cb(data, tolerate_peer_gone=tolerate_peer_gone)
def putrequest(self, method, selector, **kwargs):
"""
This should be the first call for sending a given HTTP request to a
server. It returns a stream ID for the given connection that should be
passed to all subsequent request building calls.
Concurrency
-----------
This method is thread-safe. It can be called from multiple threads,
and each thread should receive a unique stream ID.
:param method: The request method, e.g. ``'GET'``.
:param selector: The path selector.
:returns: A stream ID for the request.
"""
# Create a new stream.
s = self._new_stream()
# To this stream we need to immediately add a few headers that are
# HTTP/2 specific. These are: ":method", ":scheme", ":authority" and
# ":path". We can set all of these now.
s.add_header(":method", method)
s.add_header(":scheme", "https" if self.secure else "http")
s.add_header(":authority", self.host)
s.add_header(":path", selector)
# Save the stream.
self.recent_stream = s
return s.stream_id
def putheader(self, header, argument, stream_id=None, replace=False):
"""
Sends an HTTP header to the server, with name ``header`` and value
``argument``.
Unlike the ``httplib`` version of this function, this version does not
actually send anything when called. Instead, it queues the headers up
to be sent when you call
:meth:`endheaders() <hyper.HTTP20Connection.endheaders>`.
This method ensures that headers conform to the HTTP/2 specification.
In particular, it strips out the ``Connection`` header, as that header
is no longer valid in HTTP/2. This is to make it easy to write code
that runs correctly in both HTTP/1.1 and HTTP/2.
:param header: The name of the header.
:param argument: The value of the header.
:param stream_id: (optional) The stream ID of the request to add the
header to.
:returns: Nothing.
"""
stream = self._get_stream(stream_id)
stream.add_header(header, argument, replace)
return
def endheaders(self, message_body=None, final=False, stream_id=None):
"""
Sends the prepared headers to the server. If the ``message_body``
argument is provided it will also be sent to the server as the body of
the request, and the stream will immediately be closed. If the
``final`` argument is set to True, the stream will also immediately
be closed: otherwise, the stream will be left open and subsequent calls
to ``send()`` will be required.
:param message_body: (optional) The body to send. May not be provided
assuming that ``send()`` will be called.
:param final: (optional) If the ``message_body`` parameter is provided,
should be set to ``True`` if no further data will be provided via
calls to :meth:`send() <hyper.HTTP20Connection.send>`.
:param stream_id: (optional) The stream ID of the request to finish
sending the headers on.
:returns: Nothing.
"""
self.connect()
stream = self._get_stream(stream_id)
headers_only = (message_body is None and final)
# Concurrency:
#
# Hold _write_lock: synchronize access to the connection's HPACK
# encoder and decoder and the subsquent write to the connection
with self._write_lock:
stream.send_headers(headers_only)
# Send whatever data we have.
if message_body is not None:
stream.send_data(message_body, final)
self._send_outstanding_data()
return
def send(self, data, final=False, stream_id=None):
"""
Sends some data to the server. This data will be sent immediately
(excluding the normal HTTP/2 flow control rules). If this is the last
data that will be sent as part of this request, the ``final`` argument
should be set to ``True``. This will cause the stream to be closed.
:param data: The data to send.
:param final: (optional) Whether this is the last bit of data to be
sent on this request.
:param stream_id: (optional) The stream ID of the request to send the
data on.
:returns: Nothing.
"""
stream = self._get_stream(stream_id)
stream.send_data(data, final)
return
def _new_stream(self, stream_id=None, local_closed=False):
"""
Returns a new stream object for this connection.
"""
# Concurrency
#
# Hold _lock: ensure that threads accessing the connection see
# self.next_stream_id in a consistent state
#
# No I/O occurs, the delay in waiting threads depends on their number.
with self._lock:
s = Stream(
stream_id or self.next_stream_id,
self.__wm_class(DEFAULT_WINDOW_SIZE),
self._conn,
self._send_outstanding_data,
self._recv_cb,
self._stream_close_cb,
)
s.local_closed = local_closed
self.streams[s.stream_id] = s
self.next_stream_id += 2
return s
def _send_cb(self, data, tolerate_peer_gone=False):
"""
This is the callback used by streams to send data on the connection.
This acts as a dumb wrapper around the socket send method.
"""
# Concurrency
#
# Hold _write_lock: ensures only writer at a time
#
# I/O occurs while the lock is held; waiting threads will see a delay.
with self._write_lock:
try:
self._sock.sendall(data)
except socket.error as e:
if (not tolerate_peer_gone or
e.errno not in (errno.EPIPE, errno.ECONNRESET)):
raise
def _adjust_receive_window(self, frame_len):
"""
Adjusts the window size in response to receiving a DATA frame of length
``frame_len``. May send a WINDOWUPDATE frame if necessary.
"""
# Concurrency
#
# Hold _write_lock; synchronize the window manager update and the
# subsequent potential write to the connection
#
# I/O may occur while the lock is held; waiting threads may see a
# delay.
with self._write_lock:
increment = self.window_manager._handle_frame(frame_len)
if increment:
with self._conn as conn:
conn.increment_flow_control_window(increment)
self._send_outstanding_data(tolerate_peer_gone=True)
return
def _single_read(self):
"""
Performs a single read from the socket and hands the data off to the
h2 connection object.
"""
# Begin by reading what we can from the socket.
#
# Concurrency
#
# Synchronizes reading the data
#
# I/O occurs while the lock is held; waiting threads will see a delay.
with self._read_lock:
if self._sock is None:
raise ConnectionError('tried to read after connection close')
self._sock.fill()
data = self._sock.buffer.tobytes()
self._sock.advance_buffer(len(data))
with self._conn as conn:
events = conn.receive_data(data)
stream_ids = set(getattr(e, 'stream_id', -1) for e in events)
stream_ids.discard(-1) # sentinel
stream_ids.discard(0) # connection events
self.recent_recv_streams |= stream_ids
for event in events:
if isinstance(event, h2Events.DataReceived):
self._adjust_receive_window(event.flow_controlled_length)
self.streams[event.stream_id].receive_data(event)
elif isinstance(event, h2Events.PushedStreamReceived):
if self._enable_push:
self._new_stream(event.pushed_stream_id, local_closed=True)
self.streams[event.parent_stream_id].receive_push(event)
else:
# Servers are forbidden from sending push promises when
# the ENABLE_PUSH setting is 0, but the spec leaves the
# client action undefined when they do it anyway. So we
# just refuse the stream and go about our business.
self._send_rst_frame(event.pushed_stream_id, 7)
elif isinstance(event, h2Events.ResponseReceived):
self.streams[event.stream_id].receive_response(event)
elif isinstance(event, h2Events.TrailersReceived):
self.streams[event.stream_id].receive_trailers(event)
elif isinstance(event, h2Events.StreamEnded):
self.streams[event.stream_id].receive_end_stream(event)
elif isinstance(event, h2Events.StreamReset):
if event.stream_id not in self.reset_streams:
self.reset_streams.add(event.stream_id)
self.streams[event.stream_id].receive_reset(event)
elif isinstance(event, h2Events.ConnectionTerminated):
# If we get GoAway with error code zero, we are doing a
# graceful shutdown and all is well. Otherwise, throw an
# exception.
self.close()
# If an error occured, try to read the error description from
# code registry otherwise use the frame's additional data.
if event.error_code != 0:
try:
name, number, description = errors.get_data(
event.error_code
)
except ValueError:
error_string = (
"Encountered error code %d" % event.error_code
)
else:
error_string = (
"Encountered error %s %s: %s" %
(name, number, description)
)
raise ConnectionError(error_string)
else:
log.info("Received unhandled event %s", event)
self._send_outstanding_data(tolerate_peer_gone=True, send_empty=False)
def _recv_cb(self, stream_id=0):
"""
This is the callback used by streams to read data from the connection.
This stream reads what data it can, and throws it into the underlying
connection, before farming out any events that fire to the relevant
streams. If the socket remains readable, it will then optimistically
continue to attempt to read.
This is generally called by a stream, not by the connection itself, and
it's likely that streams will read a frame that doesn't belong to them.
:param stream_id: (optional) The stream ID of the stream reading data
from the connection.
"""
# Begin by reading what we can from the socket.
#
# Concurrency
#
# Ignore this read if some other thread has recently read data from
# from the requested stream.
#
# The lock here looks broad, but is needed to ensure correct behavior
# when there are multiple readers of the same stream. It is
# re-acquired in the calls to self._single_read.
#
# I/O occurs while the lock is held; waiting threads will see a delay.
with self._read_lock:
log.debug('recv for stream %d with %s already present',
stream_id,
self.recent_recv_streams)
if stream_id in self.recent_recv_streams:
self.recent_recv_streams.discard(stream_id)
return
# make sure to validate the stream is readable.
# if the connection was reset, this stream id won't appear in
# self.streams and will cause this call to raise an exception.
if stream_id:
self._get_stream(stream_id)
# TODO: Re-evaluate this.
self._single_read()
count = 9
retry_wait = 0.05 # can improve responsiveness to delay the retry
while count and self._sock is not None and self._sock.can_read:
# If the connection has been closed, bail out, but retry
# on transient errors.
try:
self._single_read()
except ConnectionResetError:
break
except ssl.SSLError as e: # pragma: no cover
# these are transient errors that can occur while reading
# from ssl connections.
if e.args[0] in TRANSIENT_SSL_ERRORS:
continue
else:
raise
except socket.error as e: # pragma: no cover
if e.errno in (errno.EINTR, errno.EAGAIN):
# if 'interrupted' or 'try again', continue
time.sleep(retry_wait)
continue
elif e.errno == errno.ECONNRESET:
break
else:
raise
count -= 1
def _send_rst_frame(self, stream_id, error_code):
"""
Send reset stream frame with error code and remove stream from map.
"""
# Concurrency
#
# Hold _write_lock; synchronize generating the reset frame and writing
# it
#
# I/O occurs while the lock is held; waiting threads will see a delay.
with self._write_lock:
with self._conn as conn:
conn.reset_stream(stream_id, error_code=error_code)
self._send_outstanding_data()
# Concurrency
#
# Hold _lock; the stream storage is being updated. No I/O occurs, any
# delay is proportional to the number of waiting threads.
with self._lock:
try:
del self.streams[stream_id]
self.recent_recv_streams.discard(stream_id)
except KeyError as e: # pragma: no cover
log.warn(
"Stream with id %d does not exist: %s",
stream_id, e)
# Keep track of the fact that we reset this stream in case there
# are other frames in flight.
self.reset_streams.add(stream_id)
def _stream_close_cb(self, stream_id):
"""
Called by a stream when it is closing, so that state can be cleared.
"""
try:
del self.streams[stream_id]
self.recent_recv_streams.discard(stream_id)
except KeyError:
pass
# The following two methods are the implementation of the context manager
# protocol.
def __enter__(self):
return self
def __exit__(self, type, value, tb):
self.close()
return False # Never swallow exceptions.

View file

@ -0,0 +1,77 @@
# -*- coding: utf-8 -*-
# flake8: noqa
"""
hyper/http20/errors
~~~~~~~~~~~~~~~~~~~
Global error code registry containing the established HTTP/2 error codes.
The registry is based on a 32-bit space so we use the error code to index into
the array.
The current registry is available at:
https://tools.ietf.org/html/rfc7540#section-11.4
"""
NO_ERROR = {'Name': 'NO_ERROR',
'Code': '0x0',
'Description': 'Graceful shutdown'}
PROTOCOL_ERROR = {'Name': 'PROTOCOL_ERROR',
'Code': '0x1',
'Description': 'Protocol error detected'}
INTERNAL_ERROR = {'Name': 'INTERNAL_ERROR',
'Code': '0x2',
'Description': 'Implementation fault'}
FLOW_CONTROL_ERROR = {'Name': 'FLOW_CONTROL_ERROR',
'Code': '0x3',
'Description': 'Flow control limits exceeded'}
SETTINGS_TIMEOUT = {'Name': 'SETTINGS_TIMEOUT',
'Code': '0x4',
'Description': 'Settings not acknowledged'}
STREAM_CLOSED = {'Name': 'STREAM_CLOSED',
'Code': '0x5',
'Description': 'Frame received for closed stream'}
FRAME_SIZE_ERROR = {'Name': 'FRAME_SIZE_ERROR',
'Code': '0x6',
'Description': 'Frame size incorrect'}
REFUSED_STREAM = {'Name': 'REFUSED_STREAM ',
'Code': '0x7',
'Description': 'Stream not processed'}
CANCEL = {'Name': 'CANCEL',
'Code': '0x8',
'Description': 'Stream cancelled'}
COMPRESSION_ERROR = {'Name': 'COMPRESSION_ERROR',
'Code': '0x9',
'Description': 'Compression state not updated'}
CONNECT_ERROR = {'Name': 'CONNECT_ERROR',
'Code': '0xa',
'Description':
'TCP connection error for CONNECT method'}
ENHANCE_YOUR_CALM = {'Name': 'ENHANCE_YOUR_CALM',
'Code': '0xb',
'Description': 'Processing capacity exceeded'}
INADEQUATE_SECURITY = {'Name': 'INADEQUATE_SECURITY',
'Code': '0xc',
'Description':
'Negotiated TLS parameters not acceptable'}
HTTP_1_1_REQUIRED = {'Name': 'HTTP_1_1_REQUIRED',
'Code': '0xd',
'Description': 'Use HTTP/1.1 for the request'}
H2_ERRORS = [NO_ERROR, PROTOCOL_ERROR, INTERNAL_ERROR, FLOW_CONTROL_ERROR,
SETTINGS_TIMEOUT, STREAM_CLOSED, FRAME_SIZE_ERROR, REFUSED_STREAM,
CANCEL, COMPRESSION_ERROR, CONNECT_ERROR, ENHANCE_YOUR_CALM,
INADEQUATE_SECURITY, HTTP_1_1_REQUIRED]
def get_data(error_code):
"""
Lookup the error code description, if not available throw a value error
"""
if error_code < 0 or error_code >= len(H2_ERRORS):
raise ValueError("Error code is invalid")
name = H2_ERRORS[error_code]['Name']
number = H2_ERRORS[error_code]['Code']
description = H2_ERRORS[error_code]['Description']
return name, number, description

View file

@ -0,0 +1,50 @@
# -*- coding: utf-8 -*-
"""
hyper/http20/exceptions
~~~~~~~~~~~~~~~~~~~~~~~
This defines exceptions used in the HTTP/2 portion of hyper.
"""
class HTTP20Error(Exception):
"""
The base class for all of ``hyper``'s HTTP/2-related exceptions.
"""
pass
class HPACKEncodingError(HTTP20Error):
"""
An error has been encountered while performing HPACK encoding.
"""
pass
class HPACKDecodingError(HTTP20Error):
"""
An error has been encountered while performing HPACK decoding.
"""
pass
class ConnectionError(HTTP20Error):
"""
The remote party signalled an error affecting the entire HTTP/2
connection, and the connection has been closed.
"""
pass
class ProtocolError(HTTP20Error):
"""
The remote party violated the HTTP/2 protocol.
"""
pass
class StreamResetError(HTTP20Error):
"""
A stream was forcefully reset by the remote party.
"""
pass

View file

@ -0,0 +1,228 @@
# -*- coding: utf-8 -*-
"""
hyper/http20/response
~~~~~~~~~~~~~~~~~~~~~
Contains the HTTP/2 equivalent of the HTTPResponse object defined in
httplib/http.client.
"""
import logging
import zlib
from ..common.decoder import DeflateDecoder
from ..common.headers import HTTPHeaderMap
log = logging.getLogger(__name__)
def strip_headers(headers):
"""
Strips the headers attached to the instance of any header beginning
with a colon that ``hyper`` doesn't understand. This method logs at
warning level about the deleted headers, for discoverability.
"""
# Convert to list to ensure that we don't mutate the headers while
# we iterate over them.
for name in list(headers.keys()):
if name.startswith(b':'):
del headers[name]
class HTTP20Response(object):
"""
An ``HTTP20Response`` wraps the HTTP/2 response from the server. It
provides access to the response headers and the entity body. The response
is an iterable object and can be used in a with statement (though due to
the persistent connections used in HTTP/2 this has no effect, and is done
soley for compatibility).
"""
def __init__(self, headers, stream):
#: The reason phrase returned by the server. This is not used in
#: HTTP/2, and so is always the empty string.
self.reason = ''
status = headers[b':status'][0]
strip_headers(headers)
#: The status code returned by the server.
self.status = int(status)
#: The response headers. These are determined upon creation, assigned
#: once, and never assigned again.
self.headers = headers
# The response trailers. These are always intially ``None``.
self._trailers = None
# The stream this response is being sent over.
self._stream = stream
# We always read in one-data-frame increments from the stream, so we
# may need to buffer some for incomplete reads.
self._data_buffer = b''
# This object is used for decompressing gzipped request bodies. Right
# now we only support gzip because that's all the RFC mandates of us.
# Later we'll add support for more encodings.
# This 16 + MAX_WBITS nonsense is to force gzip. See this
# Stack Overflow answer for more:
# http://stackoverflow.com/a/2695466/1401686
if b'gzip' in self.headers.get(b'content-encoding', []):
self._decompressobj = zlib.decompressobj(16 + zlib.MAX_WBITS)
elif b'deflate' in self.headers.get(b'content-encoding', []):
self._decompressobj = DeflateDecoder()
else:
self._decompressobj = None
@property
def trailers(self):
"""
Trailers on the HTTP message, if any.
.. warning:: Note that this property requires that the stream is
totally exhausted. This means that, if you have not
completely read from the stream, all stream data will be
read into memory.
"""
if self._trailers is None:
self._trailers = self._stream.gettrailers() or HTTPHeaderMap()
strip_headers(self._trailers)
return self._trailers
def read(self, amt=None, decode_content=True):
"""
Reads the response body, or up to the next ``amt`` bytes.
:param amt: (optional) The amount of data to read. If not provided, all
the data will be read from the response.
:param decode_content: (optional) If ``True``, will transparently
decode the response data.
:returns: The read data. Note that if ``decode_content`` is set to
``True``, the actual amount of data returned may be different to
the amount requested.
"""
if amt is not None and amt <= len(self._data_buffer):
data = self._data_buffer[:amt]
self._data_buffer = self._data_buffer[amt:]
response_complete = False
elif amt is not None:
read_amt = amt - len(self._data_buffer)
self._data_buffer += self._stream._read(read_amt)
data = self._data_buffer[:amt]
self._data_buffer = self._data_buffer[amt:]
response_complete = len(data) < amt
else:
data = b''.join([self._data_buffer, self._stream._read()])
response_complete = True
# We may need to decode the body.
if decode_content and self._decompressobj and data:
data = self._decompressobj.decompress(data)
# If we're at the end of the request, we have some cleaning up to do.
# Close the stream, and if necessary flush the buffer.
if response_complete:
if decode_content and self._decompressobj:
data += self._decompressobj.flush()
if self._stream.response_headers:
self.headers.merge(self._stream.response_headers)
# We're at the end, close the connection.
if response_complete:
self.close()
return data
def read_chunked(self, decode_content=True):
"""
Reads chunked transfer encoded bodies. This method returns a generator:
each iteration of which yields one data frame *unless* the frames
contain compressed data and ``decode_content`` is ``True``, in which
case it yields whatever the decompressor provides for each chunk.
.. warning:: This may yield the empty string, without that being the
end of the body!
"""
while True:
data = self._stream._read_one_frame()
if data is None:
break
if decode_content and self._decompressobj:
data = self._decompressobj.decompress(data)
yield data
if decode_content and self._decompressobj:
yield self._decompressobj.flush()
self.close()
return
def fileno(self):
"""
Return the ``fileno`` of the underlying socket. This function is
currently not implemented.
"""
raise NotImplementedError("Not currently implemented.")
def close(self):
"""
Close the response. In effect this closes the backing HTTP/2 stream.
:returns: Nothing.
"""
self._stream.close()
# The following methods implement the context manager protocol.
def __enter__(self):
return self
def __exit__(self, *args):
self.close()
return False # Never swallow exceptions.
class HTTP20Push(object):
"""
Represents a request-response pair sent by the server through the server
push mechanism.
"""
def __init__(self, request_headers, stream):
#: The scheme of the simulated request
self.scheme = request_headers[b':scheme'][0]
#: The method of the simulated request (must be safe and cacheable,
#: e.g. GET)
self.method = request_headers[b':method'][0]
#: The authority of the simulated request (usually host:port)
self.authority = request_headers[b':authority'][0]
#: The path of the simulated request
self.path = request_headers[b':path'][0]
strip_headers(request_headers)
#: The headers the server attached to the simulated request.
self.request_headers = request_headers
self._stream = stream
def get_response(self):
"""
Get the pushed response provided by the server.
:returns: A :class:`HTTP20Response <hyper.HTTP20Response>` object
representing the pushed response.
"""
return HTTP20Response(self._stream.getheaders(), self._stream)
def cancel(self):
"""
Cancel the pushed response and close the stream.
:returns: Nothing.
"""
self._stream.close(8) # CANCEL

View file

@ -0,0 +1,341 @@
# -*- coding: utf-8 -*-
"""
hyper/http20/stream
~~~~~~~~~~~~~~~~~~~
Objects that make up the stream-level abstraction of hyper's HTTP/2 support.
These objects are not expected to be part of the public HTTP/2 API: they're
intended purely for use inside hyper's HTTP/2 abstraction.
Conceptually, a single HTTP/2 connection is made up of many streams: each
stream is an independent, bi-directional sequence of HTTP headers and data.
Each stream is identified by a monotonically increasing integer, assigned to
the stream by the endpoint that initiated the stream.
"""
from ..h2 import exceptions as h2Exceptions
from ..common.headers import HTTPHeaderMap
from .util import h2_safe_headers
import logging
log = logging.getLogger(__name__)
# Define the largest chunk of data we'll send in one go. Realistically, we
# should take the MSS into account but that's pretty dull, so let's just say
# 1kB and call it a day.
MAX_CHUNK = 1024
class Stream(object):
"""
A single HTTP/2 stream.
A stream is an independent, bi-directional sequence of HTTP headers and
data. Each stream is identified by a single integer. From a HTTP
perspective, a stream _approximately_ matches a single request-response
pair.
"""
def __init__(self,
stream_id,
window_manager,
connection,
send_outstanding_data,
recv_cb,
close_cb):
self.stream_id = stream_id
self.headers = HTTPHeaderMap()
# Set to a key-value set of the response headers once their
# HEADERS..CONTINUATION frame sequence finishes.
self.response_headers = None
# Set to a key-value set of the response trailers once their
# HEADERS..CONTINUATION frame sequence finishes.
self.response_trailers = None
# A dict mapping the promised stream ID of a pushed resource to a
# key-value set of its request headers. Entries are added once their
# PUSH_PROMISE..CONTINUATION frame sequence finishes.
self.promised_headers = {}
# Unconsumed response data chunks. Empties after every call to _read().
self.data = []
# Whether the remote side has completed the stream.
self.remote_closed = False
# Whether we have closed the stream.
self.local_closed = False
# There are two flow control windows: one for data we're sending,
# one for data being sent to us.
self._in_window_manager = window_manager
# Save off a reference to the state machine wrapped with lock.
self._conn = connection
# Save off a data callback.
self._send_outstanding_data = send_outstanding_data
self._recv_cb = recv_cb
self._close_cb = close_cb
def add_header(self, name, value, replace=False):
"""
Adds a single HTTP header to the headers to be sent on the request.
"""
if not replace:
self.headers[name] = value
else:
self.headers.replace(name, value)
def send_headers(self, end_stream=False):
"""
Sends the complete saved header block on the stream.
"""
headers = self.get_headers()
with self._conn as conn:
conn.send_headers(self.stream_id, headers, end_stream)
self._send_outstanding_data()
if end_stream:
self.local_closed = True
def send_data(self, data, final):
"""
Send some data on the stream. If this is the end of the data to be
sent, the ``final`` flag _must_ be set to True. If no data is to be
sent, set ``data`` to ``None``.
"""
# Define a utility iterator for file objects.
def file_iterator(fobj):
while True:
data = fobj.read(MAX_CHUNK)
yield data
if len(data) < MAX_CHUNK:
break
# Build the appropriate iterator for the data, in chunks of CHUNK_SIZE.
if hasattr(data, 'read'):
chunks = file_iterator(data)
else:
chunks = (data[i:i+MAX_CHUNK]
for i in range(0, len(data), MAX_CHUNK))
for chunk in chunks:
self._send_chunk(chunk, final)
def _read(self, amt=None):
"""
Read data from the stream. Unlike a normal read behaviour, this
function returns _at least_ ``amt`` data, but may return more.
"""
def listlen(list):
return sum(map(len, list))
# Keep reading until the stream is closed or we get enough data.
while (not self.remote_closed and
(amt is None or listlen(self.data) < amt)):
self._recv_cb(stream_id=self.stream_id)
result = b''.join(self.data)
self.data = []
return result
def _read_one_frame(self):
"""
Reads a single data frame from the stream and returns it.
"""
# Keep reading until the stream is closed or we have a data frame.
while not self.remote_closed and not self.data:
self._recv_cb(stream_id=self.stream_id)
try:
return self.data.pop(0)
except IndexError:
return None
def receive_response(self, event):
"""
Receive response headers.
"""
# TODO: If this is called while we're still sending data, we may want
# to stop sending that data and check the response. Early responses to
# big uploads are almost always a problem.
self.response_headers = HTTPHeaderMap(event.headers)
def receive_trailers(self, event):
"""
Receive response trailers.
"""
self.response_trailers = HTTPHeaderMap(event.headers)
def receive_push(self, event):
"""
Receive the request headers for a pushed stream.
"""
self.promised_headers[event.pushed_stream_id] = event.headers
def receive_data(self, event):
"""
Receive a chunk of data.
"""
size = event.flow_controlled_length
increment = self._in_window_manager._handle_frame(size)
# Append the data to the buffer.
self.data.append(event.data)
if increment:
try:
with self._conn as conn:
conn.increment_flow_control_window(
increment, stream_id=self.stream_id
)
except h2Exceptions.StreamClosedError:
# We haven't got to it yet, but the stream is already
# closed. We don't need to increment the window in this
# case!
pass
else:
self._send_outstanding_data()
def receive_end_stream(self, event):
"""
All of the data is returned now.
"""
self.remote_closed = True
def receive_reset(self, event):
"""
Stream forcefully reset.
"""
self.remote_closed = True
self._close_cb(self.stream_id)
def get_headers(self):
"""
Provides the headers to the connection object.
"""
# Strip any headers invalid in H2.
return h2_safe_headers(self.headers)
def getheaders(self):
"""
Once all data has been sent on this connection, returns a key-value set
of the headers of the response to the original request.
"""
# Keep reading until all headers are received.
while self.response_headers is None:
self._recv_cb(stream_id=self.stream_id)
# Find the Content-Length header if present.
self._in_window_manager.document_size = (
int(self.response_headers.get(b'content-length', [0])[0])
)
return self.response_headers
def gettrailers(self):
"""
Once all data has been sent on this connection, returns a key-value set
of the trailers of the response to the original request.
.. warning:: Note that this method requires that the stream is
totally exhausted. This means that, if you have not
completely read from the stream, all stream data will be
read into memory.
:returns: The key-value set of the trailers, or ``None`` if no trailers
were sent.
"""
# Keep reading until the stream is done.
while not self.remote_closed:
self._recv_cb(stream_id=self.stream_id)
return self.response_trailers
def get_pushes(self, capture_all=False):
"""
Returns a generator that yields push promises from the server. Note
that this method is not idempotent; promises returned in one call will
not be returned in subsequent calls. Iterating through generators
returned by multiple calls to this method simultaneously results in
undefined behavior.
:param capture_all: If ``False``, the generator will yield all buffered
push promises without blocking. If ``True``, the generator will
first yield all buffered push promises, then yield additional ones
as they arrive, and terminate when the original stream closes.
"""
while True:
for pair in self.promised_headers.items():
yield pair
self.promised_headers = {}
if not capture_all or self.remote_closed:
break
self._recv_cb(stream_id=self.stream_id)
def close(self, error_code=None):
"""
Closes the stream. If the stream is currently open, attempts to close
it as gracefully as possible.
:param error_code: (optional) The error code to reset the stream with.
:returns: Nothing.
"""
# FIXME: I think this is overbroad, but for now it's probably ok.
if not (self.remote_closed and self.local_closed):
try:
with self._conn as conn:
conn.reset_stream(self.stream_id, error_code or 0)
except h2Exceptions.ProtocolError:
# If for any reason we can't reset the stream, just
# tolerate it.
pass
else:
self._send_outstanding_data(tolerate_peer_gone=True)
self.remote_closed = True
self.local_closed = True
self._close_cb(self.stream_id)
@property
def _out_flow_control_window(self):
"""
The size of our outbound flow control window.
"""
with self._conn as conn:
return conn.local_flow_control_window(self.stream_id)
def _send_chunk(self, data, final):
"""
Implements most of the sending logic.
Takes a single chunk of size at most MAX_CHUNK, wraps it in a frame and
sends it. Optionally sets the END_STREAM flag if this is the last chunk
(determined by being of size less than MAX_CHUNK) and no more data is
to be sent.
"""
# If we don't fit in the connection window, try popping frames off the
# connection in hope that one might be a window update frame.
while len(data) > self._out_flow_control_window:
self._recv_cb()
# If the length of the data is less than MAX_CHUNK, we're probably
# at the end of the file. If this is the end of the data, mark it
# as END_STREAM.
end_stream = False
if len(data) < MAX_CHUNK and final:
end_stream = True
# Send the frame and decrement the flow control window.
with self._conn as conn:
conn.send_data(
stream_id=self.stream_id, data=data, end_stream=end_stream
)
self._send_outstanding_data()
if end_stream:
self.local_closed = True

View file

@ -0,0 +1,60 @@
# -*- coding: utf-8 -*-
"""
hyper/http20/util
~~~~~~~~~~~~~~~~~
Utility functions for use with hyper.
"""
from collections import defaultdict
def combine_repeated_headers(kvset):
"""
Given a list of key-value pairs (like for HTTP headers!), combines pairs
with the same key together, separating the values with NULL bytes. This
function maintains the order of input keys, because it's awesome.
"""
def set_pop(set, item):
set.remove(item)
return item
headers = defaultdict(list)
keys = set()
for key, value in kvset:
headers[key].append(value)
keys.add(key)
return [(set_pop(keys, k), b'\x00'.join(headers[k])) for k, v in kvset
if k in keys]
def split_repeated_headers(kvset):
"""
Given a set of key-value pairs (like for HTTP headers!), finds values that
have NULL bytes in them and splits them into a dictionary whose values are
lists.
"""
headers = defaultdict(list)
for key, value in kvset:
headers[key] = value.split(b'\x00')
return dict(headers)
def h2_safe_headers(headers):
"""
This method takes a set of headers that are provided by the user and
transforms them into a form that is safe for emitting over HTTP/2.
Currently, this strips the Connection header and any header it refers to.
"""
stripped = {
i.lower().strip()
for k, v in headers if k == 'connection'
for i in v.split(',')
}
stripped.add('connection')
return [header for header in headers if header[0] not in stripped]

View file

@ -0,0 +1,154 @@
# -*- coding: utf-8 -*-
"""
hyper/http20/window
~~~~~~~~~~~~~~~~~~~
Objects that understand flow control in hyper.
HTTP/2 implements connection- and stream-level flow control. This flow
control is mandatory. Unfortunately, it's difficult for hyper to be
all that intelligent about how it manages flow control in a general case.
This module defines an interface for pluggable flow-control managers. These
managers will define a flow-control policy. This policy will determine when to
send WINDOWUPDATE frames.
"""
class BaseFlowControlManager(object):
"""
The abstract base class for flow control managers.
This class defines the interface for pluggable flow-control managers. A
flow-control manager defines a flow-control policy, which basically boils
down to deciding when to increase the flow control window.
This decision can be based on a number of factors:
- the initial window size,
- the size of the document being retrieved,
- the size of the received data frames,
- any other information the manager can obtain
A flow-control manager may be defined at the connection level or at the
stream level. If no stream-level flow-control manager is defined, an
instance of the connection-level flow control manager is used.
A class that inherits from this one must not adjust the member variables
defined in this class. They are updated and set by methods on this class.
"""
def __init__(self, initial_window_size, document_size=None):
#: The initial size of the connection window in bytes. This is set at
#: creation time.
self.initial_window_size = initial_window_size
#: The current size of the connection window. Any methods overridden
#: by the user must not adjust this value.
self.window_size = initial_window_size
#: The size of the document being retrieved, in bytes. This is
#: retrieved from the Content-Length header, if provided. Note that
#: the total number of bytes that will be received may be larger than
#: this value due to HTTP/2 padding. It should not be assumed that
#: simply because the the document size is smaller than the initial
#: window size that there will never be a need to increase the window
#: size.
self.document_size = document_size
def increase_window_size(self, frame_size):
"""
Determine whether or not to emit a WINDOWUPDATE frame.
This method should be overridden to determine, based on the state of
the system and the size of the received frame, whether or not a
WindowUpdate frame should be sent for the stream.
This method should *not* adjust any of the member variables of this
class.
Note that this method is called before the window size is decremented
as a result of the frame being handled.
:param frame_size: The size of the received frame. Note that this *may*
be zero. When this parameter is zero, it's possible that a
WINDOWUPDATE frame may want to be emitted anyway. A zero-length frame
size is usually associated with a change in the size of the receive
window due to a SETTINGS frame.
:returns: The amount to increase the receive window by. Return zero if
the window should not be increased.
"""
raise NotImplementedError(
"FlowControlManager is an abstract base class"
)
def blocked(self):
"""
Called whenever the remote endpoint reports that it is blocked behind
the flow control window.
When this method is called the remote endpoint is signaling that it
has more data to send and that the transport layer is capable of
transmitting it, but that the HTTP/2 flow control window prevents it
being sent.
This method should return the size by which the window should be
incremented, which may be zero. This method should *not* adjust any
of the member variables of this class.
:returns: The amount to increase the receive window by. Return zero if
the window should not be increased.
"""
# TODO: Is this method necessary?
raise NotImplementedError(
"FlowControlManager is an abstract base class"
)
def _handle_frame(self, frame_size):
"""
This internal method is called by the connection or stream that owns
the flow control manager. It handles the generic behaviour of flow
control managers: namely, keeping track of the window size.
"""
rc = self.increase_window_size(frame_size)
self.window_size -= frame_size
self.window_size += rc
return rc
def _blocked(self):
"""
This internal method is called by the connection or stream that owns
the flow control manager. It handles the generic behaviour of receiving
BLOCKED frames.
"""
rc = self.blocked()
self.window_size += rc
return rc
class FlowControlManager(BaseFlowControlManager):
"""
``hyper``'s default flow control manager.
This implements hyper's flow control algorithms. This algorithm attempts to
reduce the number of WINDOWUPDATE frames we send without blocking the
remote endpoint behind the flow control window.
This algorithm will become more complicated over time. In the current form,
the algorithm is very simple:
- When the flow control window gets less than 1/4 of the maximum size,
increment back to the maximum.
- Otherwise, if the flow control window gets to less than 1kB, increment
back to the maximum.
"""
def increase_window_size(self, frame_size):
future_window_size = self.window_size - frame_size
if ((future_window_size < (self.initial_window_size / 4)) or
(future_window_size < 1000)):
return self.initial_window_size - future_window_size
return 0
def blocked(self):
return self.initial_window_size - self.window_size

View file

@ -0,0 +1,116 @@
# -*- coding: utf-8 -*-
"""
hyper/httplib_compat
~~~~~~~~~~~~~~~~~~~~
This file defines the publicly-accessible API for hyper. This API also
constitutes the abstraction layer between HTTP/1.1 and HTTP/2.
This API doesn't currently work, and is a lower priority than the HTTP/2
stack at this time.
"""
import socket
try:
import http.client as httplib
except ImportError:
import httplib
from .compat import ssl
from .http20.tls import wrap_socket
# If there's no NPN support, we're going to drop all support for HTTP/2.
try:
support_20 = ssl.HAS_NPN
except AttributeError:
support_20 = False
# The HTTPConnection object is currently always the underlying one.
HTTPConnection = httplib.HTTPConnection
HTTPSConnection = httplib.HTTPSConnection
# If we have NPN support, define our custom one, otherwise just use the
# default.
if support_20:
class HTTPSConnection(object):
"""
An object representing a single HTTPS connection, whether HTTP/1.1 or
HTTP/2.
More specifically, this object represents an abstraction over the
distinction. This object encapsulates a connection object for one of
the specific types of connection, and delegates most of the work to
that object.
"""
def __init__(self, *args, **kwargs):
# Whatever arguments and keyword arguments are passed to this
# object need to be saved off for when we initialise one of our
# subsidiary objects.
self._original_args = args
self._original_kwargs = kwargs
# Set up some variables we're going to use later.
self._sock = None
self._conn = None
# Prepare our backlog of method calls.
self._call_queue = []
def __getattr__(self, name):
# Anything that can't be found on this instance is presumably a
# property of underlying connection object.
# We need to be a little bit careful here. There are a few methods
# that can act on a HTTPSConnection before it actually connects to
# the remote server. We don't want to change the semantics of the,
# HTTPSConnection so we need to spot these and queue them up. When
# we actually create the backing Connection, we'll apply them
# immediately. These methods can't throw exceptions, so we should
# be fine.
delay_methods = ["set_tunnel", "set_debuglevel"]
if self._conn is None and name in delay_methods:
# Return a little closure that saves off the method call to
# apply later.
def capture(obj, *args, **kwargs):
self._call_queue.append((name, args, kwargs))
return capture
elif self._conn is None:
# We're being told to do something! We can now connect to the
# remote server and build the connection object.
self._delayed_connect()
# Call through to the underlying object.
return getattr(self._conn, name)
def _delayed_connect(self):
"""
Called when we need to work out what kind of HTTPS connection we're
actually going to use.
"""
# Because we're ghetto, we're going to quickly create a
# HTTPConnection object to parse the args and kwargs for us, and
# grab the values out.
tempconn = httplib.HTTPConnection(*self._original_args,
**self._original_kwargs)
host = tempconn.host
port = tempconn.port
timeout = tempconn.timeout
source_address = tempconn.source_address
# Connect to the remote server.
sock = socket.create_connection(
(host, port),
timeout,
source_address
)
# Wrap it in TLS. This needs to be looked at in future when I pull
# in the TLS verification logic from urllib3, but right now we
# accept insecurity because no-one's using this anyway.
sock = wrap_socket(sock, host)
# At this early stage the library can't do HTTP/2, so who cares?
tempconn.sock = sock
self._sock = sock
self._conn = tempconn
return

View file

@ -0,0 +1,7 @@
# -*- coding: utf-8 -*-
"""
hyper/packages
~~~~~~~~~~~~~~
This module contains external packages that are vendored into hyper.
"""

View file

@ -0,0 +1,8 @@
# -*- coding: utf-8 -*-
"""
hpack
~~~~~
HTTP/2 header encoding for Python.
"""
__version__ = '1.0.1'

View file

@ -0,0 +1,33 @@
# -*- coding: utf-8 -*-
"""
hpack/compat
~~~~~~~~~~~~
Normalizes the Python 2/3 API for internal use.
"""
import sys
_ver = sys.version_info
is_py2 = _ver[0] == 2
is_py3 = _ver[0] == 3
if is_py2:
def to_byte(char):
return ord(char)
def decode_hex(b):
return b.decode('hex')
unicode = unicode
bytes = str
elif is_py3:
def to_byte(char):
return char
def decode_hex(b):
return bytes.fromhex(b)
unicode = str
bytes = bytes

View file

@ -0,0 +1,42 @@
# -*- coding: utf-8 -*-
"""
hyper/http20/exceptions
~~~~~~~~~~~~~~~~~~~~~~~
This defines exceptions used in the HTTP/2 portion of hyper.
"""
class HTTP20Error(Exception):
"""
The base class for all of ``hyper``'s HTTP/2-related exceptions.
"""
pass
class HPACKEncodingError(HTTP20Error):
"""
An error has been encountered while performing HPACK encoding.
"""
pass
class HPACKDecodingError(HTTP20Error):
"""
An error has been encountered while performing HPACK decoding.
"""
pass
class ConnectionError(HTTP20Error):
"""
The remote party signalled an error affecting the entire HTTP/2
connection, and the connection has been closed.
"""
pass
class ProtocolError(HTTP20Error):
"""
The remote party violated the HTTP/2 protocol.
"""
pass

View file

@ -0,0 +1,662 @@
# -*- coding: utf-8 -*-
"""
hpack/hpack
~~~~~~~~~~~
Implements the HPACK header compression algorithm as detailed by the IETF.
"""
import collections
import logging
from .compat import to_byte
from .huffman import HuffmanDecoder, HuffmanEncoder
from .huffman_constants import (
REQUEST_CODES, REQUEST_CODES_LENGTH
)
log = logging.getLogger(__name__)
def encode_integer(integer, prefix_bits):
"""
This encodes an integer according to the wacky integer encoding rules
defined in the HPACK spec.
"""
log.debug("Encoding %d with %d bits", integer, prefix_bits)
max_number = (2 ** prefix_bits) - 1
if (integer < max_number):
return bytearray([integer]) # Seriously?
else:
elements = [max_number]
integer = integer - max_number
while integer >= 128:
elements.append((integer % 128) + 128)
integer = integer // 128 # We need integer division
elements.append(integer)
return bytearray(elements)
def decode_integer(data, prefix_bits):
"""
This decodes an integer according to the wacky integer encoding rules
defined in the HPACK spec. Returns a tuple of the decoded integer and the
number of bytes that were consumed from ``data`` in order to get that
integer.
"""
multiple = lambda index: 128 ** (index - 1)
max_number = (2 ** prefix_bits) - 1
mask = 0xFF >> (8 - prefix_bits)
index = 0
number = to_byte(data[index]) & mask
if (number == max_number):
while True:
index += 1
next_byte = to_byte(data[index])
if next_byte >= 128:
number += (next_byte - 128) * multiple(index)
else:
number += next_byte * multiple(index)
break
log.debug("Decoded %d, consumed %d bytes", number, index + 1)
return (number, index + 1)
def _to_bytes(string):
"""
Convert string to bytes.
"""
if not isinstance(string, (str, bytes)): # pragma: no cover
string = str(string)
return string if isinstance(string, bytes) else string.encode('utf-8')
def header_table_size(table):
"""
Calculates the 'size' of the header table as defined by the HTTP/2
specification.
"""
# It's phenomenally frustrating that the specification feels it is able to
# tell me how large the header table is, considering that its calculations
# assume a very particular layout that most implementations will not have.
# I appreciate it's an attempt to prevent DoS attacks by sending lots of
# large headers in the header table, but it seems like a better approach
# would be to limit the size of headers. Ah well.
return sum(32 + len(name) + len(value) for name, value in table)
class Encoder(object):
"""
An HPACK encoder object. This object takes HTTP headers and emits encoded
HTTP/2 header blocks.
"""
# This is the static table of header fields.
static_table = [
(b':authority', b''),
(b':method', b'GET'),
(b':method', b'POST'),
(b':path', b'/'),
(b':path', b'/index.html'),
(b':scheme', b'http'),
(b':scheme', b'https'),
(b':status', b'200'),
(b':status', b'204'),
(b':status', b'206'),
(b':status', b'304'),
(b':status', b'400'),
(b':status', b'404'),
(b':status', b'500'),
(b'accept-charset', b''),
(b'accept-encoding', b'gzip, deflate'),
(b'accept-language', b''),
(b'accept-ranges', b''),
(b'accept', b''),
(b'access-control-allow-origin', b''),
(b'age', b''),
(b'allow', b''),
(b'authorization', b''),
(b'cache-control', b''),
(b'content-disposition', b''),
(b'content-encoding', b''),
(b'content-language', b''),
(b'content-length', b''),
(b'content-location', b''),
(b'content-range', b''),
(b'content-type', b''),
(b'cookie', b''),
(b'date', b''),
(b'etag', b''),
(b'expect', b''),
(b'expires', b''),
(b'from', b''),
(b'host', b''),
(b'if-match', b''),
(b'if-modified-since', b''),
(b'if-none-match', b''),
(b'if-range', b''),
(b'if-unmodified-since', b''),
(b'last-modified', b''),
(b'link', b''),
(b'location', b''),
(b'max-forwards', b''),
(b'proxy-authenticate', b''),
(b'proxy-authorization', b''),
(b'range', b''),
(b'referer', b''),
(b'refresh', b''),
(b'retry-after', b''),
(b'server', b''),
(b'set-cookie', b''),
(b'strict-transport-security', b''),
(b'transfer-encoding', b''),
(b'user-agent', b''),
(b'vary', b''),
(b'via', b''),
(b'www-authenticate', b''),
]
def __init__(self):
self.header_table = collections.deque()
self._header_table_size = 4096 # This value set by the standard.
self.huffman_coder = HuffmanEncoder(
REQUEST_CODES, REQUEST_CODES_LENGTH
)
# We need to keep track of whether the header table size has been
# changed since we last encoded anything. If it has, we need to signal
# that change in the HPACK block.
self._table_size_changed = False
@property
def header_table_size(self):
return self._header_table_size
@header_table_size.setter
def header_table_size(self, value):
log.debug(
"Setting header table size to %d from %d",
value,
self._header_table_size
)
# If the new value is larger than the current one, no worries!
# Otherwise, we may need to shrink the header table.
if value < self._header_table_size:
current_size = header_table_size(self.header_table)
while value < current_size:
header = self.header_table.pop()
n, v = header
current_size -= (
32 + len(n) + len(v)
)
log.debug(
"Removed %s: %s from the encoder header table", n, v
)
if value != self._header_table_size:
self._table_size_changed = True
self._header_table_size = value
def encode(self, headers, huffman=True):
"""
Takes a set of headers and encodes them into a HPACK-encoded header
block.
Transforming the headers into a header block is a procedure that can
be modeled as a chain or pipe. First, the headers are encoded. This
encoding can be done a number of ways. If the header name-value pair
are already in the header table we can represent them using the indexed
representation: the same is true if they are in the static table.
Otherwise, a literal representation will be used.
"""
log.debug("HPACK encoding %s", headers)
header_block = []
# Turn the headers into a list of tuples if possible. This is the
# natural way to interact with them in HPACK.
if isinstance(headers, dict):
headers = headers.items()
# Next, walk across the headers and turn them all into bytestrings.
headers = [(_to_bytes(n), _to_bytes(v)) for n, v in headers]
# Before we begin, if the header table size has been changed we need
# to signal that appropriately.
if self._table_size_changed:
header_block.append(self._encode_table_size_change())
self._table_size_changed = False
# We can now encode each header in the block.
header_block.extend(
(self.add(header, huffman) for header in headers)
)
header_block = b''.join(header_block)
log.debug("Encoded header block to %s", header_block)
return header_block
def add(self, to_add, huffman=False):
"""
This function takes a header key-value tuple and serializes it.
"""
log.debug("Adding %s to the header table", to_add)
name, value = to_add
# Search for a matching header in the header table.
match = self.matching_header(name, value)
if match is None:
# Not in the header table. Encode using the literal syntax,
# and add it to the header table.
encoded = self._encode_literal(name, value, True, huffman)
self._add_to_header_table(to_add)
return encoded
# The header is in the table, break out the values. If we matched
# perfectly, we can use the indexed representation: otherwise we
# can use the indexed literal.
index, perfect = match
if perfect:
# Indexed representation.
encoded = self._encode_indexed(index)
else:
# Indexed literal. We are going to add header to the
# header table unconditionally. It is a future todo to
# filter out headers which are known to be ineffective for
# indexing since they just take space in the table and
# pushed out other valuable headers.
encoded = self._encode_indexed_literal(index, value, huffman)
self._add_to_header_table(to_add)
return encoded
def matching_header(self, name, value):
"""
Scans the header table and the static table. Returns a tuple, where the
first value is the index of the match, and the second is whether there
was a full match or not. Prefers full matches to partial ones.
Upsettingly, the header table is one-indexed, not zero-indexed.
"""
partial_match = None
static_table_len = len(Encoder.static_table)
for (i, (n, v)) in enumerate(Encoder.static_table):
if n == name:
if v == value:
return (i + 1, Encoder.static_table[i])
elif partial_match is None:
partial_match = (i + 1, None)
for (i, (n, v)) in enumerate(self.header_table):
if n == name:
if v == value:
return (i + static_table_len + 1, self.header_table[i])
elif partial_match is None:
partial_match = (i + static_table_len + 1, None)
return partial_match
def _add_to_header_table(self, header):
"""
Adds a header to the header table, evicting old ones if necessary.
"""
# Be optimistic: add the header straight away.
self.header_table.appendleft(header)
# Now, work out how big the header table is.
actual_size = header_table_size(self.header_table)
# Loop and remove whatever we need to.
while actual_size > self.header_table_size:
header = self.header_table.pop()
n, v = header
actual_size -= (
32 + len(n) + len(v)
)
log.debug("Evicted %s: %s from the header table", n, v)
def _encode_indexed(self, index):
"""
Encodes a header using the indexed representation.
"""
field = encode_integer(index, 7)
field[0] = field[0] | 0x80 # we set the top bit
return bytes(field)
def _encode_literal(self, name, value, indexing, huffman=False):
"""
Encodes a header with a literal name and literal value. If ``indexing``
is True, the header will be added to the header table: otherwise it
will not.
"""
prefix = b'\x40' if indexing else b'\x00'
if huffman:
name = self.huffman_coder.encode(name)
value = self.huffman_coder.encode(value)
name_len = encode_integer(len(name), 7)
value_len = encode_integer(len(value), 7)
if huffman:
name_len[0] |= 0x80
value_len[0] |= 0x80
return b''.join([prefix, bytes(name_len), name, bytes(value_len), value])
def _encode_indexed_literal(self, index, value, huffman=False):
"""
Encodes a header with an indexed name and a literal value and performs
incremental indexing.
"""
prefix = encode_integer(index, 6)
prefix[0] |= 0x40
if huffman:
value = self.huffman_coder.encode(value)
value_len = encode_integer(len(value), 7)
if huffman:
value_len[0] |= 0x80
return b''.join([bytes(prefix), bytes(value_len), value])
def _encode_table_size_change(self):
"""
Produces the encoded form of a header table size change context update.
"""
size_bytes = encode_integer(self.header_table_size, 5)
size_bytes[0] |= 0x20
return bytes(size_bytes)
class Decoder(object):
"""
An HPACK decoder object.
"""
static_table = [
(b':authority', b''),
(b':method', b'GET'),
(b':method', b'POST'),
(b':path', b'/'),
(b':path', b'/index.html'),
(b':scheme', b'http'),
(b':scheme', b'https'),
(b':status', b'200'),
(b':status', b'204'),
(b':status', b'206'),
(b':status', b'304'),
(b':status', b'400'),
(b':status', b'404'),
(b':status', b'500'),
(b'accept-charset', b''),
(b'accept-encoding', b'gzip, deflate'),
(b'accept-language', b''),
(b'accept-ranges', b''),
(b'accept', b''),
(b'access-control-allow-origin', b''),
(b'age', b''),
(b'allow', b''),
(b'authorization', b''),
(b'cache-control', b''),
(b'content-disposition', b''),
(b'content-encoding', b''),
(b'content-language', b''),
(b'content-length', b''),
(b'content-location', b''),
(b'content-range', b''),
(b'content-type', b''),
(b'cookie', b''),
(b'date', b''),
(b'etag', b''),
(b'expect', b''),
(b'expires', b''),
(b'from', b''),
(b'host', b''),
(b'if-match', b''),
(b'if-modified-since', b''),
(b'if-none-match', b''),
(b'if-range', b''),
(b'if-unmodified-since', b''),
(b'last-modified', b''),
(b'link', b''),
(b'location', b''),
(b'max-forwards', b''),
(b'proxy-authenticate', b''),
(b'proxy-authorization', b''),
(b'range', b''),
(b'referer', b''),
(b'refresh', b''),
(b'retry-after', b''),
(b'server', b''),
(b'set-cookie', b''),
(b'strict-transport-security', b''),
(b'transfer-encoding', b''),
(b'user-agent', b''),
(b'vary', b''),
(b'via', b''),
(b'www-authenticate', b''),
]
def __init__(self):
self.header_table = collections.deque()
self._header_table_size = 4096 # This value set by the standard.
self.huffman_coder = HuffmanDecoder(
REQUEST_CODES, REQUEST_CODES_LENGTH
)
@property
def header_table_size(self):
return self._header_table_size
@header_table_size.setter
def header_table_size(self, value):
log.debug(
"Resizing decoder header table to %d from %d",
value,
self._header_table_size
)
# If the new value is larger than the current one, no worries!
# Otherwise, we may need to shrink the header table.
if value < self._header_table_size:
current_size = header_table_size(self.header_table)
while value < current_size:
header = self.header_table.pop()
n, v = header
current_size -= (
32 + len(n) + len(v)
)
log.debug("Evicting %s: %s from the header table", n, v)
self._header_table_size = value
def decode(self, data):
"""
Takes an HPACK-encoded header block and decodes it into a header set.
"""
log.debug("Decoding %s", data)
headers = []
data_len = len(data)
current_index = 0
while current_index < data_len:
# Work out what kind of header we're decoding.
# If the high bit is 1, it's an indexed field.
current = to_byte(data[current_index])
indexed = bool(current & 0x80)
# Otherwise, if the second-highest bit is 1 it's a field that does
# alter the header table.
literal_index = bool(current & 0x40)
# Otherwise, if the third-highest bit is 1 it's an encoding context
# update.
encoding_update = bool(current & 0x20)
if indexed:
header, consumed = self._decode_indexed(data[current_index:])
elif literal_index:
# It's a literal header that does affect the header table.
header, consumed = self._decode_literal_index(
data[current_index:]
)
elif encoding_update:
# It's an update to the encoding context.
consumed = self._update_encoding_context(data)
header = None
else:
# It's a literal header that does not affect the header table.
header, consumed = self._decode_literal_no_index(
data[current_index:]
)
if header:
headers.append(header)
current_index += consumed
return [(n.decode('utf-8'), v.decode('utf-8')) for n, v in headers]
def _add_to_header_table(self, new_header):
"""
Adds a header to the header table, evicting old ones if necessary.
"""
# Be optimistic: add the header straight away.
self.header_table.appendleft(new_header)
# Now, work out how big the header table is.
actual_size = header_table_size(self.header_table)
# Loop and remove whatever we need to.
while actual_size > self.header_table_size:
header = self.header_table.pop()
n, v = header
actual_size -= (
32 + len(n) + len(v)
)
log.debug("Evicting %s: %s from the header table", n, v)
def _update_encoding_context(self, data):
"""
Handles a byte that updates the encoding context.
"""
# We've been asked to resize the header table.
new_size, consumed = decode_integer(data, 5)
self.header_table_size = new_size
return consumed
def _decode_indexed(self, data):
"""
Decodes a header represented using the indexed representation.
"""
index, consumed = decode_integer(data, 7)
index -= 1 # Because this idiot table is 1-indexed. Ugh.
if index >= len(Decoder.static_table):
index -= len(Decoder.static_table)
header = self.header_table[index]
else:
header = Decoder.static_table[index]
log.debug("Decoded %s, consumed %d", header, consumed)
return header, consumed
def _decode_literal_no_index(self, data):
return self._decode_literal(data, False)
def _decode_literal_index(self, data):
return self._decode_literal(data, True)
def _decode_literal(self, data, should_index):
"""
Decodes a header represented with a literal.
"""
total_consumed = 0
# When should_index is true, if the low six bits of the first byte are
# nonzero, the header name is indexed.
# When should_index is false, if the low four bits of the first byte
# are nonzero the header name is indexed.
if should_index:
indexed_name = to_byte(data[0]) & 0x3F
name_len = 6
else:
indexed_name = to_byte(data[0]) & 0x0F
name_len = 4
if indexed_name:
# Indexed header name.
index, consumed = decode_integer(data, name_len)
index -= 1
if index >= len(Decoder.static_table):
index -= len(Decoder.static_table)
name = self.header_table[index][0]
else:
name = Decoder.static_table[index][0]
total_consumed = consumed
length = 0
else:
# Literal header name. The first byte was consumed, so we need to
# move forward.
data = data[1:]
length, consumed = decode_integer(data, 7)
name = data[consumed:consumed + length]
if to_byte(data[0]) & 0x80:
name = self.huffman_coder.decode(name)
total_consumed = consumed + length + 1 # Since we moved forward 1.
data = data[consumed + length:]
# The header value is definitely length-based.
length, consumed = decode_integer(data, 7)
value = data[consumed:consumed + length]
if to_byte(data[0]) & 0x80:
value = self.huffman_coder.decode(value)
# Updated the total consumed length.
total_consumed += length + consumed
# If we've been asked to index this, add it to the header table.
header = (name, value)
if should_index:
self._add_to_header_table(header)
log.debug(
"Decoded %s, total consumed %d bytes, indexed %s",
header,
total_consumed,
should_index
)
return header, total_consumed

View file

@ -0,0 +1,107 @@
# -*- coding: utf-8 -*-
"""
hpack/hpack_compat
~~~~~~~~~~~~~~~~~~
Provides an abstraction layer over two HPACK implementations.
This module has a pure-Python greenfield HPACK implementation that can be used
on all Python platforms. However, this implementation is both slower and more
memory-hungry than could be achieved with a C-language version. Additionally,
nghttp2's HPACK implementation currently achieves better compression ratios
than hyper's in almost all benchmarks.
For those who care about efficiency and speed in HPACK, this module allows you
to use nghttp2's HPACK implementation instead of ours. This module detects
whether the nghttp2 bindings are installed, and if they are it wraps them in
a hpack-compatible API and uses them instead of its own. If not, it falls back
to the built-in Python bindings.
"""
import logging
from .hpack import _to_bytes
log = logging.getLogger(__name__)
# Attempt to import nghttp2.
try:
import nghttp2
USE_NGHTTP2 = True
log.debug("Using nghttp2's HPACK implementation.")
except ImportError:
USE_NGHTTP2 = False
log.debug("Using our pure-Python HPACK implementation.")
if USE_NGHTTP2:
class Encoder(object):
"""
An HPACK encoder object. This object takes HTTP headers and emits
encoded HTTP/2 header blocks.
"""
def __init__(self):
self._e = nghttp2.HDDeflater()
@property
def header_table_size(self):
"""
Returns the header table size. For the moment this isn't
useful, so we don't use it.
"""
raise NotImplementedError()
@header_table_size.setter
def header_table_size(self, value):
log.debug("Setting header table size to %d", value)
self._e.change_table_size(value)
def encode(self, headers, huffman=True):
"""
Encode the headers. The huffman parameter has no effect, it is
simply present for compatibility.
"""
log.debug("HPACK encoding %s", headers)
# Turn the headers into a list of tuples if possible. This is the
# natural way to interact with them in HPACK.
if isinstance(headers, dict):
headers = headers.items()
# Next, walk across the headers and turn them all into bytestrings.
headers = [(_to_bytes(n), _to_bytes(v)) for n, v in headers]
# Now, let nghttp2 do its thing.
header_block = self._e.deflate(headers)
return header_block
class Decoder(object):
"""
An HPACK decoder object.
"""
def __init__(self):
self._d = nghttp2.HDInflater()
@property
def header_table_size(self):
"""
Returns the header table size. For the moment this isn't
useful, so we don't use it.
"""
raise NotImplementedError()
@header_table_size.setter
def header_table_size(self, value):
log.debug("Setting header table size to %d", value)
self._d.change_table_size(value)
def decode(self, data):
"""
Takes an HPACK-encoded header block and decodes it into a header
set.
"""
log.debug("Decoding %s", data)
headers = self._d.inflate(data)
return [(n.decode('utf-8'), v.decode('utf-8')) for n, v in headers]
else:
# Grab the built-in encoder and decoder.
from .hpack import Encoder, Decoder

View file

@ -0,0 +1,136 @@
# -*- coding: utf-8 -*-
"""
hpack/huffman_decoder
~~~~~~~~~~~~~~~~~~~~~
An implementation of a bitwise prefix tree specially built for decoding
Huffman-coded content where we already know the Huffman table.
"""
from .compat import to_byte, decode_hex
from .exceptions import HPACKDecodingError
def _pad_binary(bin_str, req_len=8):
"""
Given a binary string (returned by bin()), pad it to a full byte length.
"""
bin_str = bin_str[2:] # Strip the 0b prefix
return max(0, req_len - len(bin_str)) * '0' + bin_str
def _hex_to_bin_str(hex_string):
"""
Given a Python bytestring, returns a string representing those bytes in
unicode form.
"""
unpadded_bin_string_list = (bin(to_byte(c)) for c in hex_string)
padded_bin_string_list = map(_pad_binary, unpadded_bin_string_list)
bitwise_message = "".join(padded_bin_string_list)
return bitwise_message
class HuffmanDecoder(object):
"""
Decodes a Huffman-coded bytestream according to the Huffman table laid out
in the HPACK specification.
"""
class _Node(object):
def __init__(self, data):
self.data = data
self.mapping = {}
def __init__(self, huffman_code_list, huffman_code_list_lengths):
self.root = self._Node(None)
for index, (huffman_code, code_length) in enumerate(zip(huffman_code_list, huffman_code_list_lengths)):
self._insert(huffman_code, code_length, index)
def _insert(self, hex_number, hex_length, letter):
"""
Inserts a Huffman code point into the tree.
"""
hex_number = _pad_binary(bin(hex_number), hex_length)
cur_node = self.root
for digit in hex_number:
if digit not in cur_node.mapping:
cur_node.mapping[digit] = self._Node(None)
cur_node = cur_node.mapping[digit]
cur_node.data = letter
def decode(self, encoded_string):
"""
Decode the given Huffman coded string.
"""
number = _hex_to_bin_str(encoded_string)
cur_node = self.root
decoded_message = bytearray()
try:
for digit in number:
cur_node = cur_node.mapping[digit]
if cur_node.data is not None:
# If we get EOS, everything else is padding.
if cur_node.data == 256:
break
decoded_message.append(cur_node.data)
cur_node = self.root
except KeyError:
# We have a Huffman-coded string that doesn't match our trie. This
# is pretty bad: raise a useful exception.
raise HPACKDecodingError("Invalid Huffman-coded string received.")
return bytes(decoded_message)
class HuffmanEncoder(object):
"""
Encodes a string according to the Huffman encoding table defined in the
HPACK specification.
"""
def __init__(self, huffman_code_list, huffman_code_list_lengths):
self.huffman_code_list = huffman_code_list
self.huffman_code_list_lengths = huffman_code_list_lengths
def encode(self, bytes_to_encode):
"""
Given a string of bytes, encodes them according to the HPACK Huffman
specification.
"""
# If handed the empty string, just immediately return.
if not bytes_to_encode:
return b''
final_num = 0
final_int_len = 0
# Turn each byte into its huffman code. These codes aren't necessarily
# octet aligned, so keep track of how far through an octet we are. To
# handle this cleanly, just use a single giant integer.
for char in bytes_to_encode:
byte = to_byte(char)
bin_int_len = self.huffman_code_list_lengths[byte]
bin_int = self.huffman_code_list[byte] & (2 ** (bin_int_len + 1) - 1)
final_num <<= bin_int_len
final_num |= bin_int
final_int_len += bin_int_len
# Pad out to an octet with ones.
bits_to_be_padded = (8 - (final_int_len % 8)) % 8
final_num <<= bits_to_be_padded
final_num |= (1 << (bits_to_be_padded)) - 1
# Convert the number to hex and strip off the leading '0x' and the
# trailing 'L', if present.
final_num = hex(final_num)[2:].rstrip('L')
# If this is odd, prepend a zero.
final_num = '0' + final_num if len(final_num) % 2 != 0 else final_num
# This number should have twice as many digits as bytes. If not, we're
# missing some leading zeroes. Work out how many bytes we want and how
# many digits we have, then add the missing zero digits to the front.
total_bytes = (final_int_len + bits_to_be_padded) // 8
expected_digits = total_bytes * 2
if len(final_num) != expected_digits:
missing_digits = expected_digits - len(final_num)
final_num = ('0' * missing_digits) + final_num
return decode_hex(final_num)

View file

@ -0,0 +1,288 @@
# -*- coding: utf-8 -*-
"""
hpack/huffman_constants
~~~~~~~~~~~~~~~~~~~~~~~
Defines the constant Huffman table. This takes up an upsetting amount of space,
but c'est la vie.
"""
REQUEST_CODES = [
0x1ff8,
0x7fffd8,
0xfffffe2,
0xfffffe3,
0xfffffe4,
0xfffffe5,
0xfffffe6,
0xfffffe7,
0xfffffe8,
0xffffea,
0x3ffffffc,
0xfffffe9,
0xfffffea,
0x3ffffffd,
0xfffffeb,
0xfffffec,
0xfffffed,
0xfffffee,
0xfffffef,
0xffffff0,
0xffffff1,
0xffffff2,
0x3ffffffe,
0xffffff3,
0xffffff4,
0xffffff5,
0xffffff6,
0xffffff7,
0xffffff8,
0xffffff9,
0xffffffa,
0xffffffb,
0x14,
0x3f8,
0x3f9,
0xffa,
0x1ff9,
0x15,
0xf8,
0x7fa,
0x3fa,
0x3fb,
0xf9,
0x7fb,
0xfa,
0x16,
0x17,
0x18,
0x0,
0x1,
0x2,
0x19,
0x1a,
0x1b,
0x1c,
0x1d,
0x1e,
0x1f,
0x5c,
0xfb,
0x7ffc,
0x20,
0xffb,
0x3fc,
0x1ffa,
0x21,
0x5d,
0x5e,
0x5f,
0x60,
0x61,
0x62,
0x63,
0x64,
0x65,
0x66,
0x67,
0x68,
0x69,
0x6a,
0x6b,
0x6c,
0x6d,
0x6e,
0x6f,
0x70,
0x71,
0x72,
0xfc,
0x73,
0xfd,
0x1ffb,
0x7fff0,
0x1ffc,
0x3ffc,
0x22,
0x7ffd,
0x3,
0x23,
0x4,
0x24,
0x5,
0x25,
0x26,
0x27,
0x6,
0x74,
0x75,
0x28,
0x29,
0x2a,
0x7,
0x2b,
0x76,
0x2c,
0x8,
0x9,
0x2d,
0x77,
0x78,
0x79,
0x7a,
0x7b,
0x7ffe,
0x7fc,
0x3ffd,
0x1ffd,
0xffffffc,
0xfffe6,
0x3fffd2,
0xfffe7,
0xfffe8,
0x3fffd3,
0x3fffd4,
0x3fffd5,
0x7fffd9,
0x3fffd6,
0x7fffda,
0x7fffdb,
0x7fffdc,
0x7fffdd,
0x7fffde,
0xffffeb,
0x7fffdf,
0xffffec,
0xffffed,
0x3fffd7,
0x7fffe0,
0xffffee,
0x7fffe1,
0x7fffe2,
0x7fffe3,
0x7fffe4,
0x1fffdc,
0x3fffd8,
0x7fffe5,
0x3fffd9,
0x7fffe6,
0x7fffe7,
0xffffef,
0x3fffda,
0x1fffdd,
0xfffe9,
0x3fffdb,
0x3fffdc,
0x7fffe8,
0x7fffe9,
0x1fffde,
0x7fffea,
0x3fffdd,
0x3fffde,
0xfffff0,
0x1fffdf,
0x3fffdf,
0x7fffeb,
0x7fffec,
0x1fffe0,
0x1fffe1,
0x3fffe0,
0x1fffe2,
0x7fffed,
0x3fffe1,
0x7fffee,
0x7fffef,
0xfffea,
0x3fffe2,
0x3fffe3,
0x3fffe4,
0x7ffff0,
0x3fffe5,
0x3fffe6,
0x7ffff1,
0x3ffffe0,
0x3ffffe1,
0xfffeb,
0x7fff1,
0x3fffe7,
0x7ffff2,
0x3fffe8,
0x1ffffec,
0x3ffffe2,
0x3ffffe3,
0x3ffffe4,
0x7ffffde,
0x7ffffdf,
0x3ffffe5,
0xfffff1,
0x1ffffed,
0x7fff2,
0x1fffe3,
0x3ffffe6,
0x7ffffe0,
0x7ffffe1,
0x3ffffe7,
0x7ffffe2,
0xfffff2,
0x1fffe4,
0x1fffe5,
0x3ffffe8,
0x3ffffe9,
0xffffffd,
0x7ffffe3,
0x7ffffe4,
0x7ffffe5,
0xfffec,
0xfffff3,
0xfffed,
0x1fffe6,
0x3fffe9,
0x1fffe7,
0x1fffe8,
0x7ffff3,
0x3fffea,
0x3fffeb,
0x1ffffee,
0x1ffffef,
0xfffff4,
0xfffff5,
0x3ffffea,
0x7ffff4,
0x3ffffeb,
0x7ffffe6,
0x3ffffec,
0x3ffffed,
0x7ffffe7,
0x7ffffe8,
0x7ffffe9,
0x7ffffea,
0x7ffffeb,
0xffffffe,
0x7ffffec,
0x7ffffed,
0x7ffffee,
0x7ffffef,
0x7fffff0,
0x3ffffee,
0x3fffffff,
]
REQUEST_CODES_LENGTH = [
13, 23, 28, 28, 28, 28, 28, 28, 28, 24, 30, 28, 28, 30, 28, 28,
28, 28, 28, 28, 28, 28, 30, 28, 28, 28, 28, 28, 28, 28, 28, 28,
6, 10, 10, 12, 13, 6, 8, 11, 10, 10, 8, 11, 8, 6, 6, 6,
5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 7, 8, 15, 6, 12, 10,
13, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 8, 7, 8, 13, 19, 13, 14, 6,
15, 5, 6, 5, 6, 5, 6, 6, 6, 5, 7, 7, 6, 6, 6, 5,
6, 7, 6, 5, 5, 6, 7, 7, 7, 7, 7, 15, 11, 14, 13, 28,
20, 22, 20, 20, 22, 22, 22, 23, 22, 23, 23, 23, 23, 23, 24, 23,
24, 24, 22, 23, 24, 23, 23, 23, 23, 21, 22, 23, 22, 23, 23, 24,
22, 21, 20, 22, 22, 23, 23, 21, 23, 22, 22, 24, 21, 22, 23, 23,
21, 21, 22, 21, 23, 22, 23, 23, 20, 22, 22, 22, 23, 22, 22, 23,
26, 26, 20, 19, 22, 23, 22, 25, 26, 26, 26, 27, 27, 26, 24, 25,
19, 21, 26, 27, 27, 26, 27, 24, 21, 21, 26, 26, 28, 27, 27, 27,
20, 24, 20, 21, 22, 21, 21, 23, 22, 22, 25, 25, 24, 24, 26, 23,
26, 27, 26, 26, 27, 27, 27, 27, 27, 28, 27, 27, 27, 27, 27, 26,
30,
]

View file

@ -0,0 +1,8 @@
# -*- coding: utf-8 -*-
"""
hyperframe
~~~~~~~~~~
A module for providing a pure-Python HTTP/2 framing layer.
"""
__version__ = '2.1.0'

View file

@ -0,0 +1,40 @@
# -*- coding: utf-8 -*-
"""
hyperframe/flags
~~~~~~~~~~~~~~~~
Defines basic Flag and Flags data structures.
"""
import collections
Flag = collections.namedtuple("Flag", ["name", "bit"])
class Flags(collections.MutableSet):
"""
A simple MutableSet implementation that will only accept known flags as elements.
Will behave like a regular set(), except that a ValueError will be thrown when .add()ing
unexpected flags.
"""
def __init__(self, defined_flags):
self._valid_flags = set(flag.name for flag in defined_flags)
self._flags = set()
def __contains__(self, x):
return self._flags.__contains__(x)
def __iter__(self):
return self._flags.__iter__()
def __len__(self):
return self._flags.__len__()
def discard(self, value):
return self._flags.discard(value)
def add(self, value):
if value not in self._valid_flags:
raise ValueError("Unexpected flag: {}".format(value))
return self._flags.add(value)

View file

@ -0,0 +1,625 @@
# -*- coding: utf-8 -*-
"""
hyperframe/frame
~~~~~~~~~~~~~~~~
Defines framing logic for HTTP/2. Provides both classes to represent framed
data and logic for aiding the connection when it comes to reading from the
socket.
"""
import collections
import struct
from .flags import Flag, Flags
# The maximum initial length of a frame. Some frames have shorter maximum lengths.
FRAME_MAX_LEN = (2 ** 14)
# The maximum allowed length of a frame.
FRAME_MAX_ALLOWED_LEN = (2 ** 24) - 1
class Frame(object):
"""
The base class for all HTTP/2 frames.
"""
# The flags defined on this type of frame.
defined_flags = []
# The type of the frame.
type = None
# If 'has-stream', the frame's stream_id must be non-zero. If 'no-stream',
# it must be zero. If 'either', it's not checked.
stream_association = None
def __init__(self, stream_id, flags=()):
self.stream_id = stream_id
self.flags = Flags(self.defined_flags)
self.body_len = 0
for flag in flags:
self.flags.add(flag)
if self.stream_association == 'has-stream' and not self.stream_id:
raise ValueError('Stream ID must be non-zero')
if self.stream_association == 'no-stream' and self.stream_id:
raise ValueError('Stream ID must be zero')
def __repr__(self):
flags = ", ".join(self.flags) or "None"
body = self.serialize_body()
if len(body) > 100:
body = str(body[:100]) + "..."
return (
"{type}(Stream: {stream}; Flags: {flags}): {body}"
).format(type=type(self).__name__, stream=self.stream_id, flags=flags, body=body)
@staticmethod
def parse_frame_header(header):
"""
Takes a 9-byte frame header and returns a tuple of the appropriate
Frame object and the length that needs to be read from the socket.
"""
fields = struct.unpack("!HBBBL", header)
# First 24 bits are frame length.
length = (fields[0] << 8) + fields[1]
type = fields[2]
flags = fields[3]
stream_id = fields[4]
if type not in FRAMES:
raise ValueError("Unknown frame type %d" % type)
frame = FRAMES[type](stream_id)
frame.parse_flags(flags)
return (frame, length)
def parse_flags(self, flag_byte):
for flag, flag_bit in self.defined_flags:
if flag_byte & flag_bit:
self.flags.add(flag)
return self.flags
def serialize(self):
body = self.serialize_body()
self.body_len = len(body)
# Build the common frame header.
# First, get the flags.
flags = 0
for flag, flag_bit in self.defined_flags:
if flag in self.flags:
flags |= flag_bit
header = struct.pack(
"!HBBBL",
(self.body_len & 0xFFFF00) >> 8, # Length is spread over top 24 bits
self.body_len & 0x0000FF,
self.type,
flags,
self.stream_id & 0x7FFFFFFF # Stream ID is 32 bits.
)
return header + body
def serialize_body(self):
raise NotImplementedError()
def parse_body(self, data):
raise NotImplementedError()
class Padding(object):
"""
Mixin for frames that contain padding.
"""
def __init__(self, stream_id, pad_length=0, **kwargs):
super(Padding, self).__init__(stream_id, **kwargs)
self.pad_length = pad_length
def serialize_padding_data(self):
if 'PADDED' in self.flags:
return struct.pack('!B', self.pad_length)
return b''
def parse_padding_data(self, data):
if 'PADDED' in self.flags:
self.pad_length = struct.unpack('!B', data[:1])[0]
return 1
return 0
@property
def total_padding(self):
"""Return the total length of the padding, if any."""
return self.pad_length
class Priority(object):
"""
Mixin for frames that contain priority data.
"""
def __init__(self, stream_id, depends_on=None, stream_weight=None, exclusive=None, **kwargs):
super(Priority, self).__init__(stream_id, **kwargs)
# The stream ID of the stream on which this stream depends.
self.depends_on = depends_on
# The weight of the stream. This is an integer between 0 and 256.
self.stream_weight = stream_weight
# Whether the exclusive bit was set.
self.exclusive = exclusive
def serialize_priority_data(self):
return struct.pack(
"!LB",
self.depends_on | (int(self.exclusive) << 31),
self.stream_weight
)
def parse_priority_data(self, data):
MASK = 0x80000000
self.depends_on, self.stream_weight = struct.unpack(
"!LB", data[:5]
)
self.exclusive = bool(self.depends_on & MASK)
self.depends_on &= ~MASK
return 5
class DataFrame(Padding, Frame):
"""
DATA frames convey arbitrary, variable-length sequences of octets
associated with a stream. One or more DATA frames are used, for instance,
to carry HTTP request or response payloads.
"""
defined_flags = [
Flag('END_STREAM', 0x01),
Flag('PADDED', 0x08),
]
type = 0x0
stream_association = 'has-stream'
def __init__(self, stream_id, data=b'', **kwargs):
super(DataFrame, self).__init__(stream_id, **kwargs)
self.data = data
def serialize_body(self):
padding_data = self.serialize_padding_data()
padding = b'\0' * self.total_padding
return b''.join([padding_data, self.data, padding])
def parse_body(self, data):
padding_data_length = self.parse_padding_data(data)
self.data = data[padding_data_length:len(data)-self.total_padding].tobytes()
self.body_len = len(data)
@property
def flow_controlled_length(self):
"""
If the frame is padded we need to include the padding length byte in
the flow control used.
"""
padding_len = self.total_padding + 1 if self.total_padding else 0
return len(self.data) + padding_len
class PriorityFrame(Priority, Frame):
"""
The PRIORITY frame specifies the sender-advised priority of a stream. It
can be sent at any time for an existing stream. This enables
reprioritisation of existing streams.
"""
defined_flags = []
type = 0x02
stream_association = 'has-stream'
def serialize_body(self):
return self.serialize_priority_data()
def parse_body(self, data):
self.parse_priority_data(data)
self.body_len = len(data)
class RstStreamFrame(Frame):
"""
The RST_STREAM frame allows for abnormal termination of a stream. When sent
by the initiator of a stream, it indicates that they wish to cancel the
stream or that an error condition has occurred. When sent by the receiver
of a stream, it indicates that either the receiver is rejecting the stream,
requesting that the stream be cancelled or that an error condition has
occurred.
"""
defined_flags = []
type = 0x03
stream_association = 'has-stream'
def __init__(self, stream_id, error_code=0, **kwargs):
super(RstStreamFrame, self).__init__(stream_id, **kwargs)
self.error_code = error_code
def serialize_body(self):
return struct.pack("!L", self.error_code)
def parse_body(self, data):
if len(data) != 4:
raise ValueError()
self.error_code = struct.unpack("!L", data)[0]
self.body_len = len(data)
class SettingsFrame(Frame):
"""
The SETTINGS frame conveys configuration parameters that affect how
endpoints communicate. The parameters are either constraints on peer
behavior or preferences.
Settings are not negotiated. Settings describe characteristics of the
sending peer, which are used by the receiving peer. Different values for
the same setting can be advertised by each peer. For example, a client
might set a high initial flow control window, whereas a server might set a
lower value to conserve resources.
"""
defined_flags = [Flag('ACK', 0x01)]
type = 0x04
stream_association = 'no-stream'
# We need to define the known settings, they may as well be class
# attributes.
HEADER_TABLE_SIZE = 0x01
ENABLE_PUSH = 0x02
MAX_CONCURRENT_STREAMS = 0x03
INITIAL_WINDOW_SIZE = 0x04
SETTINGS_MAX_FRAME_SIZE = 0x05
SETTINGS_MAX_HEADER_LIST_SIZE = 0x06
def __init__(self, stream_id=0, settings=None, **kwargs):
super(SettingsFrame, self).__init__(stream_id, **kwargs)
if settings and "ACK" in kwargs.get("flags", ()):
raise ValueError("Settings must be empty if ACK flag is set.")
# A dictionary of the setting type byte to the value.
self.settings = settings or {}
def serialize_body(self):
settings = [struct.pack("!HL", setting & 0xFF, value)
for setting, value in self.settings.items()]
return b''.join(settings)
def parse_body(self, data):
for i in range(0, len(data), 6):
name, value = struct.unpack("!HL", data[i:i+6])
self.settings[name] = value
self.body_len = len(data)
class PushPromiseFrame(Padding, Frame):
"""
The PUSH_PROMISE frame is used to notify the peer endpoint in advance of
streams the sender intends to initiate.
"""
defined_flags = [
Flag('END_HEADERS', 0x04),
Flag('PADDED', 0x08)
]
type = 0x05
stream_association = 'has-stream'
def __init__(self, stream_id, promised_stream_id=0, data=b'', **kwargs):
super(PushPromiseFrame, self).__init__(stream_id, **kwargs)
self.promised_stream_id = promised_stream_id
self.data = data
def serialize_body(self):
padding_data = self.serialize_padding_data()
padding = b'\0' * self.total_padding
data = struct.pack("!L", self.promised_stream_id)
return b''.join([padding_data, data, self.data, padding])
def parse_body(self, data):
padding_data_length = self.parse_padding_data(data)
self.promised_stream_id = struct.unpack("!L", data[padding_data_length:padding_data_length + 4])[0]
self.data = data[padding_data_length + 4:].tobytes()
self.body_len = len(data)
class PingFrame(Frame):
"""
The PING frame is a mechanism for measuring a minimal round-trip time from
the sender, as well as determining whether an idle connection is still
functional. PING frames can be sent from any endpoint.
"""
defined_flags = [Flag('ACK', 0x01)]
type = 0x06
stream_association = 'no-stream'
def __init__(self, stream_id=0, opaque_data=b'', **kwargs):
super(PingFrame, self).__init__(stream_id, **kwargs)
self.opaque_data = opaque_data
def serialize_body(self):
if len(self.opaque_data) > 8:
raise ValueError()
data = self.opaque_data
data += b'\x00' * (8 - len(self.opaque_data))
return data
def parse_body(self, data):
if len(data) > 8:
raise ValueError()
self.opaque_data = data.tobytes()
self.body_len = len(data)
class GoAwayFrame(Frame):
"""
The GOAWAY frame informs the remote peer to stop creating streams on this
connection. It can be sent from the client or the server. Once sent, the
sender will ignore frames sent on new streams for the remainder of the
connection.
"""
type = 0x07
stream_association = 'no-stream'
def __init__(self, stream_id=0, last_stream_id=0, error_code=0, additional_data=b'', **kwargs):
super(GoAwayFrame, self).__init__(stream_id, **kwargs)
self.last_stream_id = last_stream_id
self.error_code = error_code
self.additional_data = additional_data
def serialize_body(self):
data = struct.pack(
"!LL",
self.last_stream_id & 0x7FFFFFFF,
self.error_code
)
data += self.additional_data
return data
def parse_body(self, data):
self.last_stream_id, self.error_code = struct.unpack("!LL", data[:8])
self.body_len = len(data)
if len(data) > 8:
self.additional_data = data[8:].tobytes()
class WindowUpdateFrame(Frame):
"""
The WINDOW_UPDATE frame is used to implement flow control.
Flow control operates at two levels: on each individual stream and on the
entire connection.
Both types of flow control are hop by hop; that is, only between the two
endpoints. Intermediaries do not forward WINDOW_UPDATE frames between
dependent connections. However, throttling of data transfer by any receiver
can indirectly cause the propagation of flow control information toward the
original sender.
"""
type = 0x08
stream_association = 'either'
def __init__(self, stream_id, window_increment=0, **kwargs):
super(WindowUpdateFrame, self).__init__(stream_id, **kwargs)
self.window_increment = window_increment
def serialize_body(self):
return struct.pack("!L", self.window_increment & 0x7FFFFFFF)
def parse_body(self, data):
self.window_increment = struct.unpack("!L", data)[0]
self.body_len = len(data)
class HeadersFrame(Padding, Priority, Frame):
"""
The HEADERS frame carries name-value pairs. It is used to open a stream.
HEADERS frames can be sent on a stream in the "open" or "half closed
(remote)" states.
The HeadersFrame class is actually basically a data frame in this
implementation, because of the requirement to control the sizes of frames.
A header block fragment that doesn't fit in an entire HEADERS frame needs
to be followed with CONTINUATION frames. From the perspective of the frame
building code the header block is an opaque data segment.
"""
type = 0x01
stream_association = 'has-stream'
defined_flags = [
Flag('END_STREAM', 0x01),
Flag('END_HEADERS', 0x04),
Flag('PADDED', 0x08),
Flag('PRIORITY', 0x20),
]
def __init__(self, stream_id, data=b'', **kwargs):
super(HeadersFrame, self).__init__(stream_id, **kwargs)
self.data = data
def serialize_body(self):
padding_data = self.serialize_padding_data()
padding = b'\0' * self.total_padding
if 'PRIORITY' in self.flags:
priority_data = self.serialize_priority_data()
else:
priority_data = b''
return b''.join([padding_data, priority_data, self.data, padding])
def parse_body(self, data):
padding_data_length = self.parse_padding_data(data)
data = data[padding_data_length:]
if 'PRIORITY' in self.flags:
priority_data_length = self.parse_priority_data(data)
else:
priority_data_length = 0
self.body_len = len(data)
self.data = data[priority_data_length:len(data)-self.total_padding].tobytes()
class ContinuationFrame(Frame):
"""
The CONTINUATION frame is used to continue a sequence of header block
fragments. Any number of CONTINUATION frames can be sent on an existing
stream, as long as the preceding frame on the same stream is one of
HEADERS, PUSH_PROMISE or CONTINUATION without the END_HEADERS flag set.
Much like the HEADERS frame, hyper treats this as an opaque data frame with
different flags and a different type.
"""
type = 0x09
stream_association = 'has-stream'
defined_flags = [Flag('END_HEADERS', 0x04),]
def __init__(self, stream_id, data=b'', **kwargs):
super(ContinuationFrame, self).__init__(stream_id, **kwargs)
self.data = data
def serialize_body(self):
return self.data
def parse_body(self, data):
self.data = data.tobytes()
self.body_len = len(data)
Origin = collections.namedtuple('Origin', ['scheme', 'host', 'port'])
class AltSvcFrame(Frame):
"""
The ALTSVC frame is used to advertise alternate services that the current
host, or a different one, can understand.
"""
type = 0xA
stream_association = 'no-stream'
def __init__(self, stream_id=0, host=b'', port=0, protocol_id=b'', max_age=0, origin=None, **kwargs):
super(AltSvcFrame, self).__init__(stream_id, **kwargs)
self.host = host
self.port = port
self.protocol_id = protocol_id
self.max_age = max_age
self.origin = origin
def serialize_origin(self):
if self.origin is not None:
if self.origin.port is None:
hostport = self.origin.host
else:
hostport = self.origin.host + b':' + str(self.origin.port).encode('ascii')
return self.origin.scheme + b'://' + hostport
return b''
def parse_origin(self, data):
if len(data) > 0:
data = data.tobytes()
scheme, hostport = data.split(b'://')
host, _, port = hostport.partition(b':')
self.origin = Origin(scheme=scheme, host=host,
port=int(port) if len(port) > 0 else None)
def serialize_body(self):
first = struct.pack("!LHxB", self.max_age, self.port, len(self.protocol_id))
host_length = struct.pack("!B", len(self.host))
return b''.join([first, self.protocol_id, host_length, self.host,
self.serialize_origin()])
def parse_body(self, data):
self.body_len = len(data)
self.max_age, self.port, protocol_id_length = struct.unpack("!LHxB", data[:8])
pos = 8
self.protocol_id = data[pos:pos+protocol_id_length].tobytes()
pos += protocol_id_length
host_length = struct.unpack("!B", data[pos:pos+1])[0]
pos += 1
self.host = data[pos:pos+host_length].tobytes()
pos += host_length
self.parse_origin(data[pos:])
class BlockedFrame(Frame):
"""
The BLOCKED frame indicates that the sender is unable to send data due to a
closed flow control window.
The BLOCKED frame is used to provide feedback about the performance of flow
control for the purposes of performance tuning and debugging. The BLOCKED
frame can be sent by a peer when flow controlled data cannot be sent due to
the connection- or stream-level flow control. This frame MUST NOT be sent
if there are other reasons preventing data from being sent, either a lack
of available data, or the underlying transport being blocked.
"""
type = 0x0B
stream_association = 'both'
defined_flags = []
def serialize_body(self):
return b''
def parse_body(self, data):
pass
# A map of type byte to frame class.
_FRAME_CLASSES = [
DataFrame,
HeadersFrame,
PriorityFrame,
RstStreamFrame,
SettingsFrame,
PushPromiseFrame,
PingFrame,
GoAwayFrame,
WindowUpdateFrame,
ContinuationFrame,
AltSvcFrame,
BlockedFrame
]
FRAMES = {cls.type: cls for cls in _FRAME_CLASSES}

View file

@ -0,0 +1,13 @@
Copyright 2014 Ian Cordasco, Rackspace
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View file

@ -0,0 +1,45 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2014 Rackspace
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
rfc3986
=======
An implementation of semantics and validations described in RFC 3986. See
http://rfc3986.rtfd.org/ for documentation.
:copyright: (c) 2014 Rackspace
:license: Apache v2.0, see LICENSE for details
"""
__title__ = 'rfc3986'
__author__ = 'Ian Cordasco'
__author_email__ = 'ian.cordasco@rackspace.com'
__license__ = 'Apache v2.0'
__copyright__ = 'Copyright 2014 Rackspace'
__version__ = '0.3.0'
from .api import (URIReference, uri_reference, is_valid_uri, normalize_uri,
urlparse)
from .parseresult import ParseResult
__all__ = (
'ParseResult',
'URIReference',
'is_valid_uri',
'normalize_uri',
'uri_reference',
'urlparse',
)

View file

@ -0,0 +1,92 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2014 Rackspace
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
rfc3986.api
~~~~~~~~~~~
This defines the simple API to rfc3986. This module defines 3 functions and
provides access to the class ``URIReference``.
"""
from .uri import URIReference
from .parseresult import ParseResult
def uri_reference(uri, encoding='utf-8'):
"""Parse a URI string into a URIReference.
This is a convenience function. You could achieve the same end by using
``URIReference.from_string(uri)``.
:param str uri: The URI which needs to be parsed into a reference.
:param str encoding: The encoding of the string provided
:returns: A parsed URI
:rtype: :class:`URIReference`
"""
return URIReference.from_string(uri, encoding)
def is_valid_uri(uri, encoding='utf-8', **kwargs):
"""Determine if the URI given is valid.
This is a convenience function. You could use either
``uri_reference(uri).is_valid()`` or
``URIReference.from_string(uri).is_valid()`` to achieve the same result.
:param str uri: The URI to be validated.
:param str encoding: The encoding of the string provided
:param bool require_scheme: Set to ``True`` if you wish to require the
presence of the scheme component.
:param bool require_authority: Set to ``True`` if you wish to require the
presence of the authority component.
:param bool require_path: Set to ``True`` if you wish to require the
presence of the path component.
:param bool require_query: Set to ``True`` if you wish to require the
presence of the query component.
:param bool require_fragment: Set to ``True`` if you wish to require the
presence of the fragment component.
:returns: ``True`` if the URI is valid, ``False`` otherwise.
:rtype: bool
"""
return URIReference.from_string(uri, encoding).is_valid(**kwargs)
def normalize_uri(uri, encoding='utf-8'):
"""Normalize the given URI.
This is a convenience function. You could use either
``uri_reference(uri).normalize().unsplit()`` or
``URIReference.from_string(uri).normalize().unsplit()`` instead.
:param str uri: The URI to be normalized.
:param str encoding: The encoding of the string provided
:returns: The normalized URI.
:rtype: str
"""
normalized_reference = URIReference.from_string(uri, encoding).normalize()
return normalized_reference.unsplit()
def urlparse(uri, encoding='utf-8'):
"""Parse a given URI and return a ParseResult.
This is a partial replacement of the standard library's urlparse function.
:param str uri: The URI to be parsed.
:param str encoding: The encoding of the string provided.
:returns: A parsed URI
:rtype: :class:`~rfc3986.parseresult.ParseResult`
"""
return ParseResult.from_string(uri, encoding, strict=False)

View file

@ -0,0 +1,31 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2014 Rackspace
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
if sys.version_info >= (3, 0):
unicode = str # Python 3.x
def to_str(b, encoding):
if hasattr(b, 'decode') and not isinstance(b, unicode):
b = b.decode('utf-8')
return b
def to_bytes(s, encoding):
if hasattr(s, 'encode') and not isinstance(s, bytes):
s = s.encode('utf-8')
return s

View file

@ -0,0 +1,21 @@
# -*- coding: utf-8 -*-
class RFC3986Exception(Exception):
pass
class InvalidAuthority(RFC3986Exception):
def __init__(self, authority):
super(InvalidAuthority, self).__init__(
"The authority ({0}) is not valid.".format(authority))
class InvalidPort(RFC3986Exception):
def __init__(self, port):
super(InvalidPort, self).__init__(
'The port ("{0}") is not valid.'.format(port))
class ResolutionError(RFC3986Exception):
def __init__(self, uri):
super(ResolutionError, self).__init__(
"{0} is not an absolute URI.".format(uri.unsplit()))

View file

@ -0,0 +1,214 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2014 Rackspace
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
rfc3986.misc
~~~~~~~~~~~~
This module contains important constants, patterns, and compiled regular
expressions for parsing and validating URIs and their components.
"""
import re
# These are enumerated for the named tuple used as a superclass of
# URIReference
URI_COMPONENTS = ['scheme', 'authority', 'path', 'query', 'fragment']
important_characters = {
'generic_delimiters': ":/?#[]@",
'sub_delimiters': "!$&'()*+,;=",
# We need to escape the '*' in this case
're_sub_delimiters': "!$&'()\*+,;=",
'unreserved_chars': ('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
'0123456789._~-'),
# We need to escape the '-' in this case:
're_unreserved': 'A-Za-z0-9._~\-',
}
# For details about delimiters and reserved characters, see:
# http://tools.ietf.org/html/rfc3986#section-2.2
GENERIC_DELIMITERS = set(important_characters['generic_delimiters'])
SUB_DELIMITERS = set(important_characters['sub_delimiters'])
RESERVED_CHARS = GENERIC_DELIMITERS.union(SUB_DELIMITERS)
# For details about unreserved characters, see:
# http://tools.ietf.org/html/rfc3986#section-2.3
UNRESERVED_CHARS = set(important_characters['unreserved_chars'])
NON_PCT_ENCODED = RESERVED_CHARS.union(UNRESERVED_CHARS).union('%')
# Extracted from http://tools.ietf.org/html/rfc3986#appendix-B
component_pattern_dict = {
'scheme': '[^:/?#]+',
'authority': '[^/?#]*',
'path': '[^?#]*',
'query': '[^#]*',
'fragment': '.*',
}
# See http://tools.ietf.org/html/rfc3986#appendix-B
# In this case, we name each of the important matches so we can use
# SRE_Match#groupdict to parse the values out if we so choose. This is also
# modified to ignore other matches that are not important to the parsing of
# the reference so we can also simply use SRE_Match#groups.
expression = ('(?:(?P<scheme>{scheme}):)?(?://(?P<authority>{authority}))?'
'(?P<path>{path})(?:\?(?P<query>{query}))?'
'(?:#(?P<fragment>{fragment}))?'
).format(**component_pattern_dict)
URI_MATCHER = re.compile(expression)
# #########################
# Authority Matcher Section
# #########################
# Host patterns, see: http://tools.ietf.org/html/rfc3986#section-3.2.2
# The pattern for a regular name, e.g., www.google.com, api.github.com
reg_name = '(({0})*|[{1}]*)'.format(
'%[0-9A-Fa-f]{2}',
important_characters['re_sub_delimiters'] +
important_characters['re_unreserved']
)
# The pattern for an IPv4 address, e.g., 192.168.255.255, 127.0.0.1,
ipv4 = '(\d{1,3}.){3}\d{1,3}'
# Hexadecimal characters used in each piece of an IPv6 address
hexdig = '[0-9A-Fa-f]{1,4}'
# Least-significant 32 bits of an IPv6 address
ls32 = '({hex}:{hex}|{ipv4})'.format(hex=hexdig, ipv4=ipv4)
# Substitutions into the following patterns for IPv6 patterns defined
# http://tools.ietf.org/html/rfc3986#page-20
subs = {'hex': hexdig, 'ls32': ls32}
# Below: h16 = hexdig, see: https://tools.ietf.org/html/rfc5234 for details
# about ABNF (Augmented Backus-Naur Form) use in the comments
variations = [
# 6( h16 ":" ) ls32
'(%(hex)s:){6}%(ls32)s' % subs,
# "::" 5( h16 ":" ) ls32
'::(%(hex)s:){5}%(ls32)s' % subs,
# [ h16 ] "::" 4( h16 ":" ) ls32
'(%(hex)s)?::(%(hex)s:){4}%(ls32)s' % subs,
# [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
'((%(hex)s:)?%(hex)s)?::(%(hex)s:){3}%(ls32)s' % subs,
# [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
'((%(hex)s:){0,2}%(hex)s)?::(%(hex)s:){2}%(ls32)s' % subs,
# [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
'((%(hex)s:){0,3}%(hex)s)?::%(hex)s:%(ls32)s' % subs,
# [ *4( h16 ":" ) h16 ] "::" ls32
'((%(hex)s:){0,4}%(hex)s)?::%(ls32)s' % subs,
# [ *5( h16 ":" ) h16 ] "::" h16
'((%(hex)s:){0,5}%(hex)s)?::%(hex)s' % subs,
# [ *6( h16 ":" ) h16 ] "::"
'((%(hex)s:){0,6}%(hex)s)?::' % subs,
]
ipv6 = '(({0})|({1})|({2})|({3})|({4})|({5})|({6})|({7}))'.format(*variations)
ipv_future = 'v[0-9A-Fa-f]+.[%s]+' % (
important_characters['re_unreserved'] +
important_characters['re_sub_delimiters'] +
':')
ip_literal = '\[({0}|{1})\]'.format(ipv6, ipv_future)
# Pattern for matching the host piece of the authority
HOST_PATTERN = '({0}|{1}|{2})'.format(reg_name, ipv4, ip_literal)
SUBAUTHORITY_MATCHER = re.compile((
'^(?:(?P<userinfo>[A-Za-z0-9_.~\-%:]+)@)?' # userinfo
'(?P<host>{0}?)' # host
':?(?P<port>\d+)?$' # port
).format(HOST_PATTERN))
IPv4_MATCHER = re.compile('^' + ipv4 + '$')
# ####################
# Path Matcher Section
# ####################
# See http://tools.ietf.org/html/rfc3986#section-3.3 for more information
# about the path patterns defined below.
# Percent encoded character values
pct_encoded = '%[A-Fa-f0-9]{2}'
pchar = ('([' + important_characters['re_unreserved']
+ important_characters['re_sub_delimiters']
+ ':@]|%s)' % pct_encoded)
segments = {
'segment': pchar + '*',
# Non-zero length segment
'segment-nz': pchar + '+',
# Non-zero length segment without ":"
'segment-nz-nc': pchar.replace(':', '') + '+'
}
# Path types taken from Section 3.3 (linked above)
path_empty = '^$'
path_rootless = '%(segment-nz)s(/%(segment)s)*' % segments
path_noscheme = '%(segment-nz-nc)s(/%(segment)s)*' % segments
path_absolute = '/(%s)?' % path_rootless
path_abempty = '(/%(segment)s)*' % segments
# Matcher used to validate path components
PATH_MATCHER = re.compile('^(%s|%s|%s|%s|%s)$' % (
path_abempty, path_absolute, path_noscheme, path_rootless, path_empty
))
# ##################################
# Query and Fragment Matcher Section
# ##################################
QUERY_MATCHER = re.compile(
'^([/?:@' + important_characters['re_unreserved']
+ important_characters['re_sub_delimiters']
+ ']|%s)*$' % pct_encoded)
FRAGMENT_MATCHER = QUERY_MATCHER
# Scheme validation, see: http://tools.ietf.org/html/rfc3986#section-3.1
SCHEME_MATCHER = re.compile('^[A-Za-z][A-Za-z0-9+.\-]*$')
# Relative reference matcher
# See http://tools.ietf.org/html/rfc3986#section-4.2 for details
relative_part = '(//%s%s|%s|%s|%s)' % (
component_pattern_dict['authority'], path_abempty, path_absolute,
path_noscheme, path_empty
)
RELATIVE_REF_MATCHER = re.compile('^%s(\?%s)?(#%s)?$' % (
relative_part, QUERY_MATCHER.pattern, FRAGMENT_MATCHER.pattern
))
# See http://tools.ietf.org/html/rfc3986#section-3 for definition
hier_part = '(//%s%s|%s|%s|%s)' % (
component_pattern_dict['authority'], path_abempty, path_absolute,
path_rootless, path_empty
)
# See http://tools.ietf.org/html/rfc3986#section-4.3
ABSOLUTE_URI_MATCHER = re.compile('^%s:%s(\?%s)?$' % (
component_pattern_dict['scheme'], hier_part, QUERY_MATCHER.pattern[1:-1]
))
# Path merger as defined in http://tools.ietf.org/html/rfc3986#section-5.2.3
def merge_paths(base_uri, relative_path):
"""Merge a base URI's path with a relative URI's path."""
if base_uri.path is None and base_uri.authority is not None:
return '/' + relative_path
else:
path = base_uri.path or ''
index = path.rfind('/')
return path[:index] + '/' + relative_path

View file

@ -0,0 +1,115 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2014 Rackspace
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
from .compat import to_bytes
from .misc import NON_PCT_ENCODED
def normalize_scheme(scheme):
return scheme.lower()
def normalize_authority(authority):
userinfo, host, port = authority
result = ''
if userinfo:
result += normalize_percent_characters(userinfo) + '@'
if host:
result += host.lower()
if port:
result += ':' + port
return result
def normalize_path(path):
if not path:
return path
path = normalize_percent_characters(path)
return remove_dot_segments(path)
def normalize_query(query):
return normalize_percent_characters(query)
def normalize_fragment(fragment):
return normalize_percent_characters(fragment)
PERCENT_MATCHER = re.compile('%[A-Fa-f0-9]{2}')
def normalize_percent_characters(s):
"""All percent characters should be upper-cased.
For example, ``"%3afoo%DF%ab"`` should be turned into ``"%3Afoo%DF%AB"``.
"""
matches = set(PERCENT_MATCHER.findall(s))
for m in matches:
if not m.isupper():
s = s.replace(m, m.upper())
return s
def remove_dot_segments(s):
# See http://tools.ietf.org/html/rfc3986#section-5.2.4 for pseudo-code
segments = s.split('/') # Turn the path into a list of segments
output = [] # Initialize the variable to use to store output
for segment in segments:
# '.' is the current directory, so ignore it, it is superfluous
if segment == '.':
continue
# Anything other than '..', should be appended to the output
elif segment != '..':
output.append(segment)
# In this case segment == '..', if we can, we should pop the last
# element
elif output:
output.pop()
# If the path starts with '/' and the output is empty or the first string
# is non-empty
if s.startswith('/') and (not output or output[0]):
output.insert(0, '')
# If the path starts with '/.' or '/..' ensure we add one more empty
# string to add a trailing '/'
if s.endswith(('/.', '/..')):
output.append('')
return '/'.join(output)
def encode_component(uri_component, encoding):
if uri_component is None:
return uri_component
uri_bytes = to_bytes(uri_component, encoding)
encoded_uri = bytearray()
for i in range(0, len(uri_bytes)):
# Will return a single character bytestring on both Python 2 & 3
byte = uri_bytes[i:i+1]
byte_ord = ord(byte)
if byte_ord < 128 and byte.decode() in NON_PCT_ENCODED:
encoded_uri.extend(byte)
continue
encoded_uri.extend('%{0:02x}'.format(byte_ord).encode())
return encoded_uri.decode(encoding)

View file

@ -0,0 +1,303 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2015 Ian Cordasco
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import namedtuple
from . import compat
from . import exceptions
from . import normalizers
from . import uri
__all__ = ('ParseResult', 'ParseResultBytes')
PARSED_COMPONENTS = ('scheme', 'userinfo', 'host', 'port', 'path', 'query',
'fragment')
class ParseResultMixin(object):
def _generate_authority(self, attributes):
# I swear I did not align the comparisons below. That's just how they
# happened to align based on pep8 and attribute lengths.
userinfo, host, port = (attributes[p]
for p in ('userinfo', 'host', 'port'))
if (self.userinfo != userinfo or
self.host != host or
self.port != port):
if port:
port = '{0}'.format(port)
return normalizers.normalize_authority(
(compat.to_str(userinfo, self.encoding),
compat.to_str(host, self.encoding),
port)
)
return self.authority
def geturl(self):
"""Standard library shim to the unsplit method."""
return self.unsplit()
@property
def hostname(self):
"""Standard library shim for the host portion of the URI."""
return self.host
@property
def netloc(self):
"""Standard library shim for the authority portion of the URI."""
return self.authority
@property
def params(self):
"""Standard library shim for the query portion of the URI."""
return self.query
class ParseResult(namedtuple('ParseResult', PARSED_COMPONENTS),
ParseResultMixin):
slots = ()
def __new__(cls, scheme, userinfo, host, port, path, query, fragment,
uri_ref, encoding='utf-8'):
parse_result = super(ParseResult, cls).__new__(
cls,
scheme or None,
userinfo or None,
host,
port or None,
path or None,
query or None,
fragment or None)
parse_result.encoding = encoding
parse_result.reference = uri_ref
return parse_result
@classmethod
def from_string(cls, uri_string, encoding='utf-8', strict=True):
"""Parse a URI from the given unicode URI string.
:param str uri_string: Unicode URI to be parsed into a reference.
:param str encoding: The encoding of the string provided
:param bool strict: Parse strictly according to :rfc:`3986` if True.
If False, parse similarly to the standard library's urlparse
function.
:returns: :class:`ParseResult` or subclass thereof
"""
reference = uri.URIReference.from_string(uri_string, encoding)
try:
subauthority = reference.authority_info()
except exceptions.InvalidAuthority:
if strict:
raise
userinfo, host, port = split_authority(reference.authority)
else:
# Thanks to Richard Barrell for this idea:
# https://twitter.com/0x2ba22e11/status/617338811975139328
userinfo, host, port = (subauthority.get(p)
for p in ('userinfo', 'host', 'port'))
if port:
try:
port = int(port)
except ValueError:
raise exceptions.InvalidPort(port)
return cls(scheme=reference.scheme,
userinfo=userinfo,
host=host,
port=port,
path=reference.path,
query=reference.query,
fragment=reference.fragment,
uri_ref=reference,
encoding=encoding)
@property
def authority(self):
"""Normalized authority generated from the subauthority parts."""
return self.reference.authority
def copy_with(self, scheme=None, userinfo=None, host=None, port=None,
path=None, query=None, fragment=None):
attributes = zip(PARSED_COMPONENTS,
(scheme, userinfo, host, port, path, query, fragment))
attrs_dict = {}
for name, value in attributes:
if value is None:
value = getattr(self, name)
attrs_dict[name] = value
authority = self._generate_authority(attrs_dict)
ref = self.reference.copy_with(scheme=attrs_dict['scheme'],
authority=authority,
path=attrs_dict['path'],
query=attrs_dict['query'],
fragment=attrs_dict['fragment'])
return ParseResult(uri_ref=ref, encoding=self.encoding, **attrs_dict)
def encode(self, encoding=None):
encoding = encoding or self.encoding
attrs = dict(
zip(PARSED_COMPONENTS,
(attr.encode(encoding) if hasattr(attr, 'encode') else attr
for attr in self)))
return ParseResultBytes(
uri_ref=self.reference,
encoding=encoding,
**attrs
)
def unsplit(self, use_idna=False):
"""Create a URI string from the components.
:returns: The parsed URI reconstituted as a string.
:rtype: str
"""
parse_result = self
if use_idna and self.host:
hostbytes = self.host.encode('idna')
host = hostbytes.decode(self.encoding)
parse_result = self.copy_with(host=host)
return parse_result.reference.unsplit()
class ParseResultBytes(namedtuple('ParseResultBytes', PARSED_COMPONENTS),
ParseResultMixin):
def __new__(cls, scheme, userinfo, host, port, path, query, fragment,
uri_ref, encoding='utf-8'):
parse_result = super(ParseResultBytes, cls).__new__(
cls,
scheme or None,
userinfo or None,
host,
port or None,
path or None,
query or None,
fragment or None)
parse_result.encoding = encoding
parse_result.reference = uri_ref
return parse_result
@classmethod
def from_string(cls, uri_string, encoding='utf-8', strict=True):
"""Parse a URI from the given unicode URI string.
:param str uri_string: Unicode URI to be parsed into a reference.
:param str encoding: The encoding of the string provided
:param bool strict: Parse strictly according to :rfc:`3986` if True.
If False, parse similarly to the standard library's urlparse
function.
:returns: :class:`ParseResultBytes` or subclass thereof
"""
reference = uri.URIReference.from_string(uri_string, encoding)
try:
subauthority = reference.authority_info()
except exceptions.InvalidAuthority:
if strict:
raise
userinfo, host, port = split_authority(reference.authority)
else:
# Thanks to Richard Barrell for this idea:
# https://twitter.com/0x2ba22e11/status/617338811975139328
userinfo, host, port = (subauthority.get(p)
for p in ('userinfo', 'host', 'port'))
if port:
try:
port = int(port)
except ValueError:
raise exceptions.InvalidPort(port)
to_bytes = compat.to_bytes
return cls(scheme=to_bytes(reference.scheme, encoding),
userinfo=to_bytes(userinfo, encoding),
host=to_bytes(host, encoding),
port=port,
path=to_bytes(reference.path, encoding),
query=to_bytes(reference.query, encoding),
fragment=to_bytes(reference.fragment, encoding),
uri_ref=reference,
encoding=encoding)
@property
def authority(self):
"""Normalized authority generated from the subauthority parts."""
return self.reference.authority.encode(self.encoding)
def copy_with(self, scheme=None, userinfo=None, host=None, port=None,
path=None, query=None, fragment=None):
attributes = zip(PARSED_COMPONENTS,
(scheme, userinfo, host, port, path, query, fragment))
attrs_dict = {}
for name, value in attributes:
if value is None:
value = getattr(self, name)
if not isinstance(value, bytes) and hasattr(value, 'encode'):
value = value.encode(self.encoding)
attrs_dict[name] = value
authority = self._generate_authority(attrs_dict)
to_str = compat.to_str
ref = self.reference.copy_with(
scheme=to_str(attrs_dict['scheme'], self.encoding),
authority=authority,
path=to_str(attrs_dict['path'], self.encoding),
query=to_str(attrs_dict['query'], self.encoding),
fragment=to_str(attrs_dict['fragment'], self.encoding)
)
return ParseResultBytes(
uri_ref=ref,
encoding=self.encoding,
**attrs_dict
)
def unsplit(self, use_idna=False):
"""Create a URI bytes object from the components.
:returns: The parsed URI reconstituted as a string.
:rtype: bytes
"""
parse_result = self
if use_idna and self.host:
# self.host is bytes, to encode to idna, we need to decode it
# first
host = self.host.decode(self.encoding)
hostbytes = host.encode('idna')
parse_result = self.copy_with(host=hostbytes)
uri = parse_result.reference.unsplit()
return uri.encode(self.encoding)
def split_authority(authority):
# Initialize our expected return values
userinfo = host = port = None
# Initialize an extra var we may need to use
extra_host = None
# Set-up rest in case there is no userinfo portion
rest = authority
if '@' in authority:
userinfo, rest = authority.rsplit('@', 1)
# Handle IPv6 host addresses
if rest.startswith('['):
host, rest = rest.split(']', 1)
host += ']'
if ':' in rest:
extra_host, port = rest.split(':', 1)
elif not host and rest:
host = rest
if extra_host and not host:
host = extra_host
return userinfo, host, port

View file

@ -0,0 +1,385 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2014 Rackspace
# Copyright (c) 2015 Ian Cordasco
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import namedtuple
from .compat import to_str
from .exceptions import InvalidAuthority, ResolutionError
from .misc import (
ABSOLUTE_URI_MATCHER, FRAGMENT_MATCHER, IPv4_MATCHER, PATH_MATCHER,
QUERY_MATCHER, SCHEME_MATCHER, SUBAUTHORITY_MATCHER, URI_MATCHER,
URI_COMPONENTS, merge_paths
)
from .normalizers import (
encode_component, normalize_scheme, normalize_authority, normalize_path,
normalize_query, normalize_fragment
)
class URIReference(namedtuple('URIReference', URI_COMPONENTS)):
slots = ()
def __new__(cls, scheme, authority, path, query, fragment,
encoding='utf-8'):
ref = super(URIReference, cls).__new__(
cls,
scheme or None,
authority or None,
path or None,
query or None,
fragment or None)
ref.encoding = encoding
return ref
def __eq__(self, other):
other_ref = other
if isinstance(other, tuple):
other_ref = URIReference(*other)
elif not isinstance(other, URIReference):
try:
other_ref = URIReference.from_string(other)
except TypeError:
raise TypeError(
'Unable to compare URIReference() to {0}()'.format(
type(other).__name__))
# See http://tools.ietf.org/html/rfc3986#section-6.2
naive_equality = tuple(self) == tuple(other_ref)
return naive_equality or self.normalized_equality(other_ref)
@classmethod
def from_string(cls, uri_string, encoding='utf-8'):
"""Parse a URI reference from the given unicode URI string.
:param str uri_string: Unicode URI to be parsed into a reference.
:param str encoding: The encoding of the string provided
:returns: :class:`URIReference` or subclass thereof
"""
uri_string = to_str(uri_string, encoding)
split_uri = URI_MATCHER.match(uri_string).groupdict()
return cls(split_uri['scheme'], split_uri['authority'],
encode_component(split_uri['path'], encoding),
encode_component(split_uri['query'], encoding),
encode_component(split_uri['fragment'], encoding), encoding)
def authority_info(self):
"""Returns a dictionary with the ``userinfo``, ``host``, and ``port``.
If the authority is not valid, it will raise a ``InvalidAuthority``
Exception.
:returns:
``{'userinfo': 'username:password', 'host': 'www.example.com',
'port': '80'}``
:rtype: dict
:raises InvalidAuthority: If the authority is not ``None`` and can not
be parsed.
"""
if not self.authority:
return {'userinfo': None, 'host': None, 'port': None}
match = SUBAUTHORITY_MATCHER.match(self.authority)
if match is None:
# In this case, we have an authority that was parsed from the URI
# Reference, but it cannot be further parsed by our
# SUBAUTHORITY_MATCHER. In this case it must not be a valid
# authority.
raise InvalidAuthority(self.authority.encode(self.encoding))
# We had a match, now let's ensure that it is actually a valid host
# address if it is IPv4
matches = match.groupdict()
host = matches.get('host')
if (host and IPv4_MATCHER.match(host) and not
valid_ipv4_host_address(host)):
# If we have a host, it appears to be IPv4 and it does not have
# valid bytes, it is an InvalidAuthority.
raise InvalidAuthority(self.authority.encode(self.encoding))
return matches
@property
def host(self):
"""If present, a string representing the host."""
try:
authority = self.authority_info()
except InvalidAuthority:
return None
return authority['host']
@property
def port(self):
"""If present, the port (as a string) extracted from the authority."""
try:
authority = self.authority_info()
except InvalidAuthority:
return None
return authority['port']
@property
def userinfo(self):
"""If present, the userinfo extracted from the authority."""
try:
authority = self.authority_info()
except InvalidAuthority:
return None
return authority['userinfo']
def is_absolute(self):
"""Determine if this URI Reference is an absolute URI.
See http://tools.ietf.org/html/rfc3986#section-4.3 for explanation.
:returns: ``True`` if it is an absolute URI, ``False`` otherwise.
:rtype: bool
"""
return bool(ABSOLUTE_URI_MATCHER.match(self.unsplit()))
def is_valid(self, **kwargs):
"""Determines if the URI is valid.
:param bool require_scheme: Set to ``True`` if you wish to require the
presence of the scheme component.
:param bool require_authority: Set to ``True`` if you wish to require
the presence of the authority component.
:param bool require_path: Set to ``True`` if you wish to require the
presence of the path component.
:param bool require_query: Set to ``True`` if you wish to require the
presence of the query component.
:param bool require_fragment: Set to ``True`` if you wish to require
the presence of the fragment component.
:returns: ``True`` if the URI is valid. ``False`` otherwise.
:rtype: bool
"""
validators = [
(self.scheme_is_valid, kwargs.get('require_scheme', False)),
(self.authority_is_valid, kwargs.get('require_authority', False)),
(self.path_is_valid, kwargs.get('require_path', False)),
(self.query_is_valid, kwargs.get('require_query', False)),
(self.fragment_is_valid, kwargs.get('require_fragment', False)),
]
return all(v(r) for v, r in validators)
def _is_valid(self, value, matcher, require):
if require:
return (value is not None
and matcher.match(value))
# require is False and value is not None
return value is None or matcher.match(value)
def authority_is_valid(self, require=False):
"""Determines if the authority component is valid.
:param str require: Set to ``True`` to require the presence of this
component.
:returns: ``True`` if the authority is valid. ``False`` otherwise.
:rtype: bool
"""
try:
self.authority_info()
except InvalidAuthority:
return False
is_valid = self._is_valid(self.authority,
SUBAUTHORITY_MATCHER,
require)
# Ensure that IPv4 addresses have valid bytes
if is_valid and self.host and IPv4_MATCHER.match(self.host):
return valid_ipv4_host_address(self.host)
# Perhaps the host didn't exist or if it did, it wasn't an IPv4-like
# address. In either case, we want to rely on the `_is_valid` check,
# so let's return that.
return is_valid
def scheme_is_valid(self, require=False):
"""Determines if the scheme component is valid.
:param str require: Set to ``True`` to require the presence of this
component.
:returns: ``True`` if the scheme is valid. ``False`` otherwise.
:rtype: bool
"""
return self._is_valid(self.scheme, SCHEME_MATCHER, require)
def path_is_valid(self, require=False):
"""Determines if the path component is valid.
:param str require: Set to ``True`` to require the presence of this
component.
:returns: ``True`` if the path is valid. ``False`` otherwise.
:rtype: bool
"""
return self._is_valid(self.path, PATH_MATCHER, require)
def query_is_valid(self, require=False):
"""Determines if the query component is valid.
:param str require: Set to ``True`` to require the presence of this
component.
:returns: ``True`` if the query is valid. ``False`` otherwise.
:rtype: bool
"""
return self._is_valid(self.query, QUERY_MATCHER, require)
def fragment_is_valid(self, require=False):
"""Determines if the fragment component is valid.
:param str require: Set to ``True`` to require the presence of this
component.
:returns: ``True`` if the fragment is valid. ``False`` otherwise.
:rtype: bool
"""
return self._is_valid(self.fragment, FRAGMENT_MATCHER, require)
def normalize(self):
"""Normalize this reference as described in Section 6.2.2
This is not an in-place normalization. Instead this creates a new
URIReference.
:returns: A new reference object with normalized components.
:rtype: URIReference
"""
# See http://tools.ietf.org/html/rfc3986#section-6.2.2 for logic in
# this method.
return URIReference(normalize_scheme(self.scheme or ''),
normalize_authority(
(self.userinfo, self.host, self.port)),
normalize_path(self.path or ''),
normalize_query(self.query or ''),
normalize_fragment(self.fragment or ''))
def normalized_equality(self, other_ref):
"""Compare this URIReference to another URIReference.
:param URIReference other_ref: (required), The reference with which
we're comparing.
:returns: ``True`` if the references are equal, ``False`` otherwise.
:rtype: bool
"""
return tuple(self.normalize()) == tuple(other_ref.normalize())
def resolve_with(self, base_uri, strict=False):
"""Use an absolute URI Reference to resolve this relative reference.
Assuming this is a relative reference that you would like to resolve,
use the provided base URI to resolve it.
See http://tools.ietf.org/html/rfc3986#section-5 for more information.
:param base_uri: Either a string or URIReference. It must be an
absolute URI or it will raise an exception.
:returns: A new URIReference which is the result of resolving this
reference using ``base_uri``.
:rtype: :class:`URIReference`
:raises ResolutionError: If the ``base_uri`` is not an absolute URI.
"""
if not isinstance(base_uri, URIReference):
base_uri = URIReference.from_string(base_uri)
if not base_uri.is_absolute():
raise ResolutionError(base_uri)
# This is optional per
# http://tools.ietf.org/html/rfc3986#section-5.2.1
base_uri = base_uri.normalize()
# The reference we're resolving
resolving = self
if not strict and resolving.scheme == base_uri.scheme:
resolving = resolving.copy_with(scheme=None)
# http://tools.ietf.org/html/rfc3986#page-32
if resolving.scheme is not None:
target = resolving.copy_with(path=normalize_path(resolving.path))
else:
if resolving.authority is not None:
target = resolving.copy_with(
scheme=base_uri.scheme,
path=normalize_path(resolving.path)
)
else:
if resolving.path is None:
if resolving.query is not None:
query = resolving.query
else:
query = base_uri.query
target = resolving.copy_with(
scheme=base_uri.scheme,
authority=base_uri.authority,
path=base_uri.path,
query=query
)
else:
if resolving.path.startswith('/'):
path = normalize_path(resolving.path)
else:
path = normalize_path(
merge_paths(base_uri, resolving.path)
)
target = resolving.copy_with(
scheme=base_uri.scheme,
authority=base_uri.authority,
path=path,
query=resolving.query
)
return target
def unsplit(self):
"""Create a URI string from the components.
:returns: The URI Reference reconstituted as a string.
:rtype: str
"""
# See http://tools.ietf.org/html/rfc3986#section-5.3
result_list = []
if self.scheme:
result_list.extend([self.scheme, ':'])
if self.authority:
result_list.extend(['//', self.authority])
if self.path:
result_list.append(self.path)
if self.query:
result_list.extend(['?', self.query])
if self.fragment:
result_list.extend(['#', self.fragment])
return ''.join(result_list)
def copy_with(self, scheme=None, authority=None, path=None, query=None,
fragment=None):
attributes = {
'scheme': scheme,
'authority': authority,
'path': path,
'query': query,
'fragment': fragment,
}
for key, value in list(attributes.items()):
if value is None:
del attributes[key]
return self._replace(**attributes)
def valid_ipv4_host_address(host):
# If the host exists, and it might be IPv4, check each byte in the
# address.
return all([0 <= int(byte, base=10) <= 255 for byte in host.split('.')])

View file

@ -0,0 +1,307 @@
# -*- coding: utf-8 -*-
"""
hyper/ssl_compat
~~~~~~~~~
Shoves pyOpenSSL into an API that looks like the standard Python 3.x ssl
module.
Currently exposes exactly those attributes, classes, and methods that we
actually use in hyper (all method signatures are complete, however). May be
expanded to something more general-purpose in the future.
"""
try:
import StringIO as BytesIO
except ImportError:
from io import BytesIO
import errno
import socket
import time
from OpenSSL import SSL as ossl
from service_identity.pyopenssl import verify_hostname as _verify
CERT_NONE = ossl.VERIFY_NONE
CERT_REQUIRED = ossl.VERIFY_PEER | ossl.VERIFY_FAIL_IF_NO_PEER_CERT
_OPENSSL_ATTRS = dict(
OP_NO_COMPRESSION='OP_NO_COMPRESSION',
PROTOCOL_TLSv1_2='TLSv1_2_METHOD',
PROTOCOL_SSLv23='SSLv23_METHOD',
)
for external, internal in _OPENSSL_ATTRS.items():
value = getattr(ossl, internal, None)
if value:
locals()[external] = value
OP_ALL = 0
# TODO: Find out the names of these other flags.
for bit in [31] + list(range(10)):
OP_ALL |= 1 << bit
HAS_NPN = True
def _proxy(method):
def inner(self, *args, **kwargs):
return getattr(self._conn, method)(*args, **kwargs)
return inner
# Referenced in hyper/http20/connection.py. These values come
# from the python ssl package, and must be defined in this file
# for hyper to work in python versions <2.7.9
SSL_ERROR_WANT_READ = 2
SSL_ERROR_WANT_WRITE = 3
# TODO missing some attributes
class SSLError(OSError):
pass
class CertificateError(SSLError):
pass
def verify_hostname(ssl_sock, server_hostname):
"""
A method nearly compatible with the stdlib's match_hostname.
"""
if isinstance(server_hostname, bytes):
server_hostname = server_hostname.decode('ascii')
return _verify(ssl_sock._conn, server_hostname)
class SSLSocket(object):
SSL_TIMEOUT = 3
SSL_RETRY = .01
def __init__(self, conn, server_side, do_handshake_on_connect,
suppress_ragged_eofs, server_hostname, check_hostname):
self._conn = conn
self._do_handshake_on_connect = do_handshake_on_connect
self._suppress_ragged_eofs = suppress_ragged_eofs
self._check_hostname = check_hostname
if server_side:
self._conn.set_accept_state()
else:
if server_hostname:
self._conn.set_tlsext_host_name(
server_hostname.encode('utf-8')
)
self._server_hostname = server_hostname
# FIXME does this override do_handshake_on_connect=False?
self._conn.set_connect_state()
if self.connected and self._do_handshake_on_connect:
self.do_handshake()
@property
def connected(self):
try:
self._conn.getpeername()
except socket.error as e:
if e.errno != errno.ENOTCONN:
# It's an exception other than the one we expected if we're not
# connected.
raise
return False
return True
# Lovingly stolen from CherryPy
# (http://svn.cherrypy.org/tags/cherrypy-3.2.1/cherrypy/wsgiserver/ssl_pyopenssl.py).
def _safe_ssl_call(self, suppress_ragged_eofs, call, *args, **kwargs):
"""Wrap the given call with SSL error-trapping."""
start = time.time()
while True:
try:
return call(*args, **kwargs)
except (ossl.WantReadError, ossl.WantWriteError):
# Sleep and try again. This is dangerous, because it means
# the rest of the stack has no way of differentiating
# between a "new handshake" error and "client dropped".
# Note this isn't an endless loop: there's a timeout below.
time.sleep(self.SSL_RETRY)
except ossl.Error as e:
if suppress_ragged_eofs and e.args == (-1, 'Unexpected EOF'):
return b''
raise socket.error(e.args[0])
if time.time() - start > self.SSL_TIMEOUT:
raise socket.timeout('timed out')
def connect(self, address):
self._conn.connect(address)
if self._do_handshake_on_connect:
self.do_handshake()
def do_handshake(self):
self._safe_ssl_call(False, self._conn.do_handshake)
if self._check_hostname:
verify_hostname(self, self._server_hostname)
def recv(self, bufsize, flags=None):
return self._safe_ssl_call(
self._suppress_ragged_eofs,
self._conn.recv,
bufsize,
flags
)
def recv_into(self, buffer, bufsize=None, flags=None):
# A temporary recv_into implementation. Should be replaced when
# PyOpenSSL has merged pyca/pyopenssl#121.
if bufsize is None:
bufsize = len(buffer)
data = self.recv(bufsize, flags)
data_len = len(data)
buffer[0:data_len] = data
return data_len
def send(self, data, flags=None):
return self._safe_ssl_call(False, self._conn.send, data, flags)
def sendall(self, data, flags=None):
return self._safe_ssl_call(False, self._conn.sendall, data, flags)
def selected_npn_protocol(self):
proto = self._conn.get_next_proto_negotiated()
if isinstance(proto, bytes):
proto = proto.decode('ascii')
return proto if proto else None
def selected_alpn_protocol(self):
proto = self._conn.get_alpn_proto_negotiated()
if isinstance(proto, bytes):
proto = proto.decode('ascii')
return proto if proto else None
def getpeercert(self):
def resolve_alias(alias):
return dict(
C='countryName',
ST='stateOrProvinceName',
L='localityName',
O='organizationName',
OU='organizationalUnitName',
CN='commonName',
).get(alias, alias)
def to_components(name):
# TODO Verify that these are actually *supposed* to all be
# single-element tuples, and that's not just a quirk of the
# examples I've seen.
return tuple(
[
(resolve_alias(k.decode('utf-8'), v.decode('utf-8')),)
for k, v in name.get_components()
]
)
# The standard getpeercert() takes the nice X509 object tree returned
# by OpenSSL and turns it into a dict according to some format it seems
# to have made up on the spot. Here, we do our best to emulate that.
cert = self._conn.get_peer_certificate()
result = dict(
issuer=to_components(cert.get_issuer()),
subject=to_components(cert.get_subject()),
version=cert.get_subject(),
serialNumber=cert.get_serial_number(),
notBefore=cert.get_notBefore(),
notAfter=cert.get_notAfter(),
)
# TODO extensions, including subjectAltName
# (see _decode_certificate in _ssl.c)
return result
# a dash of magic to reduce boilerplate
methods = ['accept', 'bind', 'close', 'getsockname', 'listen', 'fileno']
for method in methods:
locals()[method] = _proxy(method)
class SSLContext(object):
def __init__(self, protocol):
self.protocol = protocol
self._ctx = ossl.Context(protocol)
self.options = OP_ALL
self.check_hostname = False
self.npn_protos = []
@property
def options(self):
return self._options
@options.setter
def options(self, value):
self._options = value
self._ctx.set_options(value)
@property
def verify_mode(self):
return self._ctx.get_verify_mode()
@verify_mode.setter
def verify_mode(self, value):
# TODO verify exception is raised on failure
self._ctx.set_verify(
value, lambda conn, cert, errnum, errdepth, ok: ok
)
def set_default_verify_paths(self):
self._ctx.set_default_verify_paths()
def load_verify_locations(self, cafile=None, capath=None, cadata=None):
# TODO factor out common code
if cafile is not None:
cafile = cafile.encode('utf-8')
if capath is not None:
capath = capath.encode('utf-8')
self._ctx.load_verify_locations(cafile, capath)
if cadata is not None:
self._ctx.load_verify_locations(BytesIO(cadata))
def load_cert_chain(self, certfile, keyfile=None, password=None):
self._ctx.use_certificate_file(certfile)
if password is not None:
self._ctx.set_passwd_cb(
lambda max_length, prompt_twice, userdata: password
)
self._ctx.use_privatekey_file(keyfile or certfile)
def set_npn_protocols(self, protocols):
self.protocols = list(map(lambda x: x.encode('ascii'), protocols))
def cb(conn, protos):
# Detect the overlapping set of protocols.
overlap = set(protos) & set(self.protocols)
# Select the option that comes last in the list in the overlap.
for p in self.protocols:
if p in overlap:
return p
else:
return b''
self._ctx.set_npn_select_callback(cb)
def set_alpn_protocols(self, protocols):
protocols = list(map(lambda x: x.encode('ascii'), protocols))
self._ctx.set_alpn_protos(protocols)
def wrap_socket(self,
sock,
server_side=False,
do_handshake_on_connect=True,
suppress_ragged_eofs=True,
server_hostname=None):
conn = ossl.Connection(self._ctx, sock)
return SSLSocket(conn, server_side, do_handshake_on_connect,
suppress_ragged_eofs, server_hostname,
# TODO what if this is changed after the fact?
self.check_hostname)

View file

@ -0,0 +1,133 @@
# -*- coding: utf-8 -*-
"""
hyper/tls
~~~~~~~~~
Contains the TLS/SSL logic for use in hyper.
"""
import os.path as path
from .common.exceptions import MissingCertFile
from .compat import ignore_missing, ssl
NPN_PROTOCOL = 'h2'
H2_NPN_PROTOCOLS = [NPN_PROTOCOL, 'h2-16', 'h2-15', 'h2-14']
SUPPORTED_NPN_PROTOCOLS = H2_NPN_PROTOCOLS + ['http/1.1']
H2C_PROTOCOL = 'h2c'
# We have a singleton SSLContext object. There's no reason to be creating one
# per connection.
_context = None
# Work out where our certificates are.
cert_loc = path.join(path.dirname(__file__), 'certs.pem')
def wrap_socket(sock, server_hostname, ssl_context=None, force_proto=None):
"""
A vastly simplified SSL wrapping function. We'll probably extend this to
do more things later.
"""
global _context
if ssl_context:
# if an SSLContext is provided then use it instead of default context
_ssl_context = ssl_context
else:
# create the singleton SSLContext we use
if _context is None: # pragma: no cover
_context = init_context()
_ssl_context = _context
# the spec requires SNI support
ssl_sock = _ssl_context.wrap_socket(sock, server_hostname=server_hostname)
# Setting SSLContext.check_hostname to True only verifies that the
# post-handshake servername matches that of the certificate. We also need
# to check that it matches the requested one.
if _ssl_context.check_hostname: # pragma: no cover
try:
ssl.match_hostname(ssl_sock.getpeercert(), server_hostname)
except AttributeError:
ssl.verify_hostname(ssl_sock, server_hostname) # pyopenssl
# Allow for the protocol to be forced externally.
proto = force_proto
# ALPN is newer, so we prefer it over NPN. The odds of us getting
# different answers is pretty low, but let's be sure.
with ignore_missing():
if proto is None:
proto = ssl_sock.selected_alpn_protocol()
with ignore_missing():
if proto is None:
proto = ssl_sock.selected_npn_protocol()
return (ssl_sock, proto)
def init_context(cert_path=None, cert=None, cert_password=None):
"""
Create a new ``SSLContext`` that is correctly set up for an HTTP/2
connection. This SSL context object can be customized and passed as a
parameter to the :class:`HTTPConnection <hyper.HTTPConnection>` class.
Provide your own certificate file in case you dont want to use hypers
default certificate. The path to the certificate can be absolute or
relative to your working directory.
:param cert_path: (optional) The path to the certificate file of
certification authority (CA) certificates
:param cert: (optional) if string, path to ssl client cert file (.pem).
If tuple, ('cert', 'key') pair.
The certfile string must be the path to a single file in PEM format
containing the certificate as well as any number of CA certificates
needed to establish the certificates authenticity. The keyfile string,
if present, must point to a file containing the private key in.
Otherwise the private key will be taken from certfile as well.
:param cert_password: (optional) The password argument may be a function to
call to get the password for decrypting the private key. It will only
be called if the private key is encrypted and a password is necessary.
It will be called with no arguments, and it should return a string,
bytes, or bytearray. If the return value is a string it will be
encoded as UTF-8 before using it to decrypt the key. Alternatively a
string, bytes, or bytearray value may be supplied directly as the
password argument. It will be ignored if the private key is not
encrypted and no password is needed.
:returns: An ``SSLContext`` correctly set up for HTTP/2.
"""
cafile = cert_path or cert_loc
if not cafile or not path.exists(cafile):
err_msg = ("No certificate found at " + str(cafile) + ". Either " +
"ensure the default cert.pem file is included in the " +
"distribution or provide a custom certificate when " +
"creating the connection.")
raise MissingCertFile(err_msg)
context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
context.set_default_verify_paths()
context.load_verify_locations(cafile=cafile)
context.verify_mode = ssl.CERT_REQUIRED
context.check_hostname = True
with ignore_missing():
context.set_npn_protocols(SUPPORTED_NPN_PROTOCOLS)
with ignore_missing():
context.set_alpn_protocols(SUPPORTED_NPN_PROTOCOLS)
# required by the spec
context.options |= ssl.OP_NO_COMPRESSION
if cert is not None:
try:
basestring
except NameError:
basestring = (str, bytes)
if not isinstance(cert, basestring):
context.load_cert_chain(cert[0], cert[1], cert_password)
else:
context.load_cert_chain(cert, password=cert_password)
return context

View file

@ -238,15 +238,10 @@ def run_tests(config, test_paths, product, **kwargs):
if test_type == "testharness":
run_tests = {"testharness": []}
for test in test_loader.tests["testharness"]:
if test.testdriver and not executor_cls.supports_testdriver:
if (test.testdriver and not executor_cls.supports_testdriver) or (
test.jsshell and not executor_cls.supports_jsshell):
logger.test_start(test.id)
logger.test_end(test.id, status="SKIP")
elif test.jsshell and not executor_cls.supports_jsshell:
# We expect that tests for JavaScript shells
# will not be run along with tests that run in
# a full web browser, so we silently skip them
# here.
pass
else:
run_tests["testharness"].append(test)
else:

View file

@ -11,6 +11,10 @@ from six.moves.urllib.request import Request as BaseRequest
from six.moves.urllib.request import urlopen
from six import binary_type, iteritems
from hyper import HTTP20Connection, tls
import ssl
from localpaths import repo_root
wptserve = pytest.importorskip("wptserve")
logging.basicConfig()
@ -74,3 +78,28 @@ class TestUsingServer(unittest.TestCase):
req.add_header("Authorization", "Basic %s" % base64.b64encode('%s:%s' % auth))
return urlopen(req)
@pytest.mark.skipif(not wptserve.utils.http2_compatible(), reason="h2 server only works in python 2.7.15")
class TestUsingH2Server:
def setup_method(self, test_method):
self.server = wptserve.server.WebTestHttpd(host="localhost",
port=0,
use_ssl=True,
doc_root=doc_root,
key_file=os.path.join(repo_root, "tools", "certs", "web-platform.test.key"),
certificate=os.path.join(repo_root, "tools", "certs", "web-platform.test.pem"),
handler_cls=wptserve.server.Http2WebTestRequestHandler,
http2=True)
self.server.start(False)
context = tls.init_context()
context.check_hostname = False
context.verify_mode = ssl.CERT_NONE
context.set_alpn_protocols(['h2'])
self.conn = HTTP20Connection('%s:%i' % (self.server.host, self.server.port), enable_push=True, secure=True, ssl_context=context)
self.conn.connect()
def teardown_method(self, test_method):
self.conn.close()
self.server.stop()

View file

@ -0,0 +1,2 @@
def handle_data(frame, request, response):
response.content = frame.data[::-1]

View file

@ -0,0 +1,3 @@
def handle_headers(frame, request, response):
response.status = 203
response.headers.update([('test', 'passed')])

View file

@ -0,0 +1,6 @@
def handle_headers(frame, request, response):
response.status = 203
response.headers.update([('test', 'passed')])
def handle_data(frame, request, response):
response.content = frame.data[::-1]

View file

@ -8,7 +8,7 @@ import pytest
from six.moves.urllib.error import HTTPError
wptserve = pytest.importorskip("wptserve")
from .base import TestUsingServer, doc_root
from .base import TestUsingServer, TestUsingH2Server, doc_root
class TestFileHandler(TestUsingServer):
@ -323,5 +323,79 @@ class TestAsIsHandler(TestUsingServer):
self.assertEqual("Content", resp.read())
#Add a check that the response is actually sane
class TestH2Handler(TestUsingH2Server):
def test_handle_headers(self):
self.conn.request("GET", '/test_h2_headers.py')
resp = self.conn.get_response()
assert resp.status == 203
assert resp.headers['test'][0] == 'passed'
assert resp.read() == ''
def test_only_main(self):
self.conn.request("GET", '/test_tuple_3.py')
resp = self.conn.get_response()
assert resp.status == 202
assert resp.headers['Content-Type'][0] == 'text/html'
assert resp.headers['X-Test'][0] == 'PASS'
assert resp.read() == b'PASS'
def test_handle_data(self):
self.conn.request("POST", '/test_h2_data.py', body="hello world!")
resp = self.conn.get_response()
assert resp.status == 200
assert resp.read() == b'!dlrow olleh'
def test_handle_headers_data(self):
self.conn.request("POST", '/test_h2_headers_data.py', body="hello world!")
resp = self.conn.get_response()
assert resp.status == 203
assert resp.headers['test'][0] == 'passed'
assert resp.read() == b'!dlrow olleh'
def test_no_main_or_handlers(self):
self.conn.request("GET", '/no_main.py')
resp = self.conn.get_response()
assert resp.status == 500
assert "No main function or handlers in script " in json.loads(resp.read())["error"]["message"]
def test_not_found(self):
self.conn.request("GET", '/no_exist.py')
resp = self.conn.get_response()
assert resp.status == 404
def test_requesting_multiple_resources(self):
# 1st .py resource
self.conn.request("GET", '/test_h2_headers.py')
resp = self.conn.get_response()
assert resp.status == 203
assert resp.headers['test'][0] == 'passed'
assert resp.read() == ''
# 2nd .py resource
self.conn.request("GET", '/test_tuple_3.py')
resp = self.conn.get_response()
assert resp.status == 202
assert resp.headers['Content-Type'][0] == 'text/html'
assert resp.headers['X-Test'][0] == 'PASS'
assert resp.read() == b'PASS'
# 3rd .py resource
self.conn.request("GET", '/test_h2_headers.py')
resp = self.conn.get_response()
assert resp.status == 203
assert resp.headers['test'][0] == 'passed'
assert resp.read() == ''
if __name__ == '__main__':
unittest.main()

View file

@ -1,13 +1,15 @@
import sys
import os
import unittest
import json
from types import MethodType
from io import BytesIO
import pytest
wptserve = pytest.importorskip("wptserve")
from .base import TestUsingServer, doc_root
from .base import TestUsingServer, TestUsingH2Server, doc_root
from hyper.h2.exceptions import ProtocolError
def send_body_as_header(self):
if self._response.add_required_headers:
@ -182,5 +184,213 @@ class TestResponse(TestUsingServer):
resp = self.request(route[1])
assert resp.read() == resp_content
class TestH2Response(TestUsingH2Server):
def test_write_without_ending_stream(self):
data = b"TEST"
@wptserve.handlers.handler
def handler(request, response):
headers = [
('server', 'test-h2'),
('test', 'PASS'),
]
response.writer.write_headers(headers, 202)
response.writer.write_data_frame(data, False)
# Should detect stream isn't ended and call `writer.end_stream()`
route = ("GET", "/h2test/test", handler)
self.server.router.register(*route)
self.conn.request(route[0], route[1])
resp = self.conn.get_response()
assert resp.status == 202
assert [x for x in resp.headers.items()] == [('server', 'test-h2'), ('test', 'PASS')]
assert resp.read() == data
def test_push(self):
data = b"TEST"
push_data = b"PUSH TEST"
@wptserve.handlers.handler
def handler(request, response):
headers = [
('server', 'test-h2'),
('test', 'PASS'),
]
response.writer.write_headers(headers, 202)
promise_headers = [
(':method', 'GET'),
(':path', '/push-test'),
(':scheme', 'https'),
(':authority', '%s:%i' % (self.server.host, self.server.port))
]
push_headers = [
('server', 'test-h2'),
('content-length', str(len(push_data))),
('content-type', 'text'),
]
response.writer.write_push(
promise_headers,
push_stream_id=10,
status=203,
response_headers=push_headers,
response_data=push_data
)
response.writer.write_data_frame(data, True)
route = ("GET", "/h2test/test_push", handler)
self.server.router.register(*route)
self.conn.request(route[0], route[1])
resp = self.conn.get_response()
assert resp.status == 202
assert [x for x in resp.headers.items()] == [('server', 'test-h2'), ('test', 'PASS')]
assert resp.read() == data
push_promise = next(self.conn.get_pushes())
push = push_promise.get_response()
assert push_promise.path == '/push-test'
assert push.status == 203
assert push.read() == push_data
def test_set_error(self):
@wptserve.handlers.handler
def handler(request, response):
response.set_error(503, message="Test error")
route = ("GET", "/h2test/test_set_error", handler)
self.server.router.register(*route)
self.conn.request(route[0], route[1])
resp = self.conn.get_response()
assert resp.status == 503
assert json.loads(resp.read()) == json.loads("{\"error\": {\"message\": \"Test error\", \"code\": 503}}")
def test_file_like_response(self):
@wptserve.handlers.handler
def handler(request, response):
content = BytesIO("Hello, world!")
response.content = content
route = ("GET", "/h2test/test_file_like_response", handler)
self.server.router.register(*route)
self.conn.request(route[0], route[1])
resp = self.conn.get_response()
assert resp.status == 200
assert resp.read() == "Hello, world!"
def test_list_response(self):
@wptserve.handlers.handler
def handler(request, response):
response.content = ['hello', 'world']
route = ("GET", "/h2test/test_file_like_response", handler)
self.server.router.register(*route)
self.conn.request(route[0], route[1])
resp = self.conn.get_response()
assert resp.status == 200
assert resp.read() == "helloworld"
def test_content_longer_than_frame_size(self):
@wptserve.handlers.handler
def handler(request, response):
size = response.writer.get_max_payload_size()
content = "a" * (size + 5)
return [('payload_size', size)], content
route = ("GET", "/h2test/test_content_longer_than_frame_size", handler)
self.server.router.register(*route)
self.conn.request(route[0], route[1])
resp = self.conn.get_response()
assert resp.status == 200
payload_size = int(resp.headers['payload_size'][0])
assert payload_size
assert resp.read() == "a" * (payload_size + 5)
def test_encode(self):
@wptserve.handlers.handler
def handler(request, response):
response.encoding = "utf8"
t = response.writer.encode(u"hello")
assert t == "hello"
with pytest.raises(ValueError):
response.writer.encode(None)
route = ("GET", "/h2test/test_content_longer_than_frame_size", handler)
self.server.router.register(*route)
self.conn.request(route[0], route[1])
self.conn.get_response()
def test_raw_header_frame(self):
@wptserve.handlers.handler
def handler(request, response):
response.writer.write_raw_header_frame([
(':status', '204'),
('server', 'TEST-H2')
], end_headers=True)
route = ("GET", "/h2test/test_file_like_response", handler)
self.server.router.register(*route)
self.conn.request(route[0], route[1])
resp = self.conn.get_response()
assert resp.status == 204
assert resp.headers['server'][0] == 'TEST-H2'
assert resp.read() == ''
def test_raw_header_frame_invalid(self):
@wptserve.handlers.handler
def handler(request, response):
response.writer.write_raw_header_frame([
('server', 'TEST-H2'),
(':status', '204')
], end_headers=True)
route = ("GET", "/h2test/test_file_like_response", handler)
self.server.router.register(*route)
self.conn.request(route[0], route[1])
with pytest.raises(ProtocolError):
# The server can send an invalid HEADER frame, which will cause a protocol error in client
self.conn.get_response()
def test_raw_data_frame(self):
@wptserve.handlers.handler
def handler(request, response):
response.writer.write_raw_data_frame(data=b'Hello world', end_stream=True)
route = ("GET", "/h2test/test_file_like_response", handler)
self.server.router.register(*route)
sid = self.conn.request(route[0], route[1])
assert self.conn.streams[sid]._read() == 'Hello world'
def test_raw_header_continuation_frame(self):
@wptserve.handlers.handler
def handler(request, response):
response.writer.write_raw_header_frame([
(':status', '204')
])
response.writer.write_raw_continuation_frame([
('server', 'TEST-H2')
], end_headers=True)
route = ("GET", "/h2test/test_file_like_response", handler)
self.server.router.register(*route)
self.conn.request(route[0], route[1])
resp = self.conn.get_response()
assert resp.status == 204
assert resp.headers['server'][0] == 'TEST-H2'
assert resp.read() == ''
if __name__ == '__main__':
unittest.main()

View file

@ -7,6 +7,8 @@ import traceback
from six.moves.urllib.parse import parse_qs, quote, unquote, urljoin
from six import iteritems
from h2.events import RequestReceived, DataReceived
from .constants import content_types
from .pipes import Pipeline, template
from .ranges import RangeParser
@ -230,7 +232,17 @@ class PythonScriptHandler(object):
def __repr__(self):
return "<%s base_path:%s url_base:%s>" % (self.__class__.__name__, self.base_path, self.url_base)
def __call__(self, request, response):
def _set_path_and_load_file(self, request, response, func):
"""
This modifies the `sys.path` and loads the requested python file as an environ variable.
Once the environ is loaded, the passed `func` is run with this loaded environ.
:param request: The request object
:param response: The response object
:param func: The function to be run with the loaded environ with the modified filepath. Signature: (request, response, environ, path)
:return: The return of func
"""
path = filesystem_path(self.base_path, request, self.url_base)
sys_path = sys.path[:]
@ -240,18 +252,54 @@ class PythonScriptHandler(object):
sys.path.insert(0, os.path.dirname(path))
with open(path, 'rb') as f:
exec(compile(f.read(), path, 'exec'), environ, environ)
if "main" in environ:
handler = FunctionHandler(environ["main"])
handler(request, response)
wrap_pipeline(path, request, response)
else:
raise HTTPException(500, "No main function in script %s" % path)
if func is not None:
return func(request, response, environ, path)
except IOError:
raise HTTPException(404)
finally:
sys.path = sys_path
sys.modules = sys_modules
def __call__(self, request, response):
def func(request, response, environ, path):
if "main" in environ:
handler = FunctionHandler(environ["main"])
handler(request, response)
wrap_pipeline(path, request, response)
else:
raise HTTPException(500, "No main function in script %s" % path)
self._set_path_and_load_file(request, response, func)
def frame_handler(self, request):
"""
This creates a FunctionHandler with one or more of the handling functions.
Used by the H2 server.
:param request: The request object used to generate the handler.
:return: A FunctionHandler object with one or more of these functions: `handle_headers`, `handle_data` or `main`
"""
def func(request, response, environ, path):
def _main(req, resp):
pass
handler = FunctionHandler(_main)
if "main" in environ:
handler.func = environ["main"]
if "handle_headers" in environ:
handler.handle_headers = environ["handle_headers"]
if "handle_data" in environ:
handler.handle_data = environ["handle_data"]
if handler.func is _main and not hasattr(handler, "handle_headers") and not hasattr(handler, "handle_data"):
raise HTTPException(500, "No main function or handlers in script %s" % path)
return handler
return self._set_path_and_load_file(request, None, func)
python_script_handler = PythonScriptHandler()
@ -287,7 +335,6 @@ class FunctionHandler(object):
def handler(func):
return FunctionHandler(func)
class JsonHandler(object):
def __init__(self, func):
self.func = func

View file

@ -278,6 +278,7 @@ class Request(object):
self.raw_input = InputFile(request_handler.rfile,
int(self.headers.get("Content-Length", 0)))
self._body = None
self._GET = None
@ -285,8 +286,6 @@ class Request(object):
self._cookies = None
self._auth = None
self.h2_stream_id = request_handler.h2_stream_id if hasattr(request_handler, 'h2_stream_id') else None
self.server = Server(self)
def __repr__(self):
@ -349,6 +348,13 @@ class Request(object):
return self._auth
class H2Request(Request):
def __init__(self, request_handler):
self.h2_stream_id = request_handler.h2_stream_id
self.frames = []
super(H2Request, self).__init__(request_handler)
class RequestHeaders(dict):
"""Dictionary-like API for accessing request headers."""
def __init__(self, items):

View file

@ -9,6 +9,8 @@ from .logger import get_logger
from io import BytesIO
from six import binary_type, text_type, itervalues
from hyperframe.frame import HeadersFrame, DataFrame, ContinuationFrame
from hpack.struct import HeaderTuple
missing = object()
@ -369,11 +371,11 @@ class H2Response(Response):
item = item_iter.next()
while True:
check_last = item_iter.next()
self.writer.write_content(item, last=False)
self.writer.write_data(item, last=False)
item = check_last
except StopIteration:
if item:
self.writer.write_content(item, last=True)
self.writer.write_data(item, last=True)
class H2ResponseWriter(object):
@ -383,11 +385,12 @@ class H2ResponseWriter(object):
self.h2conn = handler.conn
self._response = response
self._handler = handler
self.stream_ended = False
self.content_written = False
self.request = response.request
self.logger = response.logger
def write_headers(self, headers, status_code, status_message=None):
def write_headers(self, headers, status_code, status_message=None, stream_id=None, last=False):
formatted_headers = []
secondary_headers = [] # Non ':' prefixed headers are to be added afterwards
@ -403,13 +406,14 @@ class H2ResponseWriter(object):
with self.h2conn as connection:
connection.send_headers(
stream_id=self.request.h2_stream_id,
stream_id=self.request.h2_stream_id if stream_id is None else stream_id,
headers=formatted_headers,
end_stream=last or self.request.method == "HEAD"
)
self.write(connection)
def write_content(self, item, last=False):
def write_data(self, item, last=False, stream_id=None):
if isinstance(item, (text_type, binary_type)):
data = BytesIO(self.encode(item))
else:
@ -423,30 +427,113 @@ class H2ResponseWriter(object):
# If the data is longer than max payload size, need to write it in chunks
payload_size = self.get_max_payload_size()
while data_len > payload_size:
self.write_content_frame(data.read(payload_size), False)
self.write_data_frame(data.read(payload_size), False, stream_id)
data_len -= payload_size
payload_size = self.get_max_payload_size()
self.write_content_frame(data.read(), last)
self.write_data_frame(data.read(), last, stream_id)
def write_content_frame(self, data, last):
def write_data_frame(self, data, last, stream_id=None):
with self.h2conn as connection:
connection.send_data(
stream_id=self.request.h2_stream_id,
stream_id=self.request.h2_stream_id if stream_id is None else stream_id,
data=data,
end_stream=last,
)
self.write(connection)
self.content_written = last
self.stream_ended = last
def write_push(self, promise_headers, push_stream_id=None, status=None, response_headers=None, response_data=None):
"""Write a push promise, and optionally write the push content.
This will write a push promise to the request stream. If you do not provide headers and data for the response,
then no response will be pushed, and you should push them yourself using the ID returned from this function
:param promise_headers: A list of header tuples that matches what the client would use to
request the pushed response
:param push_stream_id: The ID of the stream the response should be pushed to. If none given, will
use the next available id.
:param status: The status code of the response, REQUIRED if response_headers given
:param response_headers: The headers of the response
:param response_data: The response data.
:return: The ID of the push stream
"""
with self.h2conn as connection:
push_stream_id = push_stream_id if push_stream_id is not None else connection.get_next_available_stream_id()
connection.push_stream(self.request.h2_stream_id, push_stream_id, promise_headers)
self.write(connection)
has_data = response_data is not None
if response_headers is not None:
assert status is not None
self.write_headers(response_headers, status, stream_id=push_stream_id, last=not has_data)
if has_data:
self.write_data(response_data, last=True, stream_id=push_stream_id)
return push_stream_id
def end_stream(self, stream_id=None):
with self.h2conn as connection:
connection.end_stream(stream_id if stream_id is not None else self.request.h2_stream_id)
self.write(connection)
self.stream_ended = True
def write_raw_header_frame(self, headers, stream_id=None, end_stream=False, end_headers=False, frame_cls=HeadersFrame):
"""This bypasses state checking and such, and sends a header regardless"""
if not stream_id:
stream_id = self.request.h2_stream_id
header_t = []
for header, value in headers:
header_t.append(HeaderTuple(header, value))
with self.h2conn as connection:
frame = frame_cls(stream_id, data=connection.encoder.encode(header_t))
if end_stream:
self.stream_ended = True
frame.flags.add('END_STREAM')
if end_headers:
frame.flags.add('END_HEADERS')
data = frame.serialize()
self.write_raw(data)
def write_raw_data_frame(self, data, stream_id=None, end_stream=False):
"""This bypasses state checking and such, and sends a data frame regardless"""
if not stream_id:
stream_id = self.request.h2_stream_id
frame = DataFrame(stream_id, data=data)
if end_stream:
self.stream_ended = True
frame.flags.add('END_STREAM')
data = frame.serialize()
self.write_raw(data)
def write_raw_continuation_frame(self, headers, stream_id=None, end_headers=False):
"""This bypasses state checking and such, and sends a continuation frame regardless"""
self.write_raw_header_frame(headers, stream_id=stream_id, end_headers=end_headers, frame_cls=ContinuationFrame)
def get_max_payload_size(self):
with self.h2conn as connection:
return min(connection.remote_settings.max_frame_size, connection.local_flow_control_window(self.request.h2_stream_id)) - 9
def write(self, connection):
self.content_written = True
data = connection.data_to_send()
self.socket.sendall(data)
def write_raw(self, raw_data):
"""Used for sending raw bytes/data through the socket"""
self.content_written = True
self.socket.sendall(raw_data)
def encode(self, data):
"""Convert unicode to bytes according to response.encoding."""
if isinstance(data, binary_type):

View file

@ -12,16 +12,18 @@ from six import binary_type, text_type
import uuid
from collections import OrderedDict
from six.moves.queue import Queue
from h2.config import H2Configuration
from h2.connection import H2Connection
from h2.events import RequestReceived, ConnectionTerminated
from h2.events import RequestReceived, ConnectionTerminated, DataReceived, StreamReset, StreamEnded
from six.moves.urllib.parse import urlsplit, urlunsplit
from . import routes as default_routes
from .config import ConfigBuilder
from .logger import get_logger
from .request import Server, Request
from .request import Server, Request, H2Request
from .response import Response, H2Response
from .router import Router
from .utils import HTTPException
@ -220,78 +222,76 @@ class BaseWebTestRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
self.logger = get_logger()
BaseHTTPServer.BaseHTTPRequestHandler.__init__(self, *args, **kwargs)
def finish_handling(self, request_line_is_valid, response_cls):
self.server.rewriter.rewrite(self)
def finish_handling_h1(self, request_line_is_valid):
request = Request(self)
response = response_cls(self, request)
self.server.rewriter.rewrite(self)
if request.method == "CONNECT":
self.handle_connect(response)
return
request = Request(self)
response = Response(self, request)
if not request_line_is_valid:
response.set_error(414)
response.write()
return
if request.method == "CONNECT":
self.handle_connect(response)
return
self.logger.debug("%s %s" % (request.method, request.request_path))
handler = self.server.router.get_handler(request)
if not request_line_is_valid:
response.set_error(414)
response.write()
return
# If the handler we used for the request had a non-default base path
# set update the doc_root of the request to reflect this
if hasattr(handler, "base_path") and handler.base_path:
request.doc_root = handler.base_path
if hasattr(handler, "url_base") and handler.url_base != "/":
request.url_base = handler.url_base
self.logger.debug("%s %s" % (request.method, request.request_path))
handler = self.server.router.get_handler(request)
self.finish_handling(request, response, handler)
if self.server.latency is not None:
if callable(self.server.latency):
latency = self.server.latency()
else:
latency = self.server.latency
self.logger.warning("Latency enabled. Sleeping %i ms" % latency)
time.sleep(latency / 1000.)
def finish_handling(self, request, response, handler):
# If the handler we used for the request had a non-default base path
# set update the doc_root of the request to reflect this
if hasattr(handler, "base_path") and handler.base_path:
request.doc_root = handler.base_path
if hasattr(handler, "url_base") and handler.url_base != "/":
request.url_base = handler.url_base
if handler is None:
response.set_error(404)
if self.server.latency is not None:
if callable(self.server.latency):
latency = self.server.latency()
else:
try:
handler(request, response)
except HTTPException as e:
response.set_error(e.code, e.message)
except Exception as e:
message = str(e)
if message:
err = [message]
else:
err = []
err.append(traceback.format_exc())
response.set_error(500, "\n".join(err))
self.logger.debug("%i %s %s (%s) %i" % (response.status[0],
request.method,
request.request_path,
request.headers.get('Referer'),
request.raw_input.length))
latency = self.server.latency
self.logger.warning("Latency enabled. Sleeping %i ms" % latency)
time.sleep(latency / 1000.)
if not response.writer.content_written:
response.write()
if handler is None:
self.logger.debug("No Handler found!")
response.set_error(404)
else:
try:
handler(request, response)
except HTTPException as e:
response.set_error(e.code, e.message)
except Exception as e:
self.respond_with_error(response, e)
self.logger.debug("%i %s %s (%s) %i" % (response.status[0],
request.method,
request.request_path,
request.headers.get('Referer'),
request.raw_input.length))
# If a python handler has been used, the old ones won't send a END_STR data frame, so this
# allows for backwards compatibility by accounting for these handlers that don't close streams
if isinstance(response, H2Response) and not response.writer.content_written:
response.writer.write_content('', last=True)
if not response.writer.content_written:
response.write()
# If we want to remove this in the future, a solution is needed for
# scripts that produce a non-string iterable of content, since these
# can't set a Content-Length header. A notable example of this kind of
# problem is with the trickle pipe i.e. foo.js?pipe=trickle(d1)
if response.close_connection:
self.close_connection = True
# If a python handler has been used, the old ones won't send a END_STR data frame, so this
# allows for backwards compatibility by accounting for these handlers that don't close streams
if isinstance(response, H2Response) and not response.writer.stream_ended:
response.writer.end_stream()
if not self.close_connection:
# Ensure that the whole request has been read from the socket
request.raw_input.read()
# If we want to remove this in the future, a solution is needed for
# scripts that produce a non-string iterable of content, since these
# can't set a Content-Length header. A notable example of this kind of
# problem is with the trickle pipe i.e. foo.js?pipe=trickle(d1)
if response.close_connection:
self.close_connection = True
if not self.close_connection:
# Ensure that the whole request has been read from the socket
request.raw_input.read()
def handle_connect(self, response):
self.logger.debug("Got CONNECT")
@ -306,6 +306,15 @@ class BaseWebTestRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
self.setup()
return
def respond_with_error(self, response, e):
message = str(e)
if message:
err = [message]
else:
err = []
err.append(traceback.format_exc())
response.set_error(500, "\n".join(err))
class Http2WebTestRequestHandler(BaseWebTestRequestHandler):
protocol_version = "HTTP/2.0"
@ -322,9 +331,9 @@ class Http2WebTestRequestHandler(BaseWebTestRequestHandler):
self.close_connection = False
# Generate a UUID to make it easier to distinguish different H2 connection debug messages
uid = uuid.uuid4()
self.uid = str(uuid.uuid4())[:8]
self.logger.debug('(%s) Initiating h2 Connection' % uid)
self.logger.debug('(%s) Initiating h2 Connection' % self.uid)
with self.conn as connection:
connection.initiate_connection()
@ -332,53 +341,129 @@ class Http2WebTestRequestHandler(BaseWebTestRequestHandler):
self.request.sendall(data)
self.request_threads = []
# Dict of { stream_id: (thread, queue) }
stream_queues = {}
# TODO Need to do some major work on multithreading. Current idea is to have a thread per stream
# so that processing of the request can start from the first frame.
while not self.close_connection:
try:
try:
while not self.close_connection:
# This size may need to be made variable based on remote settings?
data = self.request.recv(65535)
with self.conn as connection:
events = connection.receive_data(data)
frames = connection.receive_data(data)
self.logger.debug('(%s) Events: ' % (uid) + str(events))
self.logger.debug('(%s) Frames Received: ' % self.uid + str(frames))
for event in events:
if isinstance(event, RequestReceived):
self.logger.debug('(%s) Parsing RequestReceived' % (uid))
self._h2_parse_request(event)
t = threading.Thread(target=BaseWebTestRequestHandler.finish_handling, args=(self, True, H2Response))
self.request_threads.append(t)
t.start()
if isinstance(event, ConnectionTerminated):
self.logger.debug('(%s) Connection terminated by remote peer ' % (uid))
for frame in frames:
if isinstance(frame, ConnectionTerminated):
self.logger.debug('(%s) Connection terminated by remote peer ' % self.uid)
self.close_connection = True
except (socket.timeout, socket.error) as e:
self.logger.debug('(%s) ERROR - Closing Connection - \n%s' % (uid, str(e)))
# Flood all the streams with connection terminated, this will cause them to stop
for stream_id, (thread, queue) in stream_queues.items():
queue.put(frame)
elif hasattr(frame, 'stream_id'):
if frame.stream_id not in stream_queues:
queue = Queue()
stream_queues[frame.stream_id] = (self.start_stream_thread(frame, queue), queue)
stream_queues[frame.stream_id][1].put(frame)
if isinstance(frame, StreamEnded) or (hasattr(frame, "stream_ended") and frame.stream_ended):
del stream_queues[frame.stream_id]
except (socket.timeout, socket.error) as e:
self.logger.error('(%s) Closing Connection - \n%s' % (self.uid, str(e)))
if not self.close_connection:
self.close_connection = True
for t in self.request_threads:
t.join()
for stream_id, (thread, queue) in stream_queues.items():
queue.put(None)
except Exception as e:
self.logger.error('(%s) Unexpected Error - \n%s' % (self.uid, str(e)))
finally:
for stream_id, (thread, queue) in stream_queues.items():
thread.join()
def _h2_parse_request(self, event):
self.headers = H2Headers(event.headers)
self.command = self.headers['method']
self.path = self.headers['path']
self.h2_stream_id = event.stream_id
def start_stream_thread(self, frame, queue):
t = threading.Thread(
target=Http2WebTestRequestHandler._stream_thread,
args=(self, frame.stream_id, queue)
)
t.start()
return t
# TODO Need to figure out what to do with this thing as it is no longer used
# For now I can just leave it be as it does not affect anything
self.raw_requestline = ''
def _stream_thread(self, stream_id, queue):
"""
This thread processes frames for a specific stream. It waits for frames to be placed
in the queue, and processes them. When it receives a request frame, it will start processing
immediately, even if there are data frames to follow. One of the reasons for this is that it
can detect invalid requests before needing to read the rest of the frames.
"""
# The file-like pipe object that will be used to share data to request object if data is received
wfile = None
request = None
response = None
req_handler = None
while not self.close_connection:
# Wait for next frame, blocking
frame = queue.get(True, None)
self.logger.debug('(%s - %s) %s' % (self.uid, stream_id, str(frame)))
if isinstance(frame, RequestReceived):
rfile, wfile = os.pipe()
rfile, wfile = os.fdopen(rfile, 'rb'), os.fdopen(wfile, 'wb')
stream_handler = H2HandlerCopy(self, frame, rfile)
stream_handler.server.rewriter.rewrite(stream_handler)
request = H2Request(stream_handler)
response = H2Response(stream_handler, request)
req_handler = stream_handler.server.router.get_handler(request)
if hasattr(req_handler, "frame_handler"):
# Convert this to a handler that will utilise H2 specific functionality, such as handling individual frames
req_handler = self.frame_handler(request, response, req_handler)
if hasattr(req_handler, 'handle_headers'):
req_handler.handle_headers(frame, request, response)
elif isinstance(frame, DataReceived):
wfile.write(frame.data)
if hasattr(req_handler, 'handle_data'):
req_handler.handle_data(frame, request, response)
if frame.stream_ended:
wfile.close()
elif frame is None or isinstance(frame, (StreamReset, StreamEnded, ConnectionTerminated)):
self.logger.debug('(%s - %s) Stream Reset, Thread Closing' % (self.uid, stream_id))
break
if request is not None:
request.frames.append(frame)
if hasattr(frame, "stream_ended") and frame.stream_ended:
self.finish_handling(request, response, req_handler)
def frame_handler(self, request, response, handler):
try:
return handler.frame_handler(request)
except HTTPException as e:
response.set_error(e.code, e.message)
response.write()
except Exception as e:
self.respond_with_error(response, e)
response.write()
class H2ConnectionGuard(object):
"""H2Connection objects are not threadsafe, so this keeps thread safety"""
lock = threading.Lock()
def __init__(self, obj):
assert isinstance(obj, H2Connection)
self.obj = obj
def __enter__(self):
@ -407,6 +492,20 @@ class H2Headers(dict):
return ['dummy function']
class H2HandlerCopy(object):
def __init__(self, handler, req_frame, rfile):
self.headers = H2Headers(req_frame.headers)
self.command = self.headers['method']
self.path = self.headers['path']
self.h2_stream_id = req_frame.stream_id
self.server = handler.server
self.protocol_version = handler.protocol_version
self.raw_requestline = ''
self.rfile = rfile
self.request = handler.request
self.conn = handler.conn
class Http1WebTestRequestHandler(BaseWebTestRequestHandler):
protocol_version = "HTTP/1.1"
@ -426,7 +525,7 @@ class Http1WebTestRequestHandler(BaseWebTestRequestHandler):
#parse_request() actually sends its own error responses
return
self.finish_handling(request_line_is_valid, Response)
self.finish_handling_h1(request_line_is_valid)
except socket.timeout as e:
self.log_error("Request timed out: %r", e)

View file

@ -110,5 +110,8 @@ def get_port(host=''):
return port
def http2_compatible():
# Currently, the HTTP/2.0 server is only working in python 2.7.15
return (sys.version_info[0] == 2 and sys.version_info[1] == 7 and sys.version_info[2] == 15)
# Currently, the HTTP/2.0 server is only working in python 2.7.10+ and OpenSSL 1.0.2+
import ssl
ssl_v = ssl.OPENSSL_VERSION_INFO
return ((sys.version_info[0] == 2 and sys.version_info[1] == 7 and sys.version_info[2] >= 10) and
(ssl_v[0] == 1 and (ssl_v[1] == 1 or (ssl_v[1] == 0 and ssl_v[2] >= 2))))