mirror of
https://github.com/servo/servo.git
synced 2025-10-16 16:29:18 +01:00
422 lines
15 KiB
Python
422 lines
15 KiB
Python
import io
|
|
import itertools
|
|
import json
|
|
import os
|
|
from atomicwrites import atomic_write
|
|
from copy import deepcopy
|
|
from multiprocessing import Pool, cpu_count
|
|
from six import (
|
|
PY3,
|
|
binary_type,
|
|
ensure_text,
|
|
iteritems,
|
|
itervalues,
|
|
string_types,
|
|
text_type,
|
|
)
|
|
|
|
from . import vcs
|
|
from .item import (ConformanceCheckerTest,
|
|
CrashTest,
|
|
ManifestItem,
|
|
ManualTest,
|
|
PrintRefTest,
|
|
RefTest,
|
|
SupportFile,
|
|
TestharnessTest,
|
|
VisualTest,
|
|
WebDriverSpecTest)
|
|
from .log import get_logger
|
|
from .sourcefile import SourceFile
|
|
from .typedata import TypeData
|
|
|
|
MYPY = False
|
|
if MYPY:
|
|
# MYPY is set to True when run under Mypy.
|
|
from logging import Logger
|
|
from typing import Any
|
|
from typing import Container
|
|
from typing import Dict
|
|
from typing import IO
|
|
from typing import Iterator
|
|
from typing import Iterable
|
|
from typing import Optional
|
|
from typing import Set
|
|
from typing import Text
|
|
from typing import Tuple
|
|
from typing import Type
|
|
from typing import Union
|
|
|
|
try:
|
|
import ujson
|
|
fast_json = ujson
|
|
except ImportError:
|
|
fast_json = json # type: ignore
|
|
|
|
CURRENT_VERSION = 8 # type: int
|
|
|
|
|
|
class ManifestError(Exception):
|
|
pass
|
|
|
|
|
|
class ManifestVersionMismatch(ManifestError):
|
|
pass
|
|
|
|
|
|
item_classes = {"testharness": TestharnessTest,
|
|
"reftest": RefTest,
|
|
"print-reftest": PrintRefTest,
|
|
"crashtest": CrashTest,
|
|
"manual": ManualTest,
|
|
"wdspec": WebDriverSpecTest,
|
|
"conformancechecker": ConformanceCheckerTest,
|
|
"visual": VisualTest,
|
|
"support": SupportFile} # type: Dict[str, Type[ManifestItem]]
|
|
|
|
|
|
def compute_manifest_items(source_file):
|
|
# type: (SourceFile) -> Tuple[Tuple[Text, ...], Text, Set[ManifestItem], Text]
|
|
rel_path_parts = source_file.rel_path_parts
|
|
new_type, manifest_items = source_file.manifest_items()
|
|
file_hash = source_file.hash
|
|
return rel_path_parts, new_type, set(manifest_items), file_hash
|
|
|
|
if MYPY:
|
|
ManifestDataType = Dict[Any, TypeData]
|
|
else:
|
|
ManifestDataType = dict
|
|
|
|
class ManifestData(ManifestDataType):
|
|
def __init__(self, manifest):
|
|
# type: (Manifest) -> None
|
|
"""Dictionary subclass containing a TypeData instance for each test type,
|
|
keyed by type name"""
|
|
self.initialized = False # type: bool
|
|
for key, value in iteritems(item_classes):
|
|
self[key] = TypeData(manifest, value)
|
|
self.initialized = True
|
|
self.json_obj = None # type: None
|
|
|
|
def __setitem__(self, key, value):
|
|
# type: (str, TypeData) -> None
|
|
if self.initialized:
|
|
raise AttributeError
|
|
dict.__setitem__(self, key, value)
|
|
|
|
def paths(self):
|
|
# type: () -> Set[Text]
|
|
"""Get a list of all paths containing test items
|
|
without actually constructing all the items"""
|
|
rv = set() # type: Set[Text]
|
|
for item_data in itervalues(self):
|
|
for item in item_data:
|
|
rv.add(os.path.sep.join(item))
|
|
return rv
|
|
|
|
def type_by_path(self):
|
|
# type: () -> Dict[Tuple[Text, ...], str]
|
|
rv = {}
|
|
for item_type, item_data in iteritems(self):
|
|
for item in item_data:
|
|
rv[item] = item_type
|
|
return rv
|
|
|
|
|
|
|
|
class Manifest(object):
|
|
def __init__(self, tests_root=None, url_base="/"):
|
|
# type: (Optional[str], Text) -> None
|
|
assert url_base is not None
|
|
self._data = ManifestData(self) # type: ManifestData
|
|
self.tests_root = tests_root # type: Optional[str]
|
|
self.url_base = url_base # type: Text
|
|
|
|
def __iter__(self):
|
|
# type: () -> Iterator[Tuple[str, Text, Set[ManifestItem]]]
|
|
return self.itertypes()
|
|
|
|
def itertypes(self, *types):
|
|
# type: (*str) -> Iterator[Tuple[str, Text, Set[ManifestItem]]]
|
|
for item_type in (types or sorted(self._data.keys())):
|
|
for path in self._data[item_type]:
|
|
str_path = os.sep.join(path)
|
|
tests = self._data[item_type][path]
|
|
yield item_type, str_path, tests
|
|
|
|
def iterpath(self, path):
|
|
# type: (Text) -> Iterable[ManifestItem]
|
|
tpath = tuple(path.split(os.path.sep))
|
|
|
|
for type_tests in self._data.values():
|
|
i = type_tests.get(tpath, set())
|
|
assert i is not None
|
|
for test in i:
|
|
yield test
|
|
|
|
def iterdir(self, dir_name):
|
|
# type: (Text) -> Iterable[ManifestItem]
|
|
tpath = tuple(dir_name.split(os.path.sep))
|
|
tpath_len = len(tpath)
|
|
|
|
for type_tests in self._data.values():
|
|
for path, tests in iteritems(type_tests):
|
|
if path[:tpath_len] == tpath:
|
|
for test in tests:
|
|
yield test
|
|
|
|
def update(self, tree, parallel=True):
|
|
# type: (Iterable[Tuple[Union[SourceFile, bytes], bool]], bool) -> bool
|
|
"""Update the manifest given an iterable of items that make up the updated manifest.
|
|
|
|
The iterable must either generate tuples of the form (SourceFile, True) for paths
|
|
that are to be updated, or (path, False) for items that are not to be updated. This
|
|
unusual API is designed as an optimistaion meaning that SourceFile items need not be
|
|
constructed in the case we are not updating a path, but the absence of an item from
|
|
the iterator may be used to remove defunct entries from the manifest."""
|
|
|
|
changed = False
|
|
|
|
# Create local variable references to these dicts so we avoid the
|
|
# attribute access in the hot loop below
|
|
data = self._data
|
|
|
|
types = data.type_by_path()
|
|
deleted = set(types)
|
|
|
|
to_update = []
|
|
|
|
for source_file_or_path, update in tree:
|
|
if not update:
|
|
assert isinstance(source_file_or_path, (binary_type, text_type))
|
|
path = ensure_text(source_file_or_path)
|
|
deleted.remove(tuple(path.split(os.path.sep)))
|
|
else:
|
|
assert not isinstance(source_file_or_path, (binary_type, text_type))
|
|
source_file = source_file_or_path
|
|
rel_path_parts = source_file.rel_path_parts
|
|
assert isinstance(rel_path_parts, tuple)
|
|
|
|
is_new = rel_path_parts not in deleted # type: bool
|
|
hash_changed = False # type: bool
|
|
|
|
if not is_new:
|
|
deleted.remove(rel_path_parts)
|
|
old_type = types[rel_path_parts]
|
|
old_hash = data[old_type].hashes[rel_path_parts]
|
|
file_hash = source_file.hash # type: Text
|
|
if old_hash != file_hash:
|
|
hash_changed = True
|
|
del data[old_type][rel_path_parts]
|
|
|
|
if is_new or hash_changed:
|
|
to_update.append(source_file)
|
|
|
|
if to_update:
|
|
changed = True
|
|
|
|
if parallel and len(to_update) > 25 and cpu_count() > 1:
|
|
# 25 derived experimentally (2020-01) to be approximately
|
|
# the point at which it is quicker to create Pool and
|
|
# parallelize this
|
|
pool = Pool()
|
|
|
|
# chunksize set > 1 when more than 10000 tests, because
|
|
# chunking is a net-gain once we get to very large numbers
|
|
# of items (again, experimentally, 2020-01)
|
|
results = pool.imap_unordered(compute_manifest_items,
|
|
to_update,
|
|
chunksize=max(1, len(to_update) // 10000)
|
|
) # type: Iterator[Tuple[Tuple[Text, ...], Text, Set[ManifestItem], Text]]
|
|
elif PY3:
|
|
results = map(compute_manifest_items, to_update)
|
|
else:
|
|
results = itertools.imap(compute_manifest_items, to_update)
|
|
|
|
for result in results:
|
|
rel_path_parts, new_type, manifest_items, file_hash = result
|
|
data[new_type][rel_path_parts] = manifest_items
|
|
data[new_type].hashes[rel_path_parts] = file_hash
|
|
|
|
if deleted:
|
|
changed = True
|
|
for rel_path_parts in deleted:
|
|
for test_data in itervalues(data):
|
|
if rel_path_parts in test_data:
|
|
del test_data[rel_path_parts]
|
|
|
|
return changed
|
|
|
|
def to_json(self, caller_owns_obj=True):
|
|
# type: (bool) -> Dict[Text, Any]
|
|
"""Dump a manifest into a object which can be serialized as JSON
|
|
|
|
If caller_owns_obj is False, then the return value remains
|
|
owned by the manifest; it is _vitally important_ that _no_
|
|
(even read) operation is done on the manifest, as otherwise
|
|
objects within the object graph rooted at the return value can
|
|
be mutated. This essentially makes this mode very dangerous
|
|
and only to be used under extreme care.
|
|
|
|
"""
|
|
out_items = {
|
|
test_type: type_paths.to_json()
|
|
for test_type, type_paths in iteritems(self._data) if type_paths
|
|
}
|
|
|
|
if caller_owns_obj:
|
|
out_items = deepcopy(out_items)
|
|
|
|
rv = {"url_base": self.url_base,
|
|
"items": out_items,
|
|
"version": CURRENT_VERSION} # type: Dict[Text, Any]
|
|
return rv
|
|
|
|
@classmethod
|
|
def from_json(cls, tests_root, obj, types=None, callee_owns_obj=False):
|
|
# type: (str, Dict[Text, Any], Optional[Container[Text]], bool) -> Manifest
|
|
"""Load a manifest from a JSON object
|
|
|
|
This loads a manifest for a given local test_root path from an
|
|
object obj, potentially partially loading it to only load the
|
|
types given by types.
|
|
|
|
If callee_owns_obj is True, then ownership of obj transfers
|
|
to this function when called, and the caller must never mutate
|
|
the obj or anything referred to in the object graph rooted at
|
|
obj.
|
|
|
|
"""
|
|
version = obj.get("version")
|
|
if version != CURRENT_VERSION:
|
|
raise ManifestVersionMismatch
|
|
|
|
self = cls(tests_root, url_base=obj.get("url_base", "/"))
|
|
if not hasattr(obj, "items"):
|
|
raise ManifestError
|
|
|
|
for test_type, type_paths in iteritems(obj["items"]):
|
|
if test_type not in item_classes:
|
|
raise ManifestError
|
|
|
|
if types and test_type not in types:
|
|
continue
|
|
|
|
if not callee_owns_obj:
|
|
type_paths = deepcopy(type_paths)
|
|
|
|
self._data[test_type].set_json(type_paths)
|
|
|
|
return self
|
|
|
|
|
|
def load(tests_root, manifest, types=None):
|
|
# type: (str, Union[IO[bytes], str], Optional[Container[Text]]) -> Optional[Manifest]
|
|
logger = get_logger()
|
|
|
|
logger.warning("Prefer load_and_update instead")
|
|
return _load(logger, tests_root, manifest, types)
|
|
|
|
|
|
__load_cache = {} # type: Dict[str, Manifest]
|
|
|
|
|
|
def _load(logger, # type: Logger
|
|
tests_root, # type: str
|
|
manifest, # type: Union[IO[bytes], str]
|
|
types=None, # type: Optional[Container[Text]]
|
|
allow_cached=True # type: bool
|
|
):
|
|
# type: (...) -> Optional[Manifest]
|
|
manifest_path = (manifest if isinstance(manifest, string_types)
|
|
else manifest.name)
|
|
if allow_cached and manifest_path in __load_cache:
|
|
return __load_cache[manifest_path]
|
|
|
|
if isinstance(manifest, string_types):
|
|
if os.path.exists(manifest):
|
|
logger.debug("Opening manifest at %s" % manifest)
|
|
else:
|
|
logger.debug("Creating new manifest at %s" % manifest)
|
|
try:
|
|
with io.open(manifest, "r", encoding="utf-8") as f:
|
|
rv = Manifest.from_json(tests_root,
|
|
fast_json.load(f),
|
|
types=types,
|
|
callee_owns_obj=True)
|
|
except IOError:
|
|
return None
|
|
except ValueError:
|
|
logger.warning("%r may be corrupted", manifest)
|
|
return None
|
|
else:
|
|
rv = Manifest.from_json(tests_root,
|
|
fast_json.load(manifest),
|
|
types=types,
|
|
callee_owns_obj=True)
|
|
|
|
if allow_cached:
|
|
__load_cache[manifest_path] = rv
|
|
return rv
|
|
|
|
|
|
def load_and_update(tests_root, # type: bytes
|
|
manifest_path, # type: bytes
|
|
url_base, # type: Text
|
|
update=True, # type: bool
|
|
rebuild=False, # type: bool
|
|
metadata_path=None, # type: Optional[bytes]
|
|
cache_root=None, # type: Optional[bytes]
|
|
working_copy=True, # type: bool
|
|
types=None, # type: Optional[Container[Text]]
|
|
write_manifest=True, # type: bool
|
|
allow_cached=True, # type: bool
|
|
parallel=True # type: bool
|
|
):
|
|
# type: (...) -> Manifest
|
|
logger = get_logger()
|
|
|
|
manifest = None
|
|
if not rebuild:
|
|
try:
|
|
manifest = _load(logger,
|
|
tests_root,
|
|
manifest_path,
|
|
types=types,
|
|
allow_cached=allow_cached)
|
|
except ManifestVersionMismatch:
|
|
logger.info("Manifest version changed, rebuilding")
|
|
except ManifestError:
|
|
logger.warning("Failed to load manifest, rebuilding")
|
|
|
|
if manifest is not None and manifest.url_base != url_base:
|
|
logger.info("Manifest url base did not match, rebuilding")
|
|
manifest = None
|
|
|
|
if manifest is None:
|
|
manifest = Manifest(tests_root, url_base)
|
|
rebuild = True
|
|
update = True
|
|
|
|
if rebuild or update:
|
|
tree = vcs.get_tree(tests_root, manifest, manifest_path, cache_root,
|
|
working_copy, rebuild)
|
|
changed = manifest.update(tree, parallel)
|
|
if write_manifest and changed:
|
|
write(manifest, manifest_path)
|
|
tree.dump_caches()
|
|
|
|
return manifest
|
|
|
|
|
|
def write(manifest, manifest_path):
|
|
# type: (Manifest, bytes) -> None
|
|
dir_name = os.path.dirname(manifest_path)
|
|
if not os.path.exists(dir_name):
|
|
os.makedirs(dir_name)
|
|
with atomic_write(manifest_path, overwrite=True) as f:
|
|
# Use ',' instead of the default ', ' separator to prevent trailing
|
|
# spaces: https://docs.python.org/2/library/json.html#json.dump
|
|
json.dump(manifest.to_json(caller_owns_obj=True), f,
|
|
sort_keys=True, indent=1, separators=(',', ': '))
|
|
f.write("\n")
|