from __future__ import print_function, unicode_literals import abc import argparse import ast import itertools import json import os import re import subprocess import sys from collections import defaultdict from . import fnmatch from .. import localpaths from ..gitignore.gitignore import PathFilter from ..wpt import testfiles from manifest.sourcefile import SourceFile, js_meta_re, python_meta_re, space_chars from six import binary_type, iteritems, itervalues from six.moves import range from six.moves.urllib.parse import urlsplit, urljoin import logging logger = None def setup_logging(prefix=False): global logger if logger is None: logger = logging.getLogger(os.path.basename(os.path.splitext(__file__)[0])) handler = logging.StreamHandler(sys.stdout) logger.addHandler(handler) if prefix: format = logging.BASIC_FORMAT else: format = "%(message)s" formatter = logging.Formatter(format) for handler in logger.handlers: handler.setFormatter(formatter) logger.setLevel(logging.DEBUG) setup_logging() ERROR_MSG = """You must fix all errors; for details on how to fix them, see http://web-platform-tests.org/writing-tests/lint-tool.html However, instead of fixing a particular error, it's sometimes OK to add a line to the lint.whitelist file in the root of the web-platform-tests directory to make the lint tool ignore it. For example, to make the lint tool ignore all '%s' errors in the %s file, you could add the following line to the lint.whitelist file. %s: %s""" def all_filesystem_paths(repo_root): path_filter = PathFilter(repo_root, extras=[".git/*"]) for dirpath, dirnames, filenames in os.walk(repo_root): for filename in filenames: path = os.path.relpath(os.path.join(dirpath, filename), repo_root) if path_filter(path): yield path dirnames[:] = [item for item in dirnames if path_filter(os.path.relpath(os.path.join(dirpath, item) + "/", repo_root))] def _all_files_equal(paths): """ Checks all the paths are files that are byte-for-byte identical :param paths: the list of paths to compare :returns: True if they are all identical """ paths = list(paths) if len(paths) < 2: return True first = paths.pop() size = os.path.getsize(first) if any(os.path.getsize(path) != size for path in paths): return False # Chunk this to avoid eating up memory and file descriptors bufsize = 4096*4 # 16KB, a "reasonable" number of disk sectors groupsize = 8 # Hypothesised to be large enough in the common case that everything fits in one group with open(first, "rb") as first_f: for start in range(0, len(paths), groupsize): path_group = paths[start:start+groupsize] first_f.seek(0) try: files = [open(x, "rb") for x in path_group] for _ in range(0, size, bufsize): a = first_f.read(bufsize) for f in files: b = f.read(bufsize) if a != b: return False finally: for f in files: f.close() return True def check_path_length(repo_root, path): if len(path) + 1 > 150: return [("PATH LENGTH", "/%s longer than maximum path length (%d > 150)" % (path, len(path) + 1), path, None)] return [] def check_worker_collision(repo_root, path): endings = [(".any.html", ".any.js"), (".any.worker.html", ".any.js"), (".worker.html", ".worker.js")] for path_ending, generated in endings: if path.endswith(path_ending): return [("WORKER COLLISION", "path ends with %s which collides with generated tests from %s files" % (path_ending, generated), path, None)] return [] def check_ahem_copy(repo_root, path): lpath = path.lower() if "ahem" in lpath and lpath.endswith(".ttf"): return [("AHEM COPY", "Don't add extra copies of Ahem, use /fonts/Ahem.ttf", path, None)] return [] drafts_csswg_re = re.compile(r"https?\:\/\/drafts\.csswg\.org\/([^/?#]+)") w3c_tr_re = re.compile(r"https?\:\/\/www\.w3c?\.org\/TR\/([^/?#]+)") w3c_dev_re = re.compile(r"https?\:\/\/dev\.w3c?\.org\/[^/?#]+\/([^/?#]+)") def check_css_globally_unique(repo_root, paths): """ Checks that CSS filenames are sufficiently unique This groups files by path classifying them as "test", "reference", or "support". "test" files must have a unique name across files that share links to the same spec. "reference" and "support" files, on the other hand, must have globally unique names. :param repo_root: the repository root :param paths: list of all paths :returns: a list of errors found in ``paths`` """ test_files = defaultdict(set) ref_files = defaultdict(set) support_files = defaultdict(set) for path in paths: if os.name == "nt": path = path.replace("\\", "/") if not path.startswith("css/"): continue source_file = SourceFile(repo_root, path, "/") if source_file.name_is_non_test: # If we're name_is_non_test for a reason apart from support, ignore it. # We care about support because of the requirement all support files in css/ to be in # a support directory; see the start of check_parsed. offset = path.find("/support/") if offset == -1: continue parts = source_file.dir_path.split(os.path.sep) if (parts[0] in source_file.root_dir_non_test or any(item in source_file.dir_non_test - {"support"} for item in parts) or any(parts[:len(non_test_path)] == list(non_test_path) for non_test_path in source_file.dir_path_non_test)): continue name = path[offset+1:] support_files[name].add(path) elif source_file.name_is_reference: ref_files[source_file.name].add(path) else: test_files[source_file.name].add(path) errors = [] for name, colliding in iteritems(test_files): if len(colliding) > 1: if not _all_files_equal([os.path.join(repo_root, x) for x in colliding]): # Only compute by_spec if there are prima-facie collisions because of cost by_spec = defaultdict(set) for path in colliding: source_file = SourceFile(repo_root, path, "/") for link in source_file.spec_links: for r in (drafts_csswg_re, w3c_tr_re, w3c_dev_re): m = r.match(link) if m: spec = m.group(1) break else: continue by_spec[spec].add(path) for spec, paths in iteritems(by_spec): if not _all_files_equal([os.path.join(repo_root, x) for x in paths]): for x in paths: errors.append(("CSS-COLLIDING-TEST-NAME", "The filename %s in the %s testsuite is shared by: %s" % (name, spec, ", ".join(sorted(paths))), x, None)) for error_name, d in [("CSS-COLLIDING-REF-NAME", ref_files), ("CSS-COLLIDING-SUPPORT-NAME", support_files)]: for name, colliding in iteritems(d): if len(colliding) > 1: if not _all_files_equal([os.path.join(repo_root, x) for x in colliding]): for x in colliding: errors.append((error_name, "The filename %s is shared by: %s" % (name, ", ".join(sorted(colliding))), x, None)) return errors def parse_whitelist(f): """ Parse the whitelist file given by `f`, and return the parsed structure. """ data = defaultdict(lambda:defaultdict(set)) ignored_files = set() for line in f: line = line.strip() if not line or line.startswith("#"): continue parts = [item.strip() for item in line.split(":")] if len(parts) == 2: parts.append(None) else: parts[-1] = int(parts[-1]) error_types, file_match, line_number = parts error_types = {item.strip() for item in error_types.split(",")} file_match = os.path.normcase(file_match) if "*" in error_types: ignored_files.add(file_match) else: for error_type in error_types: data[error_type][file_match].add(line_number) return data, ignored_files def filter_whitelist_errors(data, errors): """ Filter out those errors that are whitelisted in `data`. """ if not errors: return [] whitelisted = [False for item in range(len(errors))] for i, (error_type, msg, path, line) in enumerate(errors): normpath = os.path.normcase(path) if error_type in data: wl_files = data[error_type] for file_match, allowed_lines in iteritems(wl_files): if None in allowed_lines or line in allowed_lines: if fnmatch.fnmatchcase(normpath, file_match): whitelisted[i] = True return [item for i, item in enumerate(errors) if not whitelisted[i]] class Regexp(object): pattern = None file_extensions = None error = None _re = None def __init__(self): self._re = re.compile(self.pattern) def applies(self, path): return (self.file_extensions is None or os.path.splitext(path)[1] in self.file_extensions) def search(self, line): return self._re.search(line) class TrailingWhitespaceRegexp(Regexp): pattern = b"[ \t\f\v]$" error = "TRAILING WHITESPACE" description = "Whitespace at EOL" class TabsRegexp(Regexp): pattern = b"^\t" error = "INDENT TABS" description = "Tabs used for indentation" class CRRegexp(Regexp): pattern = b"\r$" error = "CR AT EOL" description = "CR character in line separator" class SetTimeoutRegexp(Regexp): pattern = b"setTimeout\s*\(" error = "SET TIMEOUT" file_extensions = [".html", ".htm", ".js", ".xht", ".xhtml", ".svg"] description = "setTimeout used; step_timeout should typically be used instead" class W3CTestOrgRegexp(Regexp): pattern = b"w3c\-test\.org" error = "W3C-TEST.ORG" description = "External w3c-test.org domain used" class WebPlatformTestRegexp(Regexp): pattern = b"web\-platform\.test" error = "WEB-PLATFORM.TEST" description = "Internal web-platform.test domain used" class Webidl2Regexp(Regexp): pattern = b"webidl2\.js" error = "WEBIDL2.JS" description = "Legacy webidl2.js script used" class ConsoleRegexp(Regexp): pattern = b"console\.[a-zA-Z]+\s*\(" error = "CONSOLE" file_extensions = [".html", ".htm", ".js", ".xht", ".xhtml", ".svg"] description = "Console logging API used" class GenerateTestsRegexp(Regexp): pattern = b"generate_tests\s*\(" error = "GENERATE_TESTS" file_extensions = [".html", ".htm", ".js", ".xht", ".xhtml", ".svg"] description = "generate_tests used" class PrintRegexp(Regexp): pattern = b"print(?:\s|\s*\()" error = "PRINT STATEMENT" file_extensions = [".py"] description = "Print function used" class LayoutTestsRegexp(Regexp): pattern = b"eventSender|testRunner|window\.internals" error = "LAYOUTTESTS APIS" file_extensions = [".html", ".htm", ".js", ".xht", ".xhtml", ".svg"] description = "eventSender/testRunner/window.internals used; these are LayoutTests-specific APIs (WebKit/Blink)" class SpecialPowersRegexp(Regexp): pattern = b"SpecialPowers" error = "SPECIALPOWERS API" file_extensions = [".html", ".htm", ".js", ".xht", ".xhtml", ".svg"] description = "SpecialPowers used; this is gecko-specific and not supported in wpt" regexps = [item() for item in [TrailingWhitespaceRegexp, TabsRegexp, CRRegexp, SetTimeoutRegexp, W3CTestOrgRegexp, WebPlatformTestRegexp, Webidl2Regexp, ConsoleRegexp, GenerateTestsRegexp, PrintRegexp, LayoutTestsRegexp, SpecialPowersRegexp]] def check_regexp_line(repo_root, path, f): errors = [] applicable_regexps = [regexp for regexp in regexps if regexp.applies(path)] for i, line in enumerate(f): for regexp in applicable_regexps: if regexp.search(line): errors.append((regexp.error, regexp.description, path, i+1)) return errors def check_parsed(repo_root, path, f): source_file = SourceFile(repo_root, path, "/", contents=f.read()) errors = [] if path.startswith("css/"): if (source_file.type == "support" and not source_file.name_is_non_test and not source_file.name_is_reference): return [("SUPPORT-WRONG-DIR", "Support file not in support directory", path, None)] if (source_file.type != "support" and not source_file.name_is_reference and not source_file.spec_links): return [("MISSING-LINK", "Testcase file must have a link to a spec", path, None)] if source_file.name_is_non_test or source_file.name_is_manual: return [] if source_file.markup_type is None: return [] if source_file.root is None: return [("PARSE-FAILED", "Unable to parse file", path, None)] if source_file.type == "manual" and not source_file.name_is_manual: return [("CONTENT-MANUAL", "Manual test whose filename doesn't end in '-manual'", path, None)] if source_file.type == "visual" and not source_file.name_is_visual: return [("CONTENT-VISUAL", "Visual test whose filename doesn't end in '-visual'", path, None)] for reftest_node in source_file.reftest_nodes: href = reftest_node.attrib.get("href", "").strip(space_chars) parts = urlsplit(href) if (parts.scheme or parts.netloc) and parts != urlsplit("about:blank"): errors.append(("ABSOLUTE-URL-REF", "Reference test with a reference file specified via an absolute URL: '%s'" % href, path, None)) continue ref_url = urljoin(source_file.url, href) ref_parts = urlsplit(ref_url) if source_file.url == ref_url: errors.append(("SAME-FILE-REF", "Reference test which points at itself as a reference", path, None)) continue assert ref_parts.path != "" reference_file = os.path.join(repo_root, ref_parts.path[1:]) reference_rel = reftest_node.attrib.get("rel", "") if not os.path.isfile(reference_file): errors.append(("NON-EXISTENT-REF", "Reference test with a non-existent '%s' relationship reference: '%s'" % (reference_rel, href), path, None)) if len(source_file.timeout_nodes) > 1: errors.append(("MULTIPLE-TIMEOUT", "More than one meta name='timeout'", path, None)) for timeout_node in source_file.timeout_nodes: timeout_value = timeout_node.attrib.get("content", "").lower() if timeout_value != "long": errors.append(("INVALID-TIMEOUT", "Invalid timeout value %s" % timeout_value, path, None)) if source_file.testharness_nodes: if len(source_file.testharness_nodes) > 1: errors.append(("MULTIPLE-TESTHARNESS", "More than one