from __future__ import print_function, unicode_literals import abc import argparse import ast import fnmatch import json import os import re import subprocess import sys from collections import defaultdict from .. import localpaths from manifest.sourcefile import SourceFile from six import iteritems from six.moves import range here = os.path.abspath(os.path.split(__file__)[0]) ERROR_MSG = """You must fix all errors; for details on how to fix them, see https://github.com/w3c/web-platform-tests/blob/master/docs/lint-tool.md However, instead of fixing a particular error, it's sometimes OK to add a line to the lint.whitelist file in the root of the web-platform-tests directory to make the lint tool ignore it. For example, to make the lint tool ignore all '%s' errors in the %s file, you could add the following line to the lint.whitelist file. %s:%s""" def all_git_paths(repo_root): command_line = ["git", "ls-tree", "-r", "--name-only", "HEAD"] output = subprocess.check_output(command_line, cwd=repo_root) for item in output.split("\n"): yield item def check_path_length(repo_root, path): if len(path) + 1 > 150: return [("PATH LENGTH", "/%s longer than maximum path length (%d > 150)" % (path, len(path) + 1), None)] return [] def parse_whitelist(f): """ Parse the whitelist file given by `f`, and return the parsed structure. """ data = defaultdict(lambda:defaultdict(set)) for line in f: line = line.strip() if not line or line.startswith("#"): continue parts = [item.strip() for item in line.split(":")] if len(parts) == 2: parts.append(None) else: parts[-1] = int(parts[-1]) error_type, file_match, line_number = parts data[file_match][error_type].add(line_number) return data def filter_whitelist_errors(data, path, errors): """ Filter out those errors that are whitelisted in `data`. """ whitelisted = [False for item in range(len(errors))] for file_match, whitelist_errors in iteritems(data): if fnmatch.fnmatch(path, file_match): for i, (error_type, msg, path, line) in enumerate(errors): if "*" in whitelist_errors: whitelisted[i] = True elif error_type in whitelist_errors: allowed_lines = whitelist_errors[error_type] if None in allowed_lines or line in allowed_lines: whitelisted[i] = True return [item for i, item in enumerate(errors) if not whitelisted[i]] class Regexp(object): pattern = None file_extensions = None error = None _re = None def __init__(self): self._re = re.compile(self.pattern) def applies(self, path): return (self.file_extensions is None or os.path.splitext(path)[1] in self.file_extensions) def search(self, line): return self._re.search(line) class TrailingWhitespaceRegexp(Regexp): pattern = b"[ \t\f\v]$" error = "TRAILING WHITESPACE" description = "Whitespace at EOL" class TabsRegexp(Regexp): pattern = b"^\t" error = "INDENT TABS" description = "Tabs used for indentation" class CRRegexp(Regexp): pattern = b"\r$" error = "CR AT EOL" description = "CR character in line separator" class W3CTestOrgRegexp(Regexp): pattern = b"w3c\-test\.org" error = "W3C-TEST.ORG" description = "External w3c-test.org domain used" class Webidl2Regexp(Regexp): pattern = b"webidl2\.js" error = "WEBIDL2.JS" description = "Legacy webidl2.js script used" class ConsoleRegexp(Regexp): pattern = b"console\.[a-zA-Z]+\s*\(" error = "CONSOLE" file_extensions = [".html", ".htm", ".js", ".xht", ".xhtml", ".svg"] description = "Console logging API used" class PrintRegexp(Regexp): pattern = b"print(?:\s|\s*\()" error = "PRINT STATEMENT" file_extensions = [".py"] description = "Print function used" regexps = [item() for item in [TrailingWhitespaceRegexp, TabsRegexp, CRRegexp, W3CTestOrgRegexp, Webidl2Regexp, ConsoleRegexp, PrintRegexp]] def check_regexp_line(repo_root, path, f): errors = [] applicable_regexps = [regexp for regexp in regexps if regexp.applies(path)] for i, line in enumerate(f): for regexp in applicable_regexps: if regexp.search(line): errors.append((regexp.error, regexp.description, path, i+1)) return errors def check_parsed(repo_root, path, f): source_file = SourceFile(repo_root, path, "/", contents=f.read()) errors = [] if source_file.name_is_non_test or source_file.name_is_manual: return [] if source_file.markup_type is None: return [] if source_file.root is None: return [("PARSE-FAILED", "Unable to parse file", path, None)] if len(source_file.timeout_nodes) > 1: errors.append(("MULTIPLE-TIMEOUT", "More than one meta name='timeout'", path, None)) for timeout_node in source_file.timeout_nodes: timeout_value = timeout_node.attrib.get("content", "").lower() if timeout_value != "long": errors.append(("INVALID-TIMEOUT", "Invalid timeout value %s" % timeout_value, path, None)) if source_file.testharness_nodes: if len(source_file.testharness_nodes) > 1: errors.append(("MULTIPLE-TESTHARNESS", "More than one