tidy: Use more cargo-deny features (#34447)

Instead of parsing the `Cargo.lock` file directly in `tidy.py`. Use
`cargo-deny`, which we already use to detect unapproved licenses in the
dependency chain to detect duplicate and banned crates. In addition,
enable all other `cargo-deny` checks and add exceptions where necessary
for them. This depends on the latest release of `cargo-deny` which
depends on a recent verison of `rust`.

Fixes #34393.

Signed-off-by: Martin Robinson <mrobinson@igalia.com>
This commit is contained in:
Martin Robinson 2024-12-13 09:47:40 +01:00 committed by GitHub
parent 53612dab90
commit 682eba9f74
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 155 additions and 273 deletions

2
Cargo.lock generated
View file

@ -1,6 +1,6 @@
# This file is automatically @generated by Cargo. # This file is automatically @generated by Cargo.
# It is not intended for manual editing. # It is not intended for manual editing.
version = 3 version = 4
[[package]] [[package]]
name = "ab_glyph" name = "ab_glyph"

108
deny.toml
View file

@ -12,10 +12,11 @@ feature-depth = 1
# https://embarkstudios.github.io/cargo-deny/checks/advisories/cfg.html # https://embarkstudios.github.io/cargo-deny/checks/advisories/cfg.html
[advisories] [advisories]
ignore = [ ignore = [
#"RUSTSEC-0000-0000", # This is for the usage of time@0.1.45 in WebRender, which should be removed soon.
#{ id = "RUSTSEC-0000-0000", reason = "you can specify a reason the advisory is ignored" }, "RUSTSEC-2020-0071",
#"a-crate-that-is-yanked@0.1.1", # you can also ignore yanked crate versions if you wish # This has been yanked, but upgrading to the next version breaks some WPT tests.
#{ crate = "a-crate-that-is-yanked@0.1.1", reason = "you can specify why you are ignoring the yanked crate" }, # It needs investigation.
"url@2.5.3",
] ]
# This section is considered when running `cargo deny check licenses` # This section is considered when running `cargo deny check licenses`
@ -50,10 +51,6 @@ confidence-threshold = 0.8
# Allow 1 or more licenses on a per-crate basis, so that particular licenses # Allow 1 or more licenses on a per-crate basis, so that particular licenses
# aren't accepted for every possible crate as with the normal allow list # aren't accepted for every possible crate as with the normal allow list
exceptions = [ exceptions = [
# Each entry is the crate and version constraint, and its specific allow
# list
{ allow = ["OFL-1.1", "LicenseRef-UFL-1.0"], crate = "epaint" },
{ allow = ["Unicode-DFS-2016"], crate = "unicode-ident" },
] ]
# Some crates don't have (easily) machine readable licensing information, # Some crates don't have (easily) machine readable licensing information,
@ -73,21 +70,88 @@ license-files = [
# More documentation about the 'bans' section can be found here: # More documentation about the 'bans' section can be found here:
# https://embarkstudios.github.io/cargo-deny/checks/bans/cfg.html # https://embarkstudios.github.io/cargo-deny/checks/bans/cfg.html
[bans] [bans]
multiple-versions = "warn"
wildcards = "warn"
highlight = "all"
workspace-default-features = "allow"
external-default-features = "allow" external-default-features = "allow"
highlight = "all"
multiple-versions = "deny"
wildcards = "allow"
workspace-default-features = "allow"
# List of crates that are allowed. Use with care! # List of crates that are allowed. Use with care!
allow = [ allow = []
#"ansi_term@0.11.0",
#{ crate = "ansi_term@0.11.0", reason = "you can specify a reason it is allowed" }, # List of crates to deny:
]
# List of crates to deny
deny = [ deny = [
#"ansi_term@0.11.0", "num",
#{ crate = "ansi_term@0.11.0", reason = "you can specify a reason it is banned" }, { crate = "rand", wrappers = [
# Wrapper crates can optionally be specified to allow the crate when it "ipc-channel",
# is a direct dependency of the otherwise banned crate "phf_generator",
#{ crate = "ansi_term@0.11.0", wrappers = ["this-crate-directly-depends-on-ansi_term"] }, "quickcheck",
"servo_rand",
"tracing-perfetto",
"tungstenite",
] },
] ]
# List of crates to skip for the duplicate check:
skip = [
"cfg_aliases",
"bitflags",
"cookie",
"futures",
"hermit-abi",
"redox_syscall",
"time",
"wasi",
"wayland-sys",
# New versions of these dependencies is pulled in by GStreamer / GLib.
"itertools",
"toml",
# Duplicated by winit.
"windows-sys",
"windows-targets",
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
# wgpu has the latest and greatest.
"foreign-types",
"foreign-types-shared",
"metal",
"windows-core",
# wgpu-hal depends on 0.5.0.
"ndk-sys",
# Required until a new version of string-cache is released.
"phf_generator",
"phf_shared",
# icu (from mozjs) uses old version
# tracing-subscriber (tokio-rs/tracing#3033) uses old version
# regex -> regex-automata 0.4.7
# icu_list -> regex-automata 0.2.0
# tracing-subscriber -> matchers -> regex-automata 0.1.0
"regex-automata",
# tracing-subscriber (tokio-rs/tracing#3033) uses old version
# regex [-> regex-automata 0.4.7] -> regex-syntax 0.8.4
# tracing-subscriber -> matchers -> regex-automata 0.1.0 -> regex-syntax 0.6.29
"regex-syntax",
# gilrs is on 0.10.0, but Servo is still on 0.9.4
"core-foundation",
# some non-servo crates still use 0.14
"glow",
]
# github.com organizations to allow git sources for
[sources.allow-org]
github = ["pcwalton", "servo"]

View file

@ -79,13 +79,21 @@ class Base:
return True return True
def install_cargo_deny(self, force: bool) -> bool: def install_cargo_deny(self, force: bool) -> bool:
if not force and shutil.which("cargo-deny") is not None: def cargo_deny_installed():
if force or not shutil.which("cargo-deny"):
return False
# Tidy needs at least version 0.16.3 installed.
result = subprocess.run(["cargo-deny", "--version"],
encoding='utf-8', capture_output=True)
(major, minor, micro) = result.stdout.strip().split(" ")[1].split(".", 2)
return (int(major), int(minor), int(micro)) >= (0, 16, 3)
if cargo_deny_installed():
return False return False
print(" * Installing cargo-deny...") print(" * Installing cargo-deny...")
if subprocess.call(["cargo", "install", "cargo-deny", "--locked"]) != 0: if subprocess.call(["cargo", "install", "cargo-deny", "--locked"]) != 0:
raise EnvironmentError("Installation of cargo-deny failed.") raise EnvironmentError("Installation of cargo-deny failed.")
return True return True
def install_crown(self, force: bool) -> bool: def install_crown(self, force: bool) -> bool:

View file

@ -174,59 +174,6 @@ class CheckTidiness(unittest.TestCase):
self.assertEqual('Unordered key (found b before a)', next(errors)[2]) self.assertEqual('Unordered key (found b before a)', next(errors)[2])
self.assertNoMoreErrors(errors) self.assertNoMoreErrors(errors)
def test_lock(self):
errors = tidy.run_custom_cargo_lock_lints(test_file_path('duplicated_package.lock'), print_text=False)
msg = """duplicate versions for package `test`
\t\x1b[93mThe following packages depend on version 0.4.9 from 'crates.io':\x1b[0m
\t\ttest2 0.1.0
\t\x1b[93mThe following packages depend on version 0.5.1 from 'crates.io':\x1b[0m
\t\ttest3 0.5.1"""
self.assertEqual(msg, next(errors)[2])
msg2 = """duplicate versions for package `test3`
\t\x1b[93mThe following packages depend on version 0.5.1 from 'crates.io':\x1b[0m
\t\ttest4 0.1.0
\t\x1b[93mThe following packages depend on version 0.5.1 from 'https://github.com/user/test3':\x1b[0m
\t\ttest5 0.1.0"""
self.assertEqual(msg2, next(errors)[2])
self.assertNoMoreErrors(errors)
def test_lock_ignore_without_duplicates(self):
tidy.config["ignore"]["packages"] = ["test", "test2", "test3", "test5"]
errors = tidy.run_custom_cargo_lock_lints(test_file_path('duplicated_package.lock'), print_text=False)
msg = (
"duplicates for `test2` are allowed, but only single version found"
"\n\t\x1b[93mThe following packages depend on version 0.1.0 from 'https://github.com/user/test2':\x1b[0m"
)
self.assertEqual(msg, next(errors)[2])
msg2 = (
"duplicates for `test5` are allowed, but only single version found"
"\n\t\x1b[93mThe following packages depend on version 0.1.0 from 'https://github.com/':\x1b[0m"
)
self.assertEqual(msg2, next(errors)[2])
self.assertNoMoreErrors(errors)
def test_lock_exceptions(self):
tidy.config["blocked-packages"]["rand"] = ["test_exception", "test_unneeded_exception"]
errors = tidy.run_custom_cargo_lock_lints(test_file_path('blocked_package.lock'), print_text=False)
msg = (
"Package test_blocked 0.0.2 depends on blocked package rand."
)
msg2 = (
"Package test_unneeded_exception is not required to be an exception of blocked package rand."
)
self.assertEqual(msg, next(errors)[2])
self.assertEqual(msg2, next(errors)[2])
self.assertNoMoreErrors(errors)
# needed to not raise errors in other test cases
tidy.config["blocked-packages"]["rand"] = []
def test_file_list(self): def test_file_list(self):
file_path = os.path.join(BASE_PATH, 'test_ignored') file_path = os.path.join(BASE_PATH, 'test_ignored')
file_list = tidy.FileList(file_path, only_changed_files=False, exclude_dirs=[], progress=False) file_list = tidy.FileList(file_path, only_changed_files=False, exclude_dirs=[], progress=False)

View file

@ -17,7 +17,7 @@ import os
import re import re
import subprocess import subprocess
import sys import sys
from typing import List from typing import Any, Dict, List
import colorama import colorama
import toml import toml
@ -34,6 +34,7 @@ WPT_CONFIG_INI_PATH = os.path.join(WPT_PATH, "config.ini")
URL_REGEX = re.compile(br'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+') URL_REGEX = re.compile(br'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+')
UTF8_URL_REGEX = re.compile(r'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+') UTF8_URL_REGEX = re.compile(r'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+')
CARGO_LOCK_FILE = os.path.join(TOPDIR, "Cargo.lock") CARGO_LOCK_FILE = os.path.join(TOPDIR, "Cargo.lock")
CARGO_DENY_CONFIG_FILE = os.path.join(TOPDIR, "deny.toml")
ERROR_RAW_URL_IN_RUSTDOC = "Found raw link in rustdoc. Please escape it with angle brackets or use a markdown link." ERROR_RAW_URL_IN_RUSTDOC = "Found raw link in rustdoc. Please escape it with angle brackets or use a markdown link."
@ -363,125 +364,54 @@ def check_flake8(file_name, contents):
yield line_num, message.strip() yield line_num, message.strip()
def check_cargo_lock_file(only_changed_files: bool): def run_cargo_deny_lints():
if only_changed_files and not list(git_changes_since_last_merge("./Cargo.lock")): print("\r ➤ Running `cargo-deny` checks...")
print("\r ➤ Skipping `Cargo.lock` lint checks, because it is unchanged.") result = subprocess.run(["cargo-deny", "--format=json", "--all-features", "check"],
return encoding='utf-8',
yield from run_custom_cargo_lock_lints(CARGO_LOCK_FILE)
yield from validate_dependency_licenses()
def run_custom_cargo_lock_lints(cargo_lock_filename: str, print_text: bool = True):
if print_text:
print(f"\r ➤ Linting cargo lock ({cargo_lock_filename})...")
with open(cargo_lock_filename) as cargo_lock_file:
content = toml.load(cargo_lock_file)
def find_reverse_dependencies(name, content):
for package in itertools.chain([content.get("root", {})], content["package"]):
for dependency in package.get("dependencies", []):
parts = dependency.split()
dependency = (parts[0], parts[1] if len(parts) > 1 else None, parts[2] if len(parts) > 2 else None)
if dependency[0] == name:
yield package["name"], package["version"], dependency
# Package names to be neglected (as named by cargo)
exceptions = config["ignore"]["packages"]
packages_by_name = {}
for package in content.get("package", []):
if "replace" in package:
continue
source = package.get("source", "")
if source == r"registry+https://github.com/rust-lang/crates.io-index":
source = "crates.io"
packages_by_name.setdefault(package["name"], []).append((package["version"], source))
for name in exceptions:
if name not in packages_by_name:
yield (cargo_lock_filename, 1, "duplicates are allowed for `{}` but it is not a dependency".format(name))
for (name, packages) in packages_by_name.items():
has_duplicates = len(packages) > 1
duplicates_allowed = name in exceptions
if has_duplicates == duplicates_allowed:
continue
if duplicates_allowed:
message = 'duplicates for `{}` are allowed, but only single version found'.format(name)
else:
message = "duplicate versions for package `{}`".format(name)
packages.sort()
packages_dependencies = list(find_reverse_dependencies(name, content))
for version, source in packages:
short_source = source.split("#")[0].replace("git+", "")
message += "\n\t\033[93mThe following packages depend on version {} from '{}':\033[0m" \
.format(version, short_source)
for pname, package_version, dependency in packages_dependencies:
if (not dependency[1] or version in dependency[1]) and \
(not dependency[2] or short_source in dependency[2]):
message += "\n\t\t" + pname + " " + package_version
yield (cargo_lock_filename, 1, message)
# Check to see if we are transitively using any blocked packages
blocked_packages = config["blocked-packages"]
# Create map to keep track of visited exception packages
visited_whitelisted_packages = {package: {} for package in blocked_packages.keys()}
for package in content.get("package", []):
package_name = package.get("name")
package_version = package.get("version")
for dependency in package.get("dependencies", []):
dependency = dependency.split()
dependency_name = dependency[0]
whitelist = blocked_packages.get(dependency_name)
if whitelist is not None:
if package_name not in whitelist:
fmt = "Package {} {} depends on blocked package {}."
message = fmt.format(package_name, package_version, dependency_name)
yield (cargo_lock_filename, 1, message)
else:
visited_whitelisted_packages[dependency_name][package_name] = True
# Check if all the exceptions to blocked packages actually depend on the blocked package
for dependency_name, package_names in blocked_packages.items():
for package_name in package_names:
if not visited_whitelisted_packages[dependency_name].get(package_name):
fmt = "Package {} is not required to be an exception of blocked package {}."
message = fmt.format(package_name, dependency_name)
yield (cargo_lock_filename, 1, message)
def validate_dependency_licenses():
print("\r ➤ Checking licenses of Rust dependencies...")
result = subprocess.run(["cargo-deny", "--format=json", "check", "licenses"], encoding='utf-8',
capture_output=True) capture_output=True)
if result.returncode == 0:
return False
assert result.stderr is not None, "cargo deny should return error information via stderr when failing" assert result.stderr is not None, "cargo deny should return error information via stderr when failing"
error_info = [json.loads(json_struct) for json_struct in result.stderr.splitlines()] errors = []
error_messages = [] for line in result.stderr.splitlines():
num_license_errors = 'unknown' error_fields = json.loads(line)["fields"]
for error in error_info: error_code = error_fields.get("code", "unknown")
error_fields = error['fields'] error_severity = error_fields.get("severity", "unknown")
if error['type'] == 'summary': message = error_fields.get("message", "")
num_license_errors = error_fields['licenses']['errors'] labels = error_fields.get("labels", [])
elif 'graphs' in error_fields and error_fields['graphs']:
crate = error_fields['graphs'][0]['Krate']
license_name = error_fields['notes'][0]
message = f'Rejected license "{license_name}". Run `cargo deny` for more details'
error_messages.append(
f'Rust dependency {crate["name"]}@{crate["version"]}: {message}')
else:
error_messages.append(error_fields['message'])
print(f' `cargo deny` reported {num_license_errors} licenses errors') span = ""
for message in error_messages: line = 1
yield (CARGO_LOCK_FILE, 1, message) if len(labels) > 0:
span = labels[0]["span"]
line = labels[0]["line"]
# This is an effort to detect the license failures, so that we can print
# a more helpful message -- which normally does not include the license
# name.
if error_code == "rejected":
crate = CargoDenyKrate(error_fields["graphs"][0])
license_name = error_fields["notes"][0]
errors.append((
CARGO_LOCK_FILE,
1,
f"Rust dependency {crate}: Rejected license \"{license_name}\""
))
# This detects if a crate has been marked as banned in the configuration file.
elif error_code == "banned":
crate = CargoDenyKrate(error_fields["graphs"][0])
parents = ", ".join([str(parent) for parent in crate.parents])
errors.append((CARGO_LOCK_FILE, 1, f"{message}: used by ({parents})"))
# This detects when two version of a crate have been used, but are not skipped
# by the configuration file.
elif error_code == "duplicate":
errors.append((CARGO_LOCK_FILE, 1, f"{message}:\n {span}"))
# This detects any other problem, typically caused by an unnecessary exception
# in the deny.toml file.
elif error_severity in ["warning", "error"]:
errors.append((CARGO_DENY_CONFIG_FILE, line, f"{message}: {span}"))
for error in errors:
yield error
def check_toml(file_name, lines): def check_toml(file_name, lines):
@ -1082,9 +1012,7 @@ def scan(only_changed_files=False, progress=False):
check_rust, check_spec, check_modeline) check_rust, check_spec, check_modeline)
file_errors = collect_errors_for_files(files_to_check, checking_functions, line_checking_functions) file_errors = collect_errors_for_files(files_to_check, checking_functions, line_checking_functions)
# These checks are essentially checking a single file. cargo_lock_errors = run_cargo_deny_lints()
cargo_lock_errors = check_cargo_lock_file(only_changed_files)
wpt_errors = run_wpt_lints(only_changed_files) wpt_errors = run_wpt_lints(only_changed_files)
# chain all the iterators # chain all the iterators
@ -1100,3 +1028,14 @@ def scan(only_changed_files=False, progress=False):
+ f"{colorama.Fore.RED}{error[2]}{colorama.Style.RESET_ALL}") + f"{colorama.Fore.RED}{error[2]}{colorama.Style.RESET_ALL}")
return int(error is not None) return int(error is not None)
class CargoDenyKrate:
def __init__(self, data: Dict[Any, Any]):
crate = data['Krate']
self.name = crate['name']
self.version = crate['version']
self.parents = [CargoDenyKrate(parent) for parent in data.get('parents', [])]
def __str__(self):
return f"{self.name}@{self.version}"

View file

@ -4,83 +4,7 @@ skip-check-licenses = false
check-alphabetical-order = true check-alphabetical-order = true
check-ordered-json-keys = ["./resources/prefs.json"] check-ordered-json-keys = ["./resources/prefs.json"]
# Packages which we avoid using in Servo.
# For each blocked package, we can list the exceptions,
# which are packages allowed to use the blocked package.
[blocked-packages]
num = []
rand = [
"ipc-channel",
"phf_generator",
"quickcheck", # Only used in tests
"servo_rand",
"tungstenite",
"tracing-perfetto",
]
[ignore] [ignore]
# Ignored packages with duplicated versions
packages = [
"bitflags",
"cfg_aliases",
"cookie",
"futures",
"hermit-abi",
"redox_syscall",
"time",
"wasi",
"wayland-sys",
# New versions of these dependencies is pulled in by GStreamer / GLib.
"itertools",
"toml",
# Duplicated by winit.
"windows-sys",
"windows-targets",
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
# wgpu has the latest and greatest.
"foreign-types",
"foreign-types-shared",
"metal",
"windows-core",
# wgpu-hal depends on 0.5.0.
"ndk-sys",
# quickcheck (required by layout_2020 for tests) is
# stuck on 0.8.4 with no new releases.
"env_logger",
# Required until a new version of string-cache is released.
"phf_generator",
"phf_shared",
# icu (from mozjs) uses old version
# tracing-subscriber (tokio-rs/tracing#3033) uses old version
# regex -> regex-automata 0.4.7
# icu_list -> regex-automata 0.2.0
# tracing-subscriber -> matchers -> regex-automata 0.1.0
"regex-automata",
# tracing-subscriber (tokio-rs/tracing#3033) uses old version
# regex [-> regex-automata 0.4.7] -> regex-syntax 0.8.4
# tracing-subscriber -> matchers -> regex-automata 0.1.0 -> regex-syntax 0.6.29
"regex-syntax",
# gilrs is on 0.10.0, but Servo is still on 0.9.4
"core-foundation",
# some non-servo crates still use 0.14
"glow",
]
# Files that are ignored for all tidy and lint checks. # Files that are ignored for all tidy and lint checks.
files = [ files = [
"./components/net/tests/parsable_mime/text", "./components/net/tests/parsable_mime/text",