tidy: Use more cargo-deny features (#34447)

Instead of parsing the `Cargo.lock` file directly in `tidy.py`. Use
`cargo-deny`, which we already use to detect unapproved licenses in the
dependency chain to detect duplicate and banned crates. In addition,
enable all other `cargo-deny` checks and add exceptions where necessary
for them. This depends on the latest release of `cargo-deny` which
depends on a recent verison of `rust`.

Fixes #34393.

Signed-off-by: Martin Robinson <mrobinson@igalia.com>
This commit is contained in:
Martin Robinson 2024-12-13 09:47:40 +01:00 committed by GitHub
parent 53612dab90
commit 682eba9f74
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 155 additions and 273 deletions

2
Cargo.lock generated
View file

@ -1,6 +1,6 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
version = 4
[[package]]
name = "ab_glyph"

108
deny.toml
View file

@ -12,10 +12,11 @@ feature-depth = 1
# https://embarkstudios.github.io/cargo-deny/checks/advisories/cfg.html
[advisories]
ignore = [
#"RUSTSEC-0000-0000",
#{ id = "RUSTSEC-0000-0000", reason = "you can specify a reason the advisory is ignored" },
#"a-crate-that-is-yanked@0.1.1", # you can also ignore yanked crate versions if you wish
#{ crate = "a-crate-that-is-yanked@0.1.1", reason = "you can specify why you are ignoring the yanked crate" },
# This is for the usage of time@0.1.45 in WebRender, which should be removed soon.
"RUSTSEC-2020-0071",
# This has been yanked, but upgrading to the next version breaks some WPT tests.
# It needs investigation.
"url@2.5.3",
]
# This section is considered when running `cargo deny check licenses`
@ -50,10 +51,6 @@ confidence-threshold = 0.8
# Allow 1 or more licenses on a per-crate basis, so that particular licenses
# aren't accepted for every possible crate as with the normal allow list
exceptions = [
# Each entry is the crate and version constraint, and its specific allow
# list
{ allow = ["OFL-1.1", "LicenseRef-UFL-1.0"], crate = "epaint" },
{ allow = ["Unicode-DFS-2016"], crate = "unicode-ident" },
]
# Some crates don't have (easily) machine readable licensing information,
@ -73,21 +70,88 @@ license-files = [
# More documentation about the 'bans' section can be found here:
# https://embarkstudios.github.io/cargo-deny/checks/bans/cfg.html
[bans]
multiple-versions = "warn"
wildcards = "warn"
highlight = "all"
workspace-default-features = "allow"
external-default-features = "allow"
highlight = "all"
multiple-versions = "deny"
wildcards = "allow"
workspace-default-features = "allow"
# List of crates that are allowed. Use with care!
allow = [
#"ansi_term@0.11.0",
#{ crate = "ansi_term@0.11.0", reason = "you can specify a reason it is allowed" },
]
# List of crates to deny
allow = []
# List of crates to deny:
deny = [
#"ansi_term@0.11.0",
#{ crate = "ansi_term@0.11.0", reason = "you can specify a reason it is banned" },
# Wrapper crates can optionally be specified to allow the crate when it
# is a direct dependency of the otherwise banned crate
#{ crate = "ansi_term@0.11.0", wrappers = ["this-crate-directly-depends-on-ansi_term"] },
"num",
{ crate = "rand", wrappers = [
"ipc-channel",
"phf_generator",
"quickcheck",
"servo_rand",
"tracing-perfetto",
"tungstenite",
] },
]
# List of crates to skip for the duplicate check:
skip = [
"cfg_aliases",
"bitflags",
"cookie",
"futures",
"hermit-abi",
"redox_syscall",
"time",
"wasi",
"wayland-sys",
# New versions of these dependencies is pulled in by GStreamer / GLib.
"itertools",
"toml",
# Duplicated by winit.
"windows-sys",
"windows-targets",
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
# wgpu has the latest and greatest.
"foreign-types",
"foreign-types-shared",
"metal",
"windows-core",
# wgpu-hal depends on 0.5.0.
"ndk-sys",
# Required until a new version of string-cache is released.
"phf_generator",
"phf_shared",
# icu (from mozjs) uses old version
# tracing-subscriber (tokio-rs/tracing#3033) uses old version
# regex -> regex-automata 0.4.7
# icu_list -> regex-automata 0.2.0
# tracing-subscriber -> matchers -> regex-automata 0.1.0
"regex-automata",
# tracing-subscriber (tokio-rs/tracing#3033) uses old version
# regex [-> regex-automata 0.4.7] -> regex-syntax 0.8.4
# tracing-subscriber -> matchers -> regex-automata 0.1.0 -> regex-syntax 0.6.29
"regex-syntax",
# gilrs is on 0.10.0, but Servo is still on 0.9.4
"core-foundation",
# some non-servo crates still use 0.14
"glow",
]
# github.com organizations to allow git sources for
[sources.allow-org]
github = ["pcwalton", "servo"]

View file

@ -79,13 +79,21 @@ class Base:
return True
def install_cargo_deny(self, force: bool) -> bool:
if not force and shutil.which("cargo-deny") is not None:
def cargo_deny_installed():
if force or not shutil.which("cargo-deny"):
return False
# Tidy needs at least version 0.16.3 installed.
result = subprocess.run(["cargo-deny", "--version"],
encoding='utf-8', capture_output=True)
(major, minor, micro) = result.stdout.strip().split(" ")[1].split(".", 2)
return (int(major), int(minor), int(micro)) >= (0, 16, 3)
if cargo_deny_installed():
return False
print(" * Installing cargo-deny...")
if subprocess.call(["cargo", "install", "cargo-deny", "--locked"]) != 0:
raise EnvironmentError("Installation of cargo-deny failed.")
return True
def install_crown(self, force: bool) -> bool:

View file

@ -174,59 +174,6 @@ class CheckTidiness(unittest.TestCase):
self.assertEqual('Unordered key (found b before a)', next(errors)[2])
self.assertNoMoreErrors(errors)
def test_lock(self):
errors = tidy.run_custom_cargo_lock_lints(test_file_path('duplicated_package.lock'), print_text=False)
msg = """duplicate versions for package `test`
\t\x1b[93mThe following packages depend on version 0.4.9 from 'crates.io':\x1b[0m
\t\ttest2 0.1.0
\t\x1b[93mThe following packages depend on version 0.5.1 from 'crates.io':\x1b[0m
\t\ttest3 0.5.1"""
self.assertEqual(msg, next(errors)[2])
msg2 = """duplicate versions for package `test3`
\t\x1b[93mThe following packages depend on version 0.5.1 from 'crates.io':\x1b[0m
\t\ttest4 0.1.0
\t\x1b[93mThe following packages depend on version 0.5.1 from 'https://github.com/user/test3':\x1b[0m
\t\ttest5 0.1.0"""
self.assertEqual(msg2, next(errors)[2])
self.assertNoMoreErrors(errors)
def test_lock_ignore_without_duplicates(self):
tidy.config["ignore"]["packages"] = ["test", "test2", "test3", "test5"]
errors = tidy.run_custom_cargo_lock_lints(test_file_path('duplicated_package.lock'), print_text=False)
msg = (
"duplicates for `test2` are allowed, but only single version found"
"\n\t\x1b[93mThe following packages depend on version 0.1.0 from 'https://github.com/user/test2':\x1b[0m"
)
self.assertEqual(msg, next(errors)[2])
msg2 = (
"duplicates for `test5` are allowed, but only single version found"
"\n\t\x1b[93mThe following packages depend on version 0.1.0 from 'https://github.com/':\x1b[0m"
)
self.assertEqual(msg2, next(errors)[2])
self.assertNoMoreErrors(errors)
def test_lock_exceptions(self):
tidy.config["blocked-packages"]["rand"] = ["test_exception", "test_unneeded_exception"]
errors = tidy.run_custom_cargo_lock_lints(test_file_path('blocked_package.lock'), print_text=False)
msg = (
"Package test_blocked 0.0.2 depends on blocked package rand."
)
msg2 = (
"Package test_unneeded_exception is not required to be an exception of blocked package rand."
)
self.assertEqual(msg, next(errors)[2])
self.assertEqual(msg2, next(errors)[2])
self.assertNoMoreErrors(errors)
# needed to not raise errors in other test cases
tidy.config["blocked-packages"]["rand"] = []
def test_file_list(self):
file_path = os.path.join(BASE_PATH, 'test_ignored')
file_list = tidy.FileList(file_path, only_changed_files=False, exclude_dirs=[], progress=False)

View file

@ -17,7 +17,7 @@ import os
import re
import subprocess
import sys
from typing import List
from typing import Any, Dict, List
import colorama
import toml
@ -34,6 +34,7 @@ WPT_CONFIG_INI_PATH = os.path.join(WPT_PATH, "config.ini")
URL_REGEX = re.compile(br'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+')
UTF8_URL_REGEX = re.compile(r'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+')
CARGO_LOCK_FILE = os.path.join(TOPDIR, "Cargo.lock")
CARGO_DENY_CONFIG_FILE = os.path.join(TOPDIR, "deny.toml")
ERROR_RAW_URL_IN_RUSTDOC = "Found raw link in rustdoc. Please escape it with angle brackets or use a markdown link."
@ -363,125 +364,54 @@ def check_flake8(file_name, contents):
yield line_num, message.strip()
def check_cargo_lock_file(only_changed_files: bool):
if only_changed_files and not list(git_changes_since_last_merge("./Cargo.lock")):
print("\r ➤ Skipping `Cargo.lock` lint checks, because it is unchanged.")
return
yield from run_custom_cargo_lock_lints(CARGO_LOCK_FILE)
yield from validate_dependency_licenses()
def run_custom_cargo_lock_lints(cargo_lock_filename: str, print_text: bool = True):
if print_text:
print(f"\r ➤ Linting cargo lock ({cargo_lock_filename})...")
with open(cargo_lock_filename) as cargo_lock_file:
content = toml.load(cargo_lock_file)
def find_reverse_dependencies(name, content):
for package in itertools.chain([content.get("root", {})], content["package"]):
for dependency in package.get("dependencies", []):
parts = dependency.split()
dependency = (parts[0], parts[1] if len(parts) > 1 else None, parts[2] if len(parts) > 2 else None)
if dependency[0] == name:
yield package["name"], package["version"], dependency
# Package names to be neglected (as named by cargo)
exceptions = config["ignore"]["packages"]
packages_by_name = {}
for package in content.get("package", []):
if "replace" in package:
continue
source = package.get("source", "")
if source == r"registry+https://github.com/rust-lang/crates.io-index":
source = "crates.io"
packages_by_name.setdefault(package["name"], []).append((package["version"], source))
for name in exceptions:
if name not in packages_by_name:
yield (cargo_lock_filename, 1, "duplicates are allowed for `{}` but it is not a dependency".format(name))
for (name, packages) in packages_by_name.items():
has_duplicates = len(packages) > 1
duplicates_allowed = name in exceptions
if has_duplicates == duplicates_allowed:
continue
if duplicates_allowed:
message = 'duplicates for `{}` are allowed, but only single version found'.format(name)
else:
message = "duplicate versions for package `{}`".format(name)
packages.sort()
packages_dependencies = list(find_reverse_dependencies(name, content))
for version, source in packages:
short_source = source.split("#")[0].replace("git+", "")
message += "\n\t\033[93mThe following packages depend on version {} from '{}':\033[0m" \
.format(version, short_source)
for pname, package_version, dependency in packages_dependencies:
if (not dependency[1] or version in dependency[1]) and \
(not dependency[2] or short_source in dependency[2]):
message += "\n\t\t" + pname + " " + package_version
yield (cargo_lock_filename, 1, message)
# Check to see if we are transitively using any blocked packages
blocked_packages = config["blocked-packages"]
# Create map to keep track of visited exception packages
visited_whitelisted_packages = {package: {} for package in blocked_packages.keys()}
for package in content.get("package", []):
package_name = package.get("name")
package_version = package.get("version")
for dependency in package.get("dependencies", []):
dependency = dependency.split()
dependency_name = dependency[0]
whitelist = blocked_packages.get(dependency_name)
if whitelist is not None:
if package_name not in whitelist:
fmt = "Package {} {} depends on blocked package {}."
message = fmt.format(package_name, package_version, dependency_name)
yield (cargo_lock_filename, 1, message)
else:
visited_whitelisted_packages[dependency_name][package_name] = True
# Check if all the exceptions to blocked packages actually depend on the blocked package
for dependency_name, package_names in blocked_packages.items():
for package_name in package_names:
if not visited_whitelisted_packages[dependency_name].get(package_name):
fmt = "Package {} is not required to be an exception of blocked package {}."
message = fmt.format(package_name, dependency_name)
yield (cargo_lock_filename, 1, message)
def validate_dependency_licenses():
print("\r ➤ Checking licenses of Rust dependencies...")
result = subprocess.run(["cargo-deny", "--format=json", "check", "licenses"], encoding='utf-8',
def run_cargo_deny_lints():
print("\r ➤ Running `cargo-deny` checks...")
result = subprocess.run(["cargo-deny", "--format=json", "--all-features", "check"],
encoding='utf-8',
capture_output=True)
if result.returncode == 0:
return False
assert result.stderr is not None, "cargo deny should return error information via stderr when failing"
error_info = [json.loads(json_struct) for json_struct in result.stderr.splitlines()]
error_messages = []
num_license_errors = 'unknown'
for error in error_info:
error_fields = error['fields']
if error['type'] == 'summary':
num_license_errors = error_fields['licenses']['errors']
elif 'graphs' in error_fields and error_fields['graphs']:
crate = error_fields['graphs'][0]['Krate']
license_name = error_fields['notes'][0]
message = f'Rejected license "{license_name}". Run `cargo deny` for more details'
error_messages.append(
f'Rust dependency {crate["name"]}@{crate["version"]}: {message}')
else:
error_messages.append(error_fields['message'])
errors = []
for line in result.stderr.splitlines():
error_fields = json.loads(line)["fields"]
error_code = error_fields.get("code", "unknown")
error_severity = error_fields.get("severity", "unknown")
message = error_fields.get("message", "")
labels = error_fields.get("labels", [])
print(f' `cargo deny` reported {num_license_errors} licenses errors')
for message in error_messages:
yield (CARGO_LOCK_FILE, 1, message)
span = ""
line = 1
if len(labels) > 0:
span = labels[0]["span"]
line = labels[0]["line"]
# This is an effort to detect the license failures, so that we can print
# a more helpful message -- which normally does not include the license
# name.
if error_code == "rejected":
crate = CargoDenyKrate(error_fields["graphs"][0])
license_name = error_fields["notes"][0]
errors.append((
CARGO_LOCK_FILE,
1,
f"Rust dependency {crate}: Rejected license \"{license_name}\""
))
# This detects if a crate has been marked as banned in the configuration file.
elif error_code == "banned":
crate = CargoDenyKrate(error_fields["graphs"][0])
parents = ", ".join([str(parent) for parent in crate.parents])
errors.append((CARGO_LOCK_FILE, 1, f"{message}: used by ({parents})"))
# This detects when two version of a crate have been used, but are not skipped
# by the configuration file.
elif error_code == "duplicate":
errors.append((CARGO_LOCK_FILE, 1, f"{message}:\n {span}"))
# This detects any other problem, typically caused by an unnecessary exception
# in the deny.toml file.
elif error_severity in ["warning", "error"]:
errors.append((CARGO_DENY_CONFIG_FILE, line, f"{message}: {span}"))
for error in errors:
yield error
def check_toml(file_name, lines):
@ -1082,9 +1012,7 @@ def scan(only_changed_files=False, progress=False):
check_rust, check_spec, check_modeline)
file_errors = collect_errors_for_files(files_to_check, checking_functions, line_checking_functions)
# These checks are essentially checking a single file.
cargo_lock_errors = check_cargo_lock_file(only_changed_files)
cargo_lock_errors = run_cargo_deny_lints()
wpt_errors = run_wpt_lints(only_changed_files)
# chain all the iterators
@ -1100,3 +1028,14 @@ def scan(only_changed_files=False, progress=False):
+ f"{colorama.Fore.RED}{error[2]}{colorama.Style.RESET_ALL}")
return int(error is not None)
class CargoDenyKrate:
def __init__(self, data: Dict[Any, Any]):
crate = data['Krate']
self.name = crate['name']
self.version = crate['version']
self.parents = [CargoDenyKrate(parent) for parent in data.get('parents', [])]
def __str__(self):
return f"{self.name}@{self.version}"

View file

@ -4,83 +4,7 @@ skip-check-licenses = false
check-alphabetical-order = true
check-ordered-json-keys = ["./resources/prefs.json"]
# Packages which we avoid using in Servo.
# For each blocked package, we can list the exceptions,
# which are packages allowed to use the blocked package.
[blocked-packages]
num = []
rand = [
"ipc-channel",
"phf_generator",
"quickcheck", # Only used in tests
"servo_rand",
"tungstenite",
"tracing-perfetto",
]
[ignore]
# Ignored packages with duplicated versions
packages = [
"bitflags",
"cfg_aliases",
"cookie",
"futures",
"hermit-abi",
"redox_syscall",
"time",
"wasi",
"wayland-sys",
# New versions of these dependencies is pulled in by GStreamer / GLib.
"itertools",
"toml",
# Duplicated by winit.
"windows-sys",
"windows-targets",
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
# wgpu has the latest and greatest.
"foreign-types",
"foreign-types-shared",
"metal",
"windows-core",
# wgpu-hal depends on 0.5.0.
"ndk-sys",
# quickcheck (required by layout_2020 for tests) is
# stuck on 0.8.4 with no new releases.
"env_logger",
# Required until a new version of string-cache is released.
"phf_generator",
"phf_shared",
# icu (from mozjs) uses old version
# tracing-subscriber (tokio-rs/tracing#3033) uses old version
# regex -> regex-automata 0.4.7
# icu_list -> regex-automata 0.2.0
# tracing-subscriber -> matchers -> regex-automata 0.1.0
"regex-automata",
# tracing-subscriber (tokio-rs/tracing#3033) uses old version
# regex [-> regex-automata 0.4.7] -> regex-syntax 0.8.4
# tracing-subscriber -> matchers -> regex-automata 0.1.0 -> regex-syntax 0.6.29
"regex-syntax",
# gilrs is on 0.10.0, but Servo is still on 0.9.4
"core-foundation",
# some non-servo crates still use 0.14
"glow",
]
# Files that are ignored for all tidy and lint checks.
files = [
"./components/net/tests/parsable_mime/text",