Add cargo-deny to mach-tidy to check license compliance. (#32465)

* Use cargo-deny to check license compliance. All licenses should be MPL-2.0 or weaker. * Run cargo-deny check licenses in mach tidy * fmt * Fix inverted boolean * Move cargo deny to tidy.py * Add quotes around license in error message * Integrate `cargo-deny` into tidy fully * Fix script tests --------- Co-authored-by: Martin Robinson <mrobinson@igalia.com>
2025-09-21 20:30:10 +01:00 · 2024-06-12 11:54:45 +02:00 · 2024-06-12 11:54:45 +02:00 · fd472ebd0e
commit fd472ebd0e
parent 370fbf0331
6 changed files with 160 additions and 42 deletions
--- a/deny.toml
+++ b/deny.toml
@ -0,0 +1,91 @@
+[graph]
+all-features = false
+no-default-features = false
+#features = []
+
+# The output table provides options for how/if diagnostics are outputted
+[output]
+feature-depth = 1
+
+# This section is considered when running `cargo deny check advisories`
+# More documentation for the advisories section can be found here:
+# https://embarkstudios.github.io/cargo-deny/checks/advisories/cfg.html
+[advisories]
+ignore = [
+    #"RUSTSEC-0000-0000",
+    #{ id = "RUSTSEC-0000-0000", reason = "you can specify a reason the advisory is ignored" },
+    #"a-crate-that-is-yanked@0.1.1", # you can also ignore yanked crate versions if you wish
+    #{ crate = "a-crate-that-is-yanked@0.1.1", reason = "you can specify why you are ignoring the yanked crate" },
+]
+
+# This section is considered when running `cargo deny check licenses`
+# More documentation for the licenses section can be found here:
+# https://embarkstudios.github.io/cargo-deny/checks/licenses/cfg.html
+[licenses]
+# List of explicitly allowed licenses
+# See https://spdx.org/licenses/ for list of possible licenses
+# [possible values: any SPDX 3.11 short identifier (+ optional exception)].
+allow = [
+    "Apache-2.0 WITH LLVM-exception",
+    "Apache-2.0",
+    "BSD-2-Clause",
+    "BSD-3-Clause",
+    "BSL-1.0",
+    "CC0-1.0",
+    "ISC",
+    "MIT",
+    "MPL-2.0",
+    "OpenSSL",
+    "Unicode-3.0",
+    "Zlib",
+    "zlib-acknowledgement",
+]
+# The confidence threshold for detecting a license from license text.
+# The higher the value, the more closely the license text must be to the
+# canonical license text of a valid SPDX license file.
+# [possible values: any between 0.0 and 1.0].
+confidence-threshold = 0.8
+# Allow 1 or more licenses on a per-crate basis, so that particular licenses
+# aren't accepted for every possible crate as with the normal allow list
+exceptions = [
+    # Each entry is the crate and version constraint, and its specific allow
+    # list
+    { allow = ["OFL-1.1", "LicenseRef-UFL-1.0"], crate = "epaint" },
+    { allow = ["Unicode-DFS-2016"], crate = "unicode-ident" },
+]
+
+# Some crates don't have (easily) machine readable licensing information,
+# adding a clarification entry for it allows you to manually specify the
+# licensing information
+[[licenses.clarify]]
+crate = "ring"
+# The SPDX expression for the license requirements of the crate
+expression = "MIT AND ISC AND OpenSSL"
+license-files = [
+    # Each entry is a crate relative path, and the (opaque) hash of its contents
+    { path = "LICENSE", hash = 0xbd0eed23 },
+]
+
+
+# This section is considered when running `cargo deny check bans`.
+# More documentation about the 'bans' section can be found here:
+# https://embarkstudios.github.io/cargo-deny/checks/bans/cfg.html
+[bans]
+multiple-versions = "warn"
+wildcards = "warn"
+highlight = "all"
+workspace-default-features = "allow"
+external-default-features = "allow"
+# List of crates that are allowed. Use with care!
+allow = [
+    #"ansi_term@0.11.0",
+    #{ crate = "ansi_term@0.11.0", reason = "you can specify a reason it is allowed" },
+]
+# List of crates to deny
+deny = [
+    #"ansi_term@0.11.0",
+    #{ crate = "ansi_term@0.11.0", reason = "you can specify a reason it is banned" },
+    # Wrapper crates can optionally be specified to allow the crate when it
+    # is a direct dependency of the otherwise banned crate
+    #{ crate = "ansi_term@0.11.0", wrappers = ["this-crate-directly-depends-on-ansi_term"] },
+]
--- a/python/servo/platform/base.py
+++ b/python/servo/platform/base.py
@ -59,6 +59,7 @@ class Base:
        if not skip_platform:
            installed_something |= self._platform_bootstrap(force)
        installed_something |= self.install_taplo(force)
+        installed_something |= self.install_cargo_deny(force)
        installed_something |= self.install_crown(force)

        if not installed_something:
@ -74,6 +75,16 @@ class Base:

        return True

+    def install_cargo_deny(self, force: bool) -> bool:
+        if not force and shutil.which("cargo-deny") is not None:
+            return False
+
+        print(" * Installing cargo-deny...")
+        if subprocess.call(["cargo", "install", "cargo-deny", "--locked"]) != 0:
+            raise EnvironmentError("Installation of cargo-deny failed.")
+
+        return True
+
    def install_crown(self, force: bool) -> bool:
        print(" * Installing crown (the Servo linter)...")
        if subprocess.call(["cargo", "install", "--path", "support/crown"]) != 0:
--- a/python/servo/testing_commands.py
+++ b/python/servo/testing_commands.py
@ -228,7 +228,7 @@ class MachCommands(CommandBase):
    def test_tidy(self, all_files, no_progress):
        tidy_failed = tidy.scan(not all_files, not no_progress)

-        print("\r ➤  Checking formatting of rust files...")
+        print("\r ➤  Checking formatting of Rust files...")
        rustfmt_failed = call(["cargo", "fmt", "--", *UNSTABLE_RUSTFMT_ARGUMENTS, "--check"])
        if rustfmt_failed:
            print("Run `./mach fmt` to fix the formatting")
--- a/python/tidy/test.py
+++ b/python/tidy/test.py
@ -174,7 +174,7 @@ class CheckTidiness(unittest.TestCase):
        self.assertNoMoreErrors(errors)

    def test_lock(self):
-        errors = tidy.check_cargo_lock_file(test_file_path('duplicated_package.lock'), print_text=False)
+        errors = tidy.run_custom_cargo_lock_lints(test_file_path('duplicated_package.lock'), print_text=False)
        msg = """duplicate versions for package `test`
 \t\x1b[93mThe following packages depend on version 0.4.9 from 'crates.io':\x1b[0m
 \t\ttest2 0.1.0
@ -191,7 +191,7 @@ class CheckTidiness(unittest.TestCase):

    def test_lock_ignore_without_duplicates(self):
        tidy.config["ignore"]["packages"] = ["test", "test2", "test3", "test5"]
-        errors = tidy.check_cargo_lock_file(test_file_path('duplicated_package.lock'), print_text=False)
+        errors = tidy.run_custom_cargo_lock_lints(test_file_path('duplicated_package.lock'), print_text=False)

        msg = (
            "duplicates for `test2` are allowed, but only single version found"
@ -209,7 +209,7 @@ class CheckTidiness(unittest.TestCase):

    def test_lock_exceptions(self):
        tidy.config["blocked-packages"]["rand"] = ["test_exception", "test_unneeded_exception"]
-        errors = tidy.check_cargo_lock_file(test_file_path('blocked_package.lock'), print_text=False)
+        errors = tidy.run_custom_cargo_lock_lints(test_file_path('blocked_package.lock'), print_text=False)

        msg = (
            "Package test_blocked 0.0.2 depends on blocked package rand."
--- a/python/tidy/tidy.py
+++ b/python/tidy/tidy.py
@ -24,7 +24,7 @@ import toml

 import wpt.manifestupdate

-from .licenseck import OLD_MPL, MPL, APACHE, COPYRIGHT, licenses_toml, licenses_dep_toml
+from .licenseck import OLD_MPL, MPL, APACHE, COPYRIGHT, licenses_toml

 TOPDIR = os.path.abspath(os.path.dirname(sys.argv[0]))
 WPT_PATH = os.path.join(".", "tests", "wpt")
@ -32,6 +32,7 @@ CONFIG_FILE_PATH = os.path.join(".", "servo-tidy.toml")
 WPT_CONFIG_INI_PATH = os.path.join(WPT_PATH, "config.ini")
 # regex source https://stackoverflow.com/questions/6883049/
 URL_REGEX = re.compile(br'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+')
+CARGO_LOCK_FILE = os.path.join(TOPDIR, "Cargo.lock")

 sys.path.append(os.path.join(WPT_PATH, "tests"))
 sys.path.append(os.path.join(WPT_PATH, "tests", "tools", "wptrunner"))
@ -326,11 +327,19 @@ def check_flake8(file_name, contents):
        yield line_num, message.strip()


-def check_cargo_lock_file(filename, print_text=True):
-    if print_text:
-        print(f"\r ➤  Checking cargo lock ({filename})...")
+def check_cargo_lock_file(only_changed_files: bool):
+    if not list(FileList("./Cargo.lock", only_changed_files=only_changed_files, progress=False)):
+        print("\r ➤  Skipping `Cargo.lock` lint checks, because it is unchanged.")
+        return

-    with open(filename) as cargo_lock_file:
+    yield from run_custom_cargo_lock_lints(CARGO_LOCK_FILE)
+    yield from validate_dependency_licenses()
+
+
+def run_custom_cargo_lock_lints(cargo_lock_filename: str, print_text: bool = True):
+    if print_text:
+        print(f"\r ➤  Linting cargo lock ({cargo_lock_filename})...")
+    with open(cargo_lock_filename) as cargo_lock_file:
        content = toml.load(cargo_lock_file)

    def find_reverse_dependencies(name, content):
@ -355,7 +364,7 @@ def check_cargo_lock_file(filename, print_text=True):

    for name in exceptions:
        if name not in packages_by_name:
-            yield (filename, 1, "duplicates are allowed for `{}` but it is not a dependency".format(name))
+            yield (cargo_lock_filename, 1, "duplicates are allowed for `{}` but it is not a dependency".format(name))

    for (name, packages) in packages_by_name.items():
        has_duplicates = len(packages) > 1
@ -379,7 +388,7 @@ def check_cargo_lock_file(filename, print_text=True):
                if (not dependency[1] or version in dependency[1]) and \
                   (not dependency[2] or short_source in dependency[2]):
                    message += "\n\t\t" + pname + " " + package_version
-        yield (filename, 1, message)
+        yield (cargo_lock_filename, 1, message)

    # Check to see if we are transitively using any blocked packages
    blocked_packages = config["blocked-packages"]
@ -397,7 +406,7 @@ def check_cargo_lock_file(filename, print_text=True):
                if package_name not in whitelist:
                    fmt = "Package {} {} depends on blocked package {}."
                    message = fmt.format(package_name, package_version, dependency_name)
-                    yield (filename, 1, message)
+                    yield (cargo_lock_filename, 1, message)
                else:
                    visited_whitelisted_packages[dependency_name][package_name] = True

@ -407,7 +416,36 @@ def check_cargo_lock_file(filename, print_text=True):
            if not visited_whitelisted_packages[dependency_name].get(package_name):
                fmt = "Package {} is not required to be an exception of blocked package {}."
                message = fmt.format(package_name, dependency_name)
-                yield (filename, 1, message)
+                yield (cargo_lock_filename, 1, message)
+
+
+def validate_dependency_licenses():
+    print("\r ➤  Checking licenses of Rust dependencies...")
+    result = subprocess.run(["cargo-deny", "--format=json", "check", "licenses"], encoding='utf-8',
+                            capture_output=True)
+    if result.returncode == 0:
+        return False
+    assert result.stderr is not None, "cargo deny should return error information via stderr when failing"
+
+    error_info = [json.loads(json_struct) for json_struct in result.stderr.splitlines()]
+    error_messages = []
+    num_license_errors = 'unknown'
+    for error in error_info:
+        error_fields = error['fields']
+        if error['type'] == 'summary':
+            num_license_errors = error_fields['licenses']['errors']
+        elif 'graphs' in error_fields:
+            crate = error_fields['graphs'][0]['Krate']
+            license_name = error_fields['notes'][0]
+            message = f'Rejected license "{license_name}". Run `cargo deny` for more details'
+            error_messages.append(
+                f'Rust dependency {crate["name"]}@{crate["version"]}: {message}')
+        else:
+            error_messages.append(error_fields['message'])
+
+    print(f'    `cargo deny` reported {num_license_errors} licenses errors')
+    for message in error_messages:
+        yield (CARGO_LOCK_FILE, 1, message)


 def check_toml(file_name, lines):
@ -974,7 +1012,7 @@ def collect_errors_for_files(files_to_check, checking_functions, line_checking_f
    if not has_element:
        return
    if print_text:
-        print("\r ➤  Checking files for tidiness")
+        print("\r ➤  Checking files for tidiness...")

    for filename in files_to_check:
        if not os.path.exists(filename):
@ -994,28 +1032,6 @@ def collect_errors_for_files(files_to_check, checking_functions, line_checking_f
                    yield (filename,) + error


-def get_dep_toml_files(only_changed_files=False):
-    if not only_changed_files:
-        print('\nRunning the dependency licensing lint...')
-        for root, directories, filenames in os.walk(".cargo"):
-            for filename in filenames:
-                if filename == "Cargo.toml":
-                    yield os.path.join(root, filename)
-
-
-def check_dep_license_errors(filenames, progress=True):
-    filenames = progress_wrapper(filenames) if progress else filenames
-    for filename in filenames:
-        with open(filename, "r") as f:
-            ok_licensed = False
-            lines = f.readlines()
-            for idx, line in enumerate(lines):
-                for license_line in licenses_dep_toml:
-                    ok_licensed |= (license_line in line)
-            if not ok_licensed:
-                yield (filename, 0, "dependency should contain a valid license.")
-
-
 def scan(only_changed_files=False, progress=False):
    # check config file for errors
    config_errors = check_config_file(CONFIG_FILE_PATH)
@ -1026,16 +1042,16 @@ def scan(only_changed_files=False, progress=False):
    checking_functions = (check_flake8, check_webidl_spec, check_json)
    line_checking_functions = (check_license, check_by_line, check_toml, check_shell,
                               check_rust, check_spec, check_modeline)
-    lock_errors = check_cargo_lock_file(os.path.join(TOPDIR, "Cargo.lock"))
    file_errors = collect_errors_for_files(files_to_check, checking_functions, line_checking_functions)
-    # check dependecy licenses
-    dep_license_errors = check_dep_license_errors(get_dep_toml_files(only_changed_files), progress)
+
+    # These checks are essentially checking a single file.
+    cargo_lock_errors = check_cargo_lock_file(only_changed_files)

    wpt_errors = run_wpt_lints(only_changed_files)

    # chain all the iterators
-    errors = itertools.chain(config_errors, directory_errors, lock_errors, file_errors,
-                             dep_license_errors, wpt_errors)
+    errors = itertools.chain(config_errors, directory_errors, file_errors,
+                             wpt_errors, cargo_lock_errors)

    colorama.init()
    error = None