tidy: Fix rustdoc warnings and add a tidy check for a common URL issue (#33366)

This change fixes all rustdoc errors and also adds a tidy check for a
very common rustdoc URL issue. Eventually rustdoc warnings should likely
cause the build to fail, but this catches those issues sooner in order
to not waste so much developer time.

Signed-off-by: Martin Robinson <mrobinson@igalia.com>
This commit is contained in:
Martin Robinson 2024-09-08 08:04:19 -07:00 committed by GitHub
parent f6ae050077
commit e70507ca40
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
19 changed files with 107 additions and 36 deletions

View file

@ -32,8 +32,11 @@ CONFIG_FILE_PATH = os.path.join(".", "servo-tidy.toml")
WPT_CONFIG_INI_PATH = os.path.join(WPT_PATH, "config.ini")
# regex source https://stackoverflow.com/questions/6883049/
URL_REGEX = re.compile(br'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+')
UTF8_URL_REGEX = re.compile(r'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+')
CARGO_LOCK_FILE = os.path.join(TOPDIR, "Cargo.lock")
ERROR_RAW_URL_IN_RUSTDOC = "Found raw link in rustdoc. Please escape it with angle brackets or use a markdown link."
sys.path.append(os.path.join(WPT_PATH, "tests"))
sys.path.append(os.path.join(WPT_PATH, "tests", "tools", "wptrunner"))
@ -306,13 +309,39 @@ def check_whitespace(idx, line):
yield (idx + 1, "CR on line")
def check_by_line(file_name, lines):
def check_for_raw_urls_in_rustdoc(file_name: str, idx: int, line: bytes):
"""Check that rustdoc comments in Rust source code do not have raw URLs. These appear
as warnings when rustdoc is run. rustdoc warnings could be made fatal, but adding this
check as part of tidy catches this common problem without having to run rustdoc for all
of Servo."""
if not file_name.endswith(".rs"):
return
if b"///" not in line and b"//!" not in line:
return
# Types of URLS that are allowed:
# - A URL surrounded by angle or square brackets.
# - A markdown link.
# - A URL as part of a markdown definition identifer.
# [link text]: https://example.com
match = URL_REGEX.search(line)
if match and (
not line[match.start() - 1:].startswith(b"<")
and not line[match.start() - 1:].startswith(b"[")
and not line[match.start() - 2:].startswith(b"](")
and not line[match.start() - 3:].startswith(b"]: ")):
yield (idx + 1, ERROR_RAW_URL_IN_RUSTDOC)
def check_by_line(file_name: str, lines: list[bytes]):
for idx, line in enumerate(lines):
errors = itertools.chain(
check_length(file_name, idx, line),
check_whitespace(idx, line),
check_whatwg_specific_url(idx, line),
check_whatwg_single_page_url(idx, line),
check_for_raw_urls_in_rustdoc(file_name, idx, line),
)
for error in errors: