From c738bbc41cc6d12f28ca1e09a7cde0eac7762a8e Mon Sep 17 00:00:00 2001 From: Jerens Lensun <54782057+jerensl@users.noreply.github.com> Date: Tue, 29 Jul 2025 22:46:32 +0800 Subject: [PATCH] mach: Make test-tidy line length check Unicode-aware (#38335) Currently, our implementation for each line-checking function reads the file as bytes, so we need to properly decode each line to UTF-8 before evaluating it. This ensures it is counted as a string and not as bytes Testing: I tested by changing the comment like the issue above and it not give an error Fixes: #38237 Signed-off-by: Jerens Lensun --- python/tidy/tidy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/tidy/tidy.py b/python/tidy/tidy.py index 3e3bf25e4df..18c43807179 100644 --- a/python/tidy/tidy.py +++ b/python/tidy/tidy.py @@ -319,7 +319,7 @@ def check_length(file_name: str, idx: int, line: bytes) -> Iterator[tuple[int, s # Prefer shorter lines when shell scripting. max_length = 80 if file_name.endswith(".sh") else 120 - if len(line.rstrip(b"\n")) > max_length and not is_unsplittable(file_name, line): + if len(line.decode("utf-8").rstrip("\n")) > max_length and not is_unsplittable(file_name, line): yield (idx + 1, "Line is longer than %d characters" % max_length)