From c738bbc41cc6d12f28ca1e09a7cde0eac7762a8e Mon Sep 17 00:00:00 2001
From: Jerens Lensun <54782057+jerensl@users.noreply.github.com>
Date: Tue, 29 Jul 2025 22:46:32 +0800
Subject: [PATCH] mach: Make test-tidy line length check Unicode-aware (#38335)

Currently, our implementation for each line-checking function reads the
file as bytes, so we need to properly decode each line to UTF-8 before
evaluating it. This ensures it is counted as a string and not as bytes

Testing: I tested by changing the comment like the issue above and it
not give an error
Fixes: #38237

Signed-off-by: Jerens Lensun <jerensslensun@gmail.com>
---
 python/tidy/tidy.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/tidy/tidy.py b/python/tidy/tidy.py
index 3e3bf25e4df..18c43807179 100644
--- a/python/tidy/tidy.py
+++ b/python/tidy/tidy.py
@@ -319,7 +319,7 @@ def check_length(file_name: str, idx: int, line: bytes) -> Iterator[tuple[int, s
 
     # Prefer shorter lines when shell scripting.
     max_length = 80 if file_name.endswith(".sh") else 120
-    if len(line.rstrip(b"\n")) > max_length and not is_unsplittable(file_name, line):
+    if len(line.decode("utf-8").rstrip("\n")) > max_length and not is_unsplittable(file_name, line):
         yield (idx + 1, "Line is longer than %d characters" % max_length)