Auto merge of #29622 - mrobinson:upstream-wpt-non-utf8, r=jdm

Handle non-UTF8 diffs in the WPT upstream script

The output of `git diff` and `git show` can include non-UTF8 text or binary data, so the WPT upstream script should handle this properly.

Fixes #29620.

<!-- Please describe your changes on the following line: -->

---
<!-- Thank you for contributing to Servo! Please replace each `[ ]` by `[X]` when the step is complete, and replace `___` with appropriate data: -->
- [x] `./mach build -d` does not report any errors
- [x] `./mach test-tidy` does not report any errors
- [x] These changes fix #29620
- [x] There are tests for these changes

<!-- Also, please make sure that "Allow edits from maintainers" checkbox is checked, so that we can help you if you get stuck somewhere along the way.-->

<!-- Pull requests that do not address these steps are welcome, but they will require additional verification as part of the review process. -->
This commit is contained in:
bors-servo 2023-04-12 15:09:50 +02:00 committed by GitHub
commit bdc3bc53fc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 42 additions and 6 deletions

View file

@ -229,7 +229,7 @@ class TestApplyCommitsToWPT(unittest.TestCase):
def run_test(self, pr_number: int, commit_data: dict): def run_test(self, pr_number: int, commit_data: dict):
def make_commit(data): def make_commit(data):
with open(os.path.join(TESTS_DIR, data[2]), encoding="utf-8") as file: with open(os.path.join(TESTS_DIR, data[2]), "rb") as file:
return {"author": data[0], "message": data[1], "diff": file.read()} return {"author": data[0], "message": data[1], "diff": file.read()}
commits = [make_commit(data) for data in commit_data] commits = [make_commit(data) for data in commit_data]
@ -272,6 +272,15 @@ class TestApplyCommitsToWPT(unittest.TestCase):
[ [
["test author <test@author>", "test commit message", "18746.diff"], ["test author <test@author>", "test commit message", "18746.diff"],
["another person <two@author>", "a different message", "wpt.diff"], ["another person <two@author>", "a different message", "wpt.diff"],
["another person <two@author>", "adding some non-utf8 chaos", "add-non-utf8-file.diff"],
],
)
def test_non_utf8_commit(self):
self.run_test(
100,
[
["test author <nonutf8@author>", "adding some non-utf8 chaos", "add-non-utf8-file.diff"],
], ],
) )
@ -455,6 +464,17 @@ class TestFullSyncRun(unittest.TestCase):
] ]
) )
def test_opened_upstreamable_pr_with_non_utf8_file_contents(self):
self.assertListEqual(
self.run_test("opened.json", ["add-non-utf8-file.diff"]),
[
"CreateOrUpdateBranchForPRStep:1:servo-wpt-sync/wpt/servo_export_18746",
"OpenPRStep:servo-wpt-sync/wpt/servo_export_18746→wpt/wpt#1",
"CommentStep:servo/servo#18746:🤖 Opened new upstream WPT pull request "
"(wpt/wpt#1) with upstreamable changes.",
],
)
def test_open_new_upstreamable_pr_with_preexisting_upstream_pr_not_apply_cleanly_to_upstream( def test_open_new_upstreamable_pr_with_preexisting_upstream_pr_not_apply_cleanly_to_upstream(
self, self,
): ):

View file

@ -0,0 +1,7 @@
diff --git a/tests/wpt/web-platform-tests/non-utf8-file.txt b/tests/wpt/web-platform-tests/non-utf8-file.txt
new file mode 100644
index 0000000..5a992e0
--- /dev/null
+++ b/tests/wpt/web-platform-tests/non-utf8-file.txt
@@ -0,0 +1 @@
+foo bér ÿ "

View file

@ -50,7 +50,7 @@ class LocalGitRepo:
self.path = path self.path = path
self.sync = sync self.sync = sync
def run(self, *args, env: dict = {}): def run_without_encoding(self, *args, env: dict = {}):
command_line = ["git"] + list(args) command_line = ["git"] + list(args)
logging.info(" → Execution (cwd='%s'): %s", logging.info(" → Execution (cwd='%s'): %s",
self.path, " ".join(command_line)) self.path, " ".join(command_line))
@ -63,12 +63,19 @@ class LocalGitRepo:
try: try:
return subprocess.check_output( return subprocess.check_output(
command_line, cwd=self.path, env=env, stderr=subprocess.STDOUT command_line, cwd=self.path, env=env, stderr=subprocess.STDOUT
).decode("utf-8") )
except subprocess.CalledProcessError as exception: except subprocess.CalledProcessError as exception:
logging.warning("Process execution failed with output:\n%s", logging.warning("Process execution failed with output:\n%s",
exception.output.decode("utf-8")) exception.output.decode("utf-8", errors="surrogateescape"))
raise exception raise exception
def run(self, *args, env: dict = {}):
return (
self
.run_without_encoding(*args, env=env)
.decode("utf-8", errors="surrogateescape")
)
@dataclasses.dataclass() @dataclasses.dataclass()
class SyncRun: class SyncRun:

View file

@ -114,7 +114,9 @@ class CreateOrUpdateBranchForPRStep(Step):
# TODO: If we could cleverly parse and manipulate the full commit diff # TODO: If we could cleverly parse and manipulate the full commit diff
# we could avoid cloning the servo repository altogether and only # we could avoid cloning the servo repository altogether and only
# have to fetch the commit diffs from GitHub. # have to fetch the commit diffs from GitHub.
diff = local_servo_repo.run( # NB: The output of git show might include binary files or non-UTF8 text,
# so store the content of the diff as a `bytes`.
diff = local_servo_repo.run_without_encoding(
"show", "--binary", "--format=%b", sha, "--", UPSTREAMABLE_PATH "show", "--binary", "--format=%b", sha, "--", UPSTREAMABLE_PATH
) )
@ -140,7 +142,7 @@ class CreateOrUpdateBranchForPRStep(Step):
strip_count = UPSTREAMABLE_PATH.count("/") + 1 strip_count = UPSTREAMABLE_PATH.count("/") + 1
try: try:
with open(patch_path, "w", encoding="utf-8") as file: with open(patch_path, "wb") as file:
file.write(commit["diff"]) file.write(commit["diff"])
run.sync.local_wpt_repo.run( run.sync.local_wpt_repo.run(
"apply", PATCH_FILE_NAME, "-p", str(strip_count) "apply", PATCH_FILE_NAME, "-p", str(strip_count)