Auto merge of #24841 - servo:jdm-patch-31, r=Manishearth

Useful scripts for interacting with WPT logs These are scripts I used to generate the list in https://github.com/servo/servo/issues/24828, analyze test failures for #23290, and disable slow tests.
2025-09-30 00:29:14 +01:00 · 2019-11-22 21:06:18 -05:00 · 2019-11-22 21:06:18 -05:00 · 0d549e8146
commit 0d549e8146
parent 834a0409b9 41d1ecac64
3 changed files with 209 additions and 0 deletions
--- a/etc/wpt-summarize.py
+++ b/etc/wpt-summarize.py
@ -0,0 +1,46 @@
+#!/usr/bin/env python
+
+# Copyright 2019 The Servo Project Developers. See the COPYRIGHT
+# file at the top-level directory of this distribution.
+#
+# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+# option. This file may not be copied, modified, or distributed
+# except according to those terms.
+
+# Usage: python wpt-summarize.py /wpt/test/url.html [--full]
+#
+# Extract all log lines for a particular test file from a WPT
+# logs, outputting invidual JSON objects that can be manipulated
+# with tools like jq. If a particular URL results in no output,
+# the URL is likely used as a reference test's reference file,
+# so passing `--full` will find any output from Servo process
+# command lines that include the URL.
+
+import sys
+import json
+
+full_search = len(sys.argv) > 3 and sys.argv[3] == '--full'
+
+with open(sys.argv[1]) as f:
+    data = f.readlines()
+    thread = None
+    for entry in data:
+        entry = json.loads(entry)
+        if thread and "thread" in entry:
+            if entry["thread"] == thread:
+                print(json.dumps(entry))
+                if "action" in entry and entry["action"] == "test_end":
+                    thread = None
+        else:
+            if ("action" in entry and
+                    entry["action"] == "test_start" and
+                    entry["test"] == sys.argv[2]):
+                thread = entry["thread"]
+                print(json.dumps(entry))
+            elif (full_search and
+                  "command" in entry and
+                  sys.argv[2] in entry["command"]):
+                thread = entry["thread"]
+                print(json.dumps(entry))
--- a/etc/wpt-timing.py
+++ b/etc/wpt-timing.py
@ -0,0 +1,95 @@
+#!/usr/bin/env python
+
+# Copyright 2019 The Servo Project Developers. See the COPYRIGHT
+# file at the top-level directory of this distribution.
+#
+# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+# option. This file may not be copied, modified, or distributed
+# except according to those terms.
+
+# Usage: python wpt-timing.py [path/to/wpt.log] ...
+#
+# Given a series of WPT log files as arguments, this script
+# extracts the status of each test file (ok; error; timeout; etc.)
+# and how long it took to ran, then creates three CSV files, each
+# sorted by runtime:
+#
+# - longest_ok.csv: all tests that passed
+# - longest_err.csv: all tests that failed or had an error
+# - timeouts.csv: all tests that timed out
+#
+# This information can be used to quickly determine the longest-running
+# tests in the WPT testsuite in order to improve the overall testsuite
+# runtime on CI.
+
+import sys
+import json
+import collections
+import csv
+
+
+def process_log(data):
+    tests = {}
+    test_results = collections.defaultdict(list)
+
+    for entry in data:
+        entry = json.loads(entry)
+        if "action" in entry:
+            if entry["action"] == "test_start":
+                tests[entry["test"]] = {
+                    "start": int(entry["time"]),
+                    "end": 0,
+                }
+            elif entry["action"] == "test_end":
+                test = tests[entry["test"]]
+                test["end"] = int(entry["time"])
+                test_results[entry["status"]] += [
+                    (entry["test"], test["end"] - test["start"])
+                ]
+
+    return test_results
+
+test_results = {
+    "SKIP": [],
+    "OK": [],
+    "PASS": [],
+    "ERROR": [],
+    "FAIL": [],
+    "CRASH": [],
+    "TIMEOUT": [],
+}
+for log_path in sys.argv[1:]:
+    with open(log_path) as f:
+        data = f.readlines()
+        for k, v in process_log(data).items():
+            test_results[k] += v
+
+print("Skipped %d tests." % len(test_results["SKIP"]))
+print("%d tests timed out." % len(test_results["TIMEOUT"]))
+
+longest_crash = sorted(test_results["CRASH"], key=lambda x: x[1], reverse=True)
+print("Longest CRASH test took %dms (%s)" % (longest_crash[0][1], longest_crash[0][0]))
+
+longest_ok = sorted(
+    test_results["PASS"] + test_results["OK"],
+    key=lambda x: x[1], reverse=True
+)
+csv_data = [['Test path', 'Milliseconds']]
+with open('longest_ok.csv', 'w') as csv_file:
+    writer = csv.writer(csv_file)
+    writer.writerows(csv_data + longest_ok)
+
+longest_fail = sorted(
+    test_results["ERROR"] + test_results["FAIL"],
+    key=lambda x: x[1], reverse=True
+)
+with open('longest_err.csv', 'w') as csv_file:
+    writer = csv.writer(csv_file)
+    writer.writerows(csv_data + longest_fail)
+
+longest_timeout = sorted(test_results["TIMEOUT"], key=lambda x: x[1], reverse=True)
+with open('timeouts.csv', 'w') as csv_file:
+    writer = csv.writer(csv_file)
+    writer.writerows(csv_data + longest_timeout)
--- a/etc/wpt_result_analyzer.py
+++ b/etc/wpt_result_analyzer.py
@ -0,0 +1,68 @@
+#!/usr/bin/env python
+
+# Copyright 2019 The Servo Project Developers. See the COPYRIGHT
+# file at the top-level directory of this distribution.
+#
+# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+# option. This file may not be copied, modified, or distributed
+# except according to those terms.
+
+# Usage: python etc/wpt_result_analyzer.py
+#
+# Analyze the state of WPT tests in Servo by walking all of the
+# test directories, counting the number of tests present, and
+# counting the number of ini files present in the corresponding
+# test result directory. Prints out a list of directories that
+# have non-zero failure counts, ordered by overall number of tests
+# and percentage of tests that fail.
+
+import os
+
+test_root = os.path.join('tests', 'wpt', 'web-platform-tests')
+meta_root = os.path.join('tests', 'wpt', 'metadata')
+
+test_counts = {}
+meta_counts = {}
+
+for base_dir, dir_names, files in os.walk(test_root):
+    if base_dir == test_root:
+        continue
+
+    rel_base = os.path.relpath(base_dir, test_root)
+    if not os.path.exists(os.path.join(meta_root, rel_base)):
+        continue
+
+    test_files = []
+    exts = ['.html', '.htm', '.xht', '.xhtml', '.window.js', '.worker.js', '.any.js']
+    for f in files:
+        for ext in exts:
+            if f.endswith(ext):
+                test_files += [f]
+    test_counts[rel_base] = len(test_files)
+
+for base_dir, dir_names, files in os.walk(meta_root):
+    if base_dir == meta_root:
+        continue
+
+    rel_base = os.path.relpath(base_dir, meta_root)
+    num_files = len(files)
+    if '__dir__.ini' in files:
+        num_files -= 1
+    meta_counts[rel_base] = num_files
+
+final_counts = []
+for (test_dir, test_count) in test_counts.items():
+    if not test_count:
+        continue
+    meta_count = meta_counts.get(test_dir, 0)
+    final_counts += [(test_dir, test_count, meta_count)]
+
+print('Test counts')
+print('dir: %% failed (num tests / num failures)')
+s = sorted(final_counts, key=lambda x: x[2] / x[1])
+for (test_dir, test_count, meta_count) in reversed(sorted(s, key=lambda x: x[2])):
+    if not meta_count:
+        continue
+    print('%s: %.2f%% (%d / %d)' % (test_dir, meta_count / test_count * 100, test_count, meta_count))