From 7151f24ca537759a3191b999d304eed44fed29c7 Mon Sep 17 00:00:00 2001
From: Alan Jeffrey <ajeffrey@mozilla.com>
Date: Tue, 21 Nov 2017 20:49:44 -0600
Subject: [PATCH] Download the buildbot statistics and save them as CSV.

---
 etc/ci/performance/.gitignore                 |   1 +
 .../performance/download_buildbot_timings.py  | 187 ++++++++++++++++++
 2 files changed, 188 insertions(+)
 create mode 100644 etc/ci/performance/download_buildbot_timings.py

diff --git a/etc/ci/performance/.gitignore b/etc/ci/performance/.gitignore
index 01adf45a40d..2f10274f854 100644
--- a/etc/ci/performance/.gitignore
+++ b/etc/ci/performance/.gitignore
@@ -1,6 +1,7 @@
 servo/*
 output.png
 output/*
+.cache/*
 page_load_test/tp5n/*
 page_load_test/tp5n.zip
 venv/*
diff --git a/etc/ci/performance/download_buildbot_timings.py b/etc/ci/performance/download_buildbot_timings.py
new file mode 100644
index 00000000000..1bc8fd46807
--- /dev/null
+++ b/etc/ci/performance/download_buildbot_timings.py
@@ -0,0 +1,187 @@
+#!/usr/bin/env python3
+
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import argparse
+import csv
+from datetime import datetime, date
+import httplib2
+import json
+from math import floor
+import os
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Download buildbot metadata"
+    )
+    parser.add_argument("--index-url",
+                        type=str,
+                        default='http://build.servo.org/json',
+                        help="the URL to get the JSON index data index from. "
+                        "Default: http://build.servo.org/json")
+    parser.add_argument("--build-url",
+                        type=str,
+                        default='http://build.servo.org/json/builders/{}/builds/{}',
+                        help="the URL to get the JSON build data from. "
+                        "Default: http://build.servo.org/json/builders/{}/builds/{}")
+    parser.add_argument("--cache-dir",
+                        type=str,
+                        default='.cache',
+                        help="the directory to cache JSON files in. "
+                        "Default: .cache")
+    parser.add_argument("--cache-name",
+                        type=str,
+                        default='build-{}-{}.json',
+                        help="the filename to cache JSON data in. "
+                        "Default: build-{}-{}.json")
+    parser.add_argument("--output-dir",
+                        type=str,
+                        default='output',
+                        help="the directory to save the CSV data to. "
+                        "Default: output")
+    parser.add_argument("--output-name",
+                        type=str,
+                        default='builds-{}-{}.csv',
+                        help="the filename to save the CSV data to. "
+                        "Default: builds-{}-{}.csv")
+    parser.add_argument("--verbose", "-v",
+                        action='store_true',
+                        help="print every HTTP request")
+    args = parser.parse_args()
+
+    http = httplib2.Http()
+
+    os.makedirs(args.cache_dir, exist_ok=True)
+    os.makedirs(args.output_dir, exist_ok=True)
+
+    # Get the index to find out the list of builder names
+    # Note: this isn't cached
+    if args.verbose:
+        print("Downloading index {}.".format(args.index_url))
+    (index_headers, index_data) = http.request(args.index_url, "GET", headers={'cache-control': 'no-cache'})
+    if args.verbose:
+        print("Response {}.".format(index_headers))
+    index = json.loads(index_data.decode('utf-8'))
+
+    builds = []
+
+    for builder in index["builders"]:
+        # The most recent build is at offset -1
+        # Fetch it to find out the build number
+        # Note: this isn't cached
+        recent_build_url = args.build_url.format(builder, -1)
+        if args.verbose:
+            print("Downloading recent build {}.".format(recent_build_url))
+        (recent_build_headers, recent_build_data) = http.request(
+            recent_build_url,
+            "GET",
+            headers={'cache-control': 'no-cache'}
+        )
+        if args.verbose:
+            print("Respose {}.".format(recent_build_headers))
+        recent_build = json.loads(recent_build_data.decode('utf-8'))
+        recent_build_number = recent_build["number"]
+
+        # Download each build, and convert to CSV
+        for build_number in range(0, recent_build_number):
+
+            # Rather annoyingly, we can't just use the Python http cache,
+            # because it doesn't cache 404 responses. So we roll our own.
+            cache_json_name = args.cache_name.format(builder, build_number)
+            cache_json = os.path.join(args.cache_dir, cache_json_name)
+            if os.path.isfile(cache_json):
+                with open(cache_json) as f:
+                    build = json.load(f)
+
+            else:
+                # Get the build data
+                build_url = args.build_url.format(builder, build_number)
+                if args.verbose:
+                    print("Downloading build {}.".format(build_url))
+                (build_headers, build_data) = http.request(
+                    build_url,
+                    "GET",
+                    headers={'cache-control': 'no=cache'}
+                )
+                if args.verbose:
+                    print("Response {}.".format(build_headers))
+
+                # Only parse the JSON if we got back a 200 response.
+                if build_headers.status == 200:
+                    build = json.loads(build_data.decode('utf-8'))
+                    # Don't cache current builds.
+                    if build.get('currentStep'):
+                        continue
+
+                elif build_headers.status == 404:
+                    build = {}
+
+                else:
+                    continue
+
+                with open(cache_json, 'w+') as f:
+                    json.dump(build, f)
+
+            if 'times' in build:
+                builds.append(build)
+
+    years = {}
+    for build in builds:
+        build_date = date.fromtimestamp(build['times'][0])
+        years.setdefault(build_date.year, {}).setdefault(build_date.month, []).append(build)
+
+    for year, months in years.items():
+        for month, builds in months.items():
+
+            output_name = args.output_name.format(year, month)
+            output = os.path.join(args.output_dir, output_name)
+
+            # Create the CSV file.
+            if args.verbose:
+                print('Creating file {}.'.format(output))
+            with open(output, 'w+') as output_file:
+                output_csv = csv.writer(output_file)
+
+                # The CSV column names
+                output_csv.writerow([
+                    'builder',
+                    'buildNumber',
+                    'buildTimestamp',
+                    'stepName',
+                    'stepText',
+                    'stepNumber',
+                    'stepStart',
+                    'stepFinish'
+                ])
+
+                for build in builds:
+
+                    builder = build["builderName"]
+                    build_number = build["number"]
+                    build_timestamp = datetime.fromtimestamp(build["times"][0]).replace(microsecond=0)
+
+                    # Write out the timing data for each step
+                    for step in build["steps"]:
+                        if step["isFinished"]:
+                            step_name = step["name"]
+                            step_text = ' '.join(step["text"])
+                            step_number = step["step_number"]
+                            step_start = floor(step["times"][0])
+                            step_finish = floor(step["times"][1])
+                            output_csv.writerow([
+                                builder,
+                                build_number,
+                                build_timestamp,
+                                step_name,
+                                step_text,
+                                step_number,
+                                step_start,
+                                step_finish
+                            ])
+
+
+if __name__ == "__main__":
+    main()