servo/etc/ci/performance/submit_to_perfherder.py

#!/usr/bin/env python3

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at https://mozilla.org/MPL/2.0/.

import argparse
from functools import partial, reduce
import json
import operator
import os
import random
import string
from thclient import TreeherderClient, TreeherderResultSetCollection, TreeherderJobCollection
import time

from runner import format_result_summary


def geometric_mean(iterable):
    filtered = list(filter(lambda x: x > 0, iterable))
    return (reduce(operator.mul, filtered)) ** (1.0 / len(filtered))


def format_testcase_name(name):
    temp = name.replace("http://localhost:8000/page_load_test/", "")
    temp = temp.replace("http://localhost:8000/tp6/", "")
    temp = temp.split("/")[0]
    temp = temp[0:80]
    return temp


def format_perf_data(perf_json, engine="servo"):
    suites = []
    measurement = "domComplete"  # Change this to an array when we have more

    def get_time_from_nav_start(timings, measurement):
        return timings[measurement] - timings["navigationStart"]

    measurementFromNavStart = partial(get_time_from_nav_start, measurement=measurement)

    if engine == "gecko":
        name = "gecko.{}".format(measurement)
    else:
        name = measurement

    suite = {"name": name, "value": geometric_mean(map(measurementFromNavStart, perf_json)), "subtests": []}
    for testcase in perf_json:
        if measurementFromNavStart(testcase) < 0:
            value = -1
            # print('Error: test case has negative timing. Test timeout?')
        else:
            value = measurementFromNavStart(testcase)

        suite["subtests"].append({"name": format_testcase_name(testcase["testcase"]), "value": value})

    suites.append(suite)

    return {
        "performance_data": {
            # https://bugzilla.mozilla.org/show_bug.cgi?id=1271472
            "framework": {"name": "servo-perf"},
            "suites": suites,
        }
    }


def create_resultset_collection(dataset):
    print("[DEBUG] ResultSet Collection:")
    print(dataset)
    trsc = TreeherderResultSetCollection()

    for data in dataset:
        trs = trsc.get_resultset()

        trs.add_push_timestamp(data["push_timestamp"])
        trs.add_revision(data["revision"])
        trs.add_author(data["author"])
        # TODO: figure out where type is used
        # trs.add_type(data['type'])

        revisions = []
        for rev in data["revisions"]:
            tr = trs.get_revision()

            tr.add_revision(rev["revision"])
            tr.add_author(rev["author"])
            tr.add_comment(rev["comment"])
            tr.add_repository(rev["repository"])

            revisions.append(tr)
        trs.add_revisions(revisions)

        trsc.add(trs)

    return trsc


def create_job_collection(dataset):
    print("[DEBUG] Job Collection:")
    print(dataset)

    tjc = TreeherderJobCollection()

    for data in dataset:
        tj = tjc.get_job()

        tj.add_revision(data["revision"])
        tj.add_project(data["project"])
        tj.add_coalesced_guid(data["job"]["coalesced"])
        tj.add_job_guid(data["job"]["job_guid"])
        tj.add_job_name(data["job"]["name"])
        tj.add_job_symbol(data["job"]["job_symbol"])
        tj.add_group_name(data["job"]["group_name"])
        tj.add_group_symbol(data["job"]["group_symbol"])
        tj.add_description(data["job"]["desc"])
        tj.add_product_name(data["job"]["product_name"])
        tj.add_state(data["job"]["state"])
        tj.add_result(data["job"]["result"])
        tj.add_reason(data["job"]["reason"])
        tj.add_who(data["job"]["who"])
        tj.add_tier(data["job"]["tier"])
        tj.add_submit_timestamp(data["job"]["submit_timestamp"])
        tj.add_start_timestamp(data["job"]["start_timestamp"])
        tj.add_end_timestamp(data["job"]["end_timestamp"])
        tj.add_machine(data["job"]["machine"])

        tj.add_build_info(
            data["job"]["build_platform"]["os_name"],
            data["job"]["build_platform"]["platform"],
            data["job"]["build_platform"]["architecture"],
        )

        tj.add_machine_info(
            data["job"]["machine_platform"]["os_name"],
            data["job"]["machine_platform"]["platform"],
            data["job"]["machine_platform"]["architecture"],
        )

        tj.add_option_collection(data["job"]["option_collection"])

        for artifact_data in data["job"]["artifacts"]:
            tj.add_artifact(artifact_data["name"], artifact_data["type"], artifact_data["blob"])
        tjc.add(tj)

        return tjc


# TODO: refactor this big function to smaller chunks
def submit(perf_data, failures, revision, summary, engine):
    print("[DEBUG] failures:")
    print(list(map(lambda x: x["testcase"], failures)))

    author = "{} <{}>".format(revision["author"]["name"], revision["author"]["email"])

    dataset = [
        {
            # The top-most revision in the list of commits for a push.
            "revision": revision["commit"],
            "author": author,
            "push_timestamp": int(revision["author"]["timestamp"]),
            "type": "push",
            # a list of revisions associated with the resultset. There should
            # be at least one.
            "revisions": [
                {
                    "comment": revision["subject"],
                    "revision": revision["commit"],
                    "repository": "servo",
                    "author": author,
                }
            ],
        }
    ]

    trsc = create_resultset_collection(dataset)

    result = "success"
    # TODO: verify a failed test won't affect Perfherder visualization
    # if len(failures) > 0:
    #     result = "testfailed"

    hashlen = len(revision["commit"])
    job_guid = "".join(random.choice(string.ascii_letters + string.digits) for i in range(hashlen))

    if engine == "gecko":
        project = "servo"
        job_symbol = "PLG"
        group_symbol = "SPG"
        group_name = "Servo Perf on Gecko"
    else:
        project = "servo"
        job_symbol = "PL"
        group_symbol = "SP"
        group_name = "Servo Perf"

    dataset = [
        {
            "project": project,
            "revision": revision["commit"],
            "job": {
                "job_guid": job_guid,
                "product_name": project,
                "reason": "scheduler",
                # TODO: What is `who` for?
                "who": "Servo",
                "desc": "Servo Page Load Time Tests",
                "name": "Servo Page Load Time",
                # The symbol representing the job displayed in
                # treeherder.allizom.org
                "job_symbol": job_symbol,
                # The symbol representing the job group in
                # treeherder.allizom.org
                "group_symbol": group_symbol,
                "group_name": group_name,
                # TODO: get the real timing from the test runner
                "submit_timestamp": str(int(time.time())),
                "start_timestamp": str(int(time.time())),
                "end_timestamp": str(int(time.time())),
                "state": "completed",
                "result": result,  # "success" or "testfailed"
                "machine": "local-machine",
                # TODO: read platform from test result
                "build_platform": {"platform": "linux64", "os_name": "linux", "architecture": "x86_64"},
                "machine_platform": {"platform": "linux64", "os_name": "linux", "architecture": "x86_64"},
                "option_collection": {"opt": True},
                # jobs can belong to different tiers
                # setting the tier here will determine which tier the job
                # belongs to.  However, if a job is set as Tier of 1, but
                # belongs to the Tier 2 profile on the server, it will still
                # be saved as Tier 2.
                "tier": 1,
                # the ``name`` of the log can be the default of "buildbot_text"
                # however, you can use a custom name.  See below.
                # TODO: point this to the log when we have them uploaded to S3
                "log_references": [{"url": "TBD", "name": "test log"}],
                # The artifact can contain any kind of structured data
                # associated with a test.
                "artifacts": [
                    {
                        "type": "json",
                        "name": "performance_data",
                        # TODO: include the job_guid when the runner actually
                        # generates one
                        # 'job_guid': job_guid,
                        "blob": perf_data,
                    },
                    {
                        "type": "json",
                        "name": "Job Info",
                        # 'job_guid': job_guid,
                        "blob": {
                            "job_details": [{"content_type": "raw_html", "title": "Result Summary", "value": summary}]
                        },
                    },
                ],
                # List of job guids that were coalesced to this job
                "coalesced": [],
            },
        }
    ]

    tjc = create_job_collection(dataset)

    # TODO: extract this read credential code out of this function.
    cred = {"client_id": os.environ["TREEHERDER_CLIENT_ID"], "secret": os.environ["TREEHERDER_CLIENT_SECRET"]}

    client = TreeherderClient(
        server_url="https://treeherder.mozilla.org", client_id=cred["client_id"], secret=cred["secret"]
    )

    # data structure validation is automatically performed here, if validation
    # fails a TreeherderClientError is raised
    client.post_collection("servo", trsc)
    client.post_collection("servo", tjc)


def main():
    parser = argparse.ArgumentParser(
        description=(
            "Submit Servo performance data to Perfherder. "
            "Remember to set your Treeherder credential as environment"
            " variable 'TREEHERDER_CLIENT_ID' and "
            "'TREEHERDER_CLIENT_SECRET'"
        )
    )
    parser.add_argument("perf_json", help="the output json from runner")
    parser.add_argument("revision_json", help="the json containing the servo revision data")
    parser.add_argument(
        "--engine",
        type=str,
        default="servo",
        help=("The engine to run the tests on. Currently only servo and gecko are supported."),
    )
    args = parser.parse_args()

    with open(args.perf_json, "r") as f:
        result_json = json.load(f)

    with open(args.revision_json, "r") as f:
        revision = json.load(f)

    perf_data = format_perf_data(result_json, args.engine)
    failures = list(filter(lambda x: x["domComplete"] == -1, result_json))
    summary = format_result_summary(result_json).replace("\n", "<br/>")

    submit(perf_data, failures, revision, summary, args.engine)
    print("Done!")


if __name__ == "__main__":
    main()