common.py - mozsearch

mozilla-central/python/mozperftest/mozperftest/metrics/common.py (file symbol)

Enable keyboard shortcuts

Source code

File a bug in Testing :: mozperftest

Revision control

Copy as Markdown

Other Tools

# This Source Code Form is subject to the terms of the Mozilla Public

# License, v. 2.0. If a copy of the MPL was not distributed with this

# file, You can obtain one at http://mozilla.org/MPL/2.0/.

from collections import defaultdict

from pathlib import Path

from mozperftest.metrics.exceptions import (

    MetricsMissingResultsError,

    MetricsMultipleTransformsError,

from mozperftest.metrics.notebook import PerftestETL

from mozperftest.metrics.utils import metric_fields, validate_intermediate_results

COMMON_ARGS = {

    "metrics": {

        "type": metric_fields,

        "nargs": "*",

        "default": [],

        "help": "The metrics that should be retrieved from the data.",

},

    "prefix": {"type": str, "default": "", "help": "Prefix used by the output files."},

    "split-by": {

        "type": str,

        "default": None,

        "help": "A metric name to use for splitting the data. For instance, "

        "using browserScripts.pageinfo.url will split the data by the unique "

        "URLs that are found.",

},

    "simplify-names": {

        "action": "store_true",

        "default": False,

        "help": "If set, metric names will be simplified to a single word. The PerftestETL "

        "combines dictionary keys by `.`, and the final key contains that value of the data. "

        "That final key becomes the new name of the metric.",

},

    "simplify-exclude": {

        "nargs": "*",

        "default": ["statistics"],

        "help": "When renaming/simplifying metric names, entries with these strings "

        "will be ignored and won't get simplified. These options are only used when "

        "--simplify-names is set.",

},

    "transformer": {

        "type": str,

        "default": None,

        "help": "The path to the file containing the custom transformer, "

        "or the module to import along with the class name, "

        "e.g. mozperftest.test.xpcshell:XpcShellTransformer",

},

class MetricsStorage(object):

    """Holds data that is commonly used across all metrics layers.

    An instance of this class represents data for a given and output

    path and prefix.

"""

    def __init__(self, output_path, prefix, logger):

        self.prefix = prefix

        self.output_path = output_path

        self.stddata = {}

        self.ptnb_config = {}

        self.results = []

        self.logger = logger

        p = Path(output_path)

        p.mkdir(parents=True, exist_ok=True)

    def _parse_results(self, results):

        if isinstance(results, dict):

            return [results]

        res = []

        # XXX we need to embrace pathlib everywhere.

        if isinstance(results, (str, Path)):

            # Expecting a single path or a directory

            p = Path(results)

            if not p.exists():

                self.logger.warning("Given path does not exist: {}".format(results))

            elif p.is_dir():

                files = [f for f in p.glob("**/*.json") if not f.is_dir()]

                res.extend(self._parse_results(files))

            else:

                res.append(p.as_posix())

        if isinstance(results, list):

            # Expecting a list of paths

            for path in results:

                res.extend(self._parse_results(path))

        return res

    def set_results(self, results):

        """Processes and sets results provided by the metadata.

        `results` can be a path to a file or a directory. Every

        file is scanned and we build a list. Alternatively, it

        can be a mapping containing the results, in that case

        we just use it direcly, but keep it in a list.

        :param results list/dict/str: Path, or list of paths to the data

            (or the data itself in a dict) of the data to be processed.

"""

        # Parse the results into files (for now) and the settings

        self.results = defaultdict(lambda: defaultdict(list))

        self.settings = defaultdict(dict)

        for res in results:

            # Ensure that the results are valid before continuing

            validate_intermediate_results(res)

            name = res["name"]

            if isinstance(res["results"], dict):

                # XXX Implement subtest based parsing

                raise NotImplementedError(

                    "Subtest-based processing is not implemented yet"

            # Merge all entries with the same name into one

            # result, if separation is needed use unique names

            self.results[name]["files"].extend(self._parse_results(res["results"]))

            suite_settings = self.settings[name]

            for key, val in res.items():

                if key == "results":

                    continue

                suite_settings[key] = val

            # Check the transform definitions

            currtrfm = self.results[name]["transformer"]

            if not currtrfm:

                self.results[name]["transformer"] = res.get(

                    "transformer", "SingleJsonRetriever"

            elif currtrfm != res.get("transformer", "SingleJsonRetriever"):

                raise MetricsMultipleTransformsError(

                    f"Only one transformer allowed per data name! Found multiple for {name}: "

                    f"{[currtrfm, res['transformer']]}"

            # Get the transform options if available

            self.results[name]["options"] = res.get("transformer-options", {})

        if not self.results:

            self.return_code = 1

            raise MetricsMissingResultsError("Could not find any results to process.")

    def get_standardized_data(self, group_name="firefox", transformer=None):

        """Returns a parsed, standardized results data set.

        The dataset is computed once then cached unless overwrite is used.

        The transformer dictates how the data will be parsed, by default it uses

        a JSON transformer that flattens the dictionary while merging all the

        common metrics together.

        :param group_name str: The name for this results group.

        :param transformer str: The name of the transformer to use

            when parsing the data. Currently, only SingleJsonRetriever

            is available.

        :param overwrite str: if True, we recompute the results

        :return dict: Standardized notebook data with containing the

            requested metrics.

"""

        if self.stddata:

            return self.stddata

        for data_type, data_info in self.results.items():

            tfm = transformer if transformer is not None else data_info["transformer"]

            prefix = data_type

            if self.prefix:

                prefix = "{}-{}".format(self.prefix, data_type)

            # Primarily used to store the transformer used on the data

            # so that it can also be used for generating things

            # like summary values for suites, and subtests.

            self.ptnb_config[data_type] = {

                "output": self.output_path,

                "prefix": prefix,

                "custom_transformer": tfm,

                "file_groups": {data_type: data_info["files"]},

            ptnb = PerftestETL(

                file_groups=self.ptnb_config[data_type]["file_groups"],

                config=self.ptnb_config[data_type],

                prefix=self.prefix,

                logger=self.logger,

                custom_transform=tfm,

            r = ptnb.process(**data_info["options"])

            self.stddata[data_type] = r["data"]

        return self.stddata

    def filtered_metrics(

        self,

        group_name="firefox",

        transformer=None,

        metrics=None,

        exclude=None,

        split_by=None,

        simplify_names=False,

        simplify_exclude=["statistics"],

):

        """Filters the metrics to only those that were requested by `metrics`.

        If metrics is Falsey (None, empty list, etc.) then no metrics

        will be filtered. The entries in metrics are pattern matched with

        the subtests in the standardized data (not a regular expression).

        For example, if "firstPaint" is in metrics, then all subtests which

        contain this string in their name will be kept.

        :param metrics list: List of metrics to keep.

        :param exclude list: List of string matchers to exclude from the metrics

            gathered/reported.

        :param split_by str: The name of a metric to use to split up data by.

        :param simplify_exclude list: List of string matchers to exclude

            from the naming simplification process.

        :return dict: Standardized notebook data containing the

            requested metrics.

"""

        results = self.get_standardized_data(

            group_name=group_name, transformer=transformer

        if not metrics:

            return results

        if not exclude:

            exclude = []

        if not simplify_exclude:

            simplify_exclude = []

        # Get the field to split the results by (if any)

        if split_by is not None:

            splitting_entry = None

            for data_type, data_info in results.items():

                for res in data_info:

                    if split_by in res["subtest"]:

                        splitting_entry = res

                        break

            if splitting_entry is not None:

                split_by = defaultdict(list)

                for c, entry in enumerate(splitting_entry["data"]):

                    split_by[entry["value"]].append(c)

        # Filter metrics

        filtered = {}

        for data_type, data_info in results.items():

            newresults = []

            for res in data_info:

                if any([met["name"] in res["subtest"] for met in metrics]) and not any(

                    [met in res["subtest"] for met in exclude]

):

                    res["transformer"] = self.ptnb_config[data_type][

                        "custom_transformer"

                    newresults.append(res)

            filtered[data_type] = newresults

        # Simplify the filtered metric names

        if simplify_names:

            def _simplify(name):

                if any([met in name for met in simplify_exclude]):

                    return None

                return name.split(".")[-1]

            self._alter_name(filtered, res, filter=_simplify)

        # Split the filtered results

        if split_by is not None:

            newfilt = {}

            total_iterations = sum([len(inds) for _, inds in split_by.items()])

            for data_type in filtered:

                if not filtered[data_type]:

                    # Ignore empty data types

                    continue

                newresults = []

                newfilt[data_type] = newresults

                for split, indices in split_by.items():

                    for res in filtered[data_type]:

                        if len(res["data"]) != total_iterations:

                            # Skip data that cannot be split

                            continue

                        splitres = {key: val for key, val in res.items()}

                        splitres["subtest"] += " " + split

                        splitres["data"] = [res["data"][i] for i in indices]

                        splitres["transformer"] = self.ptnb_config[data_type][

                            "custom_transformer"

                        newresults.append(splitres)

            filtered = newfilt

        return filtered

    def _alter_name(self, filtered, res, filter):

        previous = []

        for data_type, data_info in filtered.items():

            for res in data_info:

                new = filter(res["subtest"])

                if new is None:

                    continue

                if new in previous:

                    self.logger.warning(

                        f"Another metric which ends with `{new}` was already found. "

                        f"{res['subtest']} will not be simplified."

                    continue

                res["subtest"] = new

                previous.append(new)

_metrics = {}

def filtered_metrics(

    metadata,

    path,

    prefix,

    group_name="firefox",

    transformer=None,

    metrics=None,

    settings=False,

    exclude=None,

    split_by=None,

    simplify_names=False,

    simplify_exclude=["statistics"],

):

    """Returns standardized data extracted from the metadata instance.

    We're caching an instance of MetricsStorage per metrics/storage

    combination and compute the data only once when this function is called.

"""

    key = path, prefix

    if key not in _metrics:

        storage = _metrics[key] = MetricsStorage(path, prefix, metadata)

        storage.set_results(metadata.get_results())

    else:

        storage = _metrics[key]

    results = storage.filtered_metrics(

        group_name=group_name,

        transformer=transformer,

        metrics=metrics,

        exclude=exclude,

        split_by=split_by,

        simplify_names=simplify_names,

        simplify_exclude=simplify_exclude,

    # XXX returning two different types is a problem

    # in case settings is false, we should return None for it

    # and always return a 2-tuple

    if settings:

        return results, storage.settings

    return results