def matcher(stat, data):
        hostname = stat["hostname"]
        job = stat["job_name"]
        phase = stat["phase_name"]
        epoch = stat["epoch"]

        if "stage_name" in stat:
            stage_name = stat["stage_name"]

            colon_index = stage_name.find(':')

            if colon_index != -1:
                stage_name = stage_name[:colon_index]

            worker_id = stat["id"]
        else:
            stage_name = stat["logger_name"]
            worker_id = None

        val = stat[stat_value_key]

        # Add value to global job statistics
        global_data_key_list = [
            "stats", (job, phase, epoch), (stage_name, ), data_key
        ]

        global_stat_container = utils.populate_nested_dictionary(
            data,
            key_list=global_data_key_list,
            types=[dict, dict, dict, StatContainer])

        global_stat_container.append(val)

        # Add value to per-host statistics
        per_host_data_key_list = [
            "stats", (job, phase, epoch), (hostname, stage_name), data_key
        ]

        per_host_stat_container = utils.populate_nested_dictionary(
            data,
            key_list=per_host_data_key_list,
            types=[dict, dict, dict, StatContainer])

        per_host_stat_container.append(val)

        # If this is a statistic pertaining to a worker, add value to per-worker
        # statistics
        if worker_id is not None:
            per_worker_data_key_list = [
                "stats", (job, phase, epoch),
                (hostname, stage_name, worker_id), data_key
            ]

            per_worker_stat_container = utils.populate_nested_dictionary(
                data,
                key_list=per_worker_data_key_list,
                types=[dict, dict, dict, StatContainer])

            per_worker_stat_container.append(val)
    def matcher(stat, data):
        hostname = stat["hostname"]
        job = stat["job_name"]
        phase = stat["phase_name"]
        epoch = stat["epoch"]

        if "stage_name" in stat:
            stage_name = stat["stage_name"]

            colon_index = stage_name.find(':')

            if colon_index != -1:
                stage_name = stage_name[:colon_index]

            worker_id = stat["id"]
        else:
            stage_name = stat["logger_name"]
            worker_id = None

        val = stat[stat_value_key]

        # Add value to global job statistics
        global_data_key_list = [
            "stats", (job, phase, epoch), (stage_name,), data_key]

        global_stat_container = utils.populate_nested_dictionary(
            data, key_list=global_data_key_list,
            types=[dict, dict, dict, StatContainer])

        global_stat_container.append(val)

        # Add value to per-host statistics
        per_host_data_key_list = [
            "stats", (job, phase, epoch), (hostname, stage_name), data_key]

        per_host_stat_container = utils.populate_nested_dictionary(
            data, key_list=per_host_data_key_list,
            types=[dict, dict, dict, StatContainer])

        per_host_stat_container.append(val)

        # If this is a statistic pertaining to a worker, add value to per-worker
        # statistics
        if worker_id is not None:
            per_worker_data_key_list = [
                "stats", (job, phase, epoch), (hostname, stage_name, worker_id),
                data_key]

            per_worker_stat_container = utils.populate_nested_dictionary(
                data, key_list=per_worker_data_key_list,
                types=[dict, dict, dict, StatContainer])

            per_worker_stat_container.append(val)
示例#3
0
def gather_timestamped_points_matcher(query_number, match, data, data_key):
    timestamp = float(match["timestamp"])
    phase = match["phase_name"]
    stat_name = match["collection_stat_name"]
    stat_val = match["value"]

    # Update minimum and maximum timestamps
    for key, comparison_function in [("min_timestamp", min),
                                     ("max_timestamp", max)]:
        if key not in data:
            data[key] = {}

        if phase not in data[key]:
            data[key][phase] = timestamp
        else:
            data[key][phase] = comparison_function(
                data[key][phase], timestamp)

    # Subdivide points by query, then further by stat name, and worker
    # identification so that we can make fine-grained plots if needed
    data_subdict = metaprogram_utils.populate_nested_dictionary(
        data, ["plot_points", query_number, stat_name, data_key])

    for key in ["x_values", "y_values"]:
        if key not in data_subdict:
            data_subdict[key] = []

    data_subdict["x_values"].append(timestamp)
    data_subdict["y_values"].append(stat_val)
def set_stage_value_matcher(stat, data):
    """
    Operating under the assumption that an entire stage's workers share one
    value for a statistic, this matcher will log the statistic for that stage,
    overwriting previous values if they exist
    """

    job = stat["job_name"]
    phase = stat["phase_name"]
    epoch = stat["epoch"]
    stage_name = stat["stage_name"]
    stat_name = stat["stat_name"]

    colon_index = stage_name.find(':')

    if colon_index != -1:
        stage_name = stage_name[:colon_index]

    if "str_value" in stat:
        stat_val = stat["str_value"]
    else:
        stat_val = stat["uint_value"]

    data_subdict = utils.populate_nested_dictionary(
        data, ["stage_info", (job, phase, epoch), stage_name])

    data_subdict[stat_name] = stat_val
def gather_timestamped_points_matcher(query_number, match, data, data_key):
    timestamp = float(match["timestamp"])
    phase = match["phase_name"]
    stat_name = match["collection_stat_name"]
    stat_val = match["value"]

    # Update minimum and maximum timestamps
    for key, comparison_function in [("min_timestamp", min), ("max_timestamp", max)]:
        if key not in data:
            data[key] = {}

        if phase not in data[key]:
            data[key][phase] = timestamp
        else:
            data[key][phase] = comparison_function(data[key][phase], timestamp)

    # Subdivide points by query, then further by stat name, and worker
    # identification so that we can make fine-grained plots if needed
    data_subdict = metaprogram_utils.populate_nested_dictionary(
        data, ["plot_points", query_number, stat_name, data_key]
    )

    for key in ["x_values", "y_values"]:
        if key not in data_subdict:
            data_subdict[key] = []

    data_subdict["x_values"].append(timestamp)
    data_subdict["y_values"].append(stat_val)
def set_stage_value_matcher(stat, data):
    """
    Operating under the assumption that an entire stage's workers share one
    value for a statistic, this matcher will log the statistic for that stage,
    overwriting previous values if they exist
    """

    job = stat["job_name"]
    phase = stat["phase_name"]
    epoch = stat["epoch"]
    stage_name = stat["stage_name"]
    stat_name = stat["stat_name"]

    colon_index = stage_name.find(':')

    if colon_index != -1:
        stage_name = stage_name[:colon_index]

    if "str_value" in stat:
        stat_val = stat["str_value"]
    else:
        stat_val = stat["uint_value"]

    data_subdict = utils.populate_nested_dictionary(
        data, ["stage_info", (job, phase, epoch), stage_name])

    data_subdict[stat_name] = stat_val
def handleDiskCountMatch(match, data):
    job = match["job_name"]
    stat_name = match["stat_name"]
    hostname = match["hostname"]

    disks_subdict = utils.populate_nested_dictionary(
        data, [(job, None, None), "disks", hostname])
    disks_subdict[stat_name] = match["uint_value"]
def handleDiskCountMatch(match, data):
    job = match["job_name"]
    stat_name = match["stat_name"]
    hostname = match["hostname"]

    disks_subdict = utils.populate_nested_dictionary(
        data, [(job, None, None), "disks", hostname])
    disks_subdict[stat_name] = match["uint_value"]
def handleTimestampQueryMatch(match, data):
    job = match["job_name"]
    phase = match["phase_name"]
    epoch = match["epoch"]
    start_time = match["start_time"]
    stop_time = match["stop_time"]

    start_time_subdict = utils.populate_nested_dictionary(
        data,
        key_list=[(job, phase, epoch), "stats", "timestamps", "start"],
        types=[dict, dict, dict, StatContainer])
    start_time_subdict.append(start_time)

    stop_time_subdict = utils.populate_nested_dictionary(
        data,
        key_list=[(job, phase, epoch), "stats", "timestamps", "stop"],
        types=[dict, dict, dict, StatContainer])
    stop_time_subdict.append(stop_time)
def handleTimestampQueryMatch(match, data):
    job = match["job_name"]
    phase = match["phase_name"]
    epoch = match["epoch"]
    start_time = match["start_time"]
    stop_time = match["stop_time"]

    start_time_subdict = utils.populate_nested_dictionary(
        data,
        key_list=[(job, phase, epoch), "stats", "timestamps", "start"],
        types=[dict, dict, dict, StatContainer])
    start_time_subdict.append(start_time)

    stop_time_subdict = utils.populate_nested_dictionary(
        data,
        key_list=[(job, phase, epoch), "stats", "timestamps", "stop"],
        types=[dict, dict, dict, StatContainer])
    stop_time_subdict.append(stop_time)
def handleReaderInputMatch(match, data):
    job = match["job_name"]
    phase_name = match["phase_name"]
    epoch = match["epoch"]
    value = match["uint_value"]
    hostname = match["hostname"]

    hosts_subdict = utils.populate_nested_dictionary(
        data,
        key_list=[(job, phase_name, epoch), "hosts"],
        types=[dict, set])
    hosts_subdict.add(hostname)

    data_subdict = utils.populate_nested_dictionary(
        data, [(job, phase_name, epoch), "stats"])

    if "input_size" not in data_subdict:
        data_subdict["input_size"] = 0

    data_subdict["input_size"] += value
def handleWriterOutputMatch(match, data):
    job = match["job_name"]
    phase_name = match["phase_name"]
    epoch = match["epoch"]
    value = match["uint_value"]

    data_subdict = utils.populate_nested_dictionary(
        data, [(job, phase_name, epoch), "stats"])

    if "output_size" not in data_subdict:
        data_subdict["output_size"] = 0

    data_subdict["output_size"] += value
def handleReaderInputMatch(match, data):
    job = match["job_name"]
    phase_name = match["phase_name"]
    epoch = match["epoch"]
    value = match["uint_value"]
    hostname = match["hostname"]

    hosts_subdict = utils.populate_nested_dictionary(data,
                                                     key_list=[
                                                         (job, phase_name,
                                                          epoch), "hosts"
                                                     ],
                                                     types=[dict, set])
    hosts_subdict.add(hostname)

    data_subdict = utils.populate_nested_dictionary(
        data, [(job, phase_name, epoch), "stats"])

    if "input_size" not in data_subdict:
        data_subdict["input_size"] = 0

    data_subdict["input_size"] += value
def handleWriterOutputMatch(match, data):
    job = match["job_name"]
    phase_name = match["phase_name"]
    epoch = match["epoch"]
    value = match["uint_value"]

    data_subdict = utils.populate_nested_dictionary(
        data, [(job, phase_name, epoch), "stats"])

    if "output_size" not in data_subdict:
        data_subdict["output_size"] = 0

    data_subdict["output_size"] += value
示例#15
0
def gather_histogram_points_matcher(query_number, match, data, data_key):
    phase = match["phase_name"]
    stat_name = match["stat_name"]
    bin_value = match["bin"]
    count = match["count"]

    data_subdict = metaprogram_utils.populate_nested_dictionary(
        data, ["plot_points", query_number, stat_name, data_key])

    for key in ["bin", "count"]:
        if key not in data_subdict:
            data_subdict[key] = []

    data_subdict["bin"].append(bin_value)
    data_subdict["count"].append(count)
示例#16
0
def gather_histogram_points_matcher(query_number, match, data, data_key):
    phase = match["phase_name"]
    stat_name = match["stat_name"]
    bin_value = match["bin"]
    count = match["count"]

    data_subdict = metaprogram_utils.populate_nested_dictionary(
        data, ["plot_points", query_number, stat_name, data_key]
    )

    for key in ["bin", "count"]:
        if key not in data_subdict:
            data_subdict[key] = []

    data_subdict["bin"].append(bin_value)
    data_subdict["count"].append(count)