def matcher(stat, data): hostname = stat["hostname"] job = stat["job_name"] phase = stat["phase_name"] epoch = stat["epoch"] if "stage_name" in stat: stage_name = stat["stage_name"] colon_index = stage_name.find(':') if colon_index != -1: stage_name = stage_name[:colon_index] worker_id = stat["id"] else: stage_name = stat["logger_name"] worker_id = None val = stat[stat_value_key] # Add value to global job statistics global_data_key_list = [ "stats", (job, phase, epoch), (stage_name, ), data_key ] global_stat_container = utils.populate_nested_dictionary( data, key_list=global_data_key_list, types=[dict, dict, dict, StatContainer]) global_stat_container.append(val) # Add value to per-host statistics per_host_data_key_list = [ "stats", (job, phase, epoch), (hostname, stage_name), data_key ] per_host_stat_container = utils.populate_nested_dictionary( data, key_list=per_host_data_key_list, types=[dict, dict, dict, StatContainer]) per_host_stat_container.append(val) # If this is a statistic pertaining to a worker, add value to per-worker # statistics if worker_id is not None: per_worker_data_key_list = [ "stats", (job, phase, epoch), (hostname, stage_name, worker_id), data_key ] per_worker_stat_container = utils.populate_nested_dictionary( data, key_list=per_worker_data_key_list, types=[dict, dict, dict, StatContainer]) per_worker_stat_container.append(val)
def matcher(stat, data): hostname = stat["hostname"] job = stat["job_name"] phase = stat["phase_name"] epoch = stat["epoch"] if "stage_name" in stat: stage_name = stat["stage_name"] colon_index = stage_name.find(':') if colon_index != -1: stage_name = stage_name[:colon_index] worker_id = stat["id"] else: stage_name = stat["logger_name"] worker_id = None val = stat[stat_value_key] # Add value to global job statistics global_data_key_list = [ "stats", (job, phase, epoch), (stage_name,), data_key] global_stat_container = utils.populate_nested_dictionary( data, key_list=global_data_key_list, types=[dict, dict, dict, StatContainer]) global_stat_container.append(val) # Add value to per-host statistics per_host_data_key_list = [ "stats", (job, phase, epoch), (hostname, stage_name), data_key] per_host_stat_container = utils.populate_nested_dictionary( data, key_list=per_host_data_key_list, types=[dict, dict, dict, StatContainer]) per_host_stat_container.append(val) # If this is a statistic pertaining to a worker, add value to per-worker # statistics if worker_id is not None: per_worker_data_key_list = [ "stats", (job, phase, epoch), (hostname, stage_name, worker_id), data_key] per_worker_stat_container = utils.populate_nested_dictionary( data, key_list=per_worker_data_key_list, types=[dict, dict, dict, StatContainer]) per_worker_stat_container.append(val)
def gather_timestamped_points_matcher(query_number, match, data, data_key): timestamp = float(match["timestamp"]) phase = match["phase_name"] stat_name = match["collection_stat_name"] stat_val = match["value"] # Update minimum and maximum timestamps for key, comparison_function in [("min_timestamp", min), ("max_timestamp", max)]: if key not in data: data[key] = {} if phase not in data[key]: data[key][phase] = timestamp else: data[key][phase] = comparison_function( data[key][phase], timestamp) # Subdivide points by query, then further by stat name, and worker # identification so that we can make fine-grained plots if needed data_subdict = metaprogram_utils.populate_nested_dictionary( data, ["plot_points", query_number, stat_name, data_key]) for key in ["x_values", "y_values"]: if key not in data_subdict: data_subdict[key] = [] data_subdict["x_values"].append(timestamp) data_subdict["y_values"].append(stat_val)
def set_stage_value_matcher(stat, data): """ Operating under the assumption that an entire stage's workers share one value for a statistic, this matcher will log the statistic for that stage, overwriting previous values if they exist """ job = stat["job_name"] phase = stat["phase_name"] epoch = stat["epoch"] stage_name = stat["stage_name"] stat_name = stat["stat_name"] colon_index = stage_name.find(':') if colon_index != -1: stage_name = stage_name[:colon_index] if "str_value" in stat: stat_val = stat["str_value"] else: stat_val = stat["uint_value"] data_subdict = utils.populate_nested_dictionary( data, ["stage_info", (job, phase, epoch), stage_name]) data_subdict[stat_name] = stat_val
def gather_timestamped_points_matcher(query_number, match, data, data_key): timestamp = float(match["timestamp"]) phase = match["phase_name"] stat_name = match["collection_stat_name"] stat_val = match["value"] # Update minimum and maximum timestamps for key, comparison_function in [("min_timestamp", min), ("max_timestamp", max)]: if key not in data: data[key] = {} if phase not in data[key]: data[key][phase] = timestamp else: data[key][phase] = comparison_function(data[key][phase], timestamp) # Subdivide points by query, then further by stat name, and worker # identification so that we can make fine-grained plots if needed data_subdict = metaprogram_utils.populate_nested_dictionary( data, ["plot_points", query_number, stat_name, data_key] ) for key in ["x_values", "y_values"]: if key not in data_subdict: data_subdict[key] = [] data_subdict["x_values"].append(timestamp) data_subdict["y_values"].append(stat_val)
def handleDiskCountMatch(match, data): job = match["job_name"] stat_name = match["stat_name"] hostname = match["hostname"] disks_subdict = utils.populate_nested_dictionary( data, [(job, None, None), "disks", hostname]) disks_subdict[stat_name] = match["uint_value"]
def handleTimestampQueryMatch(match, data): job = match["job_name"] phase = match["phase_name"] epoch = match["epoch"] start_time = match["start_time"] stop_time = match["stop_time"] start_time_subdict = utils.populate_nested_dictionary( data, key_list=[(job, phase, epoch), "stats", "timestamps", "start"], types=[dict, dict, dict, StatContainer]) start_time_subdict.append(start_time) stop_time_subdict = utils.populate_nested_dictionary( data, key_list=[(job, phase, epoch), "stats", "timestamps", "stop"], types=[dict, dict, dict, StatContainer]) stop_time_subdict.append(stop_time)
def handleReaderInputMatch(match, data): job = match["job_name"] phase_name = match["phase_name"] epoch = match["epoch"] value = match["uint_value"] hostname = match["hostname"] hosts_subdict = utils.populate_nested_dictionary( data, key_list=[(job, phase_name, epoch), "hosts"], types=[dict, set]) hosts_subdict.add(hostname) data_subdict = utils.populate_nested_dictionary( data, [(job, phase_name, epoch), "stats"]) if "input_size" not in data_subdict: data_subdict["input_size"] = 0 data_subdict["input_size"] += value
def handleWriterOutputMatch(match, data): job = match["job_name"] phase_name = match["phase_name"] epoch = match["epoch"] value = match["uint_value"] data_subdict = utils.populate_nested_dictionary( data, [(job, phase_name, epoch), "stats"]) if "output_size" not in data_subdict: data_subdict["output_size"] = 0 data_subdict["output_size"] += value
def handleReaderInputMatch(match, data): job = match["job_name"] phase_name = match["phase_name"] epoch = match["epoch"] value = match["uint_value"] hostname = match["hostname"] hosts_subdict = utils.populate_nested_dictionary(data, key_list=[ (job, phase_name, epoch), "hosts" ], types=[dict, set]) hosts_subdict.add(hostname) data_subdict = utils.populate_nested_dictionary( data, [(job, phase_name, epoch), "stats"]) if "input_size" not in data_subdict: data_subdict["input_size"] = 0 data_subdict["input_size"] += value
def gather_histogram_points_matcher(query_number, match, data, data_key): phase = match["phase_name"] stat_name = match["stat_name"] bin_value = match["bin"] count = match["count"] data_subdict = metaprogram_utils.populate_nested_dictionary( data, ["plot_points", query_number, stat_name, data_key]) for key in ["bin", "count"]: if key not in data_subdict: data_subdict[key] = [] data_subdict["bin"].append(bin_value) data_subdict["count"].append(count)
def gather_histogram_points_matcher(query_number, match, data, data_key): phase = match["phase_name"] stat_name = match["stat_name"] bin_value = match["bin"] count = match["count"] data_subdict = metaprogram_utils.populate_nested_dictionary( data, ["plot_points", query_number, stat_name, data_key] ) for key in ["bin", "count"]: if key not in data_subdict: data_subdict[key] = [] data_subdict["bin"].append(bin_value) data_subdict["count"].append(count)