def create_comparison_dict_hybrid_over_transitive() -> dict: """ Creates a dictionary containing dataset names as keys and tuples as values of the technique definition of the best direct and hybrid techniques. This dictionary can be used to calculate the gain between techniques within metric tables. :return: """ comparison_dict = {} for dataset in DATASET_COLUMN_ORDER: comparison_dict.update({ dataset: ( get_best_transitive_technique(dataset), get_best_hybrid_technique(dataset), ) }) return comparison_dict
from api.tracer import Tracer from experiments.evaluate_paths import change_paths_in_technique from utilities.constants import PATH_TO_EXPLORATORY from utilities.technique_extractors import ( get_best_direct_technique, get_best_hybrid_technique, get_best_transitive_technique, ) if __name__ == "__main__": dataset_name = "Drone" tracer = Tracer() direct_technique = get_best_direct_technique(dataset_name) transitive_technique = get_best_transitive_technique(dataset_name) hybrid_technique = get_best_hybrid_technique(dataset_name) new_path = ["0", "2", "1"] techniques = [direct_technique, transitive_technique, hybrid_technique] techniques = [change_paths_in_technique(t, new_path) for t in techniques] matrices = [ tracer.get_technique_data(dataset_name, t).similarity_matrix for t in techniques ] matrices = list(map(minmax_scale, matrices)) def get_group(percentile): if percentile < 1 / 3: return "low" elif percentile < 2 / 3: return "medium" else:
import numpy as np from api.tracer import Tracer from utilities.technique_extractors import ( get_best_direct_technique, get_best_hybrid_technique, get_best_transitive_technique, ) if __name__ == "__main__": tracer = Tracer() d_name = "EasyClinic" direct_technique = get_best_direct_technique(d_name) transitive_technique = get_best_transitive_technique(d_name) hybrid_technique = get_best_hybrid_technique(d_name) """ Direct """ direct_score = tracer.get_metrics(d_name, direct_technique)[0].ap direct_individual_metrics = tracer.get_metrics(d_name, direct_technique, summary_metrics=False) direct_scores = [m.ap for m in direct_individual_metrics] print(f"Direct: {direct_score}:{np.mean(direct_scores)}") """ Transitive """ transitive_score = tracer.get_metrics(d_name, transitive_technique)[0].ap transitive_individual_metrics = tracer.get_metrics("EasyClinic", transitive_technique, summary_metrics=False)
def run(self) -> Table: """ Returns a metric table containing all of the metrics calculated for each technique in df :return: metric table with single query metrics for each technique applied to specified dataset in row """ dataset_name = prompt_for_dataset() """ Find best techniques """ direct_best_definition = get_best_direct_technique(dataset_name) transitive_best_definition = get_best_transitive_technique(dataset_name) combined_best_definition = get_best_hybrid_technique(dataset_name) """ Calculate metrics for individual queries on dataset """ tracer = Tracer() metric_table = MetricTable() direct_metrics: [Metrics] = tracer.get_metrics( dataset_name, direct_best_definition, summary_metrics=False ) metric_table.add( direct_metrics, other={TECHNIQUE_TYPE_COLNAME: DIRECT_ID}, create_index=True ) transitive_metrics: [Metrics] = tracer.get_metrics( dataset_name, transitive_best_definition, summary_metrics=False ) metric_table.add( transitive_metrics, other={TECHNIQUE_TYPE_COLNAME: TRANSITIVE_ID}, create_index=True, ) combined_metrics: [Metrics] = tracer.get_metrics( dataset_name, combined_best_definition, summary_metrics=False ) metric_table.add( combined_metrics, other={TECHNIQUE_TYPE_COLNAME: HYBRID_ID}, create_index=True, ) """ Export individual run """ export_path = os.path.join(PATH_TO_INDIVIDUAL_QUERIES, dataset_name + ".csv") (metric_table.sort(DATASET_COLUMN_ORDER).save(export_path)) self.export_paths.append(export_path) """ Update aggregate """ individual_queries_aggregate = ( MetricTable( Table.aggregate_intermediate_files(PATH_TO_INDIVIDUAL_QUERIES).table ) .create_lag_norm_inverted(drop_old=True) .melt_metrics(metric_value_col_name=METRIC_SCORE_COLNAME) .sort(DATASET_COLUMN_ORDER) .col_values_to_upper(METRIC_COLNAME) .to_title_case(exclude=METRIC_COLNAME) .save(PATH_TO_INDIVIDUAL_QUERIES_AGG) ) individual_queries_aggregate = ( MetricTable( Table.aggregate_intermediate_files(PATH_TO_INDIVIDUAL_QUERIES).table ) .create_lag_norm_inverted(drop_old=True) .sort(DATASET_COLUMN_ORDER) .save(PATH_TO_INDIVIDUAL_QUERIES_UNMELTED) ) # aggregate_table self.export_paths.append(PATH_TO_INDIVIDUAL_QUERIES_AGG) return individual_queries_aggregate
def run(self) -> Table: tracer = Tracer() def get_metrics(d_name, t_def: str): return tracer.get_metrics(d_name, t_def) def add_metrics(d_name, t_def: str, t_type: str, p_name: str): t_metrics = get_metrics(d_name, t_def) metric_table.add( t_metrics, { DATASET_COLNAME: d_name, "path": p_name, "type": t_type, NAME_COLNAME: t_def, }, ) aggregate_gain = None aggregate_metric = None for path in POSSIBLE_PATHS: metric_table = MetricTable() comparison_dict = {} path_name = path_to_str(path) for dataset_name in DATASET_COLUMN_ORDER: source_index = str(path[0]) intermediate_index = str(path[1]) target_index = str(path[2]) new_path = [source_index, intermediate_index, target_index] # direct direct_technique_def = change_paths_in_technique( get_best_direct_technique(dataset_name), new_path) add_metrics( dataset_name, direct_technique_def, DIRECT_ID, path_name, ) # transitive transitive_technique_def = change_paths_in_technique( get_best_transitive_technique(dataset_name), new_path) add_metrics( dataset_name, transitive_technique_def, TRANSITIVE_ID, path_name, ) # HYBRID hybrid_technique_definition = change_paths_in_technique( get_best_hybrid_technique(dataset_name), new_path) add_metrics( dataset_name, hybrid_technique_definition, HYBRID_ID, path_name, ) comparison_dict.update({ dataset_name: (direct_technique_def, hybrid_technique_definition) }) gain_table = metric_table.calculate_gain_between_techniques( comparison_dict) gain_table.table["path"] = path_name aggregate_gain = (gain_table.table if aggregate_gain is None else pd.concat([gain_table.table, aggregate_gain])) aggregate_metric = (metric_table.table if aggregate_metric is None else pd.concat( [metric_table.table, aggregate_metric])) MetricTable(aggregate_metric).create_lag_norm_inverted( drop_old=True).melt_metrics().save(METRIC_TABLE_EXPORT_PATH) self.export_paths.append(METRIC_TABLE_EXPORT_PATH) MetricTable(aggregate_gain).melt_metrics().save( GAIN_TABLE_EXPORT_PATH) self.export_paths.append(GAIN_TABLE_EXPORT_PATH) return aggregate_gain