def experiment_summary(pkl_file): data = read_pickle(pkl_file) model_config = data["config"] if "parameters" not in model_config: model_config["parameters"] = "?" if "train_start_date" not in data: model_config["train_start_date"] = "01Jan1970" else: model_config["train_start_date"] = data["train_start_date"].strftime("%d%b%Y") model_config["test_end_date"] = data["test_end_date"].strftime("%d%b%Y") # model_config["features"] = data["features"] model_config["feature_summary"] = feature_summary(model_config["features"]) prec_at = precision_at_x_percent(data["test_labels"], data["test_predictions"], x_percent=0.01) auc_model = compute_AUC(data["test_labels"], data["test_predictions"]) num_units = len(data["test_labels"]) cm_1 = fpr_tpr(data["test_labels"], data["test_predictions"], 0.10) cm_2 = fpr_tpr(data["test_labels"], data["test_predictions"], 0.15) cm_3 = fpr_tpr(data["test_labels"], data["test_predictions"], 0.20) cm_4 = fpr_tpr(data["test_labels"], data["test_predictions"], 0.25) cm_5 = fpr_tpr(data["test_labels"], data["test_predictions"], 0.30) cm_6 = fpr_tpr(data["test_labels"], data["test_predictions"], 0.40) cm_7 = fpr_tpr(data["test_labels"], data["test_predictions"], 0.50) cm_8 = fpr_tpr(data["test_labels"], data["test_predictions"], 0.60) cm_9 = fpr_tpr(data["test_labels"], data["test_predictions"], 0.70) fpr = [cm_1[0, 1], cm_2[0, 1], cm_3[0, 1], cm_4[0, 1], cm_5[0, 1], cm_6[0, 1], cm_7[0, 1], cm_8[0, 1], cm_9[0, 1]] tpr = [cm_1[1, 1], cm_2[1, 1], cm_3[1, 1], cm_4[1, 1], cm_5[1, 1], cm_6[1, 1], cm_7[1, 1], cm_8[1, 1], cm_9[1, 1]] fnr = [cm_1[1, 0], cm_2[1, 0], cm_3[1, 0], cm_4[1, 0], cm_5[1, 0], cm_6[1, 0], cm_7[1, 0], cm_8[1, 0], cm_9[1, 0]] tnr = [cm_1[0, 0], cm_2[0, 0], cm_3[0, 0], cm_4[0, 0], cm_5[0, 0], cm_6[0, 0], cm_7[0, 0], cm_8[0, 0], cm_9[0, 0]] rec_1 = recall_at_x_percent(data["test_labels"], data["test_predictions"], x_percent=0.10) rec_2 = recall_at_x_percent(data["test_labels"], data["test_predictions"], x_percent=0.15) rec_3 = recall_at_x_percent(data["test_labels"], data["test_predictions"], x_percent=0.20) try: aggregation = data["aggregation"] except: aggregation = "No aggregated data stored" try: eis_baseline = data["eis_baseline"] except: eis_baseline = "No baseline stored" recall = "[{}, {}, {}]".format(rec_1.round(2), rec_2.round(2), rec_3.round(2)) return Experiment( dateutil.parser.parse(timestamp_from_path(pkl_file)), model_config, auc_model, data, fpr, tpr, fnr, tnr, recall, aggregation, eis_baseline, )
def experiment_summary(pkl_file): data = read_pickle(pkl_file) model_config = data["config"] if "parameters" not in model_config: model_config["parameters"] = "?" if "train_start_date" not in data: model_config["train_start_date"] = "01Jan1970" else: model_config["train_start_date"] = data["train_start_date"].strftime("%d%b%Y") model_config["test_end_date"] = data["test_end_date"].strftime("%d%b%Y") # model_config["features"] = data["features"] model_config["feature_summary"] = feature_summary(model_config["features"]) prec_at = precision_at_x_percent( data["test_labels"], data["test_predictions"], x_percent=0.01) auc_model = compute_AUC(data["test_labels"], data["test_predictions"]) num_units = len(data["test_labels"]) cm_1 = fpr_tpr(data["test_labels"], data["test_predictions"], 0.10) cm_2 = fpr_tpr(data["test_labels"], data["test_predictions"], 0.15) cm_3 = fpr_tpr(data["test_labels"], data["test_predictions"], 0.20) fpr = [cm_1[0, 1], cm_2[0, 1], cm_3[0, 1]] tpr = [cm_1[1, 1], cm_2[1, 1], cm_3[1, 1]] fnr = [cm_1[1, 0], cm_2[1, 0], cm_3[1, 0]] rec_1 = recall_at_x_percent( data["test_labels"], data["test_predictions"], x_percent=0.10) rec_2 = recall_at_x_percent( data["test_labels"], data["test_predictions"], x_percent=0.15) rec_3 = recall_at_x_percent( data["test_labels"], data["test_predictions"], x_percent=0.20) recall = "[{}, {}, {}]".format(rec_1.round(2), rec_2.round(2), rec_3.round(2)) return Experiment(dateutil.parser.parse(timestamp_from_path(pkl_file)), model_config, auc_model, data, fpr, tpr, fnr, recall)
def experiment_summary(pkl_file): data = read_pickle(pkl_file) model_config = data["config"] if "parameters" not in model_config: model_config["parameters"] = "?" if "train_start_date" not in data: model_config["train_start_date"] = "01Jan1970" else: model_config["train_start_date"] = data["train_start_date"].strftime( "%d%b%Y") model_config["test_end_date"] = data["test_end_date"].strftime("%d%b%Y") # model_config["features"] = data["features"] model_config["feature_summary"] = feature_summary(model_config["features"]) prec_at = precision_at_x_percent(data["test_labels"], data["test_predictions"], x_percent=0.01) auc_model = compute_AUC(data["test_labels"], data["test_predictions"]) num_units = len(data["test_labels"]) threshold_levels = [] fpr, tpr, fnr, tnr = {}, {}, {}, {} for each_threshold in sorted(list(data["eis_baseline"].keys())): threshold_levels.append(each_threshold) fpr.update({ each_threshold: data["eis_baseline"][each_threshold]["dsapp"][0, 1] }) tpr.update({ each_threshold: data["eis_baseline"][each_threshold]["dsapp"][1, 1] }) fnr.update({ each_threshold: data["eis_baseline"][each_threshold]["dsapp"][1, 0] }) tnr.update({ each_threshold: data["eis_baseline"][each_threshold]["dsapp"][0, 0] }) eis_baseline = data["eis_baseline"][each_threshold]["eis"] rec_list = [] for rec_threshold in [10., 15., 20.]: rec_list.append( recall_at_x_percent(data["test_labels"], data["test_predictions"], x_percent=rec_threshold / 100.)) try: aggregation = data["aggregation"] except: aggregation = "No aggregated data stored" recall = "[{}, {}, {}]".format(rec_list[0].round(2), rec_list[1].round(2), rec_list[2].round(2)) return Experiment(dateutil.parser.parse(timestamp_from_path(pkl_file)), model_config, auc_model, data, fpr, tpr, fnr, tnr, recall, aggregation, eis_baseline, threshold_levels)