def execute(self): outfile = os.path.join(self.data_dir, "instance-metrics.csv") results = sorted(self.collect_results(self.data_dir)) mapper(self.execute_result, results) self.results.to_csv(outfile, index=False)
def execute(self): results = sorted(self.collect_results(self.data_dir)) mapper(self.execute_result, results) # Persist the report to disk for directory, ratings in self.ratings.items(): with open(os.path.join(directory, "predictions.json"), "w") as results_json: json.dump(ratings, results_json)
def calculate_components(self): component_file = os.path.join(self.src_dir, "components.json") if os.path.exists(component_file): with open(component_file, "r") as outfile: self.component_results = json.load(outfile) else: print("Calculating Components") mapper(self.execute_components, self.collect_results()) with open(component_file, "w") as outfile: json.dump(self.component_results, outfile)
def execute(self) -> None: from segmenter.helpers.p_tqdm import p_map as mapper filename = self.args["config"] loader = SourceFileLoader("searchconfig", filename) spec = util.spec_from_loader("searchconfig", loader) searchconfig = util.module_from_spec(spec) # type: ignore spec.loader.exec_module(searchconfig) # type: ignore self.keys = [k for k in searchconfig.search_space.keys()] configs = [ l for l in itertools.product(*searchconfig.search_space.values()) ] mapper(self.execute_result, configs)
def find_best_thresholds(self): df = None metrics_files = glob.glob("{}/**/metrics.csv".format(self.data_dir), recursive=True) results = sorted(metrics_files) # confusions = {} df = None for result in mapper(self.executre_metrics, results): if df is None: df = result else: df = df.append(result) mean_results = df.groupby( ["label", "dataset", "aggregator", "threshold", "class"]).agg({ "f1-score": "mean" }).reset_index() best_results = mean_results.groupby( ["label", "dataset", "aggregator", "class"]).agg({ "f1-score": "max" }).reset_index() best_results = pd.merge(best_results, mean_results, on=list(best_results.columns), how='inner') return best_results
def execute(self): outdir = os.path.join(self.data_dir, "results") os.makedirs(outdir, exist_ok=True) outfile = os.path.join(outdir, "variance.csv") if os.path.exists(outfile): return job_configs = sorted(self.collect_results(self.data_dir)) self.populate_samples() for results_df in mapper(self.execute_result, job_configs): self.results = self.results.append(results_df, ignore_index=True) self.results.to_csv(outfile)
def execute(self): outfile = os.path.join(self.data_dir, "train_results.csv") # if os.path.exists(outfile): # print("Training results already collected in {}".format( # self.data_dir)) # return for result_df in mapper(self.execute_result, self.collect_results(self.data_dir)): self.results = self.results.append(result_df) if not self.results.empty: print("Writing {}".format(outfile)) self.results.to_csv(outfile, index=False)
def calculate(self): from segmenter.helpers.p_tqdm import p_map as mapper if os.environ.get("JOB_CONFIG") is None: configs = self.collect_results(self.output_dir) else: configs = [os.environ["JOB_CONFIG"]] complete = [] incomplete = [] for job_complete, job_incomplete in mapper(self._calculate, configs): complete += job_complete incomplete += job_incomplete complete = sorted(complete, key=lambda x: x[1] + x[2]) incomplete = sorted(incomplete, key=lambda x: x[1] + x[2]) return complete, incomplete
def execute(self): outfile = os.path.abspath( os.path.join(self.data_dir, "..", "variance.csv")) if os.path.exists(outfile): os.remove(outfile) results = glob.glob("{}/../**/variance.csv".format(self.data_dir), recursive=True) results = [ os.path.abspath(r) for r in results if os.path.abspath(r) != outfile ] for result_df in mapper(VarianceCollector.execute_result, results): result_df.to_csv(outfile, mode='a', index=False, header=not os.path.exists(outfile))
def execute(self): self.labels = ["background"] + self.job_config["CLASSES"] confusions = {} print(self.data_dir) results = sorted(self.collect_results(self.data_dir)) for result in mapper(self.execute_result, results): if result is None: continue clazz = result[0] aggregator_name = result[1] threshold = result[2] key = (clazz, aggregator_name, threshold) if key not in confusions: confusions[key] = np.zeros( (len(self.labels), len(self.labels)), dtype=np.uint64) result_data = result[3] confusions[key] += result_data for key, result_data in confusions.items(): clazz = key[0] aggregator_name = key[1] threshold = key[2] outfile = os.path.join(self.data_dir, aggregator_name, threshold, 'confusion.npy') np.save(outfile, result_data)
def execute(self): mapper(self.execute_result, self.collect_results())
def populate_samples(self): samples = glob.glob("{}/**/*.npz".format(self.data_dir), recursive=True) for key, prediction in mapper(self.populate_sample, samples): self.sample_map[key] = prediction
def execute(self): # self.labels = ["background"] + self.job_config["CLASSES"] df = None results = sorted(self.collect_results(self.data_dir)) # confusions = {} for result in mapper(self.execute_result, results): if df is None: df = result else: df = df.append(result) for layer_type in df["layer_type"].unique(): layer_type_results = df[df["layer_type"] == layer_type] layer_type_results = layer_type_results.drop("layer_type", axis=1) layer_type_results = layer_type_results.drop("fold", axis=1) layer_type_results = layer_type_results.drop("boost_fold", axis=1) layer_type_results = layer_type_results.groupby("category").agg( 'sum').reset_index() for category in layer_type_results["category"].unique(): category_results = layer_type_results[ layer_type_results["category"] == category] category_results = category_results.drop("category", axis=1) category_results = category_results.to_numpy()[0] weights = 100 * category_results / np.sum(category_results) bins = self.bins[:len(weights)] mean = np.sum(np.multiply(category_results, bins)) / np.sum(category_results) std = np.sum(np.multiply(category_results, (bins - mean)** 2)) / np.sum(category_results) fig = plt.figure() plt.hist(bins, self.bins, weights=weights) percentile = np.percentile(weights, 99.9) plt.ylim([0, percentile]) title = "Output Histogram for {} {} layers".format( category, layer_type) plt.ylabel("Frequency (%): Peak {:1.2f}% at {:1.2f}".format( np.max(weights), self.bins[np.argmax(weights)])) used_bins = weights > 0.01 subtitle = "Frequency Concentration: {:1.2f}% in width {:1.2f}".format( np.sum(weights[used_bins]), max(bins[used_bins]) - min(bins[used_bins])) plt.xlabel( "Output Value: Mean {:1.2f}, St. Dev. {:1.2f}".format( mean, std)) plt.title('') fig.suptitle(title, y=1.00, fontsize=14) plt.figtext(.5, .91, subtitle, fontsize=12, ha='center') outdir = os.path.join(self.data_dir, "combined", "results") os.makedirs(outdir, exist_ok=True) outfile = os.path.join( outdir, "layer-output-{}-{}.png".format(category, layer_type)) print(outfile) plt.savefig(outfile, dpi=150, bbox_inches='tight', pad_inches=0.5) plt.close()
def execute(self): df = None results = sorted(self.collect_results(self.data_dir)) # confusions = {} for result in mapper(self.execute_result, results): if df is None: df = result else: df = df.append(result) baseline_results = df[df["label"] == "Baseline"] other_results = df[df["label"] != "Baseline"] groups = other_results[[ "display_aggregator", "label", "dataset", "class" ]].drop_duplicates() for group in groups.iterrows(): group = group[1] clazz = group["class"] aggregator = group["display_aggregator"] dataset = group["dataset"] label = group["label"] group_baseline_results = baseline_results[ (baseline_results["dataset"] == dataset) & (baseline_results["class"] == clazz)] group_baseline_results = group_baseline_results[[ "recall", "false_discovery_rate" ]] group_baseline_results = group_baseline_results.groupby( "recall").agg({ "false_discovery_rate": 'min' }).reset_index() group_baseline_results = group_baseline_results.sort_values( "recall") group_results = other_results[ (other_results["display_aggregator"] == aggregator) & (other_results["label"] == label) & (other_results["dataset"] == dataset) & (other_results["class"] == clazz)] group_results = group_results[["recall", "false_discovery_rate"]] group_results = group_results.groupby("recall").agg({ "false_discovery_rate": 'min' }).reset_index() group_results = group_results.append( { "recall": 0., "false_discovery_rate": 0. }, ignore_index=True) group_results = group_results.append( { "recall": 1., "false_discovery_rate": 1. }, ignore_index=True) group_results = group_results.sort_values("recall") recall = group_results["recall"] fdr = group_results["false_discovery_rate"] auc_fdr = np.trapz(recall, fdr) #Plot False-Discovery Rate fig, ax = self.visualize(recall, fdr) ax.set_xlabel('False Discovery Rate (1 - Precision)') ax.set_ylabel('True Positive Rate (Sensitivity)') ax.plot(group_baseline_results["false_discovery_rate"], group_baseline_results["recall"], "o") subtitle = "{} - Class {}, {} Aggregator".format( label, clazz, aggregator) plt.figtext(.5, .97, subtitle, fontsize=14, ha='center') plt.title( 'True Positive Rate vs. False Discovery Rate (AUC {:1.2f})'. format(round(auc_fdr, 3)), y=1.17, fontsize=16) plt.legend([label, "Baseline"], bbox_to_anchor=(0, 1, 1, 0.2), loc="lower left", ncol=2, frameon=False) outdir = os.path.join(self.data_dir, "combined", "results", label, clazz) os.makedirs(outdir, exist_ok=True) outfile = os.path.join( outdir, "{}-auc-false-discovery.png".format("_".join( aggregator.split()))) print(outfile) plt.savefig(outfile, dpi=150, bbox_inches='tight', pad_inches=0.5) plt.close()
def execute(self): print(self.data_dir) results = sorted(self.collect_results(self.data_dir)) mapper(self.execute_result, results)
def execute(self): # self.labels = ["background"] + self.job_config["CLASSES"] df = None results = sorted(self.collect_results(self.data_dir)) # confusions = {} for result in mapper(self.execute_result, results): if df is None: df = result else: df = df.append(result) df["display_aggregator"] = df["aggregator"].apply( lambda x: self.aggregator_map[x]) df = df.drop("aggregator", axis=1) mean_results = df.groupby( ["label", "dataset", "display_aggregator", "threshold", "class"]).agg({ "dice": "mean" }).reset_index() best_results = mean_results.groupby( ["label", "dataset", "display_aggregator", "class"]).agg({ "dice": "max" }).reset_index() best_results = pd.merge(best_results, mean_results, on=list(best_results.columns), how='inner') join_columns = list(best_results.columns) join_columns.remove("dice") filtered_results = pd.merge(best_results, df, on=join_columns, how='inner') filtered_results["dice"] = filtered_results["dice_y"] filtered_results = filtered_results.drop("dice_x", axis=1) filtered_results = filtered_results.drop("dice_y", axis=1) baseline_results = df[df["label"] == "Baseline"] sns.set(rc={'figure.figsize': (11, 2.5)}) for aggregator in filtered_results["display_aggregator"].unique(): if aggregator == "": continue aggregator_results = filtered_results[ filtered_results["display_aggregator"] == aggregator] comparable_results = pd.concat( [aggregator_results, baseline_results]) plot = sns.boxplot(x='class', y='dice', data=comparable_results, hue='label') fig = plot.get_figure() plt.legend(bbox_to_anchor=(0, 1, 1, 0.2), loc="lower left", ncol=len(comparable_results["label"].unique()), frameon=False) title = 'F1-Score by Model and Class, {} Aggregator'.format( aggregator) plt.title('') fig.suptitle(title, y=1.08, fontsize=14) plt.xlabel("Class") plt.ylabel("F1-Score") outdir = os.path.join(self.data_dir, "combined", "results") os.makedirs(outdir, exist_ok=True) outfile = os.path.join( outdir, "{}-f1-score.png".format(aggregator.replace(" ", "_").lower())) fig.savefig(outfile, dpi=300, bbox_inches='tight', pad_inches=0.5) plt.close()