def remove_abs(data): printt("Removing Abs") new_data = [] for data_idx in range(len(data)): if data_idx % 2 == 0: new_data.append(data[data_idx]) return new_data
def loss(self, data, model, tt, name): labels , losses = self.get_predictions_loss(data, model, tt) loss = np.sum(losses) printt("{} total loss: {:0.3f}".format(name, loss)) #print(labels) ave_loss = np.mean(losses) printt("{} average loss per protein: {:0.3f}".format(name, ave_loss)) return ["loss_" + tt, "ave_loss_" + tt], [loss, ave_loss]
def process_results_loss(self, exp_specs, data, model, name): """ processes each result in the results object based on its type and returns stuff if specified in exp_specs""" printt("Results for {}".format(name)) self.test_batch_size = exp_specs["test_batch_size"] #metrics = ["loss_train", "loss_test", "roc_train", "roc_test", "auprc_train", "auprc_test"] metrics = ["loss_train", "loss_test"] _headers = [] _results = [] for metric in metrics: process_fn = getattr(self, metric) headers, results = process_fn(data, model, name) _headers += headers _results += results return _headers, _results
def fit_model_and_activate(self, exp_specs, data, model, layer_specs, rep, outdir): printt("Fitting Model") # train for specified number of epochs for epoch in range(1, exp_specs["num_epochs"] + 1): self.train_epoch(data["train"], model, exp_specs["minibatch_size"]) # printt("Epoch %d" % epoch) if(epoch % 10 == 0): # calculate train and test metrics headers, result = self.results_processor.process_results(exp_specs, data, model, "epoch_" + str(epoch)) if(epoch==10): # create results log results_log = os.path.join(outdir, "path_"+str(rep)+".csv") with open(results_log, 'w') as f: f.write("{}\n".format(",".join(["epoch"] + headers))) # write results to file with open(results_log, 'a') as f: f.write("{}, {}\n".format(epoch, ",".join([str(r) for r in result]))) self.results_processor.reset() # calculate train and test metrics headers, result = self.results_processor.process_results(exp_specs, data, model, "epoch_" + str(epoch)) printt("*"*30) printt("Activations for validation set") dir_out = outdir + "/val_"+str(rep)+"/" if not os.path.exists(dir_out): os.mkdir(dir_out) self.activate_for_proteins(data["val"], model, layer_specs, dir_out) printt("*"*30) printt("Activations for test set") dir_out = outdir + "/test_"+str(rep)+"/" if not os.path.exists(dir_out): os.mkdir(dir_out) self.activate_for_proteins(data["test"], model, layer_specs, dir_out) dir_out = outdir + "/weights_"+str(rep)+"/" if not os.path.exists(dir_out): os.mkdir(dir_out) self.extract_weights(data["test"], model, layer_specs, dir_out) # clean up self.results_processor.reset() model.close() return headers, result
def fit_model(self, exp_specs, data, model): """ trains model by iterating minibatches for specified number of epochs """ printt("Fitting Model") # train for specified number of epochs for epoch in range(1, exp_specs["num_epochs"] + 1): self.train_epoch(data["train"], model, exp_specs["minibatch_size"]) # calculate train and test metrics headers, result = self.results_processor.process_results( exp_specs, data, model, "epoch_" + str(epoch)) # clean up self.results_processor.reset() model.close() return headers, result
def roc(self, data, model, tt, name): scores = self.get_predictions_loss(data, model, tt)[0] labels = [prot["label"][:, 1] for prot in data[tt]] fprs = [] tprs = [] roc_aucs = [] for s, l in zip(scores, labels): fpr, tpr, _ = roc_curve(l, s) roc_auc = auc(fpr, tpr) fprs.append(fpr) tprs.append(tpr) roc_aucs.append(roc_auc) auc_prot_med = np.median(roc_aucs) auc_prot_ave = np.mean(roc_aucs) printt("{} average protein auc: {:0.3f}".format(name, auc_prot_ave)) printt("{} median protein auc: {:0.3f}".format(name, auc_prot_med)) return ["auc_prot_ave_" + tt, "auc_prot_med_" + tt], [auc_prot_ave, auc_prot_med]
def auprc(self, data, model, tt, name): scores = self.get_predictions_loss(data, model, tt)[0] labels = [prot["label"][:, 1] for prot in data[tt]] close_count = 0 auprcs = [] for preds, lbls in zip(scores, labels): if np.allclose(preds[:, 0], np.zeros_like(preds[:, 0]) + np.mean(preds[:, 0])): close_count += 1 auprcs.append(average_precision_score(lbls, preds)) if close_count > 0: printt("For {} proteins, all predicted scores are close to each other, auprc may be based on improper sorting".format(close_count)) med_auprc = np.median(auprcs) avg_auprc = np.mean(auprcs) printt("{} average auprc: {:0.3f}".format(name, avg_auprc)) printt("{} median auprc: {:0.3f}".format(name, med_auprc)) return ["auprc_avg_" + tt, "auprc_med_" + tt], [avg_auprc, med_auprc]
## Random Seeds # each random seed represents an experimental replication. # You can add or remove list elements to change the number # of replications for an experiment. seeds = [ {"tf_seed": 649737, "np_seed": 29820}, {"tf_seed": 395408, "np_seed": 185228}, {"tf_seed": 252356, "np_seed": 703889}, {"tf_seed": 343053, "np_seed": 999360}, {"tf_seed": 743746, "np_seed": 67440} ] # Load experiment specified in system args exp_file = "base_network.yml" printt("Running Experiment File: {}".format(exp_file)) f_name = exp_file.split(".")[0] if "." in exp_file else exp_file exp_specs = yaml.load(open(os.path.join(experiment_directory, exp_file), 'r').read()) # setup output directory outdir = os.path.join(output_directory, f_name) if not os.path.exists(outdir): os.mkdir(outdir) results_processor = ResultsProcessor() # create results log results_log = os.path.join(outdir, "results.csv") with open(results_log, 'w') as f: f.write("") # write experiment specifications to file