Пример #1
0
def remove_abs(data):
    printt("Removing Abs")
    new_data = []
    for data_idx in range(len(data)):
        if data_idx % 2 == 0:
            new_data.append(data[data_idx])
    return new_data
Пример #2
0
    def loss(self, data, model, tt, name):
        labels , losses = self.get_predictions_loss(data, model, tt)
        loss = np.sum(losses)
        printt("{} total loss: {:0.3f}".format(name, loss))
        #print(labels)
        ave_loss = np.mean(losses)
        printt("{} average loss per protein: {:0.3f}".format(name, ave_loss))

        return ["loss_" + tt, "ave_loss_" + tt], [loss, ave_loss]
Пример #3
0
 def process_results_loss(self, exp_specs, data, model, name):
     """ processes each result in the results object based on its type and returns stuff if specified in exp_specs"""
     printt("Results for {}".format(name))
     self.test_batch_size = exp_specs["test_batch_size"]
     #metrics = ["loss_train", "loss_test", "roc_train", "roc_test", "auprc_train", "auprc_test"]
     metrics = ["loss_train", "loss_test"]
     _headers = []
     _results = []
     for metric in metrics:
         process_fn = getattr(self, metric)
         headers, results = process_fn(data, model, name)
         _headers += headers
         _results += results
     return _headers, _results
Пример #4
0
    def fit_model_and_activate(self, exp_specs, data, model, layer_specs, rep, outdir):
        printt("Fitting Model")
        # train for specified number of epochs
        for epoch in range(1, exp_specs["num_epochs"] + 1):
            self.train_epoch(data["train"], model, exp_specs["minibatch_size"])
            # printt("Epoch %d" % epoch)

            if(epoch % 10 == 0):
                # calculate train and test metrics
                headers, result = self.results_processor.process_results(exp_specs, data, model, "epoch_" + str(epoch))
                if(epoch==10):
                # create results log
                    results_log = os.path.join(outdir, "path_"+str(rep)+".csv")
                    with open(results_log, 'w') as f:
                        f.write("{}\n".format(",".join(["epoch"] + headers)))
                
                # write results to file
                with open(results_log, 'a') as f:
                    f.write("{}, {}\n".format(epoch, ",".join([str(r) for r in result])))
            
                self.results_processor.reset()
                

        # calculate train and test metrics
        headers, result = self.results_processor.process_results(exp_specs, data, model, "epoch_" + str(epoch))

        printt("*"*30)
        printt("Activations for validation set")
        dir_out = outdir + "/val_"+str(rep)+"/"
        if not os.path.exists(dir_out):
            os.mkdir(dir_out)
        self.activate_for_proteins(data["val"], model, layer_specs, dir_out)

        printt("*"*30)
        printt("Activations for test set")
        dir_out = outdir + "/test_"+str(rep)+"/"
        if not os.path.exists(dir_out):
            os.mkdir(dir_out)
        self.activate_for_proteins(data["test"], model, layer_specs, dir_out)

        dir_out = outdir + "/weights_"+str(rep)+"/"
        if not os.path.exists(dir_out):
            os.mkdir(dir_out)
        self.extract_weights(data["test"], model, layer_specs, dir_out)

        # clean up
        self.results_processor.reset()
        model.close()
        return headers, result
Пример #5
0
 def fit_model(self, exp_specs, data, model):
     """
     trains model by iterating minibatches for specified number of epochs
     """
     printt("Fitting Model")
     # train for specified number of epochs
     for epoch in range(1, exp_specs["num_epochs"] + 1):
         self.train_epoch(data["train"], model, exp_specs["minibatch_size"])
     # calculate train and test metrics
     headers, result = self.results_processor.process_results(
         exp_specs, data, model, "epoch_" + str(epoch))
     # clean up
     self.results_processor.reset()
     model.close()
     return headers, result
Пример #6
0
 def roc(self, data, model, tt, name):
     scores = self.get_predictions_loss(data, model, tt)[0]
     labels = [prot["label"][:, 1] for prot in data[tt]]
     fprs = []
     tprs = []
     roc_aucs = []
     for s, l in zip(scores, labels):
         fpr, tpr, _ = roc_curve(l, s)
         roc_auc = auc(fpr, tpr)
         fprs.append(fpr)
         tprs.append(tpr)
         roc_aucs.append(roc_auc)
     auc_prot_med = np.median(roc_aucs)
     auc_prot_ave = np.mean(roc_aucs)
     printt("{} average protein auc: {:0.3f}".format(name, auc_prot_ave))
     printt("{} median protein auc: {:0.3f}".format(name, auc_prot_med))
     return ["auc_prot_ave_" + tt, "auc_prot_med_" + tt], [auc_prot_ave, auc_prot_med]
Пример #7
0
 def auprc(self, data, model, tt, name):
     scores = self.get_predictions_loss(data, model, tt)[0]
     labels = [prot["label"][:, 1] for prot in data[tt]]
     close_count = 0
     auprcs = []
     for preds, lbls in zip(scores, labels):
         if np.allclose(preds[:, 0], np.zeros_like(preds[:, 0]) + np.mean(preds[:, 0])):
             close_count += 1
         auprcs.append(average_precision_score(lbls, preds))
     if close_count > 0:
         printt("For {} proteins, all predicted scores are close to each other, auprc may be based on improper sorting".format(close_count))
     med_auprc = np.median(auprcs)
     avg_auprc = np.mean(auprcs)
     printt("{} average auprc: {:0.3f}".format(name, avg_auprc))
     printt("{} median auprc: {:0.3f}".format(name, med_auprc))
     return ["auprc_avg_" + tt, "auprc_med_" + tt], [avg_auprc, med_auprc]
Пример #8
0
## Random Seeds
# each random seed represents an experimental replication.
# You can add or remove list elements to change the number
# of replications for an experiment.
seeds = [
    {"tf_seed": 649737, "np_seed": 29820},
    {"tf_seed": 395408, "np_seed": 185228},
    {"tf_seed": 252356, "np_seed": 703889},
    {"tf_seed": 343053, "np_seed": 999360},
    {"tf_seed": 743746, "np_seed": 67440}
]

# Load experiment specified in system args
exp_file = "base_network.yml"
printt("Running Experiment File: {}".format(exp_file))
f_name = exp_file.split(".")[0] if "." in exp_file else exp_file
exp_specs = yaml.load(open(os.path.join(experiment_directory, exp_file), 'r').read())

# setup output directory
outdir = os.path.join(output_directory, f_name)
if not os.path.exists(outdir):
    os.mkdir(outdir)
results_processor = ResultsProcessor()

# create results log
results_log = os.path.join(outdir, "results.csv")
with open(results_log, 'w') as f:
    f.write("")

# write experiment specifications to file