def run(model, exp, terms, save_freq=5, data=None): exp_dir = ds.get_path_to_dataset(pb.Experiment.Dataset.Name(exp.dataset)) save_dir = os.path.join(exp_dir, exp.description) ## # Load data from .npz archive created by invoking # deep_learning/utils/archive.py ## if data: x_train, y_train, x_test, y_test = data x_train, x_test = tr.transform(x_train, x_test) else: h_file, (x_train, y_train, x_test, y_test) = ds.load_dataset( pb.Experiment.Dataset.Name(exp.dataset), exp.coordinates + '/transformed') data = x_train, y_train, x_test, y_test exp_file_name = exp.description + '.exp' # Start training train_length = x_train.shape[0] num_batches = int(ceil(train_length / exp.batch_size)) valid = Validator(exp, terms) eTimes = np.array([]) valid._clock = clock() model.summary() while valid.check(): t = clock() if valid._num_epochs: print("Epoch {}/{}".format(valid.epochs + 1, valid._num_epochs)) else: print("Epoch {}".format(valid.epochs + 1)) bETA = 0 bTimes = np.array([]) #print("\t Training: ") for b in xrange(num_batches): bt = clock() # Update progress bar progress(b, num_batches, exp.batch_size, bETA) # Train on a batch x_batch = x_train[b * exp.batch_size:b * exp.batch_size + exp.batch_size, :] y_batch = y_train[b * exp.batch_size:b * exp.batch_size + exp.batch_size, :] model.train_on_batch(x_batch, y_batch) bTimes = np.append(bTimes, clock() - bt) bETA = np.median(bTimes) * (num_batches - b - 1) # Finish progress bar progress(num_batches, num_batches, exp.batch_size, 0, end='\n', time=clock() - t) # Calculate stats and add the epoch results to the experiment object epoch = exp.results.add() timer = clock() print("Evaluating Train") epoch.train_loss, epoch.train_accuracy = model.evaluate_generator( ((x_train[i * exp.batch_size:(i + 1) * exp.batch_size], y_train[i * exp.batch_size:(i + 1) * exp.batch_size]) for i in xrange(num_batches)), num_batches, max_q_size=min((num_batches // 2, 10))) #print("Finished {:.2f}s".format(clock()-timer)) timer = clock() print("Evaluating Test") epoch.test_loss, epoch.test_accuracy = model.evaluate_generator( ((x_test[i * exp.batch_size:(i + 1) * exp.batch_size], y_test[i * exp.batch_size:(i + 1) * exp.batch_size]) for i in xrange(int(ceil(x_test.shape[0] / exp.batch_size)))), int(ceil(x_test.shape[0] / exp.batch_size)), max_q_size=min( (int(ceil(x_test.shape[0] / exp.batch_size)) // 2, 10))) #print("Finished {:.2f}s".format(clock() - timer)) timer = clock() print("Calculating Sig") epoch.s_b = st.significance(model, data) #print("Finished {:.2f}".format(clock() - timer)) #timer = clock() #print("Calculating AUC {:.2f}".format(clock())) #epoch.auc = st.AUC(model, data, experiment_epoch=epoch) #print("Finished {:.2f}".format(clock() - timer)) timer = clock() for r in st.num_of_each_cell(model, data): epoch.matrix.add().columns.extend(r) print("Making CFM") matrix = st.confusion_matrix(model, data, offset='\t ') #print("Finished {:.2f}".format(clock() - timer)) epoch.num_seconds = clock() - t timer = clock() print("Getting output") output = st.get_output_distro(model, data) epoch.output.background.extend(output["background"]) epoch.output.signal.extend(output["signal"]) #print("Finished {:.2f}".format(clock() - timer)) # Print statistics print("\t Train Accuracy: {:.3f}\tTest Accuracy: {:.3f}".format( epoch.train_accuracy, epoch.test_accuracy)) if valid.update_w(): print("\t Slope: {:.5f} (test_accuracy / second)".format( valid.slope)) print("\t Time this epoch: {:.2f}s".format(epoch.num_seconds), end='') if valid._num_epochs: eTimes = np.append(eTimes, epoch.num_seconds) print("\tFinal ETA: {}".format( convert_seconds( np.median(eTimes) * (valid._num_epochs - valid.epochs)))) else: print() print("\t Significance (S/sqrt(B)): {:.2f}".format(epoch.s_b)) print("\t Area Under the Curve (efficiency): {:.3f}".format(epoch.auc)) print(matrix) # Saves the model if (len(exp.results) % save_freq) == 0: save(model, exp, save_dir, exp_file_name) print("\t ", end='') sys.stdout.flush() exp.end_date_time = str(datetime.datetime.now()) exp.total_time = valid.time print("\n" + valid.failed) print("Total Time: {}".format(convert_seconds(valid.time))) save(model, exp, save_dir, exp_file_name) print("\t ", end='') h_file.close()
cutoff = 1-i*(1/datapoints) e_b[i], e_s[i] = efficiencies(model, data, cutoff)[:,1] if experiment_epoch: point = experiment_epoch.curve.add() point.signal = e_s[i] point.background = e_b[i] point.cutoff = cutoff if save: plt.plot(e_b, e_s) plt.title("Efficiency Curve") plt.ylabel("Signal Efficiency") plt.xlabel("Background Inefficiency") plt.savefig(save, format="png") return trapz(e_s,e_b) # "" def confusion_matrix(model, data, offset='', **kwargs): eff = efficiencies(model, data, **kwargs) return MATRIX.format(offset, *(eff*100).flatten()) if __name__ == "__main__": from deep_learning.trainNN import load_model model = load_model("ttHLep/U_Optimal") x_train, y_train, x_test, y_test = ds.load_dataset("ttHLep", "Unsorted") x_train, x_test = tr.transform(x_train, x_test) data = (x_train, y_train, x_test, y_test) print significance(model, data) print AUC(model, data) print confusion_matrix(model, data) print confusion_matrix(model, data, over_rows=False)
def run(model, exp, terms, save_freq=5, data=None): exp_dir = ds.get_path_to_dataset(pb.Experiment.Dataset.Name(exp.dataset)) save_dir = os.path.join(exp_dir, exp.description) ## # Load data from .npz archive created by invoking # deep_learning/utils/archive.py ## if data: x_train, y_train, x_test, y_test = data x_train, x_test = tr.transform(x_train, x_test) else: h_file, (x_train, y_train, x_test, y_test) = ds.load_dataset(pb.Experiment.Dataset.Name(exp.dataset), exp.coordinates) x_train, x_test = tr.transform(x_train, x_test) data = x_train, y_train, x_test, y_test exp_file_name = exp.description + '.exp' train_length = x_train.shape[0] num_batches = int(ceil(train_length / exp.batch_size)) valid = Validator(exp, terms) eTimes = np.array([]) valid._clock = clock() model.summary() while valid.check(): t = clock() if valid._num_epochs: print("Epoch {}/{}".format(valid.epochs+1, valid._num_epochs)) else: print("Epoch {}".format(valid.epochs+1)) bETA = 0 bTimes = np.array([]) for b in xrange(num_batches): bt = clock() # Update progress bar progress(b, num_batches, exp.batch_size, bETA) # Train on a batch model.train_on_batch(x_train[b*exp.batch_size:b*exp.batch_size+exp.batch_size, :], y_train[b*exp.batch_size:b*exp.batch_size+exp.batch_size, :]) bTimes = np.append(bTimes, clock()-bt) bETA = np.median(bTimes)*(num_batches-b-1) # Finish progress bar progress(num_batches, num_batches, exp.batch_size, 0, end='\n') # Calculate stats and add the epoch results to the experiment object epoch = exp.results.add() epoch.train_loss, epoch.train_accuracy = model.evaluate_generator(((x_train[i*exp.batch_size:(i+1)*exp.batch_size], y_train[i*exp.batch_size:(i+1)*exp.batch_size]) for i in xrange(int(ceil(x_test.shape[0]/exp.batch_size)))), int(ceil(x_test.shape[0]/exp.batch_size))) epoch.test_loss, epoch.test_accuracy = model.evaluate_generator(((x_test[i*exp.batch_size:(i+1)*exp.batch_size], y_test[i*exp.batch_size:(i+1)*exp.batch_size]) for i in xrange(num_batches)), num_batches) epoch.s_b = st.significance(model, data) epoch.auc = st.AUC(model, data, experiment_epoch=epoch) for r in st.num_of_each_cell(model, data): epoch.matrix.add().columns.extend(r) matrix = st.confusion_matrix(model, data, offset='\t ') epoch.num_seconds = clock() - t # Print statistics print("\t Train Accuracy: {:.3f}\tTest Accuracy: {:.3f}".format(epoch.train_accuracy, epoch.test_accuracy)) if valid.update_w(): print("\t Slope: {:.5f} (test_accuracy / second)".format(valid.slope)) print("\t Time this epoch: {:.2f}s".format(epoch.num_seconds), end='') if valid._num_epochs: eTimes = np.append(eTimes, epoch.num_seconds) print("\tFinal ETA: {}".format(convert_seconds(np.median(eTimes) * (valid._num_epochs - valid.epochs)))) else: print() print("\t Significance (S/sqrt(B)): {:.2f}".format(epoch.s_b)) print("\t Area Under the Curve (efficiency): {:.3f}".format(epoch.auc)) print(matrix) if (len(exp.results) % save_freq) == 0: save(model, exp, save_dir, exp_file_name) print("\t Saved the model\n") sys.stdout.flush() exp.end_date_time = str(datetime.datetime.now()) exp.total_time = valid.time print("\n"+valid.failed) print("Total Time: {}".format(convert_seconds(valid.time))) save(model, exp, save_dir, exp_file_name, graph=True) h_file.close()