def multiclass_forecast_full(num_classes_train=TRAIN_LENGTH, num_classes_predict=PREDICT_LENGTH, model=None, categories=False, top_n=1): (X_train, X_val, X_test, y_train, y_val, y_test), vectorizer = util.prep_dataset_v3( num_classes_train=num_classes_train, num_classes_predict=num_classes_predict, vectorize=True) if categories: y_train = util.degrees_to_categories(y_train) y_val = util.degrees_to_categories(y_val) y_test = util.degrees_to_categories(y_test) if not model: train_score, reg = train_log_reg(X_train, y_train) print(train_score) if categories: pickle.dump(reg, open("log_reg_full_train_categories.pickle", 'wb')) else: pickle.dump(reg, open("log_reg_full_train.pickle", 'wb')) else: reg = pickle.load(open(model, 'rb')) macro_f1 = util.evaluate_model(X_test, y_test, reg, output_dict=True, top_n=top_n)['macro avg']['f1-score'] print( util.evaluate_model(X_test, y_test, reg, output_dict=False, top_n=top_n)) util.evaluate_model_bias(reg, vectorizer, evaluate_model_bias_single_df, num_classes_predict=PREDICT_LENGTH, categories=False, top_n=top_n, test=True) return macro_f1, reg
def main(device=torch.device('cuda:0')): # CLI arguments parser = arg.ArgumentParser( description='We all know what we are doing. Fighting!') parser.add_argument("--datasize", "-d", default="small", type=str, help="data size you want to use, small, medium, total") # Parsing args = parser.parse_args() # Data loaders datasize = args.datasize pathname = "data/nyu.zip" tr_loader, va_loader, te_loader = getTrainingValidationTestingData( datasize, pathname, batch_size=config("unet.batch_size")) # Model model = Net() # define loss function # criterion = torch.nn.L1Loss() # Attempts to restore the latest checkpoint if exists print("Loading unet...") model, start_epoch, stats = util.restore_checkpoint( model, util.config("unet.checkpoint")) acc, loss = util.evaluate_model(model, te_loader, device) # axes = util.make_training_plot() print(f'Test Accuracy:{acc}') print(f'Test Loss:{loss}')
def main(): model = None if config.model == 'Baseline': model = models.Baseline() elif config.model == "Muzip": model = models.Muzip() elif config.model == "Muzip2": model = models.Muzip2() elif config.model == "Muzip3": model = models.Muzip3() else: model = models.Muzip3() if config.train: # Can train with existing weights (if config.restart = True, will use most recent by default) weights_path = None if not config.restart: weights_path = os.path.join( config.model_save_dir, get_recent_weights_path(config.model_save_dir)) model.build(weights_path) train(model) #simple_train(model) if config.save_model: save_model(model, config.model_save_path, config.model_weights_save_path, ext=".h5") if config.pred: # If we only care about predicting! # Make sure there are trained weights (most recent will be used by default) weights_path = config.model_save_dir + '/' + get_recent_weights_path( config.model_save_dir) model.build(weights_path) # pred(model) pred(model) if config.evaluate: weights_path = config.model_save_dir + '/' + get_recent_weights_path( config.model_save_dir) model.build(weights_path) evaluate_model(model, "test/")
def run_baseline_classifier(num_classes_train=TRAIN_LENGTH, num_classes_predict=PREDICT_LENGTH, categories=False, top_n=1): (X_train, X_val, X_test, y_train, y_val, y_test), vectorizer = util.prep_dataset_v3( num_classes_train=num_classes_train, num_classes_predict=num_classes_predict, vectorize=True) if categories: y_train = util.degrees_to_categories(y_train) y_val = util.degrees_to_categories(y_val) y_test = util.degrees_to_categories(y_test) dummy_clf = DummyClassifier(strategy='stratified').fit(X_train, y_train) macro_f1 = util.evaluate_model(X_test, y_test, dummy_clf, output_dict=True, top_n=top_n)['macro avg']['f1-score'] print( util.evaluate_model(X_test, y_test, dummy_clf, output_dict=False, top_n=top_n)) util.evaluate_model_bias( dummy_clf, vectorizer, logistic_regression_model.evaluate_model_bias_single_df, num_classes_predict=PREDICT_LENGTH, categories=categories, top_n=top_n, test=True) return macro_f1, dummy_clf
def evaluate_model_bias_single_df(model, df, vectorizer, num_classes_predict=0, categories=False, top_n=1): X, y = util.tokenize_df(df, num_classes_predict) _, X = util.vectorize_course_history(X.loc[:, 'course_history'], vectorizer=vectorizer) if categories: y = util.degrees_to_categories(y) return util.evaluate_model(X, y, model, output_dict=True, top_n=top_n)
def evaluate_model_bias_single_df(model, df, args, num_classes_predict=0, categories=False, top_n=1): course2vec_model, vec_size = args X, y = util.tokenize_df(df, num_classes_predict) X = featurize_student(X['course_history'], course2vec_model, vec_size) if categories: y = util.degrees_to_categories(y) return util.evaluate_model(X, y, model, output_dict=True, top_n=top_n)
def main(device=torch.device('cuda:0')): """Print performance metrics for model at specified epoch.""" # Data loaders pathname = "data/nyu.zip" tr_loader, va_loader, te_loader = getTrainingValidationTestingData(pathname, batch_size=util.config("unet.batch_size")) # Model model = Net() # define loss function # criterion = torch.nn.L1Loss() # Attempts to restore the latest checkpoint if exists print("Loading unet...") model, start_epoch, stats = util.restore_checkpoint(model, util.config("unet.checkpoint")) acc, loss = util.evaluate_model(model, te_loader, device) # axes = util.make_training_plot() print(f'Test Accuracy:{acc}') print(f'Test Loss:{loss}')
def train_model(data_set_identifier, train_file, val_file, learning_rate, minibatch_size, name): set_experiment_id(data_set_identifier, learning_rate, minibatch_size, name) train_loader = contruct_dataloader_from_disk(train_file, minibatch_size, use_evolutionary=True) validation_loader = contruct_dataloader_from_disk(val_file, minibatch_size, use_evolutionary=True) validation_dataset_size = validation_loader.dataset.__len__() train_dataset_size = train_loader.dataset.__len__() embedding_size = 21 if configs.run_params["use_evolutionary"]: embedding_size = 42 #Load in existing model if given as argument if args.model is not None: model_path = "output/models/" + args.model + ".model" model = load_model_from_disk(model_path, use_gpu) else: #else construct new model from config file model = construct_model(configs.model_params, embedding_size, use_gpu,minibatch_size) #optimizer parameters betas = tuple(configs.run_params["betas"]) weight_decay = configs.run_params["weight_decay"] angle_lr = configs.run_params["angles_lr"] if configs.model_params['architecture'] == 'cnn_angles': optimizer = optim.Adam(model.parameters(), betas=betas, lr=learning_rate, weight_decay=weight_decay) else: optimizer = optim.Adam([ {'params' : model.model.parameters(), 'lr':learning_rate}, {'params' : model.soft_to_angle.parameters(), 'lr':angle_lr}], betas=betas, weight_decay=weight_decay) #print number of trainable parameters print_number_of_parameters(model) #For creating a summary table of the model (does not work on ExampleModel!) if configs.run_params["print_model_summary"]: if configs.model_params["architecture"] != 'rnn': summary(model, configs.run_params["max_sequence_length"], 2) else: write_out("DETAILED MODEL SUMMARY IS NOT SUPPORTED FOR RNN MODELS") if use_gpu: model = model.cuda() # TODO: is soft_to_angle.parameters() included here? sample_num = list() train_loss_values = list() validation_loss_values = list() rmsd_avg_values = list() drmsd_avg_values = list() break_point_values = list() breakpoints = configs.run_params['breakpoints'] best_model_loss = 1e20 best_model_train_loss = 1e20 best_model_minibatch_time = None best_model_path = None stopping_condition_met = False minibatches_proccesed = 0 loss_atoms = configs.run_params["loss_atoms"] start_time = time.time() max_time = configs.run_params["max_time"] C_epochs = configs.run_params["c_epochs"] # TODO: Change to parameter C_batch_updates = C_epochs while not stopping_condition_met: optimizer.zero_grad() model.zero_grad() loss_tracker = np.zeros(0) start_time_n_minibatches = time.time() for minibatch_id, training_minibatch in enumerate(train_loader, 0): minibatches_proccesed += 1 training_minibatch = list(training_minibatch) primary_sequence, tertiary_positions, mask, p_id = training_minibatch[:-1] # Update C C = 1.0 if minibatches_proccesed >= C_batch_updates else float(minibatches_proccesed) / C_batch_updates #One Hot encode amino string and concate PSSM values. amino_acids, batch_sizes = one_hot_encode(primary_sequence, 21, use_gpu) if configs.run_params["use_evolutionary"]: evolutionary = training_minibatch[-1] evolutionary, batch_sizes = torch.nn.utils.rnn.pad_packed_sequence(torch.nn.utils.rnn.pack_sequence(evolutionary)) if use_gpu: evolutionary = evolutionary.cuda() amino_acids = torch.cat((amino_acids, evolutionary.view(-1, len(batch_sizes) , 21)), 2) start_compute_loss = time.time() if configs.run_params["only_angular_loss"]: #raise NotImplementedError("Only_angular_loss function has not been implemented correctly yet.") loss = model.compute_angular_loss((amino_acids, batch_sizes), tertiary_positions, mask) else: loss = model.compute_loss((amino_acids, batch_sizes), tertiary_positions, mask, C=C, loss_atoms=loss_atoms) if C != 1: write_out("C:", C) write_out("Train loss:", float(loss)) start_compute_grad = time.time() loss.backward() loss_tracker = np.append(loss_tracker, float(loss)) end = time.time() write_out("Loss time:", start_compute_grad-start_compute_loss, "Grad time:", end-start_compute_grad) optimizer.step() optimizer.zero_grad() model.zero_grad() # for every eval_interval samples, plot performance on the validation set if minibatches_proccesed % configs.run_params["eval_interval"] == 0: model.eval() write_out("Testing model on validation set...") train_loss = loss_tracker.mean() loss_tracker = np.zeros(0) validation_loss, data_total, rmsd_avg, drmsd_avg = evaluate_model(validation_loader, model, use_gpu, loss_atoms, configs.run_params["use_evolutionary"]) prim = data_total[0][0] pos = data_total[0][1] pos_pred = data_total[0][3] mask = data_total[0][4] pos = apply_mask(pos, mask) angles_pred = data_total[0][2] angles_pred = apply_mask(angles_pred, mask, size=3) pos_pred = apply_mask(pos_pred, mask) prim = torch.masked_select(prim, mask) if use_gpu: pos = pos.cuda() pos_pred = pos_pred.cuda() angles = calculate_dihedral_angels(pos, use_gpu) #angles_pred = calculate_dihedral_angels(pos_pred, use_gpu) #angles_pred = data_total[0][2] # Use angles output from model - calculate_dihedral_angels(pos_pred, use_gpu) write_to_pdb(get_structure_from_angles(prim, angles), "test") write_to_pdb(get_structure_from_angles(prim, angles_pred), "test_pred") if validation_loss < best_model_loss: best_model_loss = validation_loss best_model_minibatch_time = minibatches_proccesed best_model_path = write_model_to_disk(model) if train_loss < best_model_train_loss: best_model_train_loss = train_loss best_model_train_path = write_model_to_disk(model, model_type="train") write_out("Validation loss:", validation_loss, "Train loss:", train_loss) write_out("Best model so far (validation loss): ", best_model_loss, "at time", best_model_minibatch_time) write_out("Best model stored at " + best_model_path) write_out("Best model train stored at " + best_model_train_path) write_out("Minibatches processed:",minibatches_proccesed) end_time_n_minibatches = time.time() n_minibatches_time_used = end_time_n_minibatches - start_time_n_minibatches minibatches_left = configs.run_params["max_updates"] - minibatches_proccesed seconds_left = int(n_minibatches_time_used * (minibatches_left/configs.run_params["eval_interval"])) m, s = divmod(seconds_left, 60) h, m = divmod(m, 60) write_out("Estimated time until maximum number of updates:", '{:d}:{:02d}:{:02d}'.format(h, m, s) ) sample_num.append(minibatches_proccesed) train_loss_values.append(train_loss) validation_loss_values.append(validation_loss) rmsd_avg_values.append(rmsd_avg) drmsd_avg_values.append(drmsd_avg) if breakpoints and minibatches_proccesed > breakpoints[0]: break_point_values.append(drmsd_avg) breakpoints = breakpoints[1:] data = {} data["pdb_data_pred"] = open("output/protein_test_pred.pdb","r").read() data["pdb_data_true"] = open("output/protein_test.pdb","r").read() data["validation_dataset_size"] = validation_dataset_size data["sample_num"] = sample_num data["train_loss_values"] = train_loss_values data["break_point_values"] = break_point_values data["validation_loss_values"] = validation_loss_values data["phi_actual"] = list([math.degrees(float(v)) for v in angles[1:,1]]) data["psi_actual"] = list([math.degrees(float(v)) for v in angles[:-1,2]]) data["phi_predicted"] = list([math.degrees(float(v)) for v in angles_pred[1:,1]]) data["psi_predicted"] = list([math.degrees(float(v)) for v in angles_pred[:-1,2]]) data["drmsd_avg"] = drmsd_avg_values data["rmsd_avg"] = rmsd_avg_values if not configs.run_params["hide_ui"]: res = requests.post('http://localhost:5000/graph', json=data) if res.ok: print(res.json()) # Save run data write_run_to_disk(data) #Check if maximum time is reached. start_time_n_minibatches = time.time() time_used = time.time() - start_time time_condition = (max_time is not None and time_used > max_time) max_update_condition = minibatches_proccesed >= configs.run_params["max_updates"] min_update_condition = (minibatches_proccesed > configs.run_params["min_updates"] and minibatches_proccesed > best_model_minibatch_time * 2) model.train() #Checking for stop conditions if time_condition or max_update_condition or min_update_condition: stopping_condition_met = True break write_out("Best validation model found after" , best_model_minibatch_time , "minibatches.") write_result_summary(best_model_loss) return best_model_path
def main(device=torch.device('cuda:0')): # CLI arguments parser = arg.ArgumentParser( description='We all know what we are doing. Fighting!') parser.add_argument("--datasize", "-d", default="small", type=str, help="data size you want to use, small, medium, total") # Parsing args = parser.parse_args() # Data loaders datasize = args.datasize pathname = "data/nyu.zip" tr_loader, va_loader, te_loader = getTrainingValidationTestingData( datasize, pathname, batch_size=config("unet.batch_size")) # Model model = Net() # TODO: define loss function, and optimizer learning_rate = util.config("unet.learning_rate") criterion = DepthLoss(0.1) optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) number_of_epoches = 10 # # print("Number of float-valued parameters:", util.count_parameters(model)) # Attempts to restore the latest checkpoint if exists print("Loading unet...") model, start_epoch, stats = util.restore_checkpoint( model, util.config("unet.checkpoint")) # axes = utils.make_training_plot() # Evaluate the randomly initialized model # evaluate_epoch( # axes, tr_loader, va_loader, te_loader, model, criterion, start_epoch, stats # ) # loss = criterion() # initial val loss for early stopping # prev_val_loss = stats[0][1] running_va_loss = [] running_va_acc = [] running_tr_loss = [] running_tr_acc = [] # TODO: define patience for early stopping # patience = 1 # curr_patience = 0 # tr_acc, tr_loss = util.evaluate_model(model, tr_loader, device) acc, loss = util.evaluate_model(model, va_loader, device) running_va_acc.append(acc) running_va_loss.append(loss) running_tr_acc.append(tr_acc) running_tr_loss.append(tr_loss) # Loop over the entire dataset multiple times # for epoch in range(start_epoch, config('cnn.num_epochs')): epoch = start_epoch # while curr_patience < patience: while epoch < number_of_epoches: # Train model util.train_epoch(tr_loader, model, criterion, optimizer, device) tr_acc, tr_loss = util.evaluate_model(model, tr_loader, device) va_acc, va_loss = util.evaluate_model(model, va_loader, device) running_va_acc.append(va_acc) running_va_loss.append(va_loss) running_tr_acc.append(tr_acc) running_tr_loss.append(tr_loss) # Evaluate model # evaluate_epoch( # axes, tr_loader, va_loader, te_loader, model, criterion, epoch + 1, stats # ) # Save model parameters util.save_checkpoint(model, epoch + 1, util.config("unet.checkpoint"), stats) # update early stopping parameters """ curr_patience, prev_val_loss = early_stopping( stats, curr_patience, prev_val_loss ) """ epoch += 1 print("Finished Training") # Save figure and keep plot open # utils.save_training_plot() # utils.hold_training_plot() util.make_plot(running_tr_loss, running_tr_acc, running_va_loss, running_va_acc)
Author: Tim Sweeney ''' import wandb import util import argparse project = "model_registry_example" model_use_case_id = "mnist" job_type = "evaluator" # First, we launch a run which registers this workload with W&B run = wandb.init(project=project, job_type=job_type) # Then we fetch the latest evaluation set. x_eval, y_eval, dataset = util.download_eval_dataset_from_wb(model_use_case_id) # Next we fetch the new candidate models for this use case metric = f"{dataset.name}-ce_loss" candidates = util.get_new_model_candidates_from_wb(project, model_use_case_id, metric) # Evaluate the models and save their metrics to wb. for model in candidates: score = util.evaluate_model(model, x_eval, y_eval) util.save_metric_to_model_in_wb(model, metric, score) # Finally, promote the best model to production. util.promote_best_model_in_wb(project, model_use_case_id, metric)
def train_model(data_set_identifier, train_file, val_file, learning_rate, minibatch_size): set_experiment_id(data_set_identifier, learning_rate, minibatch_size) train_loader = contruct_dataloader_from_disk(train_file, minibatch_size) validation_loader = contruct_dataloader_from_disk(val_file, minibatch_size) validation_dataset_size = validation_loader.dataset.__len__() model = ExampleModel(21, minibatch_size, use_gpu=use_gpu) # embed size = 21 # TODO: is soft_to_angle.parameters() included here? optimizer = optim.Adam(model.parameters(), lr=learning_rate) sample_num = list() train_loss_values = list() validation_loss_values = list() rmsd_avg_values = list() drmsd_avg_values = list() best_model_loss = 1.1 best_model_minibatch_time = None best_model_path = None stopping_condition_met = False minibatches_proccesed = 0 while not stopping_condition_met: optimizer.zero_grad() model.zero_grad() loss_tracker = np.zeros(0) for minibatch_id, training_minibatch in enumerate(train_loader, 0): minibatches_proccesed += 1 primary_sequence, tertiary_positions, mask = training_minibatch start_compute_loss = time.time() loss = model.compute_loss(primary_sequence, tertiary_positions) write_out("Train loss:", float(loss)) start_compute_grad = time.time() loss.backward() loss_tracker = np.append(loss_tracker, float(loss)) end = time.time() write_out("Loss time:", start_compute_grad - start_compute_loss, "Grad time:", end - start_compute_grad) optimizer.step() optimizer.zero_grad() model.zero_grad() # for every eval_interval samples, plot performance on the validation set if minibatches_proccesed % args.eval_interval == 0: train_loss = loss_tracker.mean() loss_tracker = np.zeros(0) validation_loss, data_total, rmsd_avg, drmsd_avg = evaluate_model( validation_loader, model) prim = data_total[0][0] pos = data_total[0][1] (aa_list, phi_list, psi_list, omega_list) = calculate_dihedral_angels(prim, pos) write_to_pdb( get_structure_from_angles(aa_list, phi_list[1:], psi_list[:-1], omega_list[:-1]), "test") cmd.load("output/protein_test.pdb") write_to_pdb(data_total[0][3], "test_pred") cmd.load("output/protein_test_pred.pdb") cmd.forward() cmd.orient() if validation_loss < best_model_loss: best_model_loss = validation_loss best_model_minibatch_time = minibatches_proccesed best_model_path = write_model_to_disk(model) write_out("Validation loss:", validation_loss, "Train loss:", train_loss) write_out("Best model so far (label loss): ", validation_loss, "at time", best_model_minibatch_time) write_out("Best model stored at " + best_model_path) write_out("Minibatches processed:", minibatches_proccesed) sample_num.append(minibatches_proccesed) train_loss_values.append(train_loss) validation_loss_values.append(validation_loss) rmsd_avg_values.append(rmsd_avg) drmsd_avg_values.append(drmsd_avg) if args.live_plot: data = {} data["validation_dataset_size"] = validation_dataset_size data["sample_num"] = sample_num data["train_loss_values"] = train_loss_values data["validation_loss_values"] = validation_loss_values data["phi_actual"] = list( [math.degrees(float(v)) for v in phi_list[1:]]) data["psi_actual"] = list( [math.degrees(float(v)) for v in psi_list[:-1]]) data["phi_predicted"] = list([ math.degrees(float(v)) for v in data_total[0] [2].detach().transpose(0, 1)[0][1:] ]) data["psi_predicted"] = list([ math.degrees(float(v)) for v in data_total[0] [2].detach().transpose(0, 1)[1][:-1] ]) data["drmsd_avg"] = drmsd_avg_values data["rmsd_avg"] = rmsd_avg_values res = requests.post('http://localhost:5000/graph', json=data) if res.ok: print(res.json()) if minibatches_proccesed > args.minimum_updates and minibatches_proccesed > best_model_minibatch_time * 2: stopping_condition_met = True break write_result_summary(best_model_loss) return best_model_path
def main(device=torch.device('cuda:0')): """Train CNN and show training plots.""" # Data loaders """ if check_for_augmented_data("./data"): tr_loader, va_loader, te_loader, _ = get_train_val_test_loaders( task="target", batch_size=config("cnn.batch_size"), augment=True ) else: tr_loader, va_loader, te_loader, _ = get_train_val_test_loaders( task="target", batch_size=config("cnn.batch_size"), ) """ # pathname = "data/nyu_depth.zip" pathname = "data/nyu_small.zip" tr_loader, va_loader, te_loader = getTrainingValidationTestingData(pathname, batch_size=util.config("unet.batch_size")) # Model model = Net() # TODO: define loss function, and optimizer learning_rate = util.config("unet.learning_rate") criterion = DepthLoss(0.1) optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) number_of_epoches = 10 # # print("Number of float-valued parameters:", util.count_parameters(model)) # Attempts to restore the latest checkpoint if exists print("Loading unet...") model, start_epoch, stats = util.restore_checkpoint(model, util.config("unet.checkpoint")) # axes = utils.make_training_plot() # Evaluate the randomly initialized model # evaluate_epoch( # axes, tr_loader, va_loader, te_loader, model, criterion, start_epoch, stats # ) # loss = criterion() # initial val loss for early stopping # prev_val_loss = stats[0][1] running_va_loss = [] running_va_acc = [] running_tr_loss = [] running_tr_acc = [] # TODO: define patience for early stopping # patience = 1 # curr_patience = 0 # tr_acc, tr_loss = util.evaluate_model(model, tr_loader, device) acc, loss = util.evaluate_model(model, va_loader, device) running_va_acc.append(acc) running_va_loss.append(loss) running_tr_acc.append(tr_acc) running_tr_loss.append(tr_loss) # Loop over the entire dataset multiple times # for epoch in range(start_epoch, config('cnn.num_epochs')): epoch = start_epoch # while curr_patience < patience: while epoch < number_of_epoches: # Train model util.train_epoch(tr_loader, model, criterion, optimizer) tr_acc, tr_loss = util.evaluate_model(model, tr_loader, device) va_acc, va_loss = util.evaluate_model(model, va_loader, device) running_va_acc.append(va_acc) running_va_loss.append(va_loss) running_tr_acc.append(tr_acc) running_tr_loss.append(tr_loss) # Evaluate model # evaluate_epoch( # axes, tr_loader, va_loader, te_loader, model, criterion, epoch + 1, stats # ) # Save model parameters util.save_checkpoint(model, epoch + 1, util.config("unet.checkpoint"), stats) # update early stopping parameters """ curr_patience, prev_val_loss = early_stopping( stats, curr_patience, prev_val_loss ) """ epoch += 1 print("Finished Training") # Save figure and keep plot open # utils.save_training_plot() # utils.hold_training_plot() util.make_plot(running_tr_loss, running_tr_acc, running_va_loss, running_va_acc)
def train_model(data_set_identifier, train_file, val_file, learning_rate, minibatch_size): set_experiment_id(data_set_identifier, learning_rate, minibatch_size) train_loader = contruct_dataloader_from_disk(train_file, minibatch_size) validation_loader = contruct_dataloader_from_disk(val_file, minibatch_size) validation_dataset_size = validation_loader.dataset.__len__() model = ExampleModel(21, minibatch_size, use_gpu=use_gpu) # embed size = 21 if use_gpu: model = model.cuda() # TODO: is soft_to_angle.parameters() included here? optimizer = optim.Adam(model.parameters(), lr=learning_rate) sample_num = list() train_loss_values = list() validation_loss_values = list() rmsd_avg_values = list() drmsd_avg_values = list() best_model_loss = 1e20 best_model_minibatch_time = None best_model_path = None stopping_condition_met = False minibatches_proccesed = 0 while not stopping_condition_met: optimizer.zero_grad() model.zero_grad() loss_tracker = np.zeros(0) for minibatch_id, training_minibatch in enumerate(train_loader, 0): minibatches_proccesed += 1 primary_sequence, tertiary_positions, mask = training_minibatch start_compute_loss = time.time() loss = model.compute_loss(primary_sequence, tertiary_positions) write_out("Train loss:", float(loss)) start_compute_grad = time.time() loss.backward() loss_tracker = np.append(loss_tracker, float(loss)) end = time.time() write_out("Loss time:", start_compute_grad - start_compute_loss, "Grad time:", end - start_compute_grad) optimizer.step() optimizer.zero_grad() model.zero_grad() # for every eval_interval samples, plot performance on the validation set if minibatches_proccesed % args.eval_interval == 0: write_out("Testing model on validation set...") train_loss = loss_tracker.mean() loss_tracker = np.zeros(0) validation_loss, data_total, rmsd_avg, drmsd_avg = evaluate_model( validation_loader, model) prim = data_total[0][0] pos = data_total[0][1] pos_pred = data_total[0][3] if use_gpu: pos = pos.cuda() pos_pred = pos_pred.cuda() angles = calculate_dihedral_angels(pos, use_gpu) angles_pred = calculate_dihedral_angels(pos_pred, use_gpu) write_to_pdb(get_structure_from_angles(prim, angles), "test") write_to_pdb(get_structure_from_angles(prim, angles_pred), "test_pred") if validation_loss < best_model_loss: best_model_loss = validation_loss best_model_minibatch_time = minibatches_proccesed best_model_path = saveModel(encoder_net, decoder_net, encoder_optimizer, decoder_optimizer, loss.item(), tot_eval_acc, e) write_out("Validation loss:", validation_loss, "Train loss:", train_loss) write_out("Best model so far (validation loss): ", validation_loss, "at time", best_model_minibatch_time) write_out("Best model stored at " + best_model_path) write_out("Minibatches processed:", minibatches_proccesed) sample_num.append(minibatches_proccesed) train_loss_values.append(train_loss) validation_loss_values.append(validation_loss) rmsd_avg_values.append(rmsd_avg) drmsd_avg_values.append(drmsd_avg) if not args.hide_ui: data = {} data["pdb_data_pred"] = open( "output/protein_test_pred.pdb", "r").read() data["pdb_data_true"] = open("output/protein_test.pdb", "r").read() data["validation_dataset_size"] = validation_dataset_size data["sample_num"] = sample_num data["train_loss_values"] = train_loss_values data["validation_loss_values"] = validation_loss_values data["phi_actual"] = list( [math.degrees(float(v)) for v in angles[1:, 1]]) data["psi_actual"] = list( [math.degrees(float(v)) for v in angles[:-1, 2]]) data["phi_predicted"] = list( [math.degrees(float(v)) for v in angles_pred[1:, 1]]) data["psi_predicted"] = list( [math.degrees(float(v)) for v in angles_pred[:-1, 2]]) data["drmsd_avg"] = drmsd_avg_values data["rmsd_avg"] = rmsd_avg_values res = requests.post('http://localhost:5000/graph', json=data) if res.ok: print(res.json()) if minibatches_proccesed > args.minimum_updates and minibatches_proccesed > best_model_minibatch_time * 2: stopping_condition_met = True break write_result_summary(best_model_loss) return best_model_path
def log_reg_course2vec(training_set=None, vec_size=150, win_size=10, min_count=2, epochs=10, num_classes_val=-1, categories=False, top_n=1): print( f"\nRunning course2vec with logreg with vec_size={vec_size}, win_size={win_size}, min_count={min_count}, epochs={epochs}, num_classes_val={num_classes_val}" ) # set up hyperparams, load model course2vec_model_path = get_course2vec_model_path(vec_size, win_size, min_count) if training_set: course2vec_model = train_course2vec(training_set, course2vec_model_path, vec_size, win_size, min_count, epochs=epochs) else: course2vec_model = Word2Vec.load(course2vec_model_path) # prep datasets _, X_val, X_test, _, y_val, y_test = util.prep_dataset_v3( num_classes_train=TRAIN_LENGTH, num_classes_predict=PREDICT_LENGTH, augmented=False) X_train, _, _, y_train, _, _ = util.prep_dataset_v3( num_classes_train=TRAIN_LENGTH, num_classes_predict=PREDICT_LENGTH, augmented=False) X_train = featurize_student(X_train['course_history'], course2vec_model, vec_size) X_val = featurize_student(X_val['course_history'], course2vec_model, vec_size) X_test = featurize_student(X_test['course_history'], course2vec_model, vec_size) if categories: y_train = util.degrees_to_categories(y_train) y_val = util.degrees_to_categories(y_val) y_test = util.degrees_to_categories(y_test) # train and predict using logistic regression model train_score, log_reg_model = train_log_reg(list(X_train), y_train) print(f"train_score: {train_score}") # y_pred = log_reg_model.predict(list(X_val)) macro_f1 = util.evaluate_model(X_test, y_test, log_reg_model, output_dict=True, top_n=top_n)['macro avg']['f1-score'] print( util.evaluate_model(X_test, y_test, log_reg_model, output_dict=False, top_n=top_n)) util.evaluate_model_bias(log_reg_model, (course2vec_model, vec_size), evaluate_model_bias_single_df, num_classes_predict=PREDICT_LENGTH, categories=categories, top_n=top_n, test=True) return macro_f1, log_reg_model