def cross_validate(x, y, peak_names, output_file_path): kf = KFold(n_splits=10, shuffle=True) pred_all = [] corr_all = [] peak_order = [] for train_index, test_index in kf.split(x): train_data, eval_data = x[train_index, :, :], x[test_index, :, :] train_labels, eval_labels = y[train_index, :], y[test_index, :] train_names, eval_name = peak_names[train_index], peak_names[ test_index] # Data loader train_dataset = torch.utils.data.TensorDataset( torch.from_numpy(train_data), torch.from_numpy(train_labels)) train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=False) eval_dataset = torch.utils.data.TensorDataset( torch.from_numpy(eval_data), torch.from_numpy(eval_labels)) eval_loader = torch.utils.data.DataLoader(dataset=eval_dataset, batch_size=batch_size, shuffle=False) # create model model = aitac.ConvNet(num_classes, num_filters).to(device) # Loss and optimizer criterion = aitac.pearson_loss optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # train model model, best_loss = aitac.train_model(train_loader, eval_loader, model, device, criterion, optimizer, num_epochs, output_file_path) # Predict on test set predictions, max_activations, max_act_index = aitac.test_model( eval_loader, model, device) # plot the correlations histogram correlations = plot_utils.plot_cors(eval_labels, predictions, output_file_path) pred_all.append(predictions) corr_all.append(correlations) peak_order.append(eval_name) pred_all = np.vstack(pred_all) corr_all = np.hstack(corr_all) peak_order = np.hstack(peak_order) return pred_all, corr_all, peak_order
model.parameters()), lr=learning_rate) # train model model, best_loss_valid = aitac.train_model(train_loader, valid_loader, model, device, criterion, optimizer, num_epochs, output_file_path) # save the model checkpoint torch.save(model.state_dict(), '../models/model' + model_name + '.ckpt') #save the whole model torch.save(model, '../models/model' + model_name + '.pth') # Predict on test set predictions, max_activations, max_act_index = aitac.test_model( eval_loader, model, device) #-------------------------------------------# # Create Plots # #-------------------------------------------# # plot the correlations histogram # returns correlation measurement for every prediction-label pair print("Creating plots...") #plot_utils.plot_training_loss(training_loss, output_file_path) correlations = plot_utils.plot_cors(eval_labels, predictions, output_file_path) plot_utils.plot_corr_variance(eval_labels, correlations, output_file_path)
torch.from_numpy(y)) data_loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=False) # load trained model model = aitac.ConvNet(num_classes, num_filters).to(device) checkpoint = torch.load('../models/' + model_name + '.ckpt') model.load_state_dict(checkpoint) #copy trained model weights to motif extraction model motif_model = aitac.motifCNN(model).to(device) motif_model.load_state_dict(model.state_dict()) # run predictions with full model on all data pred_full_model, max_activations, activation_idx = aitac.test_model( data_loader, model, device) correlations = plot_utils.plot_cors(y, pred_full_model, output_file_path) # find well predicted OCRs idx = np.argwhere(np.asarray(correlations) > 0.75).squeeze() #get data subset for well predicted OCRs to run further test x2 = x[idx, :, :] y2 = y[idx, :] dataset = torch.utils.data.TensorDataset(torch.from_numpy(x2), torch.from_numpy(y2)) data_loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=False)
model_name = sys.argv[5] # Data loader dataset = torch.utils.data.TensorDataset(torch.from_numpy(x), torch.from_numpy(y)) data_loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=False) # load trained model model = aitac.ConvNet(num_classes, num_filters).to(device) checkpoint = torch.load('../models/' + model_name + '.ckpt') model.load_state_dict(checkpoint) # run predictions with full model on all data mouse_predictions, max_activations, act_index = aitac.test_model( data_loader, model, device) # convert predictions from mouse cell types to human cell types map = np.genfromtxt("../human_data/mouse_human_celltypes.txt", dtype='str') mouse_cell_types = np.genfromtxt("../data/cell_type_names.txt", dtype='str') predictions, cell_names = plot_utils.mouse2human(mouse_predictions, mouse_cell_types, map) print(cell_names) #-------------------------------------------# # Create Plots # #-------------------------------------------# # plot the correlations histogram # returns correlation measurement for every prediction-label pair print("Creating plots...")