def cross_validate(x, y, peak_names, output_file_path): kf = KFold(n_splits=10, shuffle=True) pred_all = [] corr_all = [] peak_order = [] for train_index, test_index in kf.split(x): train_data, eval_data = x[train_index, :, :], x[test_index, :, :] train_labels, eval_labels = y[train_index, :], y[test_index, :] train_names, eval_name = peak_names[train_index], peak_names[ test_index] # Data loader train_dataset = torch.utils.data.TensorDataset( torch.from_numpy(train_data), torch.from_numpy(train_labels)) train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=False) eval_dataset = torch.utils.data.TensorDataset( torch.from_numpy(eval_data), torch.from_numpy(eval_labels)) eval_loader = torch.utils.data.DataLoader(dataset=eval_dataset, batch_size=batch_size, shuffle=False) # create model model = aitac.ConvNet(num_classes, num_filters).to(device) # Loss and optimizer criterion = aitac.pearson_loss optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # train model model, best_loss = aitac.train_model(train_loader, eval_loader, model, device, criterion, optimizer, num_epochs, output_file_path) # Predict on test set predictions, max_activations, max_act_index = aitac.test_model( eval_loader, model, device) # plot the correlations histogram correlations = plot_utils.plot_cors(eval_labels, predictions, output_file_path) pred_all.append(predictions) corr_all.append(correlations) peak_order.append(eval_name) pred_all = np.vstack(pred_all) corr_all = np.hstack(corr_all) peak_order = np.hstack(peak_order) return pred_all, corr_all, peak_order
plt.legend(['train', 'validation']) plt.savefig(output_directory + "training_valid_metric_r2_score_2.svg") plt.close() #-------------------------------------------# # Step 3. Evaluate #-------------------------------------------# # Using the model with the latest results [predicted_labels_1, predicted_labels_2] = model.predict([ test_features, test_labels_mask[:, :num_classes_1], test_labels_mask[:, -num_classes_2:] ]) title = "basset_cor_hist_latest_class_1.svg" correlations_1 = plot_utils.plot_cors( test_labels[test_tags == 1, :][:, :num_classes_1], predicted_labels_1[test_tags == 1, :], output_directory, title) title = "basset_cor_hist_latest_class_2.svg" correlations_2 = plot_utils.plot_cors( test_labels[test_tags == 2, :][:, -num_classes_2:], predicted_labels_2[test_tags == 2, :], output_directory, title) # Using the Best weights model = create_model(input_size, num_classes_1, num_classes_2, batch_size, combining_weight, weight_flag, ratio=1.0,
# Predict on test set predictions, max_activations, max_act_index = aitac.test_model( eval_loader, model, device) #-------------------------------------------# # Create Plots # #-------------------------------------------# # plot the correlations histogram # returns correlation measurement for every prediction-label pair print("Creating plots...") #plot_utils.plot_training_loss(training_loss, output_file_path) correlations = plot_utils.plot_cors(eval_labels, predictions, output_file_path) plot_utils.plot_corr_variance(eval_labels, correlations, output_file_path) quantile_indx = plot_utils.plot_piechart(correlations, eval_labels, output_file_path) #-------------------------------------------# # Save Files # #-------------------------------------------# #save predictions np.save(output_file_path + "predictions.npy", predictions) #save correlations np.save(output_file_path + "correlations.npy", correlations)
batch_size=batch_size, shuffle=False) # load trained model model = aitac.ConvNet(num_classes, num_filters).to(device) checkpoint = torch.load('../models/' + model_name + '.ckpt') model.load_state_dict(checkpoint) #copy trained model weights to motif extraction model motif_model = aitac.motifCNN(model).to(device) motif_model.load_state_dict(model.state_dict()) # run predictions with full model on all data pred_full_model, max_activations, activation_idx = aitac.test_model( data_loader, model, device) correlations = plot_utils.plot_cors(y, pred_full_model, output_file_path) # find well predicted OCRs idx = np.argwhere(np.asarray(correlations) > 0.75).squeeze() #get data subset for well predicted OCRs to run further test x2 = x[idx, :, :] y2 = y[idx, :] dataset = torch.utils.data.TensorDataset(torch.from_numpy(x2), torch.from_numpy(y2)) data_loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=False) # non-modified results for well-predicted OCRs only
plt.plot(history.history['mean_squared_error']) plt.plot(history.history['val_mean_squared_error']) plt.title('model mean squared error') plt.ylabel('mse') plt.xlabel('epoch') plt.legend(['train', 'validation']) plt.savefig(output_directory + "training_valid_metric.svg") plt.close() #-------------------------------------------# # Step 3. Evaluate #-------------------------------------------# # Using the model with the latest results predicted_labels = model.predict(np.stack(test_features)) title = "basset_cor_hist_latest.svg" correlations = plot_utils.plot_cors(test_labels, predicted_labels, output_directory, title) # Using the Best weights model = create_model(input_size, num_classes, learning_rate, combined_loss_weight) model.load_weights(checkpoint_path_weights) model.save(output_directory + 'whole_model_best.h5') predicted_labels = model.predict(np.stack(test_features)) title = "basset_cor_hist_best.svg" correlations = plot_utils.plot_cors(test_labels, predicted_labels, output_directory, title) plot_utils.plot_corr_variance(test_labels, correlations, output_directory) quantile_indx = plot_utils.plot_cors_piechart(correlations, test_labels, output_directory) plot_utils.plot_random_predictions(test_labels, predicted_labels, correlations,
# convert predictions from mouse cell types to human cell types map = np.genfromtxt("../human_data/mouse_human_celltypes.txt", dtype='str') mouse_cell_types = np.genfromtxt("../data/cell_type_names.txt", dtype='str') predictions, cell_names = plot_utils.mouse2human(mouse_predictions, mouse_cell_types, map) print(cell_names) #-------------------------------------------# # Create Plots # #-------------------------------------------# # plot the correlations histogram # returns correlation measurement for every prediction-label pair print("Creating plots...") correlations = plot_utils.plot_cors(y, predictions, output_file_path) plot_utils.plot_corr_variance(y, correlations, output_file_path) quantile_indx = plot_utils.plot_piechart(correlations, y, output_file_path) #-------------------------------------------# # Save Files # #-------------------------------------------# #save predictions np.save(output_file_path + "predictions.npy", predictions) #save correlations np.save(output_file_path + "correlations.npy", correlations)
np.save(output_directory + 'OCR_matrix.npy', OCR_matrix) # Filter influence computation filter_number_layer_1 = 300 window_size = x_subset.shape[1] target_layer_name = 'max_pooling1d_3' # predictions_by_filter = np.zeros((len(x_subset), filter_number_layer_1, num_classes)) predictions_by_filter = np.expand_dims(predicted_labels_subset, 1) predictions_by_filter = np.repeat(predictions_by_filter, filter_number_layer_1, axis=1) for filter_number in range(filter_number_layer_1): print('The current filter is: ' + str(filter_number)) filter_mask = np.ones((len(x_subset), window_size, filter_number_layer_1)) # Comment out to test if change == 0, weights been copied from the original model filter_mask[:, :, filter_number] = 0 filter_model = utils_influence.create_filter_model(model, window_size, target_layer_name) # filter_model.summary() predictions_by_filter[:, filter_number, :] = filter_model.predict([x_subset, filter_mask]) correlations = plot_utils.plot_cors(y_subset, predicted_labels_subset, output_directory) filt_corr, corr_change, corr_change_mean, corr_change_mean_act = plot_utils.plot_filt_corr_change(predictions_by_filter, y_subset, correlations, output_directory) infl, infl_signed_mean, infl_signed_mean_act, infl_absolute_mean, infl_absolute_mean_act = plot_utils.plot_filt_infl(predicted_labels_subset, predictions_by_filter, output_directory, class_names) np.save(output_directory + "selected_correlations.npy", correlations) np.save(output_directory + "selected_correlations_by_removing_filter.npy", filt_corr) np.save(output_directory + 'filter_influence_per_ocr.npy', corr_change) np.save(output_directory + 'filter_influence_mean.npy', corr_change_mean) np.save(output_directory + 'filter_influence_mean_activated.npy', corr_change_mean_act) np.save(output_directory + 'influence_celltype_original_per_ocr.npy', infl) np.save(output_directory + 'filter_cellwise_influence_signed_mean.npy', infl_signed_mean) np.save(output_directory + 'filter_cellwise_influence_signed_mean_activated.npy', infl_signed_mean_act) np.save(output_directory + 'filter_cellwise_influence_absolute_mean.npy', infl_absolute_mean) np.save(output_directory + 'filter_cellwise_influence_absolute_mean_activated.npy', infl_absolute_mean_act)