Exemplo n.º 1
0
def cross_validate(x, y, peak_names, output_file_path):
    kf = KFold(n_splits=10, shuffle=True)

    pred_all = []
    corr_all = []
    peak_order = []
    for train_index, test_index in kf.split(x):
        train_data, eval_data = x[train_index, :, :], x[test_index, :, :]
        train_labels, eval_labels = y[train_index, :], y[test_index, :]
        train_names, eval_name = peak_names[train_index], peak_names[
            test_index]

        # Data loader
        train_dataset = torch.utils.data.TensorDataset(
            torch.from_numpy(train_data), torch.from_numpy(train_labels))
        train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                                   batch_size=batch_size,
                                                   shuffle=False)

        eval_dataset = torch.utils.data.TensorDataset(
            torch.from_numpy(eval_data), torch.from_numpy(eval_labels))
        eval_loader = torch.utils.data.DataLoader(dataset=eval_dataset,
                                                  batch_size=batch_size,
                                                  shuffle=False)

        # create model
        model = aitac.ConvNet(num_classes, num_filters).to(device)

        # Loss and optimizer
        criterion = aitac.pearson_loss
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

        # train model
        model, best_loss = aitac.train_model(train_loader, eval_loader, model,
                                             device, criterion, optimizer,
                                             num_epochs, output_file_path)

        # Predict on test set
        predictions, max_activations, max_act_index = aitac.test_model(
            eval_loader, model, device)

        # plot the correlations histogram
        correlations = plot_utils.plot_cors(eval_labels, predictions,
                                            output_file_path)

        pred_all.append(predictions)
        corr_all.append(correlations)
        peak_order.append(eval_name)

    pred_all = np.vstack(pred_all)
    corr_all = np.hstack(corr_all)
    peak_order = np.hstack(peak_order)

    return pred_all, corr_all, peak_order
Exemplo n.º 2
0
plt.legend(['train', 'validation'])
plt.savefig(output_directory + "training_valid_metric_r2_score_2.svg")
plt.close()

#-------------------------------------------#
# Step 3. Evaluate
#-------------------------------------------#
# Using the model with the latest results
[predicted_labels_1, predicted_labels_2] = model.predict([
    test_features, test_labels_mask[:, :num_classes_1],
    test_labels_mask[:, -num_classes_2:]
])

title = "basset_cor_hist_latest_class_1.svg"
correlations_1 = plot_utils.plot_cors(
    test_labels[test_tags == 1, :][:, :num_classes_1],
    predicted_labels_1[test_tags == 1, :], output_directory, title)

title = "basset_cor_hist_latest_class_2.svg"
correlations_2 = plot_utils.plot_cors(
    test_labels[test_tags == 2, :][:, -num_classes_2:],
    predicted_labels_2[test_tags == 2, :], output_directory, title)

# Using the Best weights
model = create_model(input_size,
                     num_classes_1,
                     num_classes_2,
                     batch_size,
                     combining_weight,
                     weight_flag,
                     ratio=1.0,
Exemplo n.º 3
0
# Predict on test set
predictions, max_activations, max_act_index = aitac.test_model(
    eval_loader, model, device)

#-------------------------------------------#
#               Create Plots                #
#-------------------------------------------#

# plot the correlations histogram
# returns correlation measurement for every prediction-label pair
print("Creating plots...")

#plot_utils.plot_training_loss(training_loss, output_file_path)

correlations = plot_utils.plot_cors(eval_labels, predictions, output_file_path)

plot_utils.plot_corr_variance(eval_labels, correlations, output_file_path)

quantile_indx = plot_utils.plot_piechart(correlations, eval_labels,
                                         output_file_path)

#-------------------------------------------#
#                 Save Files                #
#-------------------------------------------#

#save predictions
np.save(output_file_path + "predictions.npy", predictions)

#save correlations
np.save(output_file_path + "correlations.npy", correlations)
Exemplo n.º 4
0
                                          batch_size=batch_size,
                                          shuffle=False)

# load trained model
model = aitac.ConvNet(num_classes, num_filters).to(device)
checkpoint = torch.load('../models/' + model_name + '.ckpt')
model.load_state_dict(checkpoint)

#copy trained model weights to motif extraction model
motif_model = aitac.motifCNN(model).to(device)
motif_model.load_state_dict(model.state_dict())

# run predictions with full model on all data
pred_full_model, max_activations, activation_idx = aitac.test_model(
    data_loader, model, device)
correlations = plot_utils.plot_cors(y, pred_full_model, output_file_path)

# find well predicted OCRs
idx = np.argwhere(np.asarray(correlations) > 0.75).squeeze()

#get data subset for well predicted OCRs to run further test
x2 = x[idx, :, :]
y2 = y[idx, :]

dataset = torch.utils.data.TensorDataset(torch.from_numpy(x2),
                                         torch.from_numpy(y2))
data_loader = torch.utils.data.DataLoader(dataset=dataset,
                                          batch_size=batch_size,
                                          shuffle=False)

# non-modified results for well-predicted OCRs only
Exemplo n.º 5
0
plt.plot(history.history['mean_squared_error'])
plt.plot(history.history['val_mean_squared_error'])
plt.title('model mean squared error')
plt.ylabel('mse')
plt.xlabel('epoch')
plt.legend(['train', 'validation'])
plt.savefig(output_directory + "training_valid_metric.svg")
plt.close()

#-------------------------------------------#
# Step 3. Evaluate
#-------------------------------------------#
# Using the model with the latest results
predicted_labels = model.predict(np.stack(test_features))
title = "basset_cor_hist_latest.svg"
correlations = plot_utils.plot_cors(test_labels, predicted_labels,
                                    output_directory, title)

# Using the Best weights
model = create_model(input_size, num_classes, learning_rate,
                     combined_loss_weight)
model.load_weights(checkpoint_path_weights)
model.save(output_directory + 'whole_model_best.h5')
predicted_labels = model.predict(np.stack(test_features))

title = "basset_cor_hist_best.svg"
correlations = plot_utils.plot_cors(test_labels, predicted_labels,
                                    output_directory, title)
plot_utils.plot_corr_variance(test_labels, correlations, output_directory)
quantile_indx = plot_utils.plot_cors_piechart(correlations, test_labels,
                                              output_directory)
plot_utils.plot_random_predictions(test_labels, predicted_labels, correlations,
Exemplo n.º 6
0
# convert predictions from mouse cell types to human cell types
map = np.genfromtxt("../human_data/mouse_human_celltypes.txt", dtype='str')
mouse_cell_types = np.genfromtxt("../data/cell_type_names.txt", dtype='str')
predictions, cell_names = plot_utils.mouse2human(mouse_predictions,
                                                 mouse_cell_types, map)
print(cell_names)

#-------------------------------------------#
#               Create Plots                #
#-------------------------------------------#

# plot the correlations histogram
# returns correlation measurement for every prediction-label pair
print("Creating plots...")

correlations = plot_utils.plot_cors(y, predictions, output_file_path)

plot_utils.plot_corr_variance(y, correlations, output_file_path)

quantile_indx = plot_utils.plot_piechart(correlations, y, output_file_path)

#-------------------------------------------#
#                 Save Files                #
#-------------------------------------------#

#save predictions
np.save(output_file_path + "predictions.npy", predictions)

#save correlations
np.save(output_file_path + "correlations.npy", correlations)
np.save(output_directory + 'OCR_matrix.npy', OCR_matrix)

# Filter influence computation
filter_number_layer_1 = 300
window_size = x_subset.shape[1]
target_layer_name = 'max_pooling1d_3'
# predictions_by_filter = np.zeros((len(x_subset), filter_number_layer_1, num_classes))
predictions_by_filter = np.expand_dims(predicted_labels_subset, 1)
predictions_by_filter = np.repeat(predictions_by_filter, filter_number_layer_1, axis=1)
for filter_number in range(filter_number_layer_1):
    print('The current filter is: ' + str(filter_number))
    filter_mask = np.ones((len(x_subset), window_size, filter_number_layer_1))
    # Comment out to test if change == 0, weights been copied from the original model
    filter_mask[:, :, filter_number] = 0
    filter_model = utils_influence.create_filter_model(model, window_size, target_layer_name)
    # filter_model.summary()
    predictions_by_filter[:, filter_number, :] = filter_model.predict([x_subset, filter_mask])

correlations = plot_utils.plot_cors(y_subset, predicted_labels_subset, output_directory)
filt_corr, corr_change, corr_change_mean, corr_change_mean_act = plot_utils.plot_filt_corr_change(predictions_by_filter, y_subset, correlations, output_directory)
infl, infl_signed_mean, infl_signed_mean_act, infl_absolute_mean, infl_absolute_mean_act = plot_utils.plot_filt_infl(predicted_labels_subset, predictions_by_filter, output_directory, class_names)
np.save(output_directory + "selected_correlations.npy", correlations)
np.save(output_directory + "selected_correlations_by_removing_filter.npy", filt_corr)
np.save(output_directory + 'filter_influence_per_ocr.npy', corr_change)
np.save(output_directory + 'filter_influence_mean.npy', corr_change_mean)
np.save(output_directory + 'filter_influence_mean_activated.npy', corr_change_mean_act)
np.save(output_directory + 'influence_celltype_original_per_ocr.npy', infl)
np.save(output_directory + 'filter_cellwise_influence_signed_mean.npy', infl_signed_mean)
np.save(output_directory + 'filter_cellwise_influence_signed_mean_activated.npy', infl_signed_mean_act)
np.save(output_directory + 'filter_cellwise_influence_absolute_mean.npy', infl_absolute_mean)
np.save(output_directory + 'filter_cellwise_influence_absolute_mean_activated.npy', infl_absolute_mean_act)