def embed_motifs(self, bg_seqs, pwms): print("embedding motifs...") embedded_seqs = [] pwm = pwms[0] locs = [] for seq in bg_seqs: mf_instance = mf.sample_motif(pwm, random_rev_comp=False) loc = np.random.randint(0, 500) locs.append(loc) embedded_seqs.append(mf.replace_at(seq, mf_instance, loc)) utils.plot_hist(locs) return embedded_seqs
def _calc_ranges(self, _ev=None): values = self.map_image.get_bands(self.map_window.channels_histogram) self.x_range, self.y_range = ([values[i].min(), values[i].max()] for i in range(2)) for i in range(2): values[i][values[i] < values[i].min() + 0.00000001] = np.nan self.graphs = [plot_hist(values[i]) for i in range(2)]
cnt[ img_arr[i, j] ] += 1 return cnt[:-1], intensity def intensity_hist(img_arr, n_bins=256): bins = np.arange(n_bins + 1) cnt, intensity = poy_histogram(img_arr, bins=bins) return cnt, intensity[:-1] L = 256 sample2_cnt, sample2_intensity = intensity_hist(sample2_arr, n_bins=L) result3_cnt, result3_intensity = intensity_hist(result3_arr, n_bins=L) result4_cnt, result4_intensity = intensity_hist(result4_arr, n_bins=L) utils.plot_hist("prob2c", [sample2_intensity, sample2_cnt], [result3_intensity, result3_cnt], [result4_intensity, result4_cnt] ) # prob (d) def poy_cumsum(arr): res = [] cumsum = 0 for a in arr: cumsum += a res.append(cumsum) res = np.array(res) return res def global_hist_equal(img_arr): '''
epoch_train_loss.append(np.mean(train_losses)) epoch_val_loss.append(np.mean(val_losses)) epoch_train_dsc.append(np.mean(train_dsc)) epoch_val_dsc.append(np.mean(val_dsc)) early_stopping(np.average(val_losses), model) if early_stopping.early_stop: print("Early stopping at epoch: ", epoch) break print('='*30) print('Average DSC score =', np.array(val_dsc).mean()) utils.plot_hist(epoch, np.array(epoch_train_loss), np.array(epoch_val_loss), "Loss") utils.plot_hist(epoch, np.array(epoch_train_dsc), np.array(epoch_val_dsc), "DSC Score") # check model outputs on validation data model.eval() idx = np.random.randint(0,batch_size) val_dsc = [] with torch.no_grad(): for x_val, y_val in val_loader: x_val, y_val = x_val.to(device), y_val.to(device) preds = model(x_val) dsc = losses.dice_score(preds, y_val) val_dsc.append(dsc/x_val.shape[0])
batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=8) # random search start = time.time() best_acc = 0.0 acc_list = list() best_choice = list() for epoch in range(args.random_search): choice = utils.random_choice(args.num_choices, args.layers) top1_acc = validate(args, epoch, val_loader, device, model, criterion, super=True, choice=choice) acc_list.append(top1_acc) if best_acc < top1_acc: best_acc = top1_acc best_choice = choice print('acc_list:') for i in acc_list: print(i) print('best_acc:{} \nbest_choice:{}'.format(best_acc, best_choice)) utils.plot_hist(acc_list, name=args.exp_name) utils.time_record(start)
'[%d]/[%d] Train Loss:%.4f\t Train Acc:%.4f\t Val Loss:%.4f\t Val Acc: %.4f' % (epoch + 1, epochs, np.mean(train_loss), np.mean(train_acc), np.mean(val_loss), np.mean(val_acc))) epoch_train_loss.append(np.mean(train_loss)) epoch_val_loss.append(np.mean(val_loss)) epoch_train_acc.append(np.mean(train_acc)) epoch_val_acc.append(np.mean(val_acc)) early_stopping(np.average(val_loss), model) if early_stopping.early_stop: print("Early stopping at epoch: ", epoch) break utils.plot_hist(epoch, np.array(epoch_train_loss), np.array(epoch_val_loss), "Loss") plt.title('Lowest loss = %.4f' % epoch_val_loss[-1]) utils.plot_hist(epoch, np.array(epoch_train_acc), np.array(epoch_val_acc), "Acc") plt.title('Best accuracy = %.4f' % epoch_val_acc[-1]) # Load up the best model and view preds model.load_state_dict(torch.load('checkpoint.pt')) outputs = [] def hook(module, input, output): outputs.append(output)
from keras.layers import Dense import sys sys.path.append("../") from utils import load_mnist_1D from utils import save_model_viz, save_weights, save_hist, plot_hist RUN_ID = 'mlp' (x_train, y_train), (x_test, y_test) = load_mnist_1D() model = Sequential() model.add(Dense(64, input_shape=(784, ), activation='relu')) model.add(Dense(10, activation='softmax')) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) save_model_viz(RUN_ID, model) hist = model.fit(x_train, y_train, epochs=5, batch_size=32, verbose=1, validation_data=(x_test, y_test)) save_weights(RUN_ID, model) save_hist(RUN_ID, hist) plot_hist(RUN_ID)
y_aug = np.array(y_new) # ====== print info ====== # print("Train set:", x_train.shape, y_train.shape) print("Valid set:", x_valid.shape, y_valid.shape) print("Test set:", x_test.shape, y_test.shape) print("Augmented training set:", x_aug.shape, y_aug.shape) # ====== checking distribution of train, valid, test matching ====== # train_dist = itemfreq(y_train) valid_dist = itemfreq(y_valid) test_dist = itemfreq(y_test) plt.figure() ax = plt.subplot(3, 1, 1) plot_hist(y_train, ax, "Training distribution") ax = plt.subplot(3, 1, 2) plot_hist(y_valid, ax, "Validating distribution") ax = plt.subplot(3, 1, 3) plot_hist(y_test, ax, "Testing distribution") plt.tight_layout() # ====== convert labels to one_hot for training ====== # labels = ["Number: %d" % i for i in y_train[:16]] y_train = one_hot(y_train, nb_classes=10) y_aug = one_hot(y_aug, nb_classes=10) y_test = one_hot(y_test, nb_classes=10) y_valid = one_hot(y_valid, nb_classes=10) plt.figure() plt.imshow(y_train[:16], cmap=plt.cm.Greys_r) plt.xticks(np.arange(10))
def compute_deeplift_scores( TARGET_DIRECTORY, dataset, X, Y, keras_model_file, reference_label, non_reference_label, base_neuron_label, mask, gpu_id, dropout, threshold, percentage_cutoff, cluster_mask, flags): """ Wrapper function for model reduction, called by main.py Uses DeepLIFT to compute saliency scores for feature selection, with the average data used as reference See https://github.com/kundajelab/deeplift for DeepLIFT implementation Inputs: - TARGET_DIRECTORY: general directory path to write files to (str) - dataset: choice of dataset, along with seed and fold number (str) - X: Numpy array containing data matrices - Y: Numpy array containing data labels - keras_model_file: name of existing model file (str) - reference_label: how the reference class is represented in Y, usually 0 (int) - non_reference_label: how the other class(es) is (are) represented in Y, usually 1 (int) - base_neuron_label: label to be used as the base, usually 0 (int) - mask: Numpy array, usually initialised as all 1s unless neurons are repeatedly removed - gpu_id: ID of GPU to use (int) - dropout: fraction of neurons to turn off (float) - threshold: usually set as 1.0, represents the previous percentage_cutoff when repeatedly removing neurons (float) - percentage_cutoff: usually set as 0.95 to keep 5% of the most significant features (float) - cluster_mask: Numpy array containing mask obtained from CLIP - flags: used to vary model settings, see main.py (dict) Returns: - new_model_file: directory path to the new model (str) - mask_2D_flattened: Numpy array of 1s and 0s, with 1 representing a selected feature """ keras_model = keras.models.load_model(keras_model_file) print(keras_model.summary()) # original model deeplift_model = kc.convert_model_from_saved_files( keras_model_file, nonlinear_mxts_mode=deeplift.layers.NonlinearMxtsMode.DeepLIFT_GenomicsDefault) print(deeplift_model.get_layers()) mode = 'average' X_masked, mapping = get_masked_data(X, mask) Y = np.argmax(Y, axis=1) reference = get_reference(mode, reference_label, X_masked, Y) print('+++++++++++++ Computing DeepLIFT scores ++++++++++++++') print('previous threshold', threshold, 'new threshold', percentage_cutoff) find_scores_layer_idx = 0 input_scores = np.zeros(X_masked[Y == non_reference_label].shape) layer_scores = [] task_id = base_neuron_label for layer_idx, layer in enumerate(deeplift_model.get_layers()): if type(layer).__name__ == 'Dense' or type(layer).__name__ == 'Input': deeplift_contribs_func = deeplift_model.get_target_contribs_func(find_scores_layer_idx=layer_idx, target_layer_idx=-2) scores = np.array(deeplift_contribs_func(task_idx=task_id, input_references_list=reference, input_data_list=[X_masked[Y == non_reference_label]], batch_size=10, progress_update=50)) sum_scores = np.zeros(scores.shape[1]) for score in scores: sum_scores += score sum_scores = np.absolute(sum_scores) if sum_scores.shape[0] > 2: plot_hist(TARGET_DIRECTORY, sum_scores, dataset + '_t_' + str(threshold) + '_layer_' + str(layer_idx)) print('layer', layer_idx, 'type is: ', type(layer), 'scores dimensions are: ', scores.shape, 'sum_scores', sum_scores.shape) layer_scores.append(sum_scores) if layer_idx == 0: input_scores = np.square(scores) layer_scores.append([]) elif type(layer).__name__ == 'NoOp' or type(layer).__name__ == 'Softmax': layer_scores.append([]) print('layer', layer_idx, 'type is: ', type(layer).__name__) alpha = (1 - (percentage_cutoff/threshold)) new_model, mask_2D = compute_new_reduced_model(keras_model, dropout, layer_scores, 2, alpha, mapping, cluster_mask, flags) input_sum_scores = np.zeros(X_masked.shape[1]) for input_score in input_scores: input_sum_scores += input_score padded_sum_scores = get_padded_data(input_sum_scores, mapping) full_matrix = create_matrix(padded_sum_scores) mkdir(TARGET_DIRECTORY + './important_features/') np.savetxt( TARGET_DIRECTORY + './important_features/' + dataset + '_scores_deeplift_reduced_r_' + str(threshold) + '_t_' + str(percentage_cutoff) + '.csv', np.transpose(np.array(input_scores)), delimiter= ',') np.savetxt( TARGET_DIRECTORY + './important_features/' + dataset + '_scores_reshaped_reduced_r_' + str(threshold) + '_t_' + str(percentage_cutoff) + '.csv', full_matrix, delimiter=",") np.savetxt( TARGET_DIRECTORY + './important_features/' + dataset + '_deeplift_features_nodes_r_' + str(threshold) + '_t_' + str(percentage_cutoff) + '.csv', mask_2D) mkdir(TARGET_DIRECTORY + './reduced_models/') new_model_file = TARGET_DIRECTORY + './reduced_models/' + dataset + '_from_' + str(threshold) + '_to_' + str(percentage_cutoff) + '.h5' new_model.save(new_model_file) os.remove(keras_model_file) mask_2D_flattened = corr_mx_flatten_single(mask_2D) return new_model_file, mask_2D_flattened
def save_and_plot_stats_environment(self, run_num): """Saves all environment-relevant statistics for later analysis.""" run_subfolder = "game_" + str(run_num) + "/" os.makedirs(self.plot_directory + run_subfolder + "turtle_dynamics/") self.exp_details() np.savetxt(self.plot_directory + run_subfolder + "turtle_dynamics/" + 'agent_act_list.csv', self.agent_act_list, delimiter=',', fmt='%f') np.savetxt(self.plot_directory + run_subfolder + "turtle_dynamics/" + 'human_act_list.csv', self.human_act_list, delimiter=',', fmt='%f') np.savetxt(self.plot_directory + run_subfolder + "turtle_dynamics/" + 'action_timesteps.csv', self.action_timesteps, delimiter=',', fmt='%f') human_act_list = np.genfromtxt(self.plot_directory + run_subfolder + "turtle_dynamics/" + 'human_act_list.csv', delimiter=',') agent_act_list = np.genfromtxt(self.plot_directory + run_subfolder + "turtle_dynamics/" + 'agent_act_list.csv', delimiter=',') plot_hist( agent_act_list, self.plot_directory + run_subfolder + "turtle_dynamics/" + 'agent_act_hist_' "game_" + str(run_num), 'Agent Action Histogram') plot_hist( human_act_list, self.plot_directory + run_subfolder + "turtle_dynamics/" + 'human_act_hist_' "game_" + str(run_num), 'Human Action Histogram') np.savetxt(self.plot_directory + run_subfolder + "turtle_dynamics/" + 'turtle_pos_x.csv', self.turtle_pos_x, delimiter=',', fmt='%f') np.savetxt(self.plot_directory + run_subfolder + "turtle_dynamics/" + 'turtle_pos_y.csv', self.turtle_pos_y, delimiter=',', fmt='%f') np.savetxt(self.plot_directory + run_subfolder + "turtle_dynamics/" + 'time_turtle_pos.csv', self.time_turtle_pos, delimiter=',', fmt='%f') np.savetxt(self.plot_directory + run_subfolder + "turtle_dynamics/" + 'turtle_vel_x.csv', self.turtle_vel_x, delimiter=',', fmt='%f') np.savetxt(self.plot_directory + run_subfolder + "turtle_dynamics/" + 'turtle_vel_y.csv', self.turtle_vel_y, delimiter=',', fmt='%f') np.savetxt(self.plot_directory + run_subfolder + "turtle_dynamics/" + 'time_turtle_vel.csv', self.time_turtle_vel, delimiter=',', fmt='%f') np.savetxt(self.plot_directory + run_subfolder + "turtle_dynamics/" + 'turtle_accel_x.csv', self.turtle_acc_x, delimiter=',', fmt='%f') np.savetxt(self.plot_directory + run_subfolder + "turtle_dynamics/" + 'turtle_accel_y.csv', self.turtle_acc_y, delimiter=',', fmt='%f') np.savetxt(self.plot_directory + run_subfolder + "turtle_dynamics/" + 'time_turtle_acc.csv', self.time_turtle_acc, delimiter=',', fmt='%f') np.savetxt(self.plot_directory + run_subfolder + "turtle_dynamics/" + 'real_act_list.csv', self.real_act_list, delimiter=',', fmt='%f') np.savetxt(self.plot_directory + run_subfolder + "turtle_dynamics/" + 'time_real_act_list.csv', self.time_real_act_list, delimiter=',', fmt='%f') np.savetxt(self.plot_directory + run_subfolder + "turtle_dynamics/" + 'used_human_act_list.csv', self.used_human_act_list, delimiter=',', fmt='%f') np.savetxt(self.plot_directory + run_subfolder + "turtle_dynamics/" + 'used_human_act_time_list.csv', self.used_human_act_time_list, delimiter=',', fmt='%f') subplot(self.plot_directory + run_subfolder + "turtle_dynamics/", self.turtle_pos_x, self.turtle_vel_x, self.turtle_acc_x, self.time_turtle_pos, self.time_turtle_vel, self.time_turtle_acc, human_act_list, self.action_timesteps, "x", control_mode) subplot(self.plot_directory + run_subfolder + "turtle_dynamics/", self.turtle_pos_y, self.turtle_vel_y, self.turtle_acc_y, self.time_turtle_pos, self.time_turtle_vel, self.time_turtle_acc, agent_act_list, self.action_timesteps, "y", control_mode) plt.figure("Real_Human_Actions_Comparison", figsize=(25, 10)) plt.grid() plt.ylabel('Human Actions') plt.xlabel('Msg Timestamp(seconds)') # plt.xticks(plt.xticks(np.arange(min(min(time_real_act_list),min(self.used_human_act_time_list)), max(max(time_real_act_list),max(used_human_act_time_list)), 1))) plt.scatter(self.time_real_act_list, self.real_act_list) plt.scatter(self.used_human_act_time_list, self.used_human_act_list) plt.savefig(self.plot_directory + run_subfolder + "human_real_action_comparison", dpi=150) self.reset_lists()
def save_and_plot_stats_rl(self): """Saves & plots all the rl-relevant statistics for later analysis.""" np.savetxt(self.plot_directory + 'rl_dynamics/' + 'alpha_values.csv', self.alpha_values, delimiter=',', fmt='%f') np.savetxt(self.plot_directory + 'rl_dynamics/' + 'policy_loss.csv', self.policy_loss_list, delimiter=',', fmt='%f') np.savetxt(self.plot_directory + 'rl_dynamics/' + 'value_loss.csv', self.value_loss_list, delimiter=',', fmt='%f') np.savetxt(self.plot_directory + 'rl_dynamics/' + 'q_loss.csv', self.q_loss_list, delimiter=',', fmt='%f') np.savetxt(self.plot_directory + 'rl_dynamics/' + 'rewards_list.csv', self.rewards_list, delimiter=',', fmt='%f') np.savetxt(self.plot_directory + 'rl_dynamics/' + 'turn_list.csv', self.turn_list, delimiter=',', fmt='%f') np.savetxt(self.plot_directory + 'rl_dynamics/' + 'means.csv', self.mean_list, delimiter=',', fmt='%f') np.savetxt(self.plot_directory + 'rl_dynamics/' + 'stdev.csv', self.stdev_list, delimiter=',', fmt='%f') np.savetxt(self.plot_directory + 'rl_dynamics/' + 'critics_lr_list.csv', self.critics_lr_list, delimiter=',', fmt='%f') np.savetxt(self.plot_directory + 'rl_dynamics/' + 'value_critic_lr_list.csv', self.value_critic_lr_list, delimiter=',', fmt='%f') np.savetxt(self.plot_directory + 'rl_dynamics/' + 'actor_lr_list.csv', self.actor_lr_list, delimiter=',', fmt='%f') np.savetxt(self.plot_directory + 'rl_dynamics/' + 'trials_list.csv', self.trials_list, delimiter=',', fmt='%f') np.savetxt(self.plot_directory + 'agent_act_list_total.csv', self.agent_act_list_total, delimiter=',', fmt='%f') np.savetxt(self.plot_directory + 'human_act_list_total.csv', self.human_act_list_total, delimiter=',', fmt='%f') np.savetxt(self.plot_directory + 'human_action_delay_list.csv', self.human_action_delay_list, delimiter=',', fmt='%f') np.savetxt(self.plot_directory + 'fps_list.csv', self.fps_list, delimiter=',', fmt='%f') plot_hist(self.fps_list, self.plot_directory, 'fps_list_' + str(self.game.fps)) np.savetxt(self.plot_directory + 'exec_time_list.csv', self.exec_time_list, delimiter=',', fmt='%f') plot(range(len(self.alpha_values)), self.alpha_values, "alpha_values", 'Alpha Value', 'Number of Gradient Updates', self.plot_directory, save=True) plot(range(len(self.policy_loss_list)), self.policy_loss_list, "policy_loss", 'Policy loss', 'Number of Gradient Updates', self.plot_directory, save=True) plot(range(len(self.value_loss_list)), self.value_loss_list, "value_loss_list", 'Value loss', 'Number of Gradient Updates', self.plot_directory, save=True) plot(range(len(self.rewards_list)), self.rewards_list, "Rewards_per_game", 'Total Rewards per Game', 'Number of Games', self.plot_directory, save=True) plot(range(len(self.turn_list)), self.turn_list, "Steps_per_game", 'Turns per Game', 'Number of Games', self.plot_directory, save=True) plot(range(len(self.critics_lr_list)), self.critics_lr_list, "critics_lr_list", 'Critic lr', 'Number of Gradient Updates', self.plot_directory, save=True) plot(range(len(self.value_critic_lr_list)), self.value_critic_lr_list, "value_critic_lr_list", 'Value lr', 'Number of Gradient Updates', self.plot_directory, save=True) plot(range(len(self.actor_lr_list)), self.actor_lr_list, "actor_lr_list", 'Actor lr', 'Number of Gradient Updates', self.plot_directory, save=True) try: plot(range(UPDATE_INTERVAL, MAX_STEPS + UPDATE_INTERVAL, UPDATE_INTERVAL), self.mean_list, "trials", 'Tests Score', 'Number of Interactions', self.plot_directory, save=True, variance=True, stdev=self.stdev_list) except: print("Trials did not ploted") plot_hist(self.agent_act_list_total, self.plot_directory + 'agent_act_hist_total', 'Agent Action Histogram') plot_hist(self.human_act_list_total, self.plot_directory + 'human_act_hist_total', 'Human Action Histogram') # human_action_delay_list_new = [elem for elem in self.human_action_delay_list if elem <0.8] # remove outliers caused by saving and plotting functions plot_hist(self.human_action_delay_list, self.plot_directory + 'human_action_delay_list', 'Human Action Delay Histogram')