예제 #1
0
 def embed_motifs(self, bg_seqs, pwms):
     print("embedding motifs...")
     embedded_seqs = []
     pwm = pwms[0]
     locs = []
     for seq in bg_seqs:
         mf_instance = mf.sample_motif(pwm, random_rev_comp=False)
         loc = np.random.randint(0, 500)
         locs.append(loc)
         embedded_seqs.append(mf.replace_at(seq, mf_instance, loc))
     utils.plot_hist(locs)
     return embedded_seqs
예제 #2
0
    def _calc_ranges(self, _ev=None):
        values = self.map_image.get_bands(self.map_window.channels_histogram)
        self.x_range, self.y_range = ([values[i].min(), values[i].max()]
                                      for i in range(2))

        for i in range(2):
            values[i][values[i] < values[i].min() + 0.00000001] = np.nan
        self.graphs = [plot_hist(values[i]) for i in range(2)]
예제 #3
0
            cnt[ img_arr[i, j] ] += 1
    return cnt[:-1], intensity

def intensity_hist(img_arr, n_bins=256):
    bins = np.arange(n_bins + 1)
    cnt, intensity = poy_histogram(img_arr, bins=bins)
    return cnt, intensity[:-1]

L = 256
sample2_cnt, sample2_intensity = intensity_hist(sample2_arr, n_bins=L)
result3_cnt, result3_intensity = intensity_hist(result3_arr, n_bins=L)
result4_cnt, result4_intensity = intensity_hist(result4_arr, n_bins=L)

utils.plot_hist("prob2c", 
                [sample2_intensity, sample2_cnt],
                [result3_intensity, result3_cnt],
                [result4_intensity, result4_cnt]
                )

# prob (d)
def poy_cumsum(arr):
    res = []
    cumsum = 0
    for a in arr:
        cumsum += a
        res.append(cumsum)
    res = np.array(res)
    return res

def global_hist_equal(img_arr):
    '''
예제 #4
0
    epoch_train_loss.append(np.mean(train_losses))
    epoch_val_loss.append(np.mean(val_losses))
    epoch_train_dsc.append(np.mean(train_dsc))
    epoch_val_dsc.append(np.mean(val_dsc))
    
    early_stopping(np.average(val_losses), model)
    
    if early_stopping.early_stop:
        print("Early stopping at epoch: ", epoch)
        break

print('='*30)
print('Average DSC score =', np.array(val_dsc).mean())


utils.plot_hist(epoch, np.array(epoch_train_loss), np.array(epoch_val_loss), "Loss")
utils.plot_hist(epoch, np.array(epoch_train_dsc), np.array(epoch_val_dsc), "DSC Score")



# check model outputs on validation data
model.eval()
idx = np.random.randint(0,batch_size)

val_dsc = []
with torch.no_grad():
    for x_val, y_val in val_loader:
        x_val, y_val = x_val.to(device), y_val.to(device)
        preds = model(x_val)
        dsc = losses.dice_score(preds, y_val)
        val_dsc.append(dsc/x_val.shape[0])
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             pin_memory=True,
                                             num_workers=8)

    # random search
    start = time.time()
    best_acc = 0.0
    acc_list = list()
    best_choice = list()
    for epoch in range(args.random_search):
        choice = utils.random_choice(args.num_choices, args.layers)
        top1_acc = validate(args,
                            epoch,
                            val_loader,
                            device,
                            model,
                            criterion,
                            super=True,
                            choice=choice)
        acc_list.append(top1_acc)
        if best_acc < top1_acc:
            best_acc = top1_acc
            best_choice = choice
    print('acc_list:')
    for i in acc_list:
        print(i)
    print('best_acc:{} \nbest_choice:{}'.format(best_acc, best_choice))
    utils.plot_hist(acc_list, name=args.exp_name)
    utils.time_record(start)
예제 #6
0
        '[%d]/[%d] Train Loss:%.4f\t Train Acc:%.4f\t Val Loss:%.4f\t Val Acc: %.4f'
        % (epoch + 1, epochs, np.mean(train_loss), np.mean(train_acc),
           np.mean(val_loss), np.mean(val_acc)))

    epoch_train_loss.append(np.mean(train_loss))
    epoch_val_loss.append(np.mean(val_loss))
    epoch_train_acc.append(np.mean(train_acc))
    epoch_val_acc.append(np.mean(val_acc))

    early_stopping(np.average(val_loss), model)

    if early_stopping.early_stop:
        print("Early stopping at epoch: ", epoch)
        break

utils.plot_hist(epoch, np.array(epoch_train_loss), np.array(epoch_val_loss),
                "Loss")
plt.title('Lowest loss = %.4f' % epoch_val_loss[-1])

utils.plot_hist(epoch, np.array(epoch_train_acc), np.array(epoch_val_acc),
                "Acc")
plt.title('Best accuracy = %.4f' % epoch_val_acc[-1])

# Load up the best model and view preds
model.load_state_dict(torch.load('checkpoint.pt'))

outputs = []


def hook(module, input, output):
    outputs.append(output)
예제 #7
0
from keras.layers import Dense

import sys
sys.path.append("../")
from utils import load_mnist_1D
from utils import save_model_viz, save_weights, save_hist, plot_hist

RUN_ID = 'mlp'

(x_train, y_train), (x_test, y_test) = load_mnist_1D()

model = Sequential()
model.add(Dense(64, input_shape=(784, ), activation='relu'))
model.add(Dense(10, activation='softmax'))
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

save_model_viz(RUN_ID, model)

hist = model.fit(x_train,
                 y_train,
                 epochs=5,
                 batch_size=32,
                 verbose=1,
                 validation_data=(x_test, y_test))

save_weights(RUN_ID, model)
save_hist(RUN_ID, hist)
plot_hist(RUN_ID)
예제 #8
0
y_aug = np.array(y_new)

# ====== print info ====== #
print("Train set:", x_train.shape, y_train.shape)
print("Valid set:", x_valid.shape, y_valid.shape)
print("Test set:", x_test.shape, y_test.shape)
print("Augmented training set:", x_aug.shape, y_aug.shape)

# ====== checking distribution of train, valid, test matching ====== #
train_dist = itemfreq(y_train)
valid_dist = itemfreq(y_valid)
test_dist = itemfreq(y_test)

plt.figure()
ax = plt.subplot(3, 1, 1)
plot_hist(y_train, ax, "Training distribution")
ax = plt.subplot(3, 1, 2)
plot_hist(y_valid, ax, "Validating distribution")
ax = plt.subplot(3, 1, 3)
plot_hist(y_test, ax, "Testing distribution")
plt.tight_layout()

# ====== convert labels to one_hot for training ====== #
labels = ["Number: %d" % i for i in y_train[:16]]
y_train = one_hot(y_train, nb_classes=10)
y_aug = one_hot(y_aug, nb_classes=10)
y_test = one_hot(y_test, nb_classes=10)
y_valid = one_hot(y_valid, nb_classes=10)
plt.figure()
plt.imshow(y_train[:16], cmap=plt.cm.Greys_r)
plt.xticks(np.arange(10))
예제 #9
0
def compute_deeplift_scores(
    TARGET_DIRECTORY, dataset, X, Y, 
    keras_model_file, reference_label, non_reference_label, base_neuron_label, 
    mask, gpu_id, dropout, threshold, percentage_cutoff, cluster_mask,
    flags):
    """
    Wrapper function for model reduction, called by main.py
    Uses DeepLIFT to compute saliency scores for feature selection, with the average data used as reference
    See https://github.com/kundajelab/deeplift for DeepLIFT implementation

    Inputs:
    - TARGET_DIRECTORY: general directory path to write files to (str)
    - dataset: choice of dataset, along with seed and fold number (str) 
    - X: Numpy array containing data matrices
    - Y: Numpy array containing data labels
    - keras_model_file: name of existing model file (str)
    - reference_label: how the reference class is represented in Y, usually 0 (int)
    - non_reference_label: how the other class(es) is (are) represented in Y, usually 1 (int)
    - base_neuron_label: label to be used as the base, usually 0 (int)
    - mask: Numpy array, usually initialised as all 1s unless neurons are repeatedly removed
    - gpu_id: ID of GPU to use (int)
    - dropout: fraction of neurons to turn off (float)
    - threshold: usually set as 1.0, represents the previous percentage_cutoff when repeatedly removing neurons (float)
    - percentage_cutoff: usually set as 0.95 to keep 5% of the most significant features (float)
    - cluster_mask: Numpy array containing mask obtained from CLIP
    - flags: used to vary model settings, see main.py (dict)

    Returns:
    - new_model_file: directory path to the new model (str)
    - mask_2D_flattened: Numpy array of 1s and 0s, with 1 representing a selected feature
    """

    keras_model = keras.models.load_model(keras_model_file)
    print(keras_model.summary()) # original model

    deeplift_model = kc.convert_model_from_saved_files(
        keras_model_file, 
        nonlinear_mxts_mode=deeplift.layers.NonlinearMxtsMode.DeepLIFT_GenomicsDefault)
    print(deeplift_model.get_layers())

    mode = 'average'
    X_masked, mapping = get_masked_data(X, mask)
    Y = np.argmax(Y, axis=1)
    reference = get_reference(mode, reference_label, X_masked, Y)
    
    print('+++++++++++++ Computing DeepLIFT scores ++++++++++++++')
    print('previous threshold', threshold, 'new threshold', percentage_cutoff)

    find_scores_layer_idx = 0
    input_scores = np.zeros(X_masked[Y == non_reference_label].shape)
    layer_scores = []
    task_id = base_neuron_label

    for layer_idx, layer in enumerate(deeplift_model.get_layers()):
        if type(layer).__name__ == 'Dense' or type(layer).__name__ == 'Input':
            deeplift_contribs_func = deeplift_model.get_target_contribs_func(find_scores_layer_idx=layer_idx, target_layer_idx=-2)
            scores = np.array(deeplift_contribs_func(task_idx=task_id,
                                                     input_references_list=reference,
                                                     input_data_list=[X_masked[Y == non_reference_label]],
                                                     batch_size=10,
                                                     progress_update=50))
                                                     
            sum_scores = np.zeros(scores.shape[1])
            
            for score in scores:
                sum_scores += score
            
            sum_scores = np.absolute(sum_scores)

            if sum_scores.shape[0] > 2:
                plot_hist(TARGET_DIRECTORY, sum_scores, dataset + '_t_' + str(threshold) + '_layer_' + str(layer_idx)) 
            
            print('layer', layer_idx, 'type is: ', type(layer), 'scores dimensions are: ', scores.shape, 'sum_scores', sum_scores.shape)
            layer_scores.append(sum_scores)
            
            if layer_idx == 0:
                input_scores = np.square(scores)
                layer_scores.append([])

        elif type(layer).__name__ == 'NoOp' or type(layer).__name__ == 'Softmax':
            layer_scores.append([])
            print('layer', layer_idx, 'type is: ', type(layer).__name__)

    alpha = (1 - (percentage_cutoff/threshold))
    new_model, mask_2D = compute_new_reduced_model(keras_model, dropout, layer_scores, 2, alpha, mapping, cluster_mask, flags)

    input_sum_scores = np.zeros(X_masked.shape[1])

    for input_score in input_scores:
        input_sum_scores += input_score

    padded_sum_scores = get_padded_data(input_sum_scores, mapping)
    full_matrix = create_matrix(padded_sum_scores)

    mkdir(TARGET_DIRECTORY + './important_features/')
    np.savetxt(
        TARGET_DIRECTORY + './important_features/' + dataset + '_scores_deeplift_reduced_r_' + str(threshold) + '_t_' + str(percentage_cutoff) + '.csv', 
        np.transpose(np.array(input_scores)), delimiter= ',')
    np.savetxt(
        TARGET_DIRECTORY + './important_features/' + dataset + '_scores_reshaped_reduced_r_' + str(threshold) + '_t_' + str(percentage_cutoff) + '.csv', 
        full_matrix, delimiter=",")
    np.savetxt(
        TARGET_DIRECTORY + './important_features/' + dataset + '_deeplift_features_nodes_r_' + str(threshold) + '_t_' + str(percentage_cutoff) + '.csv', 
        mask_2D)
    
    mkdir(TARGET_DIRECTORY + './reduced_models/')
    new_model_file = TARGET_DIRECTORY + './reduced_models/' + dataset + '_from_' + str(threshold) + '_to_' + str(percentage_cutoff) + '.h5'
    new_model.save(new_model_file)

    os.remove(keras_model_file)

    mask_2D_flattened = corr_mx_flatten_single(mask_2D)
    
    return new_model_file, mask_2D_flattened
    def save_and_plot_stats_environment(self, run_num):
        """Saves all environment-relevant statistics for later analysis."""

        run_subfolder = "game_" + str(run_num) + "/"
        os.makedirs(self.plot_directory + run_subfolder + "turtle_dynamics/")

        self.exp_details()

        np.savetxt(self.plot_directory + run_subfolder + "turtle_dynamics/" +
                   'agent_act_list.csv',
                   self.agent_act_list,
                   delimiter=',',
                   fmt='%f')
        np.savetxt(self.plot_directory + run_subfolder + "turtle_dynamics/" +
                   'human_act_list.csv',
                   self.human_act_list,
                   delimiter=',',
                   fmt='%f')
        np.savetxt(self.plot_directory + run_subfolder + "turtle_dynamics/" +
                   'action_timesteps.csv',
                   self.action_timesteps,
                   delimiter=',',
                   fmt='%f')
        human_act_list = np.genfromtxt(self.plot_directory + run_subfolder +
                                       "turtle_dynamics/" +
                                       'human_act_list.csv',
                                       delimiter=',')
        agent_act_list = np.genfromtxt(self.plot_directory + run_subfolder +
                                       "turtle_dynamics/" +
                                       'agent_act_list.csv',
                                       delimiter=',')
        plot_hist(
            agent_act_list, self.plot_directory + run_subfolder +
            "turtle_dynamics/" + 'agent_act_hist_'
            "game_" + str(run_num), 'Agent Action Histogram')
        plot_hist(
            human_act_list, self.plot_directory + run_subfolder +
            "turtle_dynamics/" + 'human_act_hist_'
            "game_" + str(run_num), 'Human Action Histogram')

        np.savetxt(self.plot_directory + run_subfolder + "turtle_dynamics/" +
                   'turtle_pos_x.csv',
                   self.turtle_pos_x,
                   delimiter=',',
                   fmt='%f')
        np.savetxt(self.plot_directory + run_subfolder + "turtle_dynamics/" +
                   'turtle_pos_y.csv',
                   self.turtle_pos_y,
                   delimiter=',',
                   fmt='%f')
        np.savetxt(self.plot_directory + run_subfolder + "turtle_dynamics/" +
                   'time_turtle_pos.csv',
                   self.time_turtle_pos,
                   delimiter=',',
                   fmt='%f')

        np.savetxt(self.plot_directory + run_subfolder + "turtle_dynamics/" +
                   'turtle_vel_x.csv',
                   self.turtle_vel_x,
                   delimiter=',',
                   fmt='%f')
        np.savetxt(self.plot_directory + run_subfolder + "turtle_dynamics/" +
                   'turtle_vel_y.csv',
                   self.turtle_vel_y,
                   delimiter=',',
                   fmt='%f')
        np.savetxt(self.plot_directory + run_subfolder + "turtle_dynamics/" +
                   'time_turtle_vel.csv',
                   self.time_turtle_vel,
                   delimiter=',',
                   fmt='%f')

        np.savetxt(self.plot_directory + run_subfolder + "turtle_dynamics/" +
                   'turtle_accel_x.csv',
                   self.turtle_acc_x,
                   delimiter=',',
                   fmt='%f')
        np.savetxt(self.plot_directory + run_subfolder + "turtle_dynamics/" +
                   'turtle_accel_y.csv',
                   self.turtle_acc_y,
                   delimiter=',',
                   fmt='%f')
        np.savetxt(self.plot_directory + run_subfolder + "turtle_dynamics/" +
                   'time_turtle_acc.csv',
                   self.time_turtle_acc,
                   delimiter=',',
                   fmt='%f')

        np.savetxt(self.plot_directory + run_subfolder + "turtle_dynamics/" +
                   'real_act_list.csv',
                   self.real_act_list,
                   delimiter=',',
                   fmt='%f')
        np.savetxt(self.plot_directory + run_subfolder + "turtle_dynamics/" +
                   'time_real_act_list.csv',
                   self.time_real_act_list,
                   delimiter=',',
                   fmt='%f')

        np.savetxt(self.plot_directory + run_subfolder + "turtle_dynamics/" +
                   'used_human_act_list.csv',
                   self.used_human_act_list,
                   delimiter=',',
                   fmt='%f')
        np.savetxt(self.plot_directory + run_subfolder + "turtle_dynamics/" +
                   'used_human_act_time_list.csv',
                   self.used_human_act_time_list,
                   delimiter=',',
                   fmt='%f')

        subplot(self.plot_directory + run_subfolder + "turtle_dynamics/",
                self.turtle_pos_x, self.turtle_vel_x, self.turtle_acc_x,
                self.time_turtle_pos, self.time_turtle_vel,
                self.time_turtle_acc, human_act_list, self.action_timesteps,
                "x", control_mode)
        subplot(self.plot_directory + run_subfolder + "turtle_dynamics/",
                self.turtle_pos_y, self.turtle_vel_y, self.turtle_acc_y,
                self.time_turtle_pos, self.time_turtle_vel,
                self.time_turtle_acc, agent_act_list, self.action_timesteps,
                "y", control_mode)

        plt.figure("Real_Human_Actions_Comparison", figsize=(25, 10))
        plt.grid()

        plt.ylabel('Human Actions')
        plt.xlabel('Msg Timestamp(seconds)')
        # plt.xticks(plt.xticks(np.arange(min(min(time_real_act_list),min(self.used_human_act_time_list)), max(max(time_real_act_list),max(used_human_act_time_list)), 1)))
        plt.scatter(self.time_real_act_list, self.real_act_list)
        plt.scatter(self.used_human_act_time_list, self.used_human_act_list)
        plt.savefig(self.plot_directory + run_subfolder +
                    "human_real_action_comparison",
                    dpi=150)

        self.reset_lists()
    def save_and_plot_stats_rl(self):
        """Saves & plots all the rl-relevant statistics for later analysis."""

        np.savetxt(self.plot_directory + 'rl_dynamics/' + 'alpha_values.csv',
                   self.alpha_values,
                   delimiter=',',
                   fmt='%f')
        np.savetxt(self.plot_directory + 'rl_dynamics/' + 'policy_loss.csv',
                   self.policy_loss_list,
                   delimiter=',',
                   fmt='%f')
        np.savetxt(self.plot_directory + 'rl_dynamics/' + 'value_loss.csv',
                   self.value_loss_list,
                   delimiter=',',
                   fmt='%f')
        np.savetxt(self.plot_directory + 'rl_dynamics/' + 'q_loss.csv',
                   self.q_loss_list,
                   delimiter=',',
                   fmt='%f')

        np.savetxt(self.plot_directory + 'rl_dynamics/' + 'rewards_list.csv',
                   self.rewards_list,
                   delimiter=',',
                   fmt='%f')
        np.savetxt(self.plot_directory + 'rl_dynamics/' + 'turn_list.csv',
                   self.turn_list,
                   delimiter=',',
                   fmt='%f')
        np.savetxt(self.plot_directory + 'rl_dynamics/' + 'means.csv',
                   self.mean_list,
                   delimiter=',',
                   fmt='%f')
        np.savetxt(self.plot_directory + 'rl_dynamics/' + 'stdev.csv',
                   self.stdev_list,
                   delimiter=',',
                   fmt='%f')
        np.savetxt(self.plot_directory + 'rl_dynamics/' +
                   'critics_lr_list.csv',
                   self.critics_lr_list,
                   delimiter=',',
                   fmt='%f')
        np.savetxt(self.plot_directory + 'rl_dynamics/' +
                   'value_critic_lr_list.csv',
                   self.value_critic_lr_list,
                   delimiter=',',
                   fmt='%f')
        np.savetxt(self.plot_directory + 'rl_dynamics/' + 'actor_lr_list.csv',
                   self.actor_lr_list,
                   delimiter=',',
                   fmt='%f')
        np.savetxt(self.plot_directory + 'rl_dynamics/' + 'trials_list.csv',
                   self.trials_list,
                   delimiter=',',
                   fmt='%f')

        np.savetxt(self.plot_directory + 'agent_act_list_total.csv',
                   self.agent_act_list_total,
                   delimiter=',',
                   fmt='%f')
        np.savetxt(self.plot_directory + 'human_act_list_total.csv',
                   self.human_act_list_total,
                   delimiter=',',
                   fmt='%f')

        np.savetxt(self.plot_directory + 'human_action_delay_list.csv',
                   self.human_action_delay_list,
                   delimiter=',',
                   fmt='%f')

        np.savetxt(self.plot_directory + 'fps_list.csv',
                   self.fps_list,
                   delimiter=',',
                   fmt='%f')
        plot_hist(self.fps_list, self.plot_directory,
                  'fps_list_' + str(self.game.fps))

        np.savetxt(self.plot_directory + 'exec_time_list.csv',
                   self.exec_time_list,
                   delimiter=',',
                   fmt='%f')

        plot(range(len(self.alpha_values)),
             self.alpha_values,
             "alpha_values",
             'Alpha Value',
             'Number of Gradient Updates',
             self.plot_directory,
             save=True)
        plot(range(len(self.policy_loss_list)),
             self.policy_loss_list,
             "policy_loss",
             'Policy loss',
             'Number of Gradient Updates',
             self.plot_directory,
             save=True)
        plot(range(len(self.value_loss_list)),
             self.value_loss_list,
             "value_loss_list",
             'Value loss',
             'Number of Gradient Updates',
             self.plot_directory,
             save=True)
        plot(range(len(self.rewards_list)),
             self.rewards_list,
             "Rewards_per_game",
             'Total Rewards per Game',
             'Number of Games',
             self.plot_directory,
             save=True)
        plot(range(len(self.turn_list)),
             self.turn_list,
             "Steps_per_game",
             'Turns per Game',
             'Number of Games',
             self.plot_directory,
             save=True)

        plot(range(len(self.critics_lr_list)),
             self.critics_lr_list,
             "critics_lr_list",
             'Critic lr',
             'Number of Gradient Updates',
             self.plot_directory,
             save=True)
        plot(range(len(self.value_critic_lr_list)),
             self.value_critic_lr_list,
             "value_critic_lr_list",
             'Value lr',
             'Number of Gradient Updates',
             self.plot_directory,
             save=True)
        plot(range(len(self.actor_lr_list)),
             self.actor_lr_list,
             "actor_lr_list",
             'Actor lr',
             'Number of Gradient Updates',
             self.plot_directory,
             save=True)

        try:
            plot(range(UPDATE_INTERVAL, MAX_STEPS + UPDATE_INTERVAL,
                       UPDATE_INTERVAL),
                 self.mean_list,
                 "trials",
                 'Tests Score',
                 'Number of Interactions',
                 self.plot_directory,
                 save=True,
                 variance=True,
                 stdev=self.stdev_list)
        except:
            print("Trials did not ploted")

        plot_hist(self.agent_act_list_total,
                  self.plot_directory + 'agent_act_hist_total',
                  'Agent Action Histogram')
        plot_hist(self.human_act_list_total,
                  self.plot_directory + 'human_act_hist_total',
                  'Human Action Histogram')

        # human_action_delay_list_new = [elem for elem in self.human_action_delay_list if elem <0.8] # remove outliers caused by saving and plotting functions
        plot_hist(self.human_action_delay_list,
                  self.plot_directory + 'human_action_delay_list',
                  'Human Action Delay Histogram')