def create_dfs(data_folder, checkpointfolder): if os.path.isfile(checkpointfolder + "train_df.pkl"): assert os.path.isfile(checkpointfolder + "test_df.pkl") print("dfs already exist") return else: print("Creating dfs") #Data extraction and preparation X_train_path = data_folder + "X_train.npy" X_train = np.array(utils.load_npy(X_train_path)) X_test_path = data_folder + "X_test.npy" X_test = np.array(utils.load_npy(X_test_path)) y_train_path = data_folder + "y_train.npy" y_train = utils.load_npy(y_train_path) y_train = np.array([utils.collapse_num_labels(y) for y in y_train]) y_test_path = data_folder + "y_test.npy" y_test = utils.load_npy(y_test_path) y_test = np.array([utils.collapse_num_labels(y) for y in y_test]) #SAVE MEMORY?? X_train, X_test = normalize_data(X_train, X_test, data_folder) train_df = create_phon_df(X_train, y_train) del (X_train, y_train) train_df.to_pickle(checkpointfolder + "train_df.pkl") test_df = create_phon_df(X_test, y_test) del (X_test, y_test) test_df.to_pickle(checkpointfolder + "test_df.pkl")
n_categories = 40 + 1 #Params dropout = 0.5 n_feats = 600 n_hidden = 128 #hyper-params lr = 0.001 epochs = 80 batchsize = 8 use_ctc = True print("Load data") X_test = np.array( utils.load_npy(checkpointfolder_specific + "X_test_0.npy")) y_test = np.array( utils.load_npy(checkpointfolder_specific + "y_test_0.npy")) # X_train, y_train = utils.get_mini_dataset(X_train, y_train, 7000) X_test, y_test = utils.get_mini_dataset(X_test, y_test, 3000) #LSTM create and feed rnn = BLSTM(n_feats=n_feats, n_hidden=n_hidden, n_classes=n_categories, logsdir=logdir) rnn.load_weights(logdir) if ngram == 3:
delta_err = p_err_mean - epoch_loss_train_mean p_err_mean = epoch_loss_train_mean self.learning_rate = self.lr_scheduler(learning_rate, e, delta_err) if self.logsdir is not None: self.save_weights() return if __name__ == "__main__": epochs = 10 lr = 0.01 rnn = RNN_CTC_base(n_feats=600, n_classes=41, logsdir="./RNN_test/") X_train = utils.load_npy('../../data/RBM_hann_v2/X_train.npy') X_test = utils.load_npy('../../data/RBM_hann_v2/X_test.npy') y_train = utils.load_npy('../../data/RBM_hann_v2/y_train.npy') y_test = utils.load_npy('../../data/RBM_hann_v2/y_test.npy') y_train = [utils.collapse_num_labels(y) for y in y_train] y_test = [utils.collapse_num_labels(y) for y in y_test] y_train = utils.to_ctc(y_train) y_test = utils.to_ctc(y_test) rnn.set_data(X_train, X_test, y_train, y_test) del (X_train, y_train, X_test, y_test) rnn.fit(n_epochs=epochs,
# l1 = 0.00 # dropout = 0.05 # logdir_feats = root_dir + w + "/v2/400_600/v1/600_300/300_200/relurelu_v1/200_100/relurelu_v1/" i = 4 weights_dir = logdir_feats + model + "_" + str(n_visibles) + "_" + str(n_hidden) + "/" out_feats_dir = weights_dir + model + "_v" + str(v) + "/" lr = 0.001 #0.0005087695972171545 ## """ TRAIN """ if i == 1: from utils import plot_weights, plot_input_bias, plot_hidden_bias n_epochs = int(200//dropout) print("train for: ", str(n_epochs)) X_train = utils.load_npy(logdir_feats + "X_train.npy") train_matrix = [] for x in X_train: train_matrix.extend(x) train_matrix = np.array(train_matrix) del(X_train) if normalize: train_matrix = normalize_fn(train_matrix, logdir_feats) # try: # mean = utils.open_pickle(logdir_feats + "mean.pkl") # except: # mean = np.array(train_matrix).mean(0) # utils.save_pickle(logdir_feats + "mean.pkl", mean) # try: # std = utils.open_pickle(logdir_feats + "std.pkl")
# folder_name = "./PreprocessedData/MFCCs_delta1/hann/" # logdir = "./BLSTM/MFCC/Hann/" folder_name = "./PreprocessedData/RBMs_longtrain/hann/v2/400_50/" logdir = "./BLSTM/rbm_50_deltas_argmax/" epochs = 8 lr = 0.001 batchsize = 8 dropout = 0.85 n_feats = 102 n_hidden = 128 n_classes = 39 + 1 X_train_path = folder_name + "X_train.npy" X_train = np.array(utils.load_npy(X_train_path)) X_test_path = folder_name + "X_test.npy" X_test = np.array(utils.load_npy(X_test_path)) y_train_path = folder_name + "y_train.npy" y_train = np.array(utils.load_npy(y_train_path)) y_test_path = folder_name + "y_test.npy" y_test = np.array(utils.load_npy(y_test_path)) # X_train,y_train = utils.get_mini_dataset(X_train,y_train,500) # X_test, y_test = utils.get_mini_dataset(X_test, y_test,100) X_train, X_test = normalize_data(X_train, X_test, folder_name) y_train = utils.remove_q( utils.to_ctc(np.array([utils.collapse_num_labels(y)