def create_dfs(data_folder, checkpointfolder):
    if os.path.isfile(checkpointfolder + "train_df.pkl"):
        assert os.path.isfile(checkpointfolder + "test_df.pkl")
        print("dfs already exist")

        return
    else:
        print("Creating dfs")
        #Data extraction and preparation
        X_train_path = data_folder + "X_train.npy"
        X_train = np.array(utils.load_npy(X_train_path))

        X_test_path = data_folder + "X_test.npy"
        X_test = np.array(utils.load_npy(X_test_path))

        y_train_path = data_folder + "y_train.npy"
        y_train = utils.load_npy(y_train_path)
        y_train = np.array([utils.collapse_num_labels(y) for y in y_train])

        y_test_path = data_folder + "y_test.npy"
        y_test = utils.load_npy(y_test_path)
        y_test = np.array([utils.collapse_num_labels(y) for y in y_test])

        #SAVE MEMORY??
        X_train, X_test = normalize_data(X_train, X_test, data_folder)

        train_df = create_phon_df(X_train, y_train)
        del (X_train, y_train)
        train_df.to_pickle(checkpointfolder + "train_df.pkl")

        test_df = create_phon_df(X_test, y_test)
        del (X_test, y_test)
        test_df.to_pickle(checkpointfolder + "test_df.pkl")
示例#2
0
    n_categories = 40 + 1

    #Params
    dropout = 0.5
    n_feats = 600
    n_hidden = 128

    #hyper-params
    lr = 0.001
    epochs = 80
    batchsize = 8
    use_ctc = True

    print("Load data")
    X_test = np.array(
        utils.load_npy(checkpointfolder_specific + "X_test_0.npy"))
    y_test = np.array(
        utils.load_npy(checkpointfolder_specific + "y_test_0.npy"))

    #    X_train, y_train = utils.get_mini_dataset(X_train, y_train, 7000)
    X_test, y_test = utils.get_mini_dataset(X_test, y_test, 3000)

    #LSTM create and feed
    rnn = BLSTM(n_feats=n_feats,
                n_hidden=n_hidden,
                n_classes=n_categories,
                logsdir=logdir)

    rnn.load_weights(logdir)

    if ngram == 3:
示例#3
0
            delta_err = p_err_mean - epoch_loss_train_mean
            p_err_mean = epoch_loss_train_mean
            self.learning_rate = self.lr_scheduler(learning_rate, e, delta_err)

            if self.logsdir is not None:
                self.save_weights()

        return

if __name__ == "__main__":
    epochs = 10
    lr = 0.01

    rnn = RNN_CTC_base(n_feats=600, n_classes=41, logsdir="./RNN_test/")

    X_train = utils.load_npy('../../data/RBM_hann_v2/X_train.npy')
    X_test = utils.load_npy('../../data/RBM_hann_v2/X_test.npy')
    y_train = utils.load_npy('../../data/RBM_hann_v2/y_train.npy')
    y_test = utils.load_npy('../../data/RBM_hann_v2/y_test.npy')

    y_train = [utils.collapse_num_labels(y) for y in y_train]
    y_test = [utils.collapse_num_labels(y) for y in y_test]

    y_train = utils.to_ctc(y_train)
    y_test = utils.to_ctc(y_test)

    rnn.set_data(X_train, X_test, y_train, y_test)

    del (X_train, y_train, X_test, y_test)

    rnn.fit(n_epochs=epochs,
示例#4
0
#    l1 = 0.00
#    dropout = 0.05
#    logdir_feats = root_dir + w + "/v2/400_600/v1/600_300/300_200/relurelu_v1/200_100/relurelu_v1/"

    i = 4
    weights_dir = logdir_feats + model + "_" + str(n_visibles) + "_" + str(n_hidden) + "/"
    out_feats_dir = weights_dir + model + "_v" + str(v) + "/" 

    lr = 0.001 #0.0005087695972171545
##    """ TRAIN """
    if i == 1:
        from utils import plot_weights, plot_input_bias, plot_hidden_bias
        n_epochs = int(200//dropout)
        print("train for: ", str(n_epochs))

        X_train = utils.load_npy(logdir_feats + "X_train.npy")
        train_matrix = []
        for x in X_train:
            train_matrix.extend(x)
        train_matrix = np.array(train_matrix)
        del(X_train)

        if normalize:
            train_matrix = normalize_fn(train_matrix, logdir_feats)
#            try:
#                mean = utils.open_pickle(logdir_feats + "mean.pkl")
#            except:
#                mean = np.array(train_matrix).mean(0)
#                utils.save_pickle(logdir_feats + "mean.pkl", mean)
#            try:
#                std = utils.open_pickle(logdir_feats + "std.pkl")
示例#5
0
    #    folder_name = "./PreprocessedData/MFCCs_delta1/hann/"
    #    logdir = "./BLSTM/MFCC/Hann/"
    folder_name = "./PreprocessedData/RBMs_longtrain/hann/v2/400_50/"
    logdir = "./BLSTM/rbm_50_deltas_argmax/"

    epochs = 8
    lr = 0.001
    batchsize = 8
    dropout = 0.85

    n_feats = 102
    n_hidden = 128
    n_classes = 39 + 1

    X_train_path = folder_name + "X_train.npy"
    X_train = np.array(utils.load_npy(X_train_path))
    X_test_path = folder_name + "X_test.npy"
    X_test = np.array(utils.load_npy(X_test_path))

    y_train_path = folder_name + "y_train.npy"
    y_train = np.array(utils.load_npy(y_train_path))
    y_test_path = folder_name + "y_test.npy"
    y_test = np.array(utils.load_npy(y_test_path))

    #    X_train,y_train = utils.get_mini_dataset(X_train,y_train,500)
    #    X_test, y_test = utils.get_mini_dataset(X_test, y_test,100)

    X_train, X_test = normalize_data(X_train, X_test, folder_name)

    y_train = utils.remove_q(
        utils.to_ctc(np.array([utils.collapse_num_labels(y)