Exemplo n.º 1
0
def train():
    # prepare data
    tr_X, tr_y, _, te_X, te_y, te_na_list = pp_dev_data.GetAllData(
        fe_fd, agg_num, hop, fold)
    [batch_num, n_time, n_freq] = tr_X.shape

    print tr_X.shape, tr_y.shape
    print te_X.shape, te_y.shape

    # build model
    seq = Sequential()
    seq.add(InputLayer((n_time, n_freq)))
    seq.add(Flatten())  # flatten to 2d: (n_time, n_freq) to 1d:(n_time*n_freq)
    seq.add(Dropout(0.1))
    seq.add(Dense(n_hid, act=act))
    seq.add(Dropout(0.1))
    seq.add(Dense(n_hid, act=act))
    seq.add(Dropout(0.1))
    seq.add(Dense(n_hid, act=act))
    seq.add(Dropout(0.1))
    seq.add(Dense(n_out, act='sigmoid'))
    md = seq.compile()
    md.summary()

    # optimizer
    optimizer = Adam(1e-4)

    # callbacks
    # tr_err, te_err are frame based. To get event based err, run recognize.py
    validation = Validation(tr_x=tr_X,
                            tr_y=tr_y,
                            va_x=None,
                            va_y=None,
                            te_x=te_X,
                            te_y=te_y,
                            batch_size=2000,
                            metrics=['binary_crossentropy'],
                            call_freq=1,
                            dump_path=None)

    # save model
    pp_dev_data.CreateFolder(cfg.dev_md_fd)
    save_model = SaveModel(dump_fd=cfg.dev_md_fd, call_freq=10)

    # callbacks
    callbacks = [validation, save_model]

    # fit model
    md.fit(x=tr_X,
           y=tr_y,
           batch_size=2000,
           n_epochs=100,
           loss_func='binary_crossentropy',
           optimizer=optimizer,
           callbacks=callbacks,
           verbose=1)
def recognize():
    # prepare data
    _, _, _, te_X, te_y, _ = pp_dev_data.GetAllData(fe_fd, agg_num, hop, fold)

    # do recognize and evaluation
    thres = 0.4  # thres, tune to prec=recall
    n_labels = len(cfg.labels)

    gt_roll = []
    pred_roll = []
    with open(cfg.dev_cv_csv_path, 'rb') as f:
        reader = csv.reader(f)
        lis = list(reader)

        # read one line
        for li in lis:
            na = li[1]
            curr_fold = int(li[2])

            if fold == curr_fold:
                # get features, tags
                fe_path = fe_fd + '/' + na + '.f'
                info_path = cfg.dev_wav_fd + '/' + na + '.csv'
                tags = pp_dev_data.GetTags(info_path)
                y = pp_dev_data.TagsToCategory(tags)
                X = cPickle.load(open(fe_path, 'rb'))

                # aggregate data
                X3d = mat_2d_to_3d(X, agg_num, hop)

                p_y_pred = md.predict(X3d)
                p_y_pred = np.mean(p_y_pred, axis=0)  # shape:(n_label)
                pred = np.zeros(n_labels)
                pred[np.where(p_y_pred > thres)] = 1
                pred_roll.append(pred)
                gt_roll.append(y)

    pred_roll = np.array(pred_roll)
    gt_roll = np.array(gt_roll)

    # calculate prec, recall, fvalue
    prec, recall, fvalue = prec_recall_fvalue(pred_roll, gt_roll, thres)
    print prec, recall, fvalue
Exemplo n.º 3
0
from hat.optimizers import SGD, Rmsprop
import hat.backend as K
import config as cfg
import prepare_dev_data as pp_dev_data

# hyper-params
agg_num = 11  # concatenate frames
hop = 5  # step_len
act = 'relu'
n_hid = 500
fold = 1
n_out = len(cfg.labels)

# prepare data
tr_X, tr_y, _ = pp_dev_data.GetAllData(cfg.dev_fe_mel_fd,
                                       agg_num,
                                       hop,
                                       fold=None)
[batch_num, n_time, n_freq] = tr_X.shape
print tr_X.shape, tr_y.shape

# build model
seq = Sequential()
seq.add(InputLayer((n_time, n_freq)))
seq.add(Flatten())  # flatten to 2d: (n_time, n_freq) to 1d:(n_time*n_freq)
seq.add(Dropout(0.1))
seq.add(Dense(n_hid, act=act))
seq.add(Dropout(0.1))
seq.add(Dense(n_hid, act=act))
seq.add(Dropout(0.1))
seq.add(Dense(n_hid, act=act))
seq.add(Dropout(0.1))
Exemplo n.º 4
0
from hat.layers.core import InputLayer, Dense, Dropout, Lambda, Flatten
from hat.layers.cnn import Convolution2D, Convolution1D
from hat.layers.pool import Pool2D, GlobalMaxPool
from hat.callbacks import Validation, SaveModel
from hat.preprocessing import pad_trunc_seqs, sparse_to_categorical, reshape_3d_to_4d
from hat.optimizers import Rmsprop
import prepare_dev_data as pp_dev_data
import config as cfg

# hyper-params
tr_fe_fd = cfg.dev_tr_fe_mel_fd
max_len = 100
n_out = len(cfg.labels)

# prepare data
tr_X, tr_y = pp_dev_data.GetAllData(tr_fe_fd, max_len)
tr_y = sparse_to_categorical(tr_y, n_out)

print tr_X.shape, tr_y.shape
(_, n_time, n_freq) = tr_X.shape

# build model
seq = Sequential()
seq.add(InputLayer((n_time, n_freq)))
seq.add(Flatten())
seq.add(Dropout(0.1))
seq.add(Dense(500, 'relu'))
seq.add(Dropout(0.1))
seq.add(Dense(500, 'relu'))
seq.add(Dropout(0.1))
seq.add(Dense(500, 'relu'))