def TrainDNNExtractor(trainidx, epoch=20, saveas="dnn.model"):
    cqtfilelist = np.array(find_files(const.PATH_MIDIHCQT,
                                      ext="npz"))[trainidx]
    #midifilelist = find_files(const.PATH_MIDI,ext="mid")[:filecnt]
    filecnt = cqtfilelist.size
    chainer.config.train = True
    chainer.config.enable_backprop = True
    dnn = networks.FeatureDNN()
    model = networks.DNNModel(dnn)
    model.to_gpu(0)
    opt = optimizers.MomentumSGD()
    opt.setup(model)
    spl = np.arange(0, filecnt, 2000)
    if spl[-1] < filecnt:
        spl = np.append(spl, filecnt)

    print("split count:%d" % (spl.size - 1))
    print(spl)
    print("start epochs...")
    S = []
    T = []

    for cqtfile in cqtfilelist:
        dat = np.load(cqtfile)
        spec = utils.PreprocessSpec(dat["spec"][:, :])
        targ = GetConvnetTargetFromPianoroll(dat["target"]).astype(np.int32)
        assert (spec.shape[0] == targ.shape[0])
        S.append(spec)
        T.append(targ)
    S = np.concatenate(S, axis=0)
    T = np.concatenate(T, axis=0)

    for ep in range(epoch):
        sum_loss = 0

        assert (S.shape[0] == T.shape[0])
        randidx = np.random.permutation(S.shape[0])
        for i in range(0, randidx.size, const.CONV_TRAIN_BATCH):
            x_batch = S[randidx[i:i + const.CONV_TRAIN_BATCH], :]
            t_batch = T[randidx[i:i + const.CONV_TRAIN_BATCH], :]
            x_in = cp.asarray(x_batch)
            t_in = cp.asarray(t_batch)
            model.cleargrads()
            loss = model(x_in, t_in)
            loss.backward()
            opt.update()
            sum_loss += loss.data * const.CONV_TRAIN_BATCH

        dnn.save(saveas)
        print("epoch: %d/%d  loss:%.04f" %
              (ep + 1, epoch, sum_loss / const.CONV_TRAIN_BATCH))
Пример #2
0
def EvaluateConvnet(modelfile, cqtfilelist):
    predicted_chroma = []
    target_chroma = []
    predicted_bass = []
    target_bass = []
    predicted_top = []
    target_top = []
    #dnn = networks.ConvnetFeatExtractor()
    dnn = networks.FeatureDNN()
    #dnn = networks.FullCNNFeatExtractor()
    dnn.load(modelfile)
    dnn.to_gpu(0)
    chainer.config.train = False
    chainer.config.enable_backprop = False

    for cqtfile in cqtfilelist:
        #print(cqtfile)
        dat = np.load(cqtfile)
        cqt = utils.PreprocessSpec(dat["spec"][:, :])
        t = chromatemplate.GetConvnetTargetFromPianoroll(dat["target"])
        #assert(cqt.shape[0]==t.shape[0])
        out = cp.asnumpy(dnn.GetFeature(cp.asarray(cqt)).data)
        #out = dnn.GetFeature(cqt).data
        out_len = out.shape[0]
        pred_chroma = np.zeros((out_len, 12), dtype="int32")
        pred_bass = np.zeros((out_len, 12), dtype="int32")
        pred_top = np.zeros((out_len, 12), dtype="int32")
        pred_chroma[out[:, 12:24] > 0.5] = 1
        pred_bass[np.arange(out_len), np.argmax(out[:, :12], axis=1)] = 1
        #pred_bass[out[:,:12]<=0.5] = 0
        pred_top[np.arange(out_len), np.argmax(out[:, 24:36], axis=1)] = 1
        #pred_top[out[:,24:36]<=0.5] = 0
        predicted_chroma.append(pred_chroma[:, :].astype(np.bool))
        target_chroma.append(t[:out_len, 12:24].astype(np.bool))
        predicted_bass.append(pred_bass[:out_len, :].astype(np.bool))
        target_bass.append(t[:out_len, :12].astype(np.bool))
        predicted_top.append(pred_top[:out_len, :].astype(np.bool))
        target_top.append(t[:out_len, 24:36].astype(np.bool))

    eval_chroma = evaluate(predicted_chroma, target_chroma)
    eval_bass = evaluate(predicted_bass, target_bass)
    eval_top = evaluate(predicted_top, target_top)
    return eval_chroma, eval_bass, eval_top
def TrainTranscribeDNNChord(idx, epoch=20, saveas="dnn_deepchroma.model"):
    cqtfilelist = np.array(find_files(const.PATH_HCQT, ext="npy"))[idx]
    chordlablist = np.array(
        find_files(const.PATH_CHORDLAB, ext=["lab", "chords"]))[idx]

    featurelist = []
    targetlist = []
    chainer.config.train = True
    chainer.config.enable_backprop = True

    for cqtfile, labfile in zip(cqtfilelist, chordlablist):
        cqt = np.load(cqtfile)[0, :, :]
        chroma = voc.LoadChromaTarget(labfile)
        min_sz = min([cqt.shape[0], chroma.shape[0]])
        cqt = utils.Embed(utils.PreprocessSpec(cqt[:min_sz]), size=7)
        chroma = chroma[:min_sz]
        featurelist.append(cqt)
        targetlist.append(chroma.astype(np.int32))
    featurelist = np.concatenate(featurelist)
    targetlist = np.concatenate(targetlist)
    itemcnt = targetlist.shape[0]
    print("DNN Training begin...")
    dnn = networks.FeatureDNN()
    dnn.train = True
    model = networks.DNNModel(predictor=dnn)
    model.to_gpu()
    opt = optimizers.AdaDelta()
    opt.setup(model)
    for ep in range(epoch):
        randidx = np.random.permutation(itemcnt)
        sumloss = 0.0
        for i in range(0, itemcnt, const.DNN_TRAIN_BATCH):
            X = cp.asarray(featurelist[randidx[i:i + const.DNN_TRAIN_BATCH]])
            T = cp.asarray(targetlist[randidx[i:i + const.DNN_TRAIN_BATCH]])
            opt.update(model, X, T)
            sumloss += model.loss.data * const.DNN_TRAIN_BATCH
        print("epoch %d/%d  loss=%.3f" % (ep + 1, epoch, sumloss / itemcnt))

    dnn.save(saveas)
Пример #4
0
            n_bins=144,
            bins_per_octave=24,
            filter_scale=2,
            tuning=None)).T.astype(np.float32)),
                   size=1)

#dat = np.load("/media/wuyiming/TOSHIBA EXT/midihcqt_12/000005.npy")
#dat_24 = np.load("/media/wuyiming/TOSHIBA EXT/midihcqt_24/000005.npz")
#spec_dnn = U.Embed(U.PreprocessSpec(dat_24["spec"]),size=7)

spec = spec[:, :250, :]
spec_dnn = spec_dnn[:250, :]
cnn = networks.FullCNNFeatExtractor()
cnn.load("fullcnn_crossentropy_6000.model")

deepchroma = networks.FeatureDNN()
deepchroma.load(
    "/home/wuyiming/Projects/TranscriptionChordRecognition/dnn3500.model")

chroma_cnn = cnn.GetFeature(spec).data[:, 12:24].T
chroma_dnn = deepchroma.GetFeature(spec_dnn).data[:, 12:24].T
chroma = np.log(
    1 + chroma_cqt(wav, sr=C.SR, hop_length=C.H, bins_per_octave=24)[:, :250])

target = chromatemplate.GetConvnetTargetFromPianoroll(
    U.GetPianoroll(
        "/media/wuyiming/TOSHIBA EXT/AIST.RWC-MDB-P-2001.SMF_SYNC/RM-P051.SMF_SYNC.MID"
    ))
target = target[10:260, 12:24].T

plt.subplot(4, 1, 1)