Exemplo n.º 1
0
def train(lmWithRNN, gateModel, args, trainData, validData):
    if args.gpu >= 0:
        lmWithRNN.lmNet.to_gpu()
        gateModel.gateModel.to_gpu()
    opt = O.Adam(alpha=0.001)
    opt.setup(gateModel.gateModel)
    opt.add_hook(chainer.optimizer.GradientClipping(args.maxGrad))
    if args.weightDecay > 0:
        opt.add_hook(chainer.optimizer.WeightDecay(args.weightDecay))
    bestperp = np.inf
    for epoch in range(args.epoch):
        epochStart = time.time()
        totalloss = 0
        finishnum = 0
        lr_decay = np.sqrt(epoch + 1)
        opt.alpha = 0.001 / lr_decay
        print 'Learning rate: %.6f' % (opt.alpha)
        if lmWithRNN.modelType == 'RHN':
            prevHidden = [
                chainer.Variable(
                    xp.zeros((args.batch, lmWithRNN.dim)).astype(np.float32))
                for _ in range(lmWithRNN.layerNum)
            ]
        else:
            prevHidden = None
        for current_words, next_words in make_batch(trainData, args.batch,
                                                    args.step):
            lmWithRNN.lmNet.cleargrads()
            gateModel.gateModel.cleargrads()
            loss, prevHidden = train_with_batch(current_words, next_words,
                                                lmWithRNN, gateModel, args,
                                                prevHidden)
            loss.backward()
            loss.unchain_backward()
            opt.update()
            totalloss += float(F.sum(loss).data) * current_words.shape[0]
            finishnum += current_words.shape[0] * current_words.shape[1]
            sys.stderr.write('\r Finished %s' % finishnum)
        sys.stderr.write('\n')
        epochEnd = time.time()
        with chainer.no_backprop_mode(), chainer.using_config('train', False):
            validloss, validperp = valid_with_batch(validData, lmWithRNN,
                                                    gateModel)
        sys.stderr.write('Train time is %s\tValid time is %s\n' %
                         (epochEnd - epochStart, time.time() - epochEnd))
        sys.stdout.write(
            'Epoch: %s\tTrain loss: %.6f\tValid loss: %.6f\tValid perplexity: %.6f\n'
            % (epoch, totalloss / finishnum, validloss, validperp))
        sys.stdout.flush()
        if validperp < bestperp:
            gateOutputFile = args.output + '.bin'
            S.save_npz(gateOutputFile,
                       copy.deepcopy(gateModel.gateModel).to_cpu())
            bestperp = validperp
Exemplo n.º 2
0
def train(lmWithRNN, args, trainData, validData):
    if args.gpu >= 0:
        lmWithRNN.lmNet.to_gpu()
    if args.WT:
        lmWithRNN.lmNet.Output.W.data = lmWithRNN.lmNet.Embed.W.data
        #assign the same id to output and embedding
    opt = O.SGD(args.lr)
    opt.setup(lmWithRNN.lmNet)
    opt.add_hook(chainer.optimizer.GradientClipping(args.maxGrad))
    opt.add_hook(chainer.optimizer.WeightDecay(args.weight_decay))
    prevvalidperp = np.inf
    prevModel = None
    for epoch in range(args.epoch):
        epochStart = time.time()
        lr_decay = args.decay**max(epoch + 1 - args.decayEpoch, 0.0)
        opt.lr = args.lr * lr_decay
        sys.stdout.write('Learning rate: %.6f\n' % (opt.lr))
        totalloss = 0
        finishnum = 0
        prevHiddenList = [
            chainer.Variable(
                xp.zeros((args.batch, args.dim)).astype(np.float32))
            for _ in range(lmWithRNN.layerNum)
        ]
        for current_words, next_words in make_batch(trainData, args.batch,
                                                    args.step):
            lmWithRNN.lmNet.cleargrads()
            loss, prevHiddenList = train_with_batch(current_words, next_words,
                                                    lmWithRNN, args,
                                                    prevHiddenList)
            loss.backward()
            loss.unchain_backward()
            opt.update()
            totalloss += float(F.sum(loss).data) * current_words.shape[0]
            finishnum += current_words.shape[0] * current_words.shape[1]
            sys.stderr.write('\r Finished %s' % finishnum)
        sys.stderr.write('\n')
        epochEnd = time.time()
        validloss, validperp = valid_with_batch(validData, lmWithRNN)
        sys.stdout.write('Train time is %s\tValid time is %s\n' %
                         (epochEnd - epochStart, time.time() - epochEnd))
        sys.stdout.write(
            'Epoch: %s\tTrain loss: %.6f\tValid loss: %.6f\tValid perplexity: %.6f\n'
            % (epoch, totalloss / finishnum, validloss, validperp))
        sys.stdout.flush()
        if prevvalidperp < validperp:
            lmOutputFile = args.output + '.epoch%s' % (epoch) + '.bin'
            S.save_npz(lmOutputFile, prevModel)
        prevModel = copy.deepcopy(lmWithRNN.lmNet).to_cpu()
        prevvalidperp = validperp
    lmOutputFile = args.output + '.epoch%s_fin' % (epoch + 1) + '.bin'
    S.save_npz(lmOutputFile, prevModel)
Exemplo n.º 3
0
def valid_with_batch(validData, lmWithRNN):
    batchsize = 50
    prevHiddenList = [chainer.Variable(xp.zeros((batchsize, lmWithRNN.dim)).astype(np.float32)) for _ in range(lmWithRNN.layerNum)]
    loss = 0
    totalins = len(validData) - 1
    for current_words, next_words in make_batch(validData, batchsize, 1000000):
        for index in range(current_words.shape[1]):
            wordIndex = current_words[:, index]
            y, prevHiddenList = lmWithRNN.compute_forward(wordIndex, prevHiddenList)
            loss += F.softmax_cross_entropy(y, next_words[:, index]) * batchsize
    loss = float(F.sum(loss).data) / totalins
    perp = np.exp(loss)
    return loss, perp
Exemplo n.º 4
0
def valid_with_batch(validData, lmWithRNN, gateModel):
    batchsize = 64
    totalins = len(validData) - 1
    loss = 0
    if lmWithRNN.modelType == 'RHN':
        prevHidden = [
            chainer.Variable(
                xp.zeros((batchsize, lmWithRNN.dim)).astype(np.float32))
            for _ in range(lmWithRNN.layerNum)
        ]
    else:
        prevHidden = None
    for current_words, next_words in make_batch(validData, batchsize, 100000):
        for index in range(current_words.shape[1]):
            wordIndex = current_words[:, index]
            rnn_out, prevHidden = lmWithRNN.compute_forward(
                wordIndex, prevHidden)
            gate = gateModel.compute_gate(wordIndex)
            y = gate * rnn_out
            loss += F.softmax_cross_entropy(y, next_words[:,
                                                          index]) * batchsize
    loss = float(F.sum(loss).data) / totalins
    perp = np.exp(loss)
    return loss, perp