def save_model(self):
        trace('saving model ...')
        prefix = self.model
        self.trg_vocab.save("model/" + prefix + '.trgvocab')
        self.encdec.save_spec("model/" + prefix + '.spec')
        serializers.save_hdf5("model/" + prefix + '.weights', self.encdec)

        trace('finished.')
Пример #2
0
    def save_model(self):
        trace('saving model ...')
        prefix = self.model
        self.trg_vocab.save("model/" + prefix + '.trgvocab')
        self.encdec.save_spec("model/" + prefix + '.spec')
        serializers.save_hdf5("model/" + prefix + '.weights', self.encdec)

        trace('finished.')
    def train(self, epoch):
        trace('making vocabularies ...')
        self.trg_vocab = Vocabulary.new(gens.word_list(self.target), self.vocab)

        trace('making model ...')

        trace('epoch %d/%d: ' % (epoch + 1, self.epoch))
        opt = optimizers.AdaGrad(lr=0.01)
        opt.setup(self.encdec)
        opt.add_hook(optimizer.GradientClipping(5))
        gen1 = gens.word_list(self.target)
        gen = gens.batch(gen1, self.minibatch)

        for trg_batch in gen:
            self.batch_size = len(trg_batch)
            self.trg_batch = fill_batch(trg_batch)
            if len(trg_batch) != self.minibatch:
                break
            self.encdec.clear(self.batch_size)
            self.__forward_img()
            self.encdec.reset(self.batch_size)
            loss, hyp_batch = self.__forward_word(self.trg_batch, self.encdec, True, 0)
            loss.backward()
            opt.update()
            K = len(self.trg_batch) - 2
            self.print_out(K, hyp_batch, epoch)
Пример #4
0
    def train(self, epoch):
        trace('making vocabularies ...')
        self.trg_vocab = Vocabulary.new(gens.word_list(self.target),
                                        self.vocab)

        trace('making model ...')

        trace('epoch %d/%d: ' % (epoch + 1, self.epoch))
        opt = optimizers.AdaGrad(lr=0.01)
        opt.setup(self.encdec)
        opt.add_hook(optimizer.GradientClipping(5))
        gen1 = gens.word_list(self.target)
        gen = gens.batch(gen1, self.minibatch)

        for trg_batch in gen:
            self.batch_size = len(trg_batch)
            self.trg_batch = fill_batch(trg_batch)
            if len(trg_batch) != self.minibatch:
                break
            self.encdec.clear(self.batch_size)
            self.__forward_img()
            self.encdec.reset(self.batch_size)
            loss, hyp_batch = self.__forward_word(self.trg_batch, self.encdec,
                                                  True, 0)
            loss.backward()
            opt.update()
            K = len(self.trg_batch) - 2
            self.print_out(K, hyp_batch, epoch)
Пример #5
0
    def train(self):
        trace('making vocabularies ...')
        trg_vocab = Vocabulary.new(gens.word_list(self.target), self.vocab)

        trace('making model ...')

        for epoch in range(self.epoch):
            trace('epoch %d/%d: ' % (epoch + 1, self.epoch))
            trained = 0
            opt = optimizers.AdaGrad(lr=0.01)
            opt.setup(self.encdec)
            opt.add_hook(optimizer.GradientClipping(5))
            gen1 = gens.word_list(self.target)
            gen = gens.batch(gen1, self.minibatch)

            random_number = random.randint(0, self.minibatch - 1)
            for trg_batch in gen:
                self.trg_batch = fill_batch(trg_batch)
                if len(self.trg_batch) != self.minibatch:
                    break
                hyp_batch, loss = self.forward(trg_vocab, self.use_gpu,
                                               self.gpu_id)
                loss.backward()
                opt.update()
                K = len(self.trg_batch)

                if trained == 0:
                    self.print_out(random_number, epoch, trained, hyp_batch)

                trained += K

        trace('saving model ...')
        prefix = self.model
        trg_vocab.save(prefix + '.trgvocab')
        self.encdec.save_spec(prefix + '.spec')
        serializers.save_hdf5(prefix + '.weights', self.encdec)

        trace('finished.')
    def train(self):
        trace('making vocabularies ...')
        trg_vocab = Vocabulary.new(gens.word_list(self.target), self.vocab)

        trace('making model ...')

        for epoch in range(self.epoch):
            trace('epoch %d/%d: ' % (epoch + 1, self.epoch))
            trained = 0
            opt = optimizers.AdaGrad(lr=0.01)
            opt.setup(self.encdec)
            opt.add_hook(optimizer.GradientClipping(5))
            gen1 = gens.word_list(self.target)
            gen = gens.batch(gen1, self.minibatch)

            random_number = random.randint(0, self.minibatch - 1)
            for trg_batch in gen:
                self.trg_batch = fill_batch(trg_batch)
                if len(self.trg_batch) != self.minibatch:
                    break
                hyp_batch, loss = self.forward(trg_vocab, self.use_gpu, self.gpu_id)
                loss.backward()
                opt.update()
                K = len(self.trg_batch)

                if trained == 0:
                    self.print_out(random_number, epoch, trained, hyp_batch)

                trained += K

        trace('saving model ...')
        prefix = self.model
        trg_vocab.save(prefix + '.trgvocab')
        self.encdec.save_spec(prefix + '.spec')
        serializers.save_hdf5(prefix + '.weights', self.encdec)

        trace('finished.')
Пример #7
0
    def print_out(self, K, i_epoch, trained, hyp_batch):

        # trace('epoch %3d/%3d, sample %8d' % (i_epoch + 1, self.epoch, trained + K + 1))
        trace('epoch %3d/%3d, sample %8d' %
              (i_epoch + 1, self.epoch, trained + 1))
        trace('  trg = ' +
              ' '.join([x if x != '</s>' else '*' for x in self.trg_batch[K]]))
        trace('  hyp = ' +
              ' '.join([x if x != '</s>' else '*' for x in hyp_batch[K]]))
    def print_out(self, K, hyp_batch, epoch):
        if len(self.trg_batch) - 2 < K:
            K = len(self.trg_batch) - 2
        if len(hyp_batch) - 2 < K:
            K = len(hyp_batch) - 2

        trace('epoch %3d/%3d, sample %8d' % (epoch, self.epoch, K + 1))
        # trace('epoch %3d/%3d, sample %8d' % (i_epoch + 1, self.epoch, trained + 1))
        trace('  trg = ' + ' '.join([x if x != '</s>' else '*' for x in self.trg_batch[K]]))
        trace('  hyp = ' + ' '.join([x if x != '</s>' else '*' for x in hyp_batch[K]]))
Пример #9
0
    def print_out(self, K, hyp_batch, epoch):
        if len(self.trg_batch) - 2 < K:
            K = len(self.trg_batch) - 2
        if len(hyp_batch) - 2 < K:
            K = len(hyp_batch) - 2

        trace('epoch %3d/%3d, sample %8d' % (epoch, self.epoch, K + 1))
        # trace('epoch %3d/%3d, sample %8d' % (i_epoch + 1, self.epoch, trained + 1))
        trace('  trg = ' +
              ' '.join([x if x != '</s>' else '*' for x in self.trg_batch[K]]))
        trace('  hyp = ' +
              ' '.join([x if x != '</s>' else '*' for x in hyp_batch[K]]))
    def test(self):
        trace('loading model ...')
        trg_vocab = Vocabulary.load(self.model + '.trgvocab')
        self.encdec = EncoderDecoderAttention.load_spec(self.model + '.spec')
        serializers.load_hdf5(self.model + '.weights', self.encdec)

        trace('generating translation ...')
        generated = 0

        trace('sample %8d - %8d ...' % (generated + 1, generated))
        hyp_batch = self.forward(trg_vocab, False, self.generation_limit)

        source_cuont = 0
        with open(self.target, 'w') as fp:
            for hyp in hyp_batch:
                hyp.append('</s>')
                hyp = hyp[: hyp.index('</s>')]
                print('hyp : ' + ''.join(hyp))
                fp.write(' '.join(hyp))
                source_cuont = source_cuont + 1

        trace('finished.')
Пример #11
0
    def test(self):
        trace('loading model ...')
        trg_vocab = Vocabulary.load(self.model + '.trgvocab')
        self.encdec = EncoderDecoderAttention.load_spec(self.model + '.spec')
        serializers.load_hdf5(self.model + '.weights', self.encdec)

        trace('generating translation ...')
        generated = 0

        trace('sample %8d - %8d ...' % (generated + 1, generated))
        hyp_batch = self.forward(trg_vocab, False, self.generation_limit)

        source_cuont = 0
        with open(self.target, 'w') as fp:
            for hyp in hyp_batch:
                hyp.append('</s>')
                hyp = hyp[:hyp.index('</s>')]
                print('hyp : ' + ''.join(hyp))
                fp.write(' '.join(hyp))
                source_cuont = source_cuont + 1

        trace('finished.')
    def test(self):
        trace('loading model ...')
        self.trg_vocab = Vocabulary.load("model/" + self.model + '.trgvocab')
        self.batch_size = len(trg_batch)
        encdec = EncoderDecoder.load_spec("model/" + self.model + '.spec')
        serializers.load_hdf5("model/" + self.model + '.weights', encdec)

        trace('generating translation ...')
        generated = 0

        with open(self.target, 'w') as fp:
            self.__forward_img()
            trace('sample %8d ...' % (generated + 1))
            hyp_batch = self.__forward_word(self.trg_batch, encdec, False, self.generation_limit)

            for hyp in hyp_batch:
                hyp.append('</s>')
                hyp = hyp[:hyp.index('</s>')]
                print('hyp : ' +''.join(hyp))
                print(' '.join(hyp), file=fp)

        trace('finished.')
Пример #13
0
    def test(self):
        trace('loading model ...')
        self.trg_vocab = Vocabulary.load("model/" + self.model + '.trgvocab')
        self.batch_size = len(trg_batch)
        encdec = EncoderDecoder.load_spec("model/" + self.model + '.spec')
        serializers.load_hdf5("model/" + self.model + '.weights', encdec)

        trace('generating translation ...')
        generated = 0

        with open(self.target, 'w') as fp:
            self.__forward_img()
            trace('sample %8d ...' % (generated + 1))
            hyp_batch = self.__forward_word(self.trg_batch, encdec, False,
                                            self.generation_limit)

            for hyp in hyp_batch:
                hyp.append('</s>')
                hyp = hyp[:hyp.index('</s>')]
                print('hyp : ' + ''.join(hyp))
                print(' '.join(hyp), file=fp)

        trace('finished.')
    def print_out(self, K, i_epoch, trained, hyp_batch):

        # trace('epoch %3d/%3d, sample %8d' % (i_epoch + 1, self.epoch, trained + K + 1))
        trace('epoch %3d/%3d, sample %8d' % (i_epoch + 1, self.epoch, trained + 1))
        trace('  trg = ' + ' '.join([x if x != '</s>' else '*' for x in self.trg_batch[K]]))
        trace('  hyp = ' + ' '.join([x if x != '</s>' else '*' for x in hyp_batch[K]]))