Exemplo n.º 1
0
    def __init__(self,
                 source_with_start=False,
                 source_with_end=False,
                 source_with_unk=False,
                 target_with_start=False,
                 target_with_end=False,
                 target_with_unk=False,
                 same_length=False):

        self.source_with_start = source_with_start
        self.source_with_end = source_with_end
        self.source_with_unk = source_with_unk

        self.target_with_start = target_with_start
        self.target_with_end = target_with_end
        self.target_with_unk = target_with_unk

        self.source_corpus = SequenceCorpus(source_with_start, source_with_end,
                                            source_with_unk)
        self.target_corpus = SequenceCorpus(target_with_start, target_with_end,
                                            target_with_unk)
        self.same_length = same_length

        self.corpus = []
Exemplo n.º 2
0
from neural_machine.tasks.language.common.corpus.segmentor import *
from neural_machine.tasks.language.common.corpus.sequence_corpus import SequenceCorpus
from neural_machine.tasks.language.common.data_reader.bucket_iter import *

import sys

import logging

if __name__ == '__main__':

    head = '%(asctime)-15s %(message)s'
    logging.basicConfig(level=logging.DEBUG, format=head)

    segmenter = SpaceSegmenter()
    corpus = SequenceCorpus()

    corpus.build(open(sys.argv[1], 'r'), segmenter)
    cell_num = corpus.cell_num()

    problem = LanguageModelProblem(corpus)

    batch_size = 32

    data_train = BucketIter(problem, batch_size)

    val_corpus = corpus.make(open(sys.argv[2], 'r'), segmenter)
    val_problem = LanguageModelProblem(val_corpus)
    data_val = BucketIter(val_problem, batch_size)

    arch_param = LanguageModelArchParam(num_hidden=200,