Exemplo n.º 1
0
    def __init__(self, name: str, in_memory: bool = True, **kwargs):
        super(FaktotumDataset, self).__init__()
        self.name = name
        self.train = list()
        self.dev = list()
        self.test = list()

        for instance in tqdm.tqdm(self._load_corpus("train")):
            sentence = Sentence(instance["sentence"], use_tokenizer=False)
            context = Sentence(instance["context"], use_tokenizer=False)
            sentence.person = instance["person"]
            sentence.indices = instance["sentence_indices"]
            context.person = instance["person"]
            context.indices = instance["context_indices"]
            point = DataPair(sentence, context)
            self.train.append(point)

        for instance in tqdm.tqdm(self._load_corpus("test")):
            sentence = Sentence(instance["sentence"], use_tokenizer=False)
            context = Sentence(instance["context"], use_tokenizer=False)
            sentence.person = instance["person"]
            sentence.indices = instance["sentence_indices"]
            context.person = instance["person"]
            context.indices = instance["context_indices"]
            point = DataPair(sentence, context)
            self.test.append(point)

        for instance in tqdm.tqdm(self._load_corpus("dev")):
            sentence = Sentence(instance["sentence"], use_tokenizer=False)
            context = Sentence(instance["context"], use_tokenizer=False)
            sentence.person = instance["person"]
            sentence.indices = instance["sentence_indices"]
            context.person = instance["person"]
            context.indices = instance["context_indices"]
            point = DataPair(sentence, context)
            self.dev.append(point)

        self.data_points = self.train + self.test + self.dev