Пример #1
0
 def get_sentence_examples(self, questions):
     for index, data in enumerate(questions):
         guid = 'test-%d' % index
         text_a = tokenization.convert_to_unicode(str(data))
         text_b = None
         # label = str(0)
         label = self.labels[0]
         yield InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)
Пример #2
0
    def get_test_examples(self, data_dir):
        file_path = os.path.join(data_dir, 'cnews.test.txt')
        with open(file_path, 'r', encoding="utf-8") as f:
            reader = f.readlines()
        # random.shuffle(reader)  # 测试集不打乱数据,便于比较

        examples = []
        for index, line in enumerate(reader):
            guid = 'test-%d' % index
            split_line = line.strip().split("\t")
            text_a = tokenization.convert_to_unicode(split_line[1])
            text_b = None
            label = split_line[0]
            examples.append(InputExample(guid=guid, text_a=text_a,
                                         text_b=text_b, label=label))
        return examples
    def get_dev_examples(self, data_dir):
        file_path = os.path.join(data_dir, 'sentiment_valid.txt')
        with open(file_path, 'r', encoding="utf-8") as f:
            reader = f.readlines()
        random.shuffle(reader)

        examples = []
        for index, line in enumerate(reader):
            guid = 'dev-%d' % index
            split_line = line.strip().split('\t')
            text_a = tokenization.convert_to_unicode(split_line[1])
            text_b = None
            label = split_line[0]
            examples.append(InputExample(guid=guid, text_a=text_a,
                                         text_b=text_b, label=label))
        return examples
Пример #4
0
    def get_train_examples(self, data_dir):
        file_path = os.path.join(data_dir, 'train.txt')
        with open(file_path, 'r', encoding="utf-8") as f:
            reader = f.readlines()
        random.seed(0)
        random.shuffle(reader)  # 注意要shuffle

        examples, self.labels = [], []
        for index, line in enumerate(reader):
            guid = 'train-%d' % index
            split_line = line.strip().split("\t")
            text_a = tokenization.convert_to_unicode(split_line[1])
            text_b = None
            label = split_line[0]
            examples.append(InputExample(guid=guid, text_a=text_a,
                                         text_b=text_b, label=label))
            self.labels.append(label)
        return examples