def get_sentence_examples(self, questions): for index, data in enumerate(questions): guid = 'test-%d' % index text_a = tokenization.convert_to_unicode(str(data)) text_b = None # label = str(0) label = self.labels[0] yield InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)
def get_test_examples(self, data_dir): file_path = os.path.join(data_dir, 'cnews.test.txt') with open(file_path, 'r', encoding="utf-8") as f: reader = f.readlines() # random.shuffle(reader) # 测试集不打乱数据,便于比较 examples = [] for index, line in enumerate(reader): guid = 'test-%d' % index split_line = line.strip().split("\t") text_a = tokenization.convert_to_unicode(split_line[1]) text_b = None label = split_line[0] examples.append(InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) return examples
def get_dev_examples(self, data_dir): file_path = os.path.join(data_dir, 'sentiment_valid.txt') with open(file_path, 'r', encoding="utf-8") as f: reader = f.readlines() random.shuffle(reader) examples = [] for index, line in enumerate(reader): guid = 'dev-%d' % index split_line = line.strip().split('\t') text_a = tokenization.convert_to_unicode(split_line[1]) text_b = None label = split_line[0] examples.append(InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) return examples
def get_train_examples(self, data_dir): file_path = os.path.join(data_dir, 'train.txt') with open(file_path, 'r', encoding="utf-8") as f: reader = f.readlines() random.seed(0) random.shuffle(reader) # 注意要shuffle examples, self.labels = [], [] for index, line in enumerate(reader): guid = 'train-%d' % index split_line = line.strip().split("\t") text_a = tokenization.convert_to_unicode(split_line[1]) text_b = None label = split_line[0] examples.append(InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) self.labels.append(label) return examples