def get_sentence_examples(self, questions, prefix): data_list = [] for index, data in enumerate(questions): data = data.strip().split('\t') guid = '%s-%d' % (prefix, index) text_a = tokenization.convert_to_unicode(str(data[0])) text_b = tokenization.convert_to_unicode(str(data[1])) label = str(data[2]) data_list.append(InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) return data_list
def _to_example(sentences): import re """ sentences to InputExample :param sentences: list of strings :return: list of InputExample """ unique_id = 0 for ss in sentences: line = tokenization.convert_to_unicode(ss) if not line: continue line = line.strip() text_a = None text_b = None m = re.match(r"^(.*) \|\|\| (.*)$", line) if m is None: text_a = line else: text_a = m.group(1) text_b = m.group(2) yield InputExample(unique_id=unique_id, text_a=text_a, text_b=text_b) unique_id += 1