Exemplo n.º 1
0
 def _create_examples(self, lines, set_type):
     """Creates examples for the training and dev sets."""
     examples = []
     print(
         "UDC dataset is too big, loading data spent a long time, please wait patiently.................."
     )
     for (i, line) in enumerate(lines):
         if len(line) < 3:
             print("data format error: %s" % "\t".join(line))
             print(
                 "data row contains at least three parts: label\tconv1\t.....\tresponse"
             )
             continue
         guid = "%s-%d" % (set_type, i)
         text_a = "\t".join(line[1:-1])
         text_a = tokenization.convert_to_unicode(text_a)
         text_a = text_a.split('\t')
         text_b = line[-1]
         text_b = tokenization.convert_to_unicode(text_b)
         label = tokenization.convert_to_unicode(line[0])
         examples.append(
             InputExample(guid=guid,
                          text_a=text_a,
                          text_b=text_b,
                          label=label))
     return examples
Exemplo n.º 2
0
 def _create_examples(self, lines, set_type): 
     """Creates examples for the training and dev sets."""
     examples = []
     for (i, line) in enumerate(lines): 
         if len(line) != 2: 
             print("data format error: %s" % "\t".join(line))
             print("data row contains two parts: label \t conversation_content")
             continue
         guid = "%s-%d" % (set_type, i)
         text_a = line[1]
         text_a = tokenization.convert_to_unicode(text_a)
         label = tokenization.convert_to_unicode(line[0])
         examples.append(
             InputExample(
                 guid=guid, text_a=text_a, label=label))
     return examples