示例#1
0
    def _create_examples(self, lines, LABEL_SPLITTER="__label__"):
        re_pattern = u"({}{})".format(LABEL_SPLITTER, "\d+")

        examples = []
        for (i, line) in enumerate(lines):
            try:
                guid = i
                element_list = re.split(re_pattern, line)
                text_a = clean("".join(element_list[-1].split()[1:]))
                input_labels = clean(element_list[1]).split(LABEL_SPLITTER)[-1]

                text_a = tokenization.convert_to_unicode(text_a)
                input_labels = [
                    label.strip() for label in input_labels
                    if label.strip() in list(self.label2id.keys())
                ]

                examples.append(
                    data_feature_classifier.InputExample(guid=guid,
                                                         text_a=text_a,
                                                         text_b=None,
                                                         label=input_labels))
            except:
                print(line, i)
        return examples
示例#2
0
	def _create_test_examples(self, data, lang="zh"):
		examples = []
		for index in range(data.shape[0]):
			content = data[index]
			guid = int(content["id"])
			text_a = content["sentence1"]
			text_b = content["sentence2"]
			if isinstance(text_a, str) and isinstance(text_b, str):
				examples.append(data_feature_classifier.InputExample(
						guid=guid,
						text_a=clean(text_a),
						text_b=clean(text_b),
						label=["0"]
				))
		return examples
示例#3
0
	def _create_examples(self, data, lang="zh"):
		examples = []
		for index in range(len(data)):
			content = data[index]
			guid = int(content["ID"])
			text_a = content["sentence1"]
			text_b = content["sentence2"]
			label = content["gold_label"]
			if isinstance(text_a,str) and isinstance(text_b,str):
				examples.append(data_feature_classifier.InputExample(
						guid=guid,
						text_a=clean(text_a),
						text_b=clean(text_b),
						label=[label]
				))
		return examples
示例#4
0
	def _create_examples(self, lines,
									LABEL_SPLITTER="__label__"):
		examples = []
		for (i, line) in enumerate(lines):
			guid = i
			element_list = line.split(LABEL_SPLITTER)
			text_a = tokenization.convert_to_unicode(element_list[0].strip())
			text_a = clean(text_a)
			input_labels = element_list[1:]
			input_labels = [label.strip() for label in input_labels if label.strip() in list(self.label2id.keys())]
			
			examples.append(data_feature_classifier.InputExample(
					guid=guid,
					text_a=text_a,
					text_b=None,
					label=input_labels
				))
		return examples
示例#5
0
 def _create_test_examples(self, df, lang="zh"):
     examples = []
     for index in range(df.shape[0]):
         content = df.loc[index]
         guid = int(content["id"])
         if lang == "zh":
             text_a = content["title1_zh"]
             text_b = content["title2_zh"]
         elif lang == "en":
             text_a = content["title1_en"]
             text_b = content["title2_en"]
         if isinstance(text_a, str) and isinstance(text_b, str):
             examples.append(
                 data_feature_classifier.InputExample(guid=guid,
                                                      text_a=clean(text_a),
                                                      text_b=clean(text_b),
                                                      label=["unrelated"]))
     return examples
示例#6
0
    def _create_examples(self, frequent_phrases):

        examples = []
        for (i, line) in enumerate(frequent_phrases):
            guid = i
            text_a = clean(line[0])
            input_labels = ["0"]

            text_a = tokenization.convert_to_unicode(text_a)
            input_labels = [
                label.strip() for label in input_labels
                if label.strip() in list(self.label2id.keys())
            ]

            examples.append(
                data_feature_classifier.InputExample(guid=guid,
                                                     text_a=text_a,
                                                     text_b=None,
                                                     label=input_labels))
        return examples