Пример #1
0
 def _read_classification_record(self, line, label=0):
     """
     data format:
         label.txt
         <source words> [tab] <target words>
     :param line:
     :return:
     """
     parts = line.strip().split("\t")
     assert len(parts) == 2, "wrong format for classification\n" + \
                             "the format is: <source words> [tab] <target words>"
     source = sent2ids(parts[0], self.source_dic)
     target = sent2ids(parts[1], self.target_dic)
     if not self.is_infer:  # train or test
         return source, target, label
     return source, target
Пример #2
0
    def _read_rank_record(self, line):
        '''
        data format:
            <source words> [TAB] <left_target words> [TAB] <right_target words> [TAB] <label>
        '''
        fs = line.strip().split('\t')
        assert len(fs) == 4, "wrong format for rank\n" + \
            "the format should be " +\
            "<source words> [TAB] <left_target words> [TAB] <right_target words> [TAB] <label>"

        source = sent2ids(fs[0], self.source_dic)
        left_target = sent2ids(fs[1], self.target_dic)
        right_target = sent2ids(fs[2], self.target_dic)
        if not self.is_infer:
            label = int(fs[3])
            return (source, left_target, right_target, label)
        return source, left_target, right_target
Пример #3
0
 def _read_regression_record(self, line):
     '''
     data format:
         <source words> [TAB] <target words> [TAB] <label>
     @line: str
         a string line which represent a record.
     '''
     fs = line.strip().split('\t')
     assert len(fs) == 3, "wrong format for regression\n" + \
         "the format shoud be " +\
         "<source words> [TAB] <target words> [TAB] <label>'"
     source = sent2ids(fs[0], self.source_dic)
     target = sent2ids(fs[1], self.target_dic)
     if not self.is_infer:
         label = float(fs[2])
         return (
             source,
             target,
             [label], )
     return source, target