示例#1
0
 def raw_to_proto(self, raw):
     if self.vocab is None:
         ids = [int(raw)]
     else:
         ids = [self.vocab[raw]]
     fe = feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=ids))
     return fe
示例#2
0
 def raw_to_proto(self, raw):
     """doc"""
     ids = [
         s if isinstance(s, int) else self.vocab.get(s, self.unk_id)
         for s in self.tokenizer(raw)
     ]
     fe = feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=ids))
     return fe
示例#3
0
def build_example(slots):
    txt, seginfo = slots
    txt_fe_list = feature_pb2.FeatureList(feature=[
        feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=t))
        for t in txt
    ])
    segsinfo_fe_list = feature_pb2.FeatureList(feature=[
        feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=s))
        for s in seginfo
    ])
    assert len(txt_fe_list.feature) == len(
        segsinfo_fe_list.feature), 'txt[%d] and seginfo[%d] size not match' % (
            len(txt_fe_list.feature), len(segsinfo_fe_list.feature))
    features = {
        'txt': txt_fe_list,
        'segs': segsinfo_fe_list,
    }

    ex = example_pb2.SequenceExample(feature_lists=feature_pb2.FeatureLists(
        feature_list=features))
    return ex
示例#4
0
 def raw_to_proto(self, raw):
     """doc"""
     ids = [int(s) for s in raw.split(b' ')]
     fe = feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=ids))
     return fe
示例#5
0
 def raw_to_proto(self, raw):
     ids = [self.vocab.get(s, self.unk_id) for s in self.tokenizer(raw)]
     fe = feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=ids))
     return fe