Python zip_boundaries示例

编程语言: Python

命名空间/包名称: tweedr.lib.text

方法/功能: zip_boundaries

hotexamples.com的示例: 3

Python zip_boundaries - 已找到3个示例。这些是从开源项目中提取的最受好评的tweedr.lib.text.zip_boundaries现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： nlp.py 项目： Priya22/tweedr

    def __call__(self, tweet):
        text = tweet['text']
        tokens = token_re.findall(text)

        # tokens_features = map(list, featurize(tokens, crf_feature_functions))
        tokens_features = featurize(tokens, self.feature_functions)

        null_label = 'None'
        labels = self.crf.predict([tokens_features])[0]
        # tweet['labels'] = labels

        if 'sequences' not in tweet:
            tweet['sequences'] = []

        for sequence_label, entries in itertools.groupby(zip_boundaries(labels), lambda tup: tup[0]):
            if sequence_label != null_label:
                labels, starts, ends = zip(*entries)

                tweet['sequences'].append({
                    'text': sequence_label,
                    'start': starts[0],
                    'end': ends[-1],
                })

        return tweet

示例#2

显示文件

文件： dbpedia.py 项目： Priya22/tweedr

def spotlight(document, confidence=0.1, support=10):
    document_string = u' '.join(document)
    r = requests.post(spotlight_annotate_url,
        headers=dict(Accept='application/json'),
        data=dict(text=document_string, confidence=confidence, support=support))
    Resources = r.json().get('Resources', [])
    for token, token_start, token_end in zip_boundaries(document):
        labels = []
        for Resource in Resources:
            entity_start = int(Resource['@offset'])
            entity_end = entity_start + len(Resource['@surfaceForm'])

            if entity_start <= token_start <= entity_end or entity_start <= token_end <= entity_end:
                entity_uri = Resource['@URI']
                entity_types = Resource['@types'].split(',')
                labels += [entity_uri] + entity_types
        yield labels

示例#3

显示文件

文件： dbpedia.py 项目： wulfboy-95/tweedr

def spotlight(document, confidence=0.1, support=10):
    document_string = u' '.join(document)
    r = requests.post(spotlight_annotate_url,
                      headers=dict(Accept='application/json'),
                      data=dict(text=document_string,
                                confidence=confidence,
                                support=support))
    Resources = r.json().get('Resources', [])
    for token, token_start, token_end in zip_boundaries(document):
        labels = []
        for Resource in Resources:
            entity_start = int(Resource['@offset'])
            entity_end = entity_start + len(Resource['@surfaceForm'])

            if entity_start <= token_start <= entity_end or entity_start <= token_end <= entity_end:
                entity_uri = Resource['@URI']
                entity_types = Resource['@types'].split(',')
                labels += [entity_uri] + entity_types
        yield labels