Python DocuscopeTagger示例

编程语言: Python

命名空间/包名称: Ity.Taggers

类/类型: DocuscopeTagger

hotexamples.com的示例: 3

Python DocuscopeTagger - 已找到3个示例。这些是从开源项目中提取的最受好评的Ity.Taggers.DocuscopeTagger现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

DocuscopeTagger(2)

tag(2)

示例#1

显示文件

文件： UpdateWebassets.py 项目： uwgraphics/Ubiqu-Ity

def build_webassets(output_dir):
    with open('1_KING_HENRY_IV_rev.txt', 'r') as input_file:
        text_contents = input_file.read()
        tokenizer = RegexTokenizer()
        tokens = tokenizer.tokenize(text_contents)
        tagger = DocuscopeTagger(return_included_tags=True)
        tags = tagger.tag(tokens)
        formatter = HTMLFormatter()
        formatter._build_webassets()
        html = formatter.format_paginated(tags=tags, tokens=tokens, text_name="1_KING_HENRY_IV_rev.txt",
                                               text_relative_path="", processing_id="")

    with open(os.path.join(output_dir, 'Ubiqu+Ity_1_KING_HENRY_IV_Docuscope_Example_Output.html'), 'w') as output_file:
        output_file.write(html)

示例#2

显示文件

文件： TestResults.py 项目： gharp/Ubiqu-Ity

def format_ds(input_file):
    """Reads the file at the path pointed at by input_file and returns Docuscope-formatted results from the Ity
    DocuscopeTagger, in string form"""
    with open(input_file, 'r') as f:
        text_contents = f.read()
        tokenizer = RegexTokenizer()
        tokens = tokenizer.tokenize(text_contents)
        tagger = DocuscopeTagger(return_included_tags=True)
        tags = tagger.tag(tokens)
        # do an ugly hack to fix lat names
        for t in tags[1]:
            new_tag = list(t['rules'][0])
            new_tag[0] = new_tag[0].rsplit('.')[-1]
            new_rules = list(t['rules'])
            new_rules.pop(0)
            new_rules.insert(0, new_tag)
            t['rules'] = tuple(new_rules)
        formatter = LATFormatter.LATFormatter()
        return formatter.format(tags=tags, tokens=tokens, s=text_contents, input_file=input_file)

示例#3

显示文件

文件： TestResults.py 项目： uwgraphics/Ubiqu-Ity

def format_ds(input_file):
    """Reads the file at the path pointed at by input_file and returns Docuscope-formatted results from the Ity
    DocuscopeTagger, in string form"""
    with open(input_file, 'r') as f:
        text_contents = f.read()
        tokenizer = RegexTokenizer()
        tokens = tokenizer.tokenize(text_contents)
        tagger = DocuscopeTagger(return_included_tags=True)
        tags = tagger.tag(tokens)
        # do an ugly hack to fix lat names
        for t in tags[1]:
            new_tag = list(t['rules'][0])
            new_tag[0] = new_tag[0].rsplit('.')[-1]
            new_rules = list(t['rules'])
            new_rules.pop(0)
            new_rules.insert(0, new_tag)
            t['rules'] = tuple(new_rules)
        formatter = LATFormatter.LATFormatter()
        return formatter.format(tags=tags,
                                tokens=tokens,
                                s=text_contents,
                                input_file=input_file)