Python OutputFormat示例

编程语言: Python

命名空间/包名称: ufal.udpipe

类/类型: OutputFormat

hotexamples.com的示例: 4

Python OutputFormat - 已找到4个示例。这些是从开源项目中提取的最受好评的ufal.udpipe.OutputFormat现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

newOutputFormat(3)

newConlluOutputFormat(1)

示例#1

显示文件

def parse(text, sentence_id):
    """Takes a sentence in raw text and produces
	its CoNLL-U annotation by invoking udpipe

	Paratemeters: text - the sentence to be parsed
				  sentence_id - the ID of the sentence

	Output: a UD graph
	"""
    model = Model.load('./models/udpipe/english-ewt-ud-2.3-181115.udpipe')

    tokenizer = model.newTokenizer(model.TOKENIZER_PRESEGMENTED)
    # tokenizer = model.TOKENIZER_PRESEGMENTED(model.DEFAULT)

    conlluOutput = OutputFormat.newOutputFormat("conllu")

    sentence = Sentence()

    error = ProcessingError()

    tokenizer.setText(text)

    tokenizer.nextSentence(sentence, error)

    model.tag(sentence, model.DEFAULT)

    model.parse(sentence, model.DEFAULT)

    return conlluOutput.writeSentence(sentence).replace(
        '# sent_id = 1', '# sent_id = ' + sentence_id)

示例#2

显示文件

文件： udpipe.py 项目： harisont/spacy-udpipe

    def write(self, sentences: List[Sentence], out_format: str) -> str:
        """Write given sentences in the required output format.

        sentences: Input ufal.udpipe.Sentence-s.
        out_format: 'conllu'|'horizontal'|'vertical'.
        RETURNS: Sentences formatted in the out_format.
        """
        output_format = OutputFormat.newOutputFormat(out_format)
        output = "".join([output_format.writeSentence(s) for s in sentences])
        output += output_format.finishDocument()

        return output

示例#3

显示文件

    def write(self, sentences, out_format):
        """Write given sentences in the required output format.

        sentences (list): Input ufal.udpipe.Sentence-s.
        out_format (unicode): One of conllu|horizontal|vertical.
        RETURNS (unicode): Sentences in the desired format.
        """
        output_format = OutputFormat.newOutputFormat(out_format)
        output = ''.join([output_format.writeSentence(s) for s in sentences])
        output += output_format.finishDocument()

        return output

示例#4

显示文件

        def preproc_item(text):
            if pd.isna(text):
                text = ''
            tokenizer.resetDocument()
            try:
                tokenizer.setText(text)
            except TypeError:
                print(row, text)
                1/0
            
            sentence = Sentence()
            error = ProcessingError()
            
            text = ''
            while (tokenizer.nextSentence(sentence, error)):
    
                udpipe_model.tag(sentence, Pipeline.DEFAULT, error)
                #udpipe_model.parse(sentence, Pipeline.DEFAULT, error)

                text += OutputFormat.newConlluOutputFormat().writeSentence(sentence)
                
            return text