Python Document примеры использования

Язык программирования: Python

Пространство имен/Пакет: stanfordnlp.protobuf

Класс/Тип: Document

Примеров на hotexamples.com: 8

Python Document - 8 примеров найдено. Это лучшие примеры Python кода для stanfordnlp.protobuf.Document, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Document(8)

Основные методы

Document (8)

Пример #1

Показать файл

        def do_POST(self):
            """
            Handle an annotate request
            """
            if not self.path.endswith("/"): self.path += "/"
            if self.path == "/annotate/":
                # Read message
                length = int(self.headers.get('content-length'))
                msg = self.rfile.read(length)

                # Do the annotation
                doc = Document()
                parseFromDelimitedString(doc, msg)
                self.annotator.annotate(doc)

                with io.BytesIO() as stream:
                    writeToDelimitedString(doc, stream)
                    msg = stream.getvalue()

                # write message
                self.send_response(HTTPStatus.OK)
                self.send_header("Content-Type", "application/x-protobuf")
                self.send_header("Content-Length", len(msg))
                self.end_headers()
                self.wfile.write(msg)

            else:
                self.send_response(HTTPStatus.BAD_REQUEST)
                self.end_headers()

Пример #2

Показать файл

Файл: corenlp.py Проект: UKPLab/cdcr-beyond-corpus-tailored

    def parse_sentence(self, sentence: str, properties: Optional[Dict] = None):
        """
        Run CoreNLP over a sentence.
        :param sentence: a single sentence
        :param properties: additional properties for CoreNLP
        :return: parsing result
        """
        # The same input sentence can result in different annotations depending on the CoreNLP properties specified.
        # We therefore use a cache identifier for the sentence which includes the annotation properties.
        sent_cache_identifier = get_dict_hash(
            {
                "sentence": sentence,
                "properties": properties
            }, shorten=False)

        if not sent_cache_identifier in self.cache:
            # Kludge ahead: We want to cache the parsed sentence provided by CoreNLP, but also want to work with it in
            # a convenient format. A convenient format is the default format (protobuf-based), but that's not
            # pickle-able for the cache. We therefore convert the protobuf-format back into a bytestring and cache that.
            # When reading from the cache, we reassemble the protobuf object.
            req_properties = {"outputFormat": "serialized"}
            if properties is not None:
                req_properties.update(properties)
            doc = self.client.annotate(sentence, properties=req_properties)
            stream = writeToDelimitedString(doc)
            buf = stream.getvalue()
            stream.close()
            self.cache[sent_cache_identifier] = buf
        else:
            buf = self.cache[sent_cache_identifier]
            doc = Document()
            parseFromDelimitedString(doc, buf)

        return doc

Пример #3

Показать файл

def test_write_protobuf(doc_pb):
    stream = writeToDelimitedString(doc_pb)
    buf = stream.getvalue()
    stream.close()

    doc_pb_ = Document()
    parseFromDelimitedString(doc_pb_, buf)
    assert doc_pb == doc_pb_

Пример #4

Показать файл

def doc_pb():
    test_dir = os.path.dirname(os.path.abspath(__file__))
    test_data = os.path.join(test_dir, 'data', 'test.dat')
    with open(test_data, 'rb') as f:
        buf = f.read()
    doc = Document()
    parseFromDelimitedString(doc, buf)
    return doc

Пример #5

Показать файл

Файл: client.py Проект: zmskye/stanfordnlp

    def annotate(self,
                 text,
                 annotators=None,
                 output_format=None,
                 properties=None):
        """Send a request to the CoreNLP server.

        :param (str | unicode) text: raw text for the CoreNLPServer to parse
        :param (list | string) annotators: list of annotators to use
        :param (str) output_format: output type from server: serialized, json, text, conll, conllu, or xml
        :param (dict) properties: properties that the server expects
        :return: request result
        """
        # set properties for server call
        if properties is None:
            properties = self.default_properties
            properties.update({
                'annotators':
                ','.join(annotators or self.default_annotators),
                'inputFormat':
                'text',
                'outputFormat':
                self.default_output_format,
                'serializer':
                'edu.stanford.nlp.pipeline.ProtobufAnnotationSerializer'
            })
        elif "annotators" not in properties:
            properties.update({
                'annotators':
                ','.join(annotators or self.default_annotators)
            })
        # if an output_format is specified, use that to override
        if output_format is not None:
            properties["outputFormat"] = output_format
        # make the request
        r = self._request(text.encode('utf-8'), properties)
        # customize what is returned based outputFormat
        if properties["outputFormat"] == "serialized":
            doc = Document()
            parseFromDelimitedString(doc, r.content)
            return doc
        elif properties["outputFormat"] == "json":
            return r.json()
        elif properties["outputFormat"] in ["text", "conllu", "conll", "xml"]:
            return r.text
        else:
            return r

Пример #6

Показать файл

Файл: client.py Проект: zmskye/stanfordnlp

    def update(self, doc, annotators=None, properties=None):
        if properties is None:
            properties = self.default_properties
            properties.update({
                'annotators':
                ','.join(annotators or self.default_annotators),
                'inputFormat':
                'serialized',
                'outputFormat':
                'serialized',
                'serializer':
                'edu.stanford.nlp.pipeline.ProtobufAnnotationSerializer'
            })
        with io.BytesIO() as stream:
            writeToDelimitedString(doc, stream)
            msg = stream.getvalue()

        r = self._request(msg, properties)
        doc = Document()
        parseFromDelimitedString(doc, r.content)
        return doc

Пример #7

Показать файл

    def update(self, doc, annotators=None, properties=None):
        if properties is None:
            properties = {}
            properties.update({
                'inputFormat':
                'serialized',
                'outputFormat':
                'serialized',
                'serializer':
                'edu.stanford.nlp.pipeline.ProtobufAnnotationSerializer'
            })
        if annotators:
            properties['annotators'] = ",".join(annotators) if isinstance(
                annotators, list) else annotators
        with io.BytesIO() as stream:
            writeToDelimitedString(doc, stream)
            msg = stream.getvalue()

        r = self._request(msg, properties)
        doc = Document()
        parseFromDelimitedString(doc, r.content)
        return doc

Пример #8

Показать файл

    def annotate(self,
                 text,
                 annotators=None,
                 output_format=None,
                 properties_key=None,
                 properties=None,
                 **kwargs):
        """
        Send a request to the CoreNLP server.

        :param (str | unicode) text: raw text for the CoreNLPServer to parse
        :param (list | string) annotators: list of annotators to use
        :param (str) output_format: output type from server: serialized, json, text, conll, conllu, or xml
        :param (str) properties_key: key into properties cache for the client
        :param (dict) properties: additional request properties (written on top of defaults)

        The properties for a request are written in this order:

        1. Server default properties (server side)
        2. Properties from client's properties_cache corresponding to properties_key (client side)
           If the properties_key is the name of a Stanford CoreNLP supported language:
           [Arabic, Chinese, English, French, German, Spanish], the Stanford CoreNLP defaults will be used (server side)
        3. Additional properties corresponding to properties (client side)
        4. Special case specific properties: annotators, output_format (client side)

        :return: request result
        """
        # set properties for server call
        # first look for a cached default properties set
        # if a Stanford CoreNLP supported language is specified, just pass {pipelineLanguage="french"}
        if properties_key is not None:
            if properties_key.lower() in CoreNLPClient.PIPELINE_LANGUAGES:
                request_properties = {
                    'pipelineLanguage': properties_key.lower()
                }
            else:
                request_properties = self.properties_cache.get(
                    properties_key, {})
        else:
            request_properties = {}
        # add on custom properties for this request
        if properties is None:
            properties = {}
        request_properties.update(properties)
        # if annotators list is specified, override with that
        if annotators is not None:
            request_properties['annotators'] = ",".join(
                annotators) if isinstance(annotators, list) else annotators
        # always send an output format with request
        # in some scenario's the server's default output format is unknown, so default to serialized
        if output_format is not None:
            request_properties['outputFormat'] = output_format
        if request_properties.get('outputFormat') is None:
            if self.server_start_info.get('props', {}).get('outputFormat'):
                request_properties['outputFormat'] = self.server_start_info[
                    'props']['outputFormat']
            else:
                request_properties[
                    'outputFormat'] = CoreNLPClient.DEFAULT_OUTPUT_FORMAT
        # make the request
        r = self._request(text.encode('utf-8'), request_properties, **kwargs)
        if request_properties["outputFormat"] == "json":
            return r.json()
        elif request_properties["outputFormat"] == "serialized":
            doc = Document()
            parseFromDelimitedString(doc, r.content)
            return doc
        elif request_properties["outputFormat"] in [
                "text", "conllu", "conll", "xml"
        ]:
            return r.text
        else:
            return r