示例#1
0
文件: common.py 项目: fossabot/kodexa
    def process(self, document):
        with get_source(document) as fh:
            json_document = json.load(fh)
            root_node = document.create_node(type='container')
            self.parse_dict(json_document, root_node)
            document.content_node = root_node

        return document
示例#2
0
文件: common.py 项目: fossabot/kodexa
    def process(self, document):
        with get_source(document) as fh:
            data = fh.read()

            try:
                data = data.decode(self.encoding)
            except (UnicodeDecodeError, AttributeError):
                pass

            text_node = document.create_node(type='text', content=data if self.decode else data)
            document.content_node = text_node

        return document
示例#3
0
文件: kodexa.py 项目: fossabot/kodexa
    def execute_service(self, document, options, attach_source):
        files = {}
        if attach_source:
            files["file"] = get_source(document)
        else:
            files["document"] = document.to_msgpack()

        data = {"options": json.dumps(options)}

        r = requests.post(
            f"{self.cloud_url}/api/sessions/{self.cloud_session.id}/execute",
            params={self.session_type: self.slug},
            data=data,
            headers={"x-access-token": self.access_token},
            files=files)
        execution = json.loads(r.text, object_hook=AttrDict)
        print(execution)
        return execution
示例#4
0
    def process(self, document):
        """
        """
        with get_source(document) as fh:

            if self.lines_as_child_nodes:
                lines = fh.readlines()
                document.content_node = document.create_node(node_type='text')

                for data in lines:
                    text_node = document.create_node(
                        node_type='text',
                        content=self.decode_text(data).strip())
                    document.content_node.add_child(text_node)
            else:
                data = fh.read()
                text_node = document.create_node(
                    node_type='text', content=self.decode_text(data))
                document.content_node = text_node

            document.add_mixin('text')

        return document
示例#5
0
def test_get_source():
    document = Document.from_url('https://www.google.com')

    with get_source(document) as fh:
        data = fh.read()
        print(data)