def process(self, document): with get_source(document) as fh: json_document = json.load(fh) root_node = document.create_node(type='container') self.parse_dict(json_document, root_node) document.content_node = root_node return document
def process(self, document): with get_source(document) as fh: data = fh.read() try: data = data.decode(self.encoding) except (UnicodeDecodeError, AttributeError): pass text_node = document.create_node(type='text', content=data if self.decode else data) document.content_node = text_node return document
def execute_service(self, document, options, attach_source): files = {} if attach_source: files["file"] = get_source(document) else: files["document"] = document.to_msgpack() data = {"options": json.dumps(options)} r = requests.post( f"{self.cloud_url}/api/sessions/{self.cloud_session.id}/execute", params={self.session_type: self.slug}, data=data, headers={"x-access-token": self.access_token}, files=files) execution = json.loads(r.text, object_hook=AttrDict) print(execution) return execution
def process(self, document): """ """ with get_source(document) as fh: if self.lines_as_child_nodes: lines = fh.readlines() document.content_node = document.create_node(node_type='text') for data in lines: text_node = document.create_node( node_type='text', content=self.decode_text(data).strip()) document.content_node.add_child(text_node) else: data = fh.read() text_node = document.create_node( node_type='text', content=self.decode_text(data)) document.content_node = text_node document.add_mixin('text') return document
def test_get_source(): document = Document.from_url('https://www.google.com') with get_source(document) as fh: data = fh.read() print(data)