def export_entities(export_id, result): from aleph.logic import resolver export_dir = ensure_path(mkdtemp(prefix="aleph.export.")) try: entities = [] stub = types.SimpleNamespace(result=result) for entity in result["results"]: resolver.queue(stub, Collection, entity.get("collection_id")) entities.append(model.get_proxy(entity)) resolver.resolve(stub) file_path = export_dir.joinpath("query-export.zip") zf = zipfile.ZipFile(file_path, "w") exporter = ExcelExporter(None, extra=EXTRA_HEADERS) for entity in entities: collection_id = entity.context.get("collection_id") collection = resolver.get(stub, Collection, collection_id) extra = [entity_url(entity.id), collection.get("label")] exporter.write(entity, extra=extra) write_document(export_dir, zf, collection, entity) content = exporter.get_bytesio().getvalue() zf.writestr("Export.xlsx", content) zf.close() complete_export(export_id, file_path) except Exception: log.exception("Failed to process export [%s]", export_id) export = Export.by_id(export_id) export.set_status(status=Export.STATUS_FAILED) db.session.commit() finally: shutil.rmtree(export_dir)
def export_entities(request, result): entities = [] for entity in result.results: resolver.queue(result, Collection, entity.get('collection_id')) entities.append(model.get_proxy(entity)) resolver.resolve(result) zip_archive = zipstream.ZipFile() exporter = ExcelExporter(None, extra=EXTRA_HEADERS) for entity in entities: collection_id = entity.context.get('collection_id') collection = resolver.get(result, Collection, collection_id) extra = [entity_url(entity.id), collection.get('label')] exporter.write(entity, extra=extra) write_document(zip_archive, collection, entity) content = exporter.get_bytesio() zip_archive.write_iter('Export.xlsx', content) for chunk in zip_archive: yield chunk
def export_entities(export_id): export = Export.by_id(export_id) log.info("Export entities [%r]...", export) export_dir = ensure_path(mkdtemp(prefix="aleph.export.")) collections = {} try: filters = [export.meta.get("query", {"match_none": {}})] file_path = export_dir.joinpath("query-export.zip") with ZipFile(file_path, mode="w") as zf: excel_path = export_dir.joinpath(EXCEL_FILE) exporter = ExcelExporter(excel_path, extra=EXTRA_HEADERS) for entity in iter_proxies(filters=filters): collection_id = entity.context.get("collection_id") if collection_id not in collections: collections[collection_id] = get_collection(collection_id) collection = collections[collection_id] if collection is None: continue extra = [entity_url(entity.id), collection.get("label")] exporter.write(entity, extra=extra) write_document(export_dir, zf, collection, entity) if file_path.stat().st_size >= Export.MAX_FILE_SIZE: log.warn("Export too large: %r", export) break exporter.finalize() zf.write(excel_path, arcname=EXCEL_FILE) complete_export(export_id, file_path) except Exception: log.exception("Failed to process export [%s]", export_id) export = Export.by_id(export_id) export.set_status(status=Status.FAILED) db.session.commit() finally: shutil.rmtree(export_dir)
def test_excel_export(self): entity = model.get_proxy(ENTITY) exporter = ExcelExporter(self.temp, extra=["source"]) exporter.write(entity, extra=["test"]) exporter.finalize() workbook = load_workbook(self.temp) self.assertListEqual(workbook.sheetnames, ["People"]) sheet = workbook["People"] rows = list(sheet) props = exporter.exportable_properties(entity.schema) self.assertListEqual( [cell.value for cell in rows[0]], ["ID", "source"] + [prop.label for prop in props], ) self.assertListEqual([cell.value for cell in rows[1][:3]], ["person", "test", "Ralph Tester"])
def test_excel_export(self): entity = model.get_proxy(ENTITY) exporter = ExcelExporter(self.temp, extra=['source']) exporter.write(entity, extra=['test']) exporter.finalize() workbook = load_workbook(self.temp) self.assertListEqual(workbook.sheetnames, ['People']) sheet = workbook["People"] rows = list(sheet) self.assertListEqual( [cell.value for cell in rows[0]], ['ID', 'source'] + [prop.label for prop in entity.schema.sorted_properties] ) self.assertListEqual( [cell.value for cell in rows[1][:3]], ['person', 'test', 'Ralph Tester'] )
def export_entities(export_id): export = Export.by_id(export_id) log.info("Export entities [%r]...", export) export_dir = ensure_path(mkdtemp(prefix="aleph.export.")) collections = {} try: filters = [export.meta.get("query", {"match_none": {}})] file_path = export_dir.joinpath("export.zip") with ZipFile(file_path, mode="w") as zf: excel_name = safe_filename(export.label, extension="xlsx") excel_path = export_dir.joinpath(excel_name) exporter = ExcelExporter(excel_path, extra=EXTRA_HEADERS) for idx, entity in enumerate(iter_proxies(filters=filters)): collection_id = entity.context.get("collection_id") if collection_id not in collections: collections[collection_id] = get_collection(collection_id) collection = collections[collection_id] if collection is None: continue extra = [entity_url(entity.id), collection.get("label")] exporter.write(entity, extra=extra) write_document(export_dir, zf, collection, entity) if file_path.stat().st_size >= settings.EXPORT_MAX_SIZE: concern = "total size of the" zf.writestr("EXPORT_TOO_LARGE.txt", WARNING % concern) break if idx >= settings.EXPORT_MAX_RESULTS: concern = "number of" zf.writestr("EXPORT_TOO_LARGE.txt", WARNING % concern) break exporter.finalize() zf.write(excel_path, arcname=excel_name) file_name = "Export: %s" % export.label file_name = safe_filename(file_name, extension="zip") complete_export(export_id, file_path, file_name) except Exception: log.exception("Failed to process export [%s]", export_id) export = Export.by_id(export_id) export.set_status(status=Status.FAILED) db.session.commit() finally: shutil.rmtree(export_dir)
def export_excel(infile, outfile): exporter = ExcelExporter(outfile) export_stream(exporter, infile)
def test_excel_bytesio(self): entity = model.get_proxy(ENTITY) exporter = ExcelExporter(self.temp, extra=['source']) exporter.write(entity, extra=['test']) buffer = exporter.get_bytesio() assert len(buffer.getvalue()) > 100
def export_excel(infile: Path, outfile: Path) -> None: exporter = ExcelExporter(outfile) export_stream(exporter, infile)