Exemplo n.º 1
0
def export_entities(export_id):
    export = Export.by_id(export_id)
    log.info("Export entities [%r]...", export)
    export_dir = ensure_path(mkdtemp(prefix="aleph.export."))
    collections = {}
    try:
        filters = [export.meta.get("query", {"match_none": {}})]
        file_path = export_dir.joinpath("query-export.zip")
        with ZipFile(file_path, mode="w") as zf:
            excel_path = export_dir.joinpath(EXCEL_FILE)
            exporter = ExcelExporter(excel_path, extra=EXTRA_HEADERS)
            for entity in iter_proxies(filters=filters):
                collection_id = entity.context.get("collection_id")
                if collection_id not in collections:
                    collections[collection_id] = get_collection(collection_id)
                collection = collections[collection_id]
                if collection is None:
                    continue
                extra = [entity_url(entity.id), collection.get("label")]
                exporter.write(entity, extra=extra)
                write_document(export_dir, zf, collection, entity)
                if file_path.stat().st_size >= Export.MAX_FILE_SIZE:
                    log.warn("Export too large: %r", export)
                    break

            exporter.finalize()
            zf.write(excel_path, arcname=EXCEL_FILE)
        complete_export(export_id, file_path)
    except Exception:
        log.exception("Failed to process export [%s]", export_id)
        export = Export.by_id(export_id)
        export.set_status(status=Status.FAILED)
        db.session.commit()
    finally:
        shutil.rmtree(export_dir)
Exemplo n.º 2
0
 def test_excel_export(self):
     entity = model.get_proxy(ENTITY)
     exporter = ExcelExporter(self.temp, extra=["source"])
     exporter.write(entity, extra=["test"])
     exporter.finalize()
     workbook = load_workbook(self.temp)
     self.assertListEqual(workbook.sheetnames, ["People"])
     sheet = workbook["People"]
     rows = list(sheet)
     props = exporter.exportable_properties(entity.schema)
     self.assertListEqual(
         [cell.value for cell in rows[0]],
         ["ID", "source"] + [prop.label for prop in props],
     )
     self.assertListEqual([cell.value for cell in rows[1][:3]],
                          ["person", "test", "Ralph Tester"])
Exemplo n.º 3
0
 def test_excel_export(self):
     entity = model.get_proxy(ENTITY)
     exporter = ExcelExporter(self.temp, extra=['source'])
     exporter.write(entity, extra=['test'])
     exporter.finalize()
     workbook = load_workbook(self.temp)
     self.assertListEqual(workbook.sheetnames, ['People'])
     sheet = workbook["People"]
     rows = list(sheet)
     self.assertListEqual(
         [cell.value for cell in rows[0]],
         ['ID', 'source'] +
         [prop.label for prop in entity.schema.sorted_properties]
     )
     self.assertListEqual(
         [cell.value for cell in rows[1][:3]],
         ['person', 'test', 'Ralph Tester']
     )
Exemplo n.º 4
0
Arquivo: export.py Projeto: sunu/aleph
def export_entities(export_id):
    export = Export.by_id(export_id)
    log.info("Export entities [%r]...", export)
    export_dir = ensure_path(mkdtemp(prefix="aleph.export."))
    collections = {}
    try:
        filters = [export.meta.get("query", {"match_none": {}})]
        file_path = export_dir.joinpath("export.zip")
        with ZipFile(file_path, mode="w") as zf:
            excel_name = safe_filename(export.label, extension="xlsx")
            excel_path = export_dir.joinpath(excel_name)
            exporter = ExcelExporter(excel_path, extra=EXTRA_HEADERS)
            for idx, entity in enumerate(iter_proxies(filters=filters)):
                collection_id = entity.context.get("collection_id")
                if collection_id not in collections:
                    collections[collection_id] = get_collection(collection_id)
                collection = collections[collection_id]
                if collection is None:
                    continue
                extra = [entity_url(entity.id), collection.get("label")]
                exporter.write(entity, extra=extra)
                write_document(export_dir, zf, collection, entity)
                if file_path.stat().st_size >= settings.EXPORT_MAX_SIZE:
                    concern = "total size of the"
                    zf.writestr("EXPORT_TOO_LARGE.txt", WARNING % concern)
                    break
                if idx >= settings.EXPORT_MAX_RESULTS:
                    concern = "number of"
                    zf.writestr("EXPORT_TOO_LARGE.txt", WARNING % concern)
                    break

            exporter.finalize()
            zf.write(excel_path, arcname=excel_name)
        file_name = "Export: %s" % export.label
        file_name = safe_filename(file_name, extension="zip")
        complete_export(export_id, file_path, file_name)
    except Exception:
        log.exception("Failed to process export [%s]", export_id)
        export = Export.by_id(export_id)
        export.set_status(status=Status.FAILED)
        db.session.commit()
    finally:
        shutil.rmtree(export_dir)