示例#1
0
def export_entities(export_id, result):
    from aleph.logic import resolver

    export_dir = ensure_path(mkdtemp(prefix="aleph.export."))
    try:
        entities = []
        stub = types.SimpleNamespace(result=result)
        for entity in result["results"]:
            resolver.queue(stub, Collection, entity.get("collection_id"))
            entities.append(model.get_proxy(entity))
        resolver.resolve(stub)

        file_path = export_dir.joinpath("query-export.zip")
        zf = zipfile.ZipFile(file_path, "w")
        exporter = ExcelExporter(None, extra=EXTRA_HEADERS)
        for entity in entities:
            collection_id = entity.context.get("collection_id")
            collection = resolver.get(stub, Collection, collection_id)
            extra = [entity_url(entity.id), collection.get("label")]
            exporter.write(entity, extra=extra)
            write_document(export_dir, zf, collection, entity)
        content = exporter.get_bytesio().getvalue()
        zf.writestr("Export.xlsx", content)
        zf.close()
        complete_export(export_id, file_path)
    except Exception:
        log.exception("Failed to process export [%s]", export_id)
        export = Export.by_id(export_id)
        export.set_status(status=Export.STATUS_FAILED)
        db.session.commit()
    finally:
        shutil.rmtree(export_dir)
示例#2
0
def export_entities(request, result):
    entities = []
    for entity in result.results:
        resolver.queue(result, Collection, entity.get('collection_id'))
        entities.append(model.get_proxy(entity))
    resolver.resolve(result)
    zip_archive = zipstream.ZipFile()
    exporter = ExcelExporter(None, extra=EXTRA_HEADERS)
    for entity in entities:
        collection_id = entity.context.get('collection_id')
        collection = resolver.get(result, Collection, collection_id)
        extra = [entity_url(entity.id), collection.get('label')]
        exporter.write(entity, extra=extra)
        write_document(zip_archive, collection, entity)
    content = exporter.get_bytesio()
    zip_archive.write_iter('Export.xlsx', content)
    for chunk in zip_archive:
        yield chunk
示例#3
0
def export_entities(export_id):
    export = Export.by_id(export_id)
    log.info("Export entities [%r]...", export)
    export_dir = ensure_path(mkdtemp(prefix="aleph.export."))
    collections = {}
    try:
        filters = [export.meta.get("query", {"match_none": {}})]
        file_path = export_dir.joinpath("query-export.zip")
        with ZipFile(file_path, mode="w") as zf:
            excel_path = export_dir.joinpath(EXCEL_FILE)
            exporter = ExcelExporter(excel_path, extra=EXTRA_HEADERS)
            for entity in iter_proxies(filters=filters):
                collection_id = entity.context.get("collection_id")
                if collection_id not in collections:
                    collections[collection_id] = get_collection(collection_id)
                collection = collections[collection_id]
                if collection is None:
                    continue
                extra = [entity_url(entity.id), collection.get("label")]
                exporter.write(entity, extra=extra)
                write_document(export_dir, zf, collection, entity)
                if file_path.stat().st_size >= Export.MAX_FILE_SIZE:
                    log.warn("Export too large: %r", export)
                    break

            exporter.finalize()
            zf.write(excel_path, arcname=EXCEL_FILE)
        complete_export(export_id, file_path)
    except Exception:
        log.exception("Failed to process export [%s]", export_id)
        export = Export.by_id(export_id)
        export.set_status(status=Status.FAILED)
        db.session.commit()
    finally:
        shutil.rmtree(export_dir)
示例#4
0
 def test_excel_export(self):
     entity = model.get_proxy(ENTITY)
     exporter = ExcelExporter(self.temp, extra=["source"])
     exporter.write(entity, extra=["test"])
     exporter.finalize()
     workbook = load_workbook(self.temp)
     self.assertListEqual(workbook.sheetnames, ["People"])
     sheet = workbook["People"]
     rows = list(sheet)
     props = exporter.exportable_properties(entity.schema)
     self.assertListEqual(
         [cell.value for cell in rows[0]],
         ["ID", "source"] + [prop.label for prop in props],
     )
     self.assertListEqual([cell.value for cell in rows[1][:3]],
                          ["person", "test", "Ralph Tester"])
示例#5
0
 def test_excel_export(self):
     entity = model.get_proxy(ENTITY)
     exporter = ExcelExporter(self.temp, extra=['source'])
     exporter.write(entity, extra=['test'])
     exporter.finalize()
     workbook = load_workbook(self.temp)
     self.assertListEqual(workbook.sheetnames, ['People'])
     sheet = workbook["People"]
     rows = list(sheet)
     self.assertListEqual(
         [cell.value for cell in rows[0]],
         ['ID', 'source'] +
         [prop.label for prop in entity.schema.sorted_properties]
     )
     self.assertListEqual(
         [cell.value for cell in rows[1][:3]],
         ['person', 'test', 'Ralph Tester']
     )
示例#6
0
文件: export.py 项目: sunu/aleph
def export_entities(export_id):
    export = Export.by_id(export_id)
    log.info("Export entities [%r]...", export)
    export_dir = ensure_path(mkdtemp(prefix="aleph.export."))
    collections = {}
    try:
        filters = [export.meta.get("query", {"match_none": {}})]
        file_path = export_dir.joinpath("export.zip")
        with ZipFile(file_path, mode="w") as zf:
            excel_name = safe_filename(export.label, extension="xlsx")
            excel_path = export_dir.joinpath(excel_name)
            exporter = ExcelExporter(excel_path, extra=EXTRA_HEADERS)
            for idx, entity in enumerate(iter_proxies(filters=filters)):
                collection_id = entity.context.get("collection_id")
                if collection_id not in collections:
                    collections[collection_id] = get_collection(collection_id)
                collection = collections[collection_id]
                if collection is None:
                    continue
                extra = [entity_url(entity.id), collection.get("label")]
                exporter.write(entity, extra=extra)
                write_document(export_dir, zf, collection, entity)
                if file_path.stat().st_size >= settings.EXPORT_MAX_SIZE:
                    concern = "total size of the"
                    zf.writestr("EXPORT_TOO_LARGE.txt", WARNING % concern)
                    break
                if idx >= settings.EXPORT_MAX_RESULTS:
                    concern = "number of"
                    zf.writestr("EXPORT_TOO_LARGE.txt", WARNING % concern)
                    break

            exporter.finalize()
            zf.write(excel_path, arcname=excel_name)
        file_name = "Export: %s" % export.label
        file_name = safe_filename(file_name, extension="zip")
        complete_export(export_id, file_path, file_name)
    except Exception:
        log.exception("Failed to process export [%s]", export_id)
        export = Export.by_id(export_id)
        export.set_status(status=Status.FAILED)
        db.session.commit()
    finally:
        shutil.rmtree(export_dir)
示例#7
0
def export_excel(infile, outfile):
    exporter = ExcelExporter(outfile)
    export_stream(exporter, infile)
示例#8
0
 def test_excel_bytesio(self):
     entity = model.get_proxy(ENTITY)
     exporter = ExcelExporter(self.temp, extra=['source'])
     exporter.write(entity, extra=['test'])
     buffer = exporter.get_bytesio()
     assert len(buffer.getvalue()) > 100
示例#9
0
def export_excel(infile: Path, outfile: Path) -> None:
    exporter = ExcelExporter(outfile)
    export_stream(exporter, infile)