def start_download(self, file_number, request_id): logger = self.logger.bind(file_number=file_number, request_id=request_id) logger.emit("list_documents.start") try: documents = yield self.vbms_client.list_documents( logger, file_number) except VBMSError as e: logger.bind( stdout=e.stdout, stderr=e.stderr, exit_code=e.exit_code, ).emit("list_documents.error") yield self.download_database.mark_download_errored( logger, request_id) else: logger.emit("list_documents.success") documents = [ Document.from_json(request_id, doc) for doc in documents ] yield self.download_database.create_documents(logger, documents) for doc in documents: self.queue.put( functools.partial(self.start_file_download, logger, doc)) yield self.download_database.mark_download_manifest_downloaded( logger, request_id)
def start_download(self, file_number, request_id): logger = self.logger.bind( file_number=file_number, request_id=request_id ) logger.emit("list_documents.start") try: documents = yield self.vbms_client.list_documents( logger, file_number ) except VBMSError as e: logger.bind( stdout=e.stdout, stderr=e.stderr, exit_code=e.exit_code, ).emit("list_documents.error") yield self.download_database.mark_download_errored( logger, request_id ) else: logger.emit("list_documents.success") documents = [ Document.from_json(request_id, doc) for doc in documents ] yield self.download_database.create_documents(logger, documents) for doc in documents: self.queue.put(functools.partial( self.start_file_download, logger, doc )) yield self.download_database.mark_download_manifest_downloaded( logger, request_id )
def test_get_pending_work_documents(self, db): logger = Logger(FakeMemoryLog()) d = db.create_download(logger, "test-request-id", "123456789") success_result_of(d) d = db.mark_download_manifest_downloaded(logger, "test-request-id") success_result_of(d) doc = Document( id="test-document-id", download_id="test-request-id", document_id="{ABCD}", doc_type="00356", filename="file.pdf", received_at=datetime.datetime.utcnow(), source="CUI", content_location=None, errored=False, ) d = db.create_documents(logger, [doc]) success_result_of(d) d = db.get_pending_work(logger) downloads, documents = success_result_of(d) assert downloads == [] [document] = documents assert document.id == "test-document-id" d = db.set_document_content_location(logger, doc, "/path/to/content") success_result_of(d) d = db.get_pending_work(logger) assert success_result_of(d) == ([], [])
def test_set_document_content_location(self, db): logger = Logger(FakeMemoryLog()) d = db.create_download(logger, "test-request-id", "123456789") success_result_of(d) doc = Document( id="test-document-id", download_id="test-request-id", document_id="{ABCD}", doc_type="00356", filename="file.pdf", received_at=datetime.datetime.utcnow(), source="CUI", content_location=None, errored=False, ) d = db.create_documents(logger, [doc]) success_result_of(d) d = db.set_document_content_location(logger, doc, "/path/to/content") success_result_of(d) download = success_result_of(db.get_download( logger, "test-request-id" )) assert download.completed assert download.percent_completed == 100 assert download.documents[0].content_location == "/path/to/content"
def test_from_json(self): doc = Document.from_json("test-document-id", { "document_id": "123456789", "doc_type": "123", "filename": "abc.pdf", "received_at": None, "source": "The moon", }) assert doc.received_at is None
def __init__(self): self._data = { "started": DownloadStatus( request_id="started", file_number="123456789", state="STARTED", documents=[], ), "manifest-downloaded": DownloadStatus(request_id="manifest-downloaded", file_number="123456789", state="MANIFEST_DOWNLOADED", documents=[ Document(id="", download_id="manifest-downloaded", document_id="", doc_type="", filename="abc.pdf", received_at=None, source="", content_location=None, errored=False), ]), "download-in-progress": DownloadStatus(request_id="download-in-progress", file_number="123456789", state="MANIFEST_DOWNLOADED", documents=[ Document(id="", download_id="manifest-downloaded", document_id="", doc_type="", filename="abc.pdf", received_at=None, source="", content_location=None, errored=False), Document(id="", download_id="manifest-downloaded", document_id="", doc_type="", filename="def.pdf", received_at=None, source="", content_location=None, errored=True), Document(id="", download_id="manifest-downloaded", document_id="", doc_type="", filename="ghe.pdf", received_at=None, source="", content_location="/not-real/", errored=False), ]), "manifest-download-error": DownloadStatus( request_id="manifest-download-error", file_number="123456789", state="ERRORED", documents=[], ), "completed": DownloadStatus(request_id="completed", file_number="123456789", state="MANIFEST_DOWNLOADED", documents=[ Document(id="", download_id="manifest-downloaded", document_id="", doc_type="", filename="abc.pdf", received_at=None, source="", content_location="/not-real/", errored=False), Document(id="", download_id="manifest-downloaded", document_id="", doc_type="", filename="def.pdf", received_at=None, source="", content_location="/not-real/", errored=False), Document(id="", download_id="manifest-downloaded", document_id="", doc_type="", filename="ghe.pdf", received_at=None, source="", content_location="/not-real/", errored=False), ]) }