def test_put_assets_and_pdfs_in_object_store_reads_each_asset_from_xml( self, mk_put_object_in_object_store): MockZipFile = MagicMock() put_assets_and_pdfs_in_object_store(MockZipFile, self.xml_data) for asset in self.xml_data["assets"]: with self.subTest(asset=asset): MockZipFile.read.assert_any_call(asset["asset_id"]) mk_put_object_in_object_store.assert_any_call( MockZipFile.read.return_value, self.xml_data["issn"], self.xml_data["scielo_id"], asset["asset_id"], )
def test_put_assets_and_pdfs_in_object_store_reads_each_pdf_from_xml( self, mk_put_object_in_object_store): MockZipFile = MagicMock() MockZipFile.read.return_value = b"" put_assets_and_pdfs_in_object_store(MockZipFile, self.xml_data) for pdf in self.xml_data["pdfs"]: with self.subTest(pdf=pdf): MockZipFile.read.assert_any_call(pdf["filename"]) mk_put_object_in_object_store.assert_any_call( MockZipFile.read.return_value, self.xml_data["issn"], self.xml_data["scielo_id"], pdf["filename"], )
def test_put_assets_and_pdfs_in_object_store_return_data_pdf( self, mk_put_object_in_object_store): expected = copy.deepcopy(self.xml_data) pdfs_size = [] for pdf in expected["pdfs"]: pdf["data_url"] = "http://minio/documentstore/{}".format( pdf["filename"]) pdf["size_bytes"] = random.randint(80000, 100000) pdfs_size.append(pdf["size_bytes"]) mk_read_file = MagicMock(return_value=b"") mk_read_file.__len__.side_effect = pdfs_size MockZipFile = Mock() MockZipFile.read.return_value = mk_read_file mk_put_object_in_object_store.side_effect = ( [None, None] + [pdf["data_url"] for pdf in expected["pdfs"]] + [None]) result = put_assets_and_pdfs_in_object_store(MockZipFile, self.xml_data) for expected_pdf, result_pdf in zip(expected["pdfs"], result["pdfs"]): self.assertEqual(expected_pdf["filename"], result_pdf["filename"]) self.assertEqual(expected_pdf["data_url"], result_pdf["data_url"]) self.assertEqual(expected_pdf["size_bytes"], result_pdf["size_bytes"])
def test_put_assets_and_pdfs_in_object_store_returns_only_read_assets_and_pdfs( self, mk_put_object_in_object_store, MockLogger): MockZipFile = MagicMock() MockZipFile.read.side_effect = [ b"", KeyError("File not found in the archive"), KeyError("File not found in the archive"), b"", ] expected = { "assets": self.xml_data["assets"][:1], "pdfs": self.xml_data["pdfs"][1:], } mk_minio_result = [ "http://minio/documentstore/{}".format( expected["assets"][0]["asset_id"]), "http://minio/documentstore/{}".format( expected["pdfs"][0]["filename"]), ] mk_put_object_in_object_store.side_effect = mk_minio_result expected["assets"][0]["asset_url"] = mk_minio_result[0] expected["pdfs"][0]["data_url"] = mk_minio_result[1] expected["pdfs"][0]["size_bytes"] = 0 result = put_assets_and_pdfs_in_object_store(MockZipFile, self.xml_data) self.assertEqual(result, expected)
def test_put_assets_and_pdfs_in_object_store_logs_error_if_file_not_found_in_zip( self, mk_put_object_in_object_store, MockLogger): MockZipFile = MagicMock() MockZipFile.read.side_effect = [ b"", KeyError("File not found in the archive"), KeyError("File not found in the archive"), b"", ] put_assets_and_pdfs_in_object_store(MockZipFile, self.xml_data) MockLogger.info.assert_any_call( 'Could not read asset "%s" from zipfile "%s": %s', self.xml_data["assets"][1]["asset_id"], MockZipFile, "'File not found in the archive'", ) MockLogger.info.assert_any_call( 'Could not read PDF "%s" from zipfile "%s": %s', self.xml_data["pdfs"][0]["filename"], MockZipFile, "'File not found in the archive'", )
def register_update_documents(sps_package, xmls_to_preserve): """ Registra/atualiza documentos informados e seus respectivos ativos digitais e renditions no Minio e no Kernel. list docs_to_preserve: lista de XMLs para manter no Kernel (Registrar ou atualizar) """ Logger.debug("register_update_documents IN") with ZipFile(sps_package) as zipfile: synchronized_docs_metadata = [] for i, xml_filename in enumerate(xmls_to_preserve): Logger.info( 'Reading XML file "%s" from ZIP file "%s" [%s/%s]', xml_filename, sps_package, i, len(xmls_to_preserve), ) try: xml_data = put_xml_into_object_store(zipfile, xml_filename) except PutXMLInObjectStoreException as exc: Logger.info( 'Could not put document "%s" in object store: %s', xml_filename, str(exc), ) else: assets_and_pdfs_data = put_assets_and_pdfs_in_object_store( zipfile, xml_data) _document_metadata = deepcopy(xml_data) _document_metadata.update(assets_and_pdfs_data) try: register_update_doc_into_kernel(_document_metadata) except RegisterUpdateDocIntoKernelException as exc: Logger.info( 'Could not register or update document "%s" in Kernel: %s', xml_filename, str(exc), ) else: synchronized_docs_metadata.append(xml_data) Logger.debug("register_update_documents OUT") return synchronized_docs_metadata
def test_put_assets_and_pdfs_in_object_store_return_data_asset( self, mk_put_object_in_object_store ): expected = copy.deepcopy(self.xml_data) for asset in expected["assets"]: asset["asset_url"] = "http://minio/documentstore/{}".format( asset["asset_id"] ) MockZipFile = MagicMock() MockZipFile.read.return_value = b"" mk_put_object_in_object_store.side_effect = [ asset["asset_url"] for asset in expected["assets"] ] + [None, None, None] result = put_assets_and_pdfs_in_object_store(MockZipFile, self.xml_data) for expected_asset, result_asset in zip(expected["assets"], result["assets"]): self.assertEqual(expected_asset["asset_id"], result_asset["asset_id"]) self.assertEqual(expected_asset["asset_url"], result_asset["asset_url"])
def register_update_documents(sps_package, xmls_to_preserve): """ Registra/atualiza documentos informados e seus respectivos ativos digitais e renditions no Minio e no Kernel. list docs_to_preserve: lista de XMLs para manter no Kernel (Registrar ou atualizar) Não deve cadastrar documentos que não tenha ``scielo-id`` """ executions = [] Logger.debug("register_update_documents IN") with ZipFile(sps_package) as zipfile: synchronized_docs_metadata = [] for i, xml_filename in enumerate(xmls_to_preserve): Logger.info( 'Reading XML file "%s" from ZIP file "%s" [%s/%s]', xml_filename, sps_package, i, len(xmls_to_preserve), ) execution = {"file_name": xml_filename} try: xml_data = put_xml_into_object_store(zipfile, xml_filename) except (PutXMLInObjectStoreException, Pidv3Exception) as exc: Logger.error( 'Could not put document "%s" in object store: %s', xml_filename, str(exc), ) execution.update({"failed": True, "error": str(exc)}) else: assets_and_pdfs_data = put_assets_and_pdfs_in_object_store(zipfile, xml_data) _document_metadata = deepcopy(xml_data) _document_metadata.update(assets_and_pdfs_data) try: register_update_doc_into_kernel(_document_metadata) except RegisterUpdateDocIntoKernelException as exc: Logger.error( 'Could not register or update document "%s" in Kernel: %s', xml_filename, str(exc), ) execution.update( { "pid": xml_data.get("scielo_id"), "failed": True, "error": str(exc), } ) else: synchronized_docs_metadata.append(xml_data) execution.update( { "pid": xml_data.get("scielo_id"), "payload": _document_metadata, } ) executions.append(execution) Logger.debug("register_update_documents OUT") return (synchronized_docs_metadata, executions)