def test_write_manifest_file_wrong_algorithm(self, mock_session, tmpdir): """Test writing a manifest file for an algorithm we don't have.""" mock_session.return_value.get.return_value = MockResponse200() sub_dir = 'downloads' dest = tmpdir.mkdir(sub_dir) downloads = wc.Downloads(WASAPI_URL, download=False, destination=str(dest)) with pytest.raises(wc.WASAPIManifestError): downloads.write_manifest_file('sha2')
def test_populate_downloads_urls(self, mock_session): """Test urls is populated with first location per file.""" mock_session.return_value.get.return_value = MockResponse200() downloads = wc.Downloads(WASAPI_URL, download=False) assert len(downloads.urls) == 2 for url in ['https://warcs.example.com/webdatafile/AIT-JOB256123-00000.warc.gz', 'https://warcs.example.com/webdatafile/AIT-JOB256118-00000.warc.gz']: assert url in downloads.urls
def test_populate_downloads_generate_manifest(self, mock_session, tmpdir): """Test checksum files are created for all algorithms.""" mock_session.return_value.get.return_value = MockResponse200() sub_dir = 'downloads' dest = tmpdir.mkdir(sub_dir) downloads = wc.Downloads(WASAPI_URL, download=False, destination=str(dest)) downloads.generate_manifests() sub_dir_contents = dest.listdir() assert len(sub_dir_contents) == 2 for name in ['manifest-md5.txt', 'manifest-sha1.txt']: assert dest.join(name) in sub_dir_contents
def test_populate_downloads(self, mock_session): """Test a queue is returned with expected data.""" mock_session.return_value.get.return_value = MockResponse200() downloads = wc.Downloads(WASAPI_URL, download=True) j_queue = downloads.get_q assert j_queue.qsize() == 2 # Drain the JoinableQueue to avoid BrokenPipeError. # There could be a better way to handle this... while j_queue.qsize(): q_item = j_queue.get() assert isinstance(q_item, wc.DataFile) j_queue.task_done()
def test_write_manifest_file(self, mock_session, tmpdir): """Test a manifest file is written for the given algorithm.""" mock_session.return_value.get.return_value = MockResponse200() sub_dir = 'downloads' dest = tmpdir.mkdir(sub_dir) downloads = wc.Downloads(WASAPI_URL, download=False, destination=str(dest)) downloads.write_manifest_file('sha1') assert len(dest.listdir()) == 1 txt = ( 'edef6bca652d75d0587ef411d5f028335341b074 {p}{s}AIT-JOB256123-00000.warc.gz\n' '54a466421471ef7d8cb4d6bbfb85afd76022a378 {p}{s}ARCHIVEIT-JOB256118-00000.warc.gz\n' ) assert dest.join('manifest-sha1.txt').read() == txt.format(p=dest, s=os.sep)
def test_populate_downloads_manifest(self, mock_session): """Test the checksums dict is populated.""" mock_session.return_value.get.return_value = MockResponse200() downloads = wc.Downloads(WASAPI_URL, download=False) assert len(downloads.checksums) assert downloads.checksums['md5'] == [('61f818912d1f39bc9dd15d4b87461110', 'AIT-JOB256123-00000.warc.gz'), ('748120fd9672b22df5942bb44e9cde81', 'ARCHIVEIT-JOB256118-00000.warc.gz')] assert downloads.checksums['sha1'] == [('edef6bca652d75d0587ef411d5f028335341b074', 'AIT-JOB256123-00000.warc.gz'), ('54a466421471ef7d8cb4d6bbfb85afd76022a378', 'ARCHIVEIT-JOB256118-00000.warc.gz')]
def test_populate_downloads_multi_page(self, mock_session): """Test the queue returned for multiple results pages.""" # Give the first of our two page responses a next page URL. p1 = WASAPI_TEXT.replace('"next":null', '"next":"http://test?page=2"') responses = [MockResponse200(p1), MockResponse200()] mock_session.return_value.get.side_effect = responses downloads = wc.Downloads(WASAPI_URL, download=True) j_queue = downloads.get_q assert j_queue.qsize() == 4 # Drain the JoinableQueue to avoid BrokenPipeError. while j_queue.qsize(): q_item = j_queue.get() assert isinstance(q_item, wc.DataFile) j_queue.task_done()
def test_populate_downloads_no_get_q(self, mock_session): """Test download=False prevents get_q attribute existing.""" mock_session.return_value.get.return_value = MockResponse200() downloads = wc.Downloads(WASAPI_URL, download=False) with pytest.raises(AttributeError): getattr(downloads, 'get_q')