def test_materialize_non_bag(self): logger.info(self.getTestHeader('test materialize non-bag')) curdir = os.getcwd() os.chdir(self.tmpdir) try: bag_path = bdb.materialize(self.test_data_dir) self.assertFalse(bdb.is_bag(bag_path)) except Exception as e: self.fail(bdbag.get_typed_exception(e)) finally: os.chdir(curdir)
def test_validate_incomplete_bag_full(self): logger.info(self.getTestHeader('test full validation incomplete bag')) try: self.assertRaisesRegex( bdbagit.BagValidationError, "^Bag validation failed:.*(test-fetch-identifier[.]txt:|test-fetch-http[.]txt)", bdb.validate_bag, self.test_bag_incomplete_dir, fast=False) except Exception as e: self.fail(bdbag.get_typed_exception(e))
def test_update_bag_mixed_checksums_allowed(self): logger.info(self.getTestHeader('allow update bag with non-uniform checksum(s) per file')) try: bdb.make_bag(self.test_data_dir) bdb.make_bag(self.test_data_dir, update=True, remote_file_manifest=ospj(self.test_config_dir, 'test-fetch-manifest-mixed-checksums.json')) bdb.validate_bag(self.test_bag_dir, fast=True) except Exception as e: self.fail(get_typed_exception(e))
def test_extract_bag_archive_tar(self): logger.info(self.getTestHeader('extract bag tar format')) try: bag_path = bdb.extract_bag(ospj(self.test_archive_dir, 'test-bag.tar'), temp=True) self.assertTrue(ospe(bag_path)) self.assertTrue(bdb.is_bag(bag_path)) bdb.cleanup_bag(os.path.dirname(bag_path)) except Exception as e: self.fail(bdbag.get_typed_exception(e))
def test_update_bag_change_file(self): logger.info(self.getTestHeader('update bag change file')) try: with open(ospj(self.test_bag_dir, 'data', 'README.txt'), 'a') as f: f.writelines('Additional data added via unit test.') bag = bdb.make_bag(self.test_bag_dir, update=True) output = self.stream.getvalue() self.assertIsInstance(bag, bdbagit.BDBag) self.assertExpectedMessages(['README.txt'], output) except Exception as e: self.fail(get_typed_exception(e))
def test_materialize_from_file(self): logger.info(self.getTestHeader('test materialize from file')) curdir = os.getcwd() os.chdir(self.tmpdir) try: bdb.materialize( ospj(self.test_archive_dir, 'test-bag-fetch-http.zip')) except Exception as e: self.fail(bdbag.get_typed_exception(e)) finally: os.chdir(curdir)
def test_resolve_fetch_http_cookie_auth(self): logger.info(self.getTestHeader('test resolve fetch http cookie auth')) try: bdb.resolve_fetch(self.test_bag_fetch_http_dir, keychain_file=ospj(self.test_config_dir, 'test-keychain-4.json'), cookie_scan=False) bdb.validate_bag(self.test_bag_fetch_http_dir, fast=True) bdb.validate_bag(self.test_bag_fetch_http_dir, fast=False) except Exception as e: self.fail(bdbag.get_typed_exception(e))
def test_fetch_single(self): logger.info(self.getTestHeader('test fetch single file')) try: output_path = ospj(self.test_bag_fetch_http_dir, "test-fetch-http.txt") fetcher.fetch_single_file( "https://raw.githubusercontent.com/fair-research/bdbag/master/test/test-data/test-http/" "test-fetch-http.txt", output_path) self.assertTrue(os.path.exists(output_path)) except Exception as e: self.fail(bdbag.get_typed_exception(e))
def test_resolve_fetch_incomplete(self): logger.info(self.getTestHeader('test resolve fetch incomplete')) try: bdb.resolve_fetch(self.test_bag_incomplete_fetch_dir, force=False, cookie_scan=False, quiet=False) bdb.validate_bag(self.test_bag_incomplete_fetch_dir, fast=True) bdb.validate_bag(self.test_bag_incomplete_fetch_dir, fast=False) except Exception as e: self.fail(bdbag.get_typed_exception(e))
def test_validate_profile_serialization(self): logger.info(self.getTestHeader('validate profile serialization')) try: bag_path = ospj(self.test_archive_dir, 'test-bag.zip') bdb.validate_bag_serialization( bag_path, bag_profile_path= 'https://raw.githubusercontent.com/fair-research/bdbag/master/profiles/bdbag-profile.json' ) except Exception as e: self.fail(bdbag.get_typed_exception(e))
def test_materialize_from_dir(self): logger.info(self.getTestHeader('test materialize from dir')) curdir = os.getcwd() os.chdir(self.tmpdir) try: bag_path = bdb.materialize(self.test_bag_fetch_http_dir) self.assertTrue(bdb.is_bag(bag_path)) except Exception as e: self.fail(bdbag.get_typed_exception(e)) finally: os.chdir(curdir)
def test_ensure_bag_path_exists(self): logger.info( self.getTestHeader('ensure bag path exists, save existing')) try: saved_bag_path = bdb.ensure_bag_path_exists(self.test_bag_dir) self.assertTrue(ospe(self.test_bag_dir), "Bag directory does not exist") self.assertTrue(ospe(saved_bag_path), "Saved bag path does not exist") except Exception as e: self.fail(bdbag.get_typed_exception(e))
def test_validate_remote_bag_from_rfm(self): logger.info( self.getTestHeader( 'create, resolve, and validate bag from remote file manifest')) try: self._test_bag_with_remote_file_manifest() bdb.resolve_fetch(self.test_data_dir) bdb.validate_bag(self.test_data_dir, fast=True) bdb.validate_bag(self.test_data_dir, fast=False) except Exception as e: self.fail(bdbag.get_typed_exception(e))
def test_validate_invalid_bag_state_manifest_fetch(self): logger.info( self.getTestHeader( 'test bag state validation invalid bag manifest with missing fetch.txt' )) try: self.assertRaises(bdbagit.BagValidationError, bdb.validate_bag_structure, self.test_bag_invalid_state_manifest_fetch_dir, skip_remote=False) except Exception as e: self.fail(bdbag.get_typed_exception(e))
def test_update_bag_add_file(self): logger.info(self.getTestHeader('update bag add file')) try: with open(ospj(self.test_bag_dir, 'data', 'NEWFILE.txt'), 'w') as nf: nf.write('Additional file added via unit test.') bag = bdb.make_bag(self.test_bag_dir, update=True) output = self.stream.getvalue() self.assertIsInstance(bag, bdbagit.BDBag) self.assertExpectedMessages(['NEWFILE.txt'], output) except Exception as e: self.fail(bdbag.get_typed_exception(e))
def test_materialize_from_url(self): logger.info(self.getTestHeader('test materialize from URL')) curdir = os.getcwd() os.chdir(self.tmpdir) try: bdb.materialize( "https://github.com/fair-research/bdbag/raw/master/test/test-data/test-archives/" "test-bag.zip") except Exception as e: self.fail(bdbag.get_typed_exception(e)) finally: os.chdir(curdir)
def authenticate(url, auth_config): for auth in list((entry for entry in auth_config if hasattr(entry, 'uri') and (entry.uri.lower() in url.lower()))): try: if not validate_auth_config(auth): continue if auth.auth_type == 'token': return auth.auth_params.transfer_token, auth.auth_params.local_endpoint except Exception as e: logger.warn("Unhandled exception getting Globus token: %s" % get_typed_exception(e)) return None, None
def test_cookie_load_and_merge_failure(self): logger.info(self.getTestHeader('test cookie load and merge')) try: cookie_jar_paths = [ ospj(self.test_config_dir, "test-cookies-bad.txt"), ospj(self.test_config_dir, "test-cookies-2.txt") ] cookies.load_and_merge_cookie_jars(cookie_jar_paths) output = self.stream.getvalue() self.assertExpectedMessages(["Unable to load cookie file"], output) except Exception as e: self.fail(bdbag.get_typed_exception(e))
def test_validate_invalid_bag_state_fetch_filesize(self): logger.info( self.getTestHeader( 'test bag state validation invalid local file size of fetch.txt file ref' )) try: self.assertRaises(bdbagit.BagValidationError, bdb.validate_bag_structure, self.test_bag_invalid_state_fetch_filesize_dir, skip_remote=False) except Exception as e: self.fail(bdbag.get_typed_exception(e))
def _test_resolve_fetch_http_with_filter(self, expr, files=list(frozenset())): logger.info( self.getTestHeader( 'test resolve fetch http with filter expression "%s"' % expr)) try: bdb.resolve_fetch(self.test_bag_fetch_http_dir, filter_expr=expr) for test_file in files: self.assertTrue( ospif(ospj(self.test_bag_fetch_http_dir, test_file))) except Exception as e: self.fail(bdbag.get_typed_exception(e))
def test_resolve_fetch_http_encoded_filename(self): logger.info( self.getTestHeader( 'test resolve fetch http with encoded filename')) try: bdb.resolve_fetch(self.test_bag_fetch_http_encoded_filename_dir, cookie_scan=False) bdb.validate_bag(self.test_bag_fetch_http_encoded_filename_dir, fast=True) bdb.validate_bag(self.test_bag_fetch_http_encoded_filename_dir, fast=False) except Exception as e: self.fail(bdbag.get_typed_exception(e))
def excepthook(etype, value, tb): sys.stderr.write(get_typed_exception(value)) if isinstance(value, BagError) or isinstance(value, RuntimeError): return traceback.print_tb(tb) msg = QMessageBox() msg.setText(str(value)) msg.setStandardButtons(QMessageBox.Close) msg.setWindowTitle("Unhandled Exception: %s" % etype.__name__) msg.setIcon(QMessageBox.Critical) msg.setDetailedText('\n'.join(traceback.format_exception(etype, value, tb))) msg.exec_()
def test_update_bag_prune(self): logger.info(self.getTestHeader('update bag prune manifests')) try: bag = bdb.make_bag(self.test_bag_dir, algs=['md5'], update=True, prune_manifests=True) self.assertIsInstance(bag, bdbagit.BDBag) self.assertFalse(ospif(ospj(self.test_bag_dir, 'manifest-sha1.txt'))) self.assertFalse(ospif(ospj(self.test_bag_dir, 'manifest-sha256.txt'))) self.assertFalse(ospif(ospj(self.test_bag_dir, 'manifest-sha512.txt'))) self.assertFalse(ospif(ospj(self.test_bag_dir, 'tagmanifest-sha1.txt'))) self.assertFalse(ospif(ospj(self.test_bag_dir, 'tagmanifest-sha256.txt'))) self.assertFalse(ospif(ospj(self.test_bag_dir, 'tagmanifest-sha512.txt'))) except Exception as e: self.fail(get_typed_exception(e))
def test_update_bag_change_metadata_nested_dict(self): logger.info(self.getTestHeader('update bag change metadata with nested dict')) try: bag = bdb.make_bag(self.test_bag_dir, update=True, save_manifests=False, metadata_file=ospj(self.test_config_dir, 'test-ro-metadata.json')) output = self.stream.getvalue() self.assertIsInstance(bag, bdbagit.BDBag) self.assertExpectedMessages(['Reading bag metadata from file', 'test-ro-metadata.json'], output) self.assertExpectedMessages(["Nested dictionary content not supported in tag file: [bag-info.txt]"], output) except Exception as e: self.fail(get_typed_exception(e))
def get_file(url, output_path, auth_config, headers=None, session=None): try: if not session: session = get_session(url, auth_config) output_dir = os.path.dirname(os.path.abspath(output_path)) if not os.path.exists(output_dir): os.makedirs(output_dir) if not headers: headers = HEADERS else: headers.update(HEADERS) logger.info("Attempting GET from URL: %s" % url) r = session.get(url, headers=headers, stream=True, verify=certifi.where()) if r.status_code == 401: session = get_session(url, auth_config) r = session.get(url, headers=headers, stream=True, verify=certifi.where()) if r.status_code != 200: logger.error('HTTP GET Failed for URL: %s' % url) logger.error("Host %s responded:\n\n%s" % (urlsplit(url).netloc, r.text)) logger.warning('File transfer failed: [%s]' % output_path) else: total = 0 start = datetime.datetime.now() logger.debug("Transferring file %s to %s" % (url, output_path)) with open(output_path, 'wb') as data_file: for chunk in r.iter_content(chunk_size=CHUNK_SIZE): data_file.write(chunk) total += len(chunk) elapsed = datetime.datetime.now() - start totalSecs = elapsed.total_seconds() totalMBs = float(total) / float((1024 * 1024)) throughput = str( "%.3f MB/second" % (totalMBs / totalSecs if totalSecs > 0 else 0.001)) logger.info( 'File [%s] transfer successful. %.3f MB transferred at %s. Elapsed time: %s. ' % (output_path, totalMBs, throughput, elapsed)) return True except requests.exceptions.RequestException as e: logger.error('HTTP Request Exception: %s' % (get_typed_exception(e))) return False
def test_resolve_fetch_ark2(self): logger.info(self.getTestHeader('test resolve fetch ark2')) try: mock_response = { "admins": [ "urn:globus:auth:identity:7b315147-d8f6-4a80-853d-78b65826d734", "urn:globus:groups:id:23acce4c-733f-11e8-a40d-0e847f194132", "urn:globus:auth:identity:b2541312-d274-11e5-9131-bbb9500ff459", "urn:globus:auth:identity:88204dba-e812-432a-abcd-ec631583a98c", "urn:globus:auth:identity:58b31676-ef95-11e5-8ff7-5783aaa8fce7" ], "checksums": [{ "function": "sha256", "value": "59e6e0b91b51d49a5fb0e1068980d2e7d2b2001a6d11c59c64156d32e197a626" }], "identifier": "ark:/57799/b91FmdtR3Pf4Ct7", "landing_page": "https://identifiers.globus.org/ark:/57799/b91FmdtR3Pf4Ct7/landingpage", "location": [ "https://raw.githubusercontent.com/fair-research/bdbag/master/test/test-data/test-http/test-fetch-identifier.txt", "http://raw.githubusercontent.com/fair-research/bdbag/master/test/test-data/test-http/test-fetch-identifier.txt" ], "metadata": { "title": "BDBag identifier unit test file" }, "visible_to": ["public"] } patched_resolve_ark_get = None def mocked_request_resolver_ark_get_success(*args, **kwargs): args[0].auth = None patched_resolve_ark_get.stop() return BaseTest.MockResponse(mock_response, 200) patched_resolve_ark_get = mock.patch.multiple( "bdbag.fetch.resolvers.base_resolver.requests.Session", get=mocked_request_resolver_ark_get_success, auth=None, create=True) patched_resolve_ark_get.start() bdb.resolve_fetch(self.test_bag_fetch_ark2_dir, cookie_scan=False) bdb.validate_bag(self.test_bag_fetch_ark2_dir, fast=True) bdb.validate_bag(self.test_bag_fetch_ark2_dir, fast=False) except Exception as e: self.fail(bdbag.get_typed_exception(e))
def test_resolve_fetch_http_basic_auth_get_bad_key(self): logger.info( self.getTestHeader( 'test resolve fetch http basic auth GET with bad key')) try: bdb.resolve_fetch(self.test_bag_fetch_http_dir, keychain_file=ospj(self.test_config_dir, 'test-keychain-bad-1.json'), cookie_scan=False) output = self.stream.getvalue() self.assertExpectedMessages( ["Missing required parameters [username, password]"], output) except Exception as e: self.fail(bdbag.get_typed_exception(e))
def test_validate_invalid_profile_serialization(self): logger.info( self.getTestHeader('validate invalid profile serialization')) try: bag_path = ospj(self.test_bag_dir) self.assertRaises( bdbagit_profile.ProfileValidationError, bdb.validate_bag_serialization, bag_path, bag_profile_path= 'https://raw.githubusercontent.com/fair-research/bdbag/master/profiles/' 'bdbag-profile.json') except Exception as e: self.fail(bdbag.get_typed_exception(e))
def test_read_with_create_default_keychain_location(self): logger.info(self.getTestHeader('test read keychain with create default location')) try: default_keychain_path = ospj(self.test_config_dir, ".bdbag") default_keychain_file = ospj(default_keychain_path, 'keychain.json') patched_default_config = mock.patch.multiple( "bdbag.fetch.auth.keychain", DEFAULT_KEYCHAIN_FILE=default_keychain_file) patched_default_config.start() keychain.read_keychain(keychain_file=default_keychain_file) patched_default_config.stop() except Exception as e: self.fail(get_typed_exception(e))
def test_extract_bag_archive_zip_with_relocate_existing(self): logger.info(self.getTestHeader('extract bag zip format, relocate existing')) try: bag_path = bdb.extract_bag(ospj(self.test_archive_dir, 'test-bag.zip'), temp=False) self.assertTrue(ospe(bag_path)) self.assertTrue(bdb.is_bag(bag_path)) bag_path = bdb.extract_bag(ospj(self.test_archive_dir, 'test-bag.zip'), temp=False) self.assertTrue(ospe(bag_path)) self.assertTrue(bdb.is_bag(bag_path)) bdb.cleanup_bag(os.path.dirname(bag_path)) output = self.stream.getvalue() self.assertExpectedMessages(["moving existing directory"], output) except Exception as e: self.fail(get_typed_exception(e))