def test_resolve_fetch_http_basic_auth_get(self): logger.info( self.getTestHeader('test resolve fetch http basic auth GET')) try: patched_requests_get = None def mocked_request_auth_get_success(*args, **kwargs): args[0].auth = None patched_requests_get.stop() return BaseTest.MockResponse({}, 200) patched_requests_get = mock.patch.multiple( "bdbag.fetch.transports.fetch_http.requests.Session", get=mocked_request_auth_get_success, auth=None, create=True) patched_requests_get.start() bdb.resolve_fetch(self.test_bag_fetch_http_dir, keychain_file=ospj(self.test_config_dir, 'test-keychain-1.json'), cookie_scan=False) bdb.validate_bag(self.test_bag_fetch_http_dir, fast=True) bdb.validate_bag(self.test_bag_fetch_http_dir, fast=False) except Exception as e: self.fail(bdbag.get_typed_exception(e))
def test_resolve_fetch_http_auth_token_get_with_disallowed_redirects(self): logger.info( self.getTestHeader( 'test resolve fetch http token auth with allowed redirect')) try: patched_requests_get_auth = None def mocked_request_auth_token_get_success(*args, **kwargs): headers = args[0].headers or {} headers.update({"Location": args[1]}) args[0].auth = None args[0].headers = {} patched_requests_get_auth.stop() return BaseTest.MockResponse({}, 302, headers=headers) patched_requests_get_auth = mock.patch.multiple( "bdbag.fetch.transports.fetch_http.requests.Session", get=mocked_request_auth_token_get_success, auth=None, create=True) patched_requests_get_auth.start() bdb.resolve_fetch(self.test_bag_fetch_http_dir, keychain_file=ospj(self.test_config_dir, 'test-keychain-7.json'), cookie_scan=False) bdb.validate_bag(self.test_bag_fetch_http_dir, fast=True) bdb.validate_bag(self.test_bag_fetch_http_dir, fast=False) output = self.stream.getvalue() self.assertExpectedMessages([ "Authorization bearer token propagation on redirect is disabled" ], output) except Exception as e: self.fail(bdbag.get_typed_exception(e))
def test_resolve_fetch_http_auth_token_get(self): logger.info(self.getTestHeader('test resolve fetch http token auth')) try: patched_requests_get_auth = None def mocked_request_auth_token_get_success(*args, **kwargs): args[0].auth = None args[0].headers = {} patched_requests_get_auth.stop() return args[0].get(args[1], **kwargs) patched_requests_get_auth = mock.patch.multiple( "bdbag.fetch.transports.fetch_http.requests.Session", get=mocked_request_auth_token_get_success, auth=None, create=True) patched_requests_get_auth.start() bdb.resolve_fetch(self.test_bag_fetch_http_dir, keychain_file=ospj(self.test_config_dir, 'test-keychain-6.json'), cookie_scan=False) bdb.validate_bag(self.test_bag_fetch_http_dir, fast=True) bdb.validate_bag(self.test_bag_fetch_http_dir, fast=False) except Exception as e: self.fail(bdbag.get_typed_exception(e))
def test_resolve_fetch_ark(self): logger.info(self.getTestHeader('test resolve fetch ark')) try: bdb.resolve_fetch(self.test_bag_fetch_ark_dir) bdb.validate_bag(self.test_bag_fetch_ark_dir, fast=False) output = self.stream.getvalue() except Exception as e: self.fail(bdbag.get_named_exception(e))
def test_resolve_fetch_minid(self): logger.info(self.getTestHeader('test resolve fetch minid')) try: bdb.resolve_fetch(self.test_bag_fetch_minid_dir, cookie_scan=False) bdb.validate_bag(self.test_bag_fetch_minid_dir, fast=True) bdb.validate_bag(self.test_bag_fetch_minid_dir, fast=False) except Exception as e: self.fail(bdbag.get_typed_exception(e))
def test_validate_remote_bag_from_rfm(self): logger.info( self.getTestHeader( 'create, resolve, and validate bag from remote file manifest')) self._test_bag_with_remote_file_manifest() bdb.resolve_fetch(self.test_data_dir) bdb.validate_bag(self.test_data_dir, fast=True) bdb.validate_bag(self.test_data_dir, fast=False)
def test_create_bag_mixed_checksums_allowed(self): logger.info(self.getTestHeader('allow create bag with non-uniform checksum(s) per file')) try: bdb.make_bag(self.test_data_dir, remote_file_manifest=ospj(self.test_config_dir, 'test-fetch-manifest-mixed-checksums.json')) bdb.validate_bag(self.test_bag_dir, fast=True) except Exception as e: self.fail(get_typed_exception(e))
def test_resolve_fetch_dataguid(self): logger.info(self.getTestHeader('test resolve fetch dataguid')) try: mock_response = { "data_object": { "checksums": [{ "checksum": "59e6e0b91b51d49a5fb0e1068980d2e7d2b2001a6d11c59c64156d32e197a626", "type": "sha256" }], "created": "2018-09-20T17:00:21.428857", "description": "BDBag identifier unit test file", "id": "dg.4503/a5d79375-1ba8-418f-9dda-eb981375e599", # fake DataGUID "mime_type": "text/plain", "name": "test-fetch-identifier.txt", "size": 223, "updated": "2018-09-20T17:00:21.428866", "urls": [{ "url": "https://raw.githubusercontent.com/fair-research/bdbag/master/test/test-data/test-http/test-fetch-identifier.txt" }, { "url": "http://raw.githubusercontent.com/fair-research/bdbag/master/test/test-data/test-http/test-fetch-identifier.txt" }], "version": "0d318219" } } patched_resolve_dataguid_get = None def mocked_request_resolver_dataguid_get_success(*args, **kwargs): args[0].auth = None patched_resolve_dataguid_get.stop() return BaseTest.MockResponse(mock_response, 200) patched_resolve_dataguid_get = mock.patch.multiple( "bdbag.fetch.resolvers.base_resolver.requests.Session", get=mocked_request_resolver_dataguid_get_success, auth=None, create=True) patched_resolve_dataguid_get.start() bdb.resolve_fetch(self.test_bag_fetch_dataguid_dir, cookie_scan=False) bdb.validate_bag(self.test_bag_fetch_dataguid_dir, fast=True) bdb.validate_bag(self.test_bag_fetch_dataguid_dir, fast=False) except Exception as e: self.fail(bdbag.get_typed_exception(e))
def test_resolve_fetch_incomplete(self): logger.info(self.getTestHeader('test resolve fetch incomplete')) try: bdb.resolve_fetch(self.test_bag_incomplete_fetch_dir, force=False, cookie_scan=False, quiet=False) bdb.validate_bag(self.test_bag_incomplete_fetch_dir, fast=True) bdb.validate_bag(self.test_bag_incomplete_fetch_dir, fast=False) except Exception as e: self.fail(bdbag.get_typed_exception(e))
def test_validate_remote_bag_from_rfm(self): logger.info( self.getTestHeader( 'create, resolve, and validate bag from remote file manifest')) try: self._test_bag_with_remote_file_manifest() bdb.resolve_fetch(self.test_data_dir) bdb.validate_bag(self.test_data_dir, fast=True) bdb.validate_bag(self.test_data_dir, fast=False) except Exception as e: self.fail(bdbag.get_typed_exception(e))
def test_resolve_fetch_http_cookie_auth(self): logger.info(self.getTestHeader('test resolve fetch http cookie auth')) try: bdb.resolve_fetch(self.test_bag_fetch_http_dir, keychain_file=ospj(self.test_config_dir, 'test-keychain-4.json')) bdb.validate_bag(self.test_bag_fetch_http_dir, fast=True) bdb.validate_bag(self.test_bag_fetch_http_dir, fast=False) output = self.stream.getvalue() except Exception as e: self.fail(bdbag.get_typed_exception(e))
def bdbag_validate(cls, content_path): """Perform BDBag validation of unpacked bag contents.""" try: logger.debug('Validating unpacked bag at "%s"' % (content_path, )) bdbag_api.validate_bag(content_path) logger.info('Bag valid at %s' % content_path) except (BagError, BagValidationError) as e: logger.error('Validation failed for bag "%s" with error "%s"' % ( content_path, e, )) raise exception.InvalidDatapackage(e)
def test_resolve_fetch_http_encoded_filename(self): logger.info( self.getTestHeader( 'test resolve fetch http with encoded filename')) try: bdb.resolve_fetch(self.test_bag_fetch_http_encoded_filename_dir, cookie_scan=False) bdb.validate_bag(self.test_bag_fetch_http_encoded_filename_dir, fast=True) bdb.validate_bag(self.test_bag_fetch_http_encoded_filename_dir, fast=False) except Exception as e: self.fail(bdbag.get_typed_exception(e))
def main(argv): parser = argparse.ArgumentParser(description='Program to create a BDBag containing a set of Minids for remote content') parser.add_argument('-m', '--minids', metavar='<minid file>', help='File listing Minids for new bag', required=True) parser.add_argument('-b', '--bagname', metavar='<bag name>', help='Name of directory for new bag.', required=True) parser.add_argument('-v', '--verify', action='store_true', help='Validate bag after building it.', required=False) parser.add_argument('-q', '--quiet', action="store_true", help="Suppress logging output.") parser.add_argument('-d', '--debug', action="store_true", help="Enable debug logging output.") parser.add_argument('-n', '--author-name', metavar="<person or entity name>", help="Optional name of the person or entity responsible for the creation of this bag, " "for inclusion in the bag metadata.") parser.add_argument('-o', '--author-orcid', metavar="<orcid>", help="Optional ORCID identifier of the bag creator, for inclusion in the bag metadata.") args = parser.parse_args() bdb.configure_logging(level=logging.ERROR if args.quiet else (logging.DEBUG if args.debug else logging.INFO)) # Create the directory that will hold the new BDBag bdb.ensure_bag_path_exists(args.bagname) # For each supplied minid, fetch sub-bag to determine its properties minid_fields = extract_fields(args.minids) # Create 'README' file in the newly created bag directory. (moved to 'data' when bag is created) write_readme(args.bagname, minid_fields) # Create remote_file_manifest_file, to be used by make_bag working_dir = temp_path = tempfile.mkdtemp(prefix='encode2bag_') remote_file_manifest_file = osp.abspath(osp.join(working_dir, 'remote-file-manifest.json')) generate_remote_manifest_file(minid_fields, remote_file_manifest_file) # Create the new bag based on the supplied remote manifest file bag = bdb.make_bag(args.bagname, algs=['md5', 'sha256'], remote_file_manifest=remote_file_manifest_file) # Create metadata/manifest.json file with Research Object JSON object ro_manifest = ro.init_ro_manifest(author_name=args.author_name, author_orcid=args.author_orcid, creator_name = 'bagofbags using BDBag version: %s (Bagit version: %s)' % (VERSION, BAGIT_VERSION), creator_uri='https://github.com/fair-research/bdbag/examples/bagofbags/') add_remote_file_manifest_to_ro(ro_manifest, minid_fields) ro.write_bag_ro_metadata(ro_manifest, args.bagname, 'manifest.json') # Run make_bag again to include manifest.json in the checksums etc. bdb.make_bag(args.bagname, update=True) if args.verify: bdb.resolve_fetch(args.bagname, force=True) bdb.validate_bag(args.bagname, fast=False, callback=None)
def test_resolve_fetch_ark2(self): logger.info(self.getTestHeader('test resolve fetch ark2')) try: mock_response = { "admins": [ "urn:globus:auth:identity:7b315147-d8f6-4a80-853d-78b65826d734", "urn:globus:groups:id:23acce4c-733f-11e8-a40d-0e847f194132", "urn:globus:auth:identity:b2541312-d274-11e5-9131-bbb9500ff459", "urn:globus:auth:identity:88204dba-e812-432a-abcd-ec631583a98c", "urn:globus:auth:identity:58b31676-ef95-11e5-8ff7-5783aaa8fce7" ], "checksums": [{ "function": "sha256", "value": "59e6e0b91b51d49a5fb0e1068980d2e7d2b2001a6d11c59c64156d32e197a626" }], "identifier": "ark:/57799/b91FmdtR3Pf4Ct7", "landing_page": "https://identifiers.globus.org/ark:/57799/b91FmdtR3Pf4Ct7/landingpage", "location": [ "https://raw.githubusercontent.com/fair-research/bdbag/master/test/test-data/test-http/test-fetch-identifier.txt", "http://raw.githubusercontent.com/fair-research/bdbag/master/test/test-data/test-http/test-fetch-identifier.txt" ], "metadata": { "title": "BDBag identifier unit test file" }, "visible_to": ["public"] } patched_resolve_ark_get = None def mocked_request_resolver_ark_get_success(*args, **kwargs): args[0].auth = None patched_resolve_ark_get.stop() return BaseTest.MockResponse(mock_response, 200) patched_resolve_ark_get = mock.patch.multiple( "bdbag.fetch.resolvers.base_resolver.requests.Session", get=mocked_request_resolver_ark_get_success, auth=None, create=True) patched_resolve_ark_get.start() bdb.resolve_fetch(self.test_bag_fetch_ark2_dir, cookie_scan=False) bdb.validate_bag(self.test_bag_fetch_ark2_dir, fast=True) bdb.validate_bag(self.test_bag_fetch_ark2_dir, fast=False) except Exception as e: self.fail(bdbag.get_typed_exception(e))
def _test_resolve_fetch_http_auth_post(self, keychain_file): try: global PATCHED_REQUESTS_POST PATCHED_REQUESTS_POST = mock.patch.multiple( "bdbag.fetch.transports.fetch_http.requests.Session", post=mocked_request_auth_post_success, auth=None, create=True) PATCHED_REQUESTS_POST.start() bdb.resolve_fetch(self.test_bag_fetch_http_dir, keychain_file=ospj(self.test_config_dir, keychain_file)) bdb.validate_bag(self.test_bag_fetch_http_dir, fast=True) bdb.validate_bag(self.test_bag_fetch_http_dir, fast=False) output = self.stream.getvalue() except Exception as e: self.fail(bdbag.get_typed_exception(e))
def create_file(self) -> Tuple[str, Optional[str]]: with TemporaryDirectory() as temp_path: bag_path = os.path.join(temp_path, 'manifest') os.makedirs(bag_path) bdbag_api.make_bag(bag_path) with open(os.path.join(bag_path, 'data', 'participants.tsv'), 'w') as samples_tsv: self._samples_tsv(samples_tsv) bag = bdbag_api.make_bag(bag_path, update=True) # update TSV checksums assert bdbag_api.is_bag(bag_path) bdbag_api.validate_bag(bag_path) assert bdbag_api.check_payload_consistency(bag) temp, temp_path = mkstemp() os.close(temp) archive_path = bdbag_api.archive_bag(bag_path, 'zip') # Moves the bdbag archive out of the temporary directory. This prevents # the archive from being deleted when the temporary directory self-destructs. os.rename(archive_path, temp_path) return temp_path, None
def test_resolve_fetch_ftp_no_auth(self): logger.info(self.getTestHeader('test resolve fetch ftp')) try: patched_urlretrieve = None def mocked_urlretrieve_success(*args, **kwargs): patched_urlretrieve.stop() return patched_urlretrieve = mock.patch.multiple( "bdbag.fetch.transports.fetch_ftp", urlretrieve=mocked_urlretrieve_success) patched_urlretrieve.start() bdb.resolve_fetch(self.test_bag_fetch_ftp_dir, force=True) bdb.validate_bag(self.test_bag_fetch_ftp_dir, fast=True) bdb.validate_bag(self.test_bag_fetch_ftp_dir, fast=False) except Exception as e: self.fail(bdbag.get_typed_exception(e))
def test_resolve_fetch_http_basic_auth_get(self): logger.info( self.getTestHeader('test resolve fetch http basic auth GET')) try: global PATCHED_REQUESTS_GET PATCHED_REQUESTS_GET = mock.patch.multiple( "bdbag.fetch.transports.fetch_http.requests.Session", get=mocked_request_auth_get_success, auth=None, create=True) PATCHED_REQUESTS_GET.start() bdb.resolve_fetch(self.test_bag_fetch_http_dir, keychain_file=ospj(self.test_config_dir, 'test-keychain-1.json')) bdb.validate_bag(self.test_bag_fetch_http_dir, fast=True) bdb.validate_bag(self.test_bag_fetch_http_dir, fast=False) output = self.stream.getvalue() except Exception as e: self.fail(bdbag.get_typed_exception(e))
def _test_resolve_fetch_http_auth_post(self, keychain_file): try: def mocked_request_auth_post_success(*args, **kwargs): args[0].auth = None patched_requests_post.stop() return BaseTest.MockResponse({}, 201) patched_requests_post = mock.patch.multiple( "bdbag.fetch.transports.fetch_http.requests.Session", post=mocked_request_auth_post_success, auth=None, create=True) patched_requests_post.start() bdb.resolve_fetch(self.test_bag_fetch_http_dir, keychain_file=ospj(self.test_config_dir, keychain_file), cookie_scan=False) bdb.validate_bag(self.test_bag_fetch_http_dir, fast=True) bdb.validate_bag(self.test_bag_fetch_http_dir, fast=False) except Exception as e: self.fail(bdbag.get_typed_exception(e))
def test_resolve_fetch_doi(self): logger.info(self.getTestHeader('test resolve fetch doi')) try: mock_response = { "@context": "http://schema.org", "@type": "Dataset", "@id": "https://doi.org/10.23725/9999-9999", # fake DOI "identifier": [ { "@type": "PropertyValue", "propertyID": "doi", "value": "https://doi.org/10.23725/9999-9999" # fake DOI }, { "@type": "PropertyValue", "propertyID": "minid", "value": "ark:/57799/b91FmdtR3Pf4Ct7" }, { "@type": "PropertyValue", "propertyID": "sha256", "value": "59e6e0b91b51d49a5fb0e1068980d2e7d2b2001a6d11c59c64156d32e197a626" } ], "url": "https://ors.datacite.org/doi:/10.23725/9999-9999", # fake DOI "additionalType": "BDBAG Test file", "name": "test-fetch-identifier.txt", "author": { "name": "BDBag" }, "description": "BDBag identifier unit test file", "keywords": "bdbag, unit test", "datePublished": "2018-09-20", "contentUrl": [ "https://raw.githubusercontent.com/fair-research/bdbag/master/test/test-data/test-http/test-fetch-identifier.txt", "http://raw.githubusercontent.com/fair-research/bdbag/master/test/test-data/test-http/test-fetch-identifier.txt" ], "schemaVersion": "http://datacite.org/schema/kernel-4", "publisher": { "@type": "Organization", "name": "fair-research.org" }, "fileFormat": ["text/plain "] } patched_resolve_doi_get = None def mocked_request_resolver_doi_get_success(*args, **kwargs): args[0].auth = None patched_resolve_doi_get.stop() return BaseTest.MockResponse(mock_response, 200) patched_resolve_doi_get = mock.patch.multiple( "bdbag.fetch.resolvers.base_resolver.requests.Session", get=mocked_request_resolver_doi_get_success, auth=None, create=True) patched_resolve_doi_get.start() bdb.resolve_fetch(self.test_bag_fetch_doi_dir, cookie_scan=False) bdb.validate_bag(self.test_bag_fetch_doi_dir, fast=True) bdb.validate_bag(self.test_bag_fetch_doi_dir, fast=False) except Exception as e: self.fail(bdbag.get_typed_exception(e))
def test_validate_complete_bag_fast(self): logger.info(self.getTestHeader('test fast validation complete bag')) try: bdb.validate_bag(self.test_bag_dir, fast=True) except Exception as e: self.fail(bdbag.get_named_exception(e))
def main(): args, is_bag, is_file = parse_cli() path = os.path.abspath(args.path) archive = None temp_path = None error = None result = 0 if not args.quiet: sys.stderr.write('\n') try: if not is_file: # do not try to create or update the bag if the user just wants to validate or complete an existing bag if not ( (args.validate or args.validate_profile or args.resolve_fetch) and not (args.update and bdb.is_bag(path))): if args.checksum and 'all' in args.checksum: args.checksum = ['md5', 'sha1', 'sha256', 'sha512'] # create or update the bag depending on the input arguments bdb.make_bag(path, algs=args.checksum, update=args.update, save_manifests=not args.skip_manifests, prune_manifests=args.prune_manifests, metadata=BAG_METADATA if BAG_METADATA else None, metadata_file=args.metadata_file, remote_file_manifest=args.remote_file_manifest, config_file=args.config_file, ro_metadata_file=args.ro_metadata_file) # otherwise just extract the bag if it is an archive and no other conflicting options specified elif not (args.validate or args.validate_profile or args.resolve_fetch): bdb.extract_bag(path) if not args.quiet: sys.stderr.write('\n') return result if args.ro_manifest_generate: bdb.generate_ro_manifest( path, True if args.ro_manifest_generate == "overwrite" else False, config_file=args.config_file) if args.resolve_fetch: if args.validate == 'full': sys.stderr.write(ASYNC_TRANSFER_VALIDATION_WARNING) bdb.resolve_fetch( path, force=True if args.resolve_fetch == 'all' else False, keychain_file=args.keychain_file, config_file=args.config_file, filter_expr=args.fetch_filter) if args.validate: if is_file: temp_path = bdb.extract_bag(path, temp=True) if args.validate == 'structure': bdb.validate_bag_structure(temp_path if temp_path else path) else: bdb.validate_bag( temp_path if temp_path else path, fast=True if args.validate == 'fast' else False, config_file=args.config_file) if args.archiver: archive = bdb.archive_bag(path, args.archiver) if archive is None and is_file: archive = path if args.validate_profile: if is_file: if not temp_path: temp_path = bdb.extract_bag(path, temp=True) profile = bdb.validate_bag_profile( temp_path if temp_path else path) bdb.validate_bag_serialization(archive if archive else path, profile) if args.revert: bdb.revert_bag(path) except Exception as e: result = 1 error = "Error: %s" % get_typed_exception(e) finally: if temp_path: bdb.cleanup_bag(os.path.dirname(temp_path)) if result != 0: sys.stderr.write("\n%s" % error) if not args.quiet: sys.stderr.write('\n') return result
def test_validate_complete_bag_full(self): logger.info(self.getTestHeader('test full validation complete bag')) try: bdb.validate_bag(self.test_bag_dir, fast=False) except Exception as e: self.fail(get_typed_exception(e))
def main(): sys.stderr.write('\n') args, is_bag, is_file = parse_cli() path = os.path.abspath(args.bag_path) archive = None temp_path = None error = None result = 0 try: if not is_file: # do not try to create or update the bag if the user just wants to validate or complete an existing bag if not ((args.validate or args.validate_profile or args.resolve_fetch) and not (args.update and bdb.is_bag(path))): if args.checksum and 'all' in args.checksum: args.checksum = ['md5', 'sha1', 'sha256', 'sha512'] # create or update the bag depending on the input arguments bdb.make_bag(path, args.checksum, args.update, args.skip_manifests, args.prune_manifests, BAG_METADATA if BAG_METADATA else None, args.metadata_file, args.remote_file_manifest, args.config_file) # otherwise just extract the bag if it is an archive and no other conflicting options specified elif not (args.validate or args.validate_profile or args.resolve_fetch): bdb.extract_bag(path) sys.stderr.write('\n') return result if args.resolve_fetch: if args.validate == 'full': sys.stderr.write(ASYNC_TRANSFER_VALIDATION_WARNING) bdb.resolve_fetch(path, True if args.resolve_fetch == 'all' else False) if args.validate: if is_file: temp_path = bdb.extract_bag(path, temp=True) bdb.validate_bag(temp_path if temp_path else path, True if args.validate == 'fast' else False, args.config_file) if args.archiver: archive = bdb.archive_bag(path, args.archiver) if archive is None and is_file: archive = path if args.validate_profile: if is_file: if not temp_path: temp_path = bdb.extract_bag(path, temp=True) profile = bdb.validate_bag_profile(temp_path if temp_path else path) bdb.validate_bag_serialization(archive if archive else path, profile) except Exception as e: result = 1 error = "Error: %s" % bdbag.get_named_exception(e) finally: if temp_path: bdb.cleanup_bag(os.path.dirname(temp_path)) if result != 0: sys.stderr.write("\n%s" % error) sys.stderr.write('\n') return result