def test(self): entries_data = { 'b/a': bytes(1), 'a/b': bytes(2), 'c': bytes(3), } pkg = Package() for lk, data in entries_data.items(): pkg.set(lk, data) workflow_validator = self.get_workflow_validator() assert workflow_validator.get_pkg_entries_for_validation(pkg) == [ { 'logical_key': 'a/b', 'size': 2, }, { 'logical_key': 'b/a', 'size': 1, }, { 'logical_key': 'c', 'size': 3, }, ]
def test_brackets(self): pkg = Package() pkg.set('asdf/jkl', LOCAL_MANIFEST) pkg.set('asdf/qwer', LOCAL_MANIFEST) pkg.set('qwer/asdf', LOCAL_MANIFEST) assert set(pkg.keys()) == {'asdf', 'qwer'} pkg2 = pkg['asdf'] assert set(pkg2.keys()) == {'jkl', 'qwer'} assert pkg['asdf']['qwer'].get() == LOCAL_MANIFEST.as_uri() assert pkg['asdf']['qwer'] == pkg['asdf/qwer'] == pkg[('asdf', 'qwer')] assert pkg[[]] == pkg pkg = (Package().set('foo', DATA_DIR / 'foo.txt', {'foo': 'blah'})) pkg['foo'].meta['target'] = 'unicode' pkg.build("Quilt/Test") assert pkg['foo'].deserialize() == '123\n' assert pkg['foo']() == '123\n' with pytest.raises(KeyError): pkg['baz'] with pytest.raises(TypeError): pkg[b'asdf'] with pytest.raises(TypeError): pkg[0]
def test_set_package_entry_as_object(self): pkg = Package() nasty_string = 'a,"\tb' num_col = [11, 22, 33] str_col = ['a', 'b', nasty_string] df = pd.DataFrame({'col_num': num_col, 'col_str': str_col}) # Test with serialization_dir set pkg.set("mydataframe1.parquet", df, meta={'user_meta': 'blah'}, serialization_location=SERIALIZATION_DIR / "df1.parquet") pkg.set("mydataframe2.csv", df, meta={'user_meta': 'blah2'}, serialization_location=SERIALIZATION_DIR / "df2.csv") pkg.set("mydataframe3.tsv", df, meta={'user_meta': 'blah3'}, serialization_location=SERIALIZATION_DIR / "df3.tsv") # Test without serialization_dir set pkg.set("mydataframe4.parquet", df, meta={'user_meta': 'blah4'}) pkg.set("mydataframe5.csv", df, meta={'user_meta': 'blah5'}) pkg.set("mydataframe6.tsv", df, meta={'user_meta': 'blah6'}) for lk, entry in pkg.walk(): file_path = parse_file_url(urlparse(entry.get())) assert pathlib.Path( file_path).exists(), "The serialization files should exist" self.file_sweeper_path_list.append( file_path) # Make sure files get deleted even if test fails pkg._fix_sha256() for lk, entry in pkg.walk(): assert df.equals(entry.deserialize()), "The deserialized PackageEntry should be equal to the object that " \ "was serialized" # Test that push cleans up the temporary files, if and only if the serialization_location was not set with patch('botocore.client.BaseClient._make_api_call', new=mock_make_api_call), \ patch('quilt3.Package._materialize') as materialize_mock, \ patch('quilt3.Package.build') as build_mock: materialize_mock.return_value = pkg pkg.push('Quilt/test_pkg_name', 's3://test-bucket') for lk in [ "mydataframe1.parquet", "mydataframe2.csv", "mydataframe3.tsv" ]: file_path = parse_file_url(urlparse(pkg.get(lk))) assert pathlib.Path(file_path).exists( ), "These files should not have been deleted during push()" for lk in [ "mydataframe4.parquet", "mydataframe5.csv", "mydataframe6.tsv" ]: file_path = parse_file_url(urlparse(pkg.get(lk))) assert not pathlib.Path(file_path).exists( ), "These temp files should have been deleted during push()"
def test_default_registry(self): new_pkg = Package() # Create a dummy file to add to the package. test_file_name = 'bar' with open(test_file_name, "w") as fd: fd.write('test_file_content_string') test_file = Path(fd.name) # Build a new package into the local registry. new_pkg = new_pkg.set('foo', test_file_name) top_hash = new_pkg.build("Quilt/Test").top_hash # Verify manifest is registered by hash. out_path = LOCAL_REGISTRY / ".quilt/packages" / top_hash with open(out_path) as fd: pkg = Package.load(fd) assert test_file.resolve().as_uri() == pkg['foo'].physical_keys[0] # Verify latest points to the new location. named_pointer_path = LOCAL_REGISTRY / ".quilt/named_packages/Quilt/Test/latest" with open(named_pointer_path) as fd: assert fd.read().replace('\n', '') == top_hash # Test unnamed packages. new_pkg = Package() new_pkg = new_pkg.set('bar', test_file_name) top_hash = new_pkg.build("Quilt/Test").top_hash out_path = LOCAL_REGISTRY / ".quilt/packages" / top_hash with open(out_path) as fd: pkg = Package.load(fd) assert test_file.resolve().as_uri() == pkg['bar'].physical_keys[0]
def test_map(self): pkg = Package() pkg.set('as/df', LOCAL_MANIFEST) pkg.set('as/qw', LOCAL_MANIFEST) assert set(pkg.map(lambda lk, entry: lk)) == {'as/df', 'as/qw'} pkg['as'].set_meta({'foo': 'bar'}) assert set(pkg.map(lambda lk, entry: lk, include_directories=True)) ==\ {'as/df', 'as/qw', 'as/'}
def test_manifest(self): pkg = Package() pkg.set('as/df', LOCAL_MANIFEST) pkg.set('as/qw', LOCAL_MANIFEST) top_hash = pkg.build('foo/bar').top_hash manifest = list(pkg.manifest) pkg2 = Package.browse('foo/bar', top_hash=top_hash) assert list(pkg.manifest) == list(pkg2.manifest)
def test_iter(self): pkg = Package() assert not pkg pkg.set('asdf', LOCAL_MANIFEST) assert list(pkg) == ['asdf'] pkg.set('jkl;', REMOTE_MANIFEST) assert set(pkg) == {'asdf', 'jkl;'}
def test_keys(self): pkg = Package() assert not pkg.keys() pkg.set('asdf', LOCAL_MANIFEST) assert set(pkg.keys()) == {'asdf'} pkg.set('jkl;', REMOTE_MANIFEST) assert set(pkg.keys()) == {'asdf', 'jkl;'} pkg.delete('asdf') assert set(pkg.keys()) == {'jkl;'}
def test_set_package_entry_as_object(self): pkg = Package() nasty_string = 'a,"\tb' num_col = [11, 22, 33] str_col = ['a', 'b', nasty_string] df = pd.DataFrame({'col_num': num_col, 'col_str': str_col}) # Test with serialization_dir set pkg.set("mydataframe1.parquet", df, meta={'user_meta': 'blah'}, serialization_location=SERIALIZATION_DIR / "df1.parquet") pkg.set("mydataframe2.csv", df, meta={'user_meta': 'blah2'}, serialization_location=SERIALIZATION_DIR / "df2.csv") pkg.set("mydataframe3.tsv", df, meta={'user_meta': 'blah3'}, serialization_location=SERIALIZATION_DIR / "df3.tsv") # Test without serialization_dir set pkg.set("mydataframe4.parquet", df, meta={'user_meta': 'blah4'}) pkg.set("mydataframe5.csv", df, meta={'user_meta': 'blah5'}) pkg.set("mydataframe6.tsv", df, meta={'user_meta': 'blah6'}) for lk, entry in pkg.walk(): file_path = parse_file_url(urlparse(entry.physical_keys[0])) assert (pathlib.Path(file_path) ).exists(), "The serialization files should exist" self.file_sweeper_path_list.append(file_path) pkg._fix_sha256() for lk, entry in pkg.walk(): assert df.equals(entry.deserialize()), "The deserialized PackageEntry should be equal to the object that " \ "was serialized" # Confirm that delete of temporary files is trivial Package.delete_local_file(pkg.get("mydataframe1.parquet")) Package.delete_local_file(pkg.get("mydataframe2.csv")) Package.delete_local_file(pkg.get("mydataframe3.tsv")) Package.delete_local_file(pkg.get("mydataframe4.parquet")) Package.delete_local_file(pkg.get("mydataframe5.csv")) Package.delete_local_file(pkg.get("mydataframe6.tsv")) for lk, entry in pkg.walk(): file_path = parse_file_url(urlparse(entry.physical_keys[0])) assert not (pathlib.Path(file_path)).exists( ), "The serialization files should have been deleted" self.file_sweeper_path_list.append(file_path)
def test_load_into_quilt(self): """ Verify loading local manifest and data into S3. """ top_hash = '5333a204bbc6e21607c2bc842f4a77d2e21aa6147cf2bf493dbf6282188d01ca' self.s3_stubber.add_response(method='put_object', service_response={'VersionId': 'v1'}, expected_params={ 'Body': ANY, 'Bucket': 'my_test_bucket', 'Key': 'Quilt/package/foo', 'Metadata': { 'helium': '{}' } }) self.s3_stubber.add_response(method='put_object', service_response={'VersionId': 'v2'}, expected_params={ 'Body': ANY, 'Bucket': 'my_test_bucket', 'Key': '.quilt/packages/' + top_hash, 'Metadata': { 'helium': 'null' } }) self.s3_stubber.add_response( method='put_object', service_response={'VersionId': 'v3'}, expected_params={ 'Body': top_hash.encode(), 'Bucket': 'my_test_bucket', 'Key': '.quilt/named_packages/Quilt/package/1234567890', 'Metadata': { 'helium': 'null' } }) self.s3_stubber.add_response( method='put_object', service_response={'VersionId': 'v4'}, expected_params={ 'Body': top_hash.encode(), 'Bucket': 'my_test_bucket', 'Key': '.quilt/named_packages/Quilt/package/latest', 'Metadata': { 'helium': 'null' } }) new_pkg = Package() # Create a dummy file to add to the package. contents = 'blah' test_file = Path('bar') test_file.write_text(contents) new_pkg = new_pkg.set('foo', test_file) with patch('time.time', return_value=1234567890): new_pkg.push('Quilt/package', 's3://my_test_bucket/')
def test_verify(self): pkg = Package() pkg.set('foo', b'Hello, World!') pkg.build('quilt/test') Package.install('quilt/test', LOCAL_REGISTRY, dest='test') assert pkg.verify('test') Path('test/blah').write_text('123') assert not pkg.verify('test') assert pkg.verify('test', extra_files_ok=True) Path('test/foo').write_text('123') assert not pkg.verify('test') assert not pkg.verify('test', extra_files_ok=True) Path('test/foo').write_text('Hello, World!') Path('test/blah').unlink() assert pkg.verify('test')
def test_tophash_changes(self): test_file = Path('test.txt') test_file.write_text('asdf', 'utf-8') pkg = Package() th1 = pkg.top_hash pkg.set('asdf', test_file) pkg.build('foo/bar') th2 = pkg.top_hash assert th1 != th2 test_file.write_text('jkl', 'utf-8') pkg.set('jkl', test_file) pkg.build('foo/bar') th3 = pkg.top_hash assert th1 != th3 assert th2 != th3 pkg.delete('jkl') th4 = pkg.top_hash assert th2 == th4
def test_dir_meta(self): test_meta = {'test': 'meta'} pkg = Package() pkg.set('asdf/jkl', LOCAL_MANIFEST) pkg.set('asdf/qwer', LOCAL_MANIFEST) pkg.set('qwer/asdf', LOCAL_MANIFEST) pkg.set('qwer/as/df', LOCAL_MANIFEST) pkg.build('Quilt/Test') assert pkg['asdf'].meta == {} assert pkg.meta == {} assert pkg['qwer']['as'].meta == {} pkg['asdf'].set_meta(test_meta) assert pkg['asdf'].meta == test_meta pkg['qwer']['as'].set_meta(test_meta) assert pkg['qwer']['as'].meta == test_meta pkg.set_meta(test_meta) assert pkg.meta == test_meta dump_path = 'test_meta' with open(dump_path, 'w') as f: pkg.dump(f) with open(dump_path) as f: pkg2 = Package.load(f) assert pkg2['asdf'].meta == test_meta assert pkg2['qwer']['as'].meta == test_meta assert pkg2.meta == test_meta
def test_rollback(self): p = Package() p.set('foo', DATA_DIR / 'foo.txt') p.build('quilt/tmp') good_hash = p.top_hash assert 'foo' in Package.browse('quilt/tmp') p.delete('foo') p.build('quilt/tmp') assert 'foo' not in Package.browse('quilt/tmp') Package.rollback('quilt/tmp', LOCAL_REGISTRY, good_hash) assert 'foo' in Package.browse('quilt/tmp') with self.assertRaises(QuiltException): Package.rollback('quilt/tmp', LOCAL_REGISTRY, '12345678' * 8) with self.assertRaises(QuiltException): Package.rollback('quilt/blah', LOCAL_REGISTRY, good_hash)
def test_diff(self): new_pkg = Package() # Create a dummy file to add to the package. test_file_name = 'bar' with open(test_file_name, "w") as fd: fd.write('test_file_content_string') test_file = Path(fd.name) # Build a new package into the local registry. new_pkg = new_pkg.set('foo', test_file_name) top_hash = new_pkg.build("Quilt/Test") p1 = Package.browse('Quilt/Test') p2 = Package.browse('Quilt/Test') assert p1.diff(p2) == ([], [], [])
def test_remote_repr(self): with patch('quilt3.packages.get_size_and_meta', return_value=(0, dict(), '0')): TEST_REPR = ("(remote Package)\n" " └─asdf\n") pkg = Package() pkg.set('asdf', 's3://my-bucket/asdf') assert repr(pkg) == TEST_REPR TEST_REPR = ("(remote Package)\n" " └─asdf\n" " └─qwer\n") pkg = Package() pkg.set('asdf', 's3://my-bucket/asdf') pkg.set('qwer', LOCAL_MANIFEST) assert repr(pkg) == TEST_REPR
def test_local_repr(self): TEST_REPR = ("(local Package)\n" " └─asdf\n" " └─path1/\n" " └─asdf\n" " └─qwer\n" " └─path2/\n" " └─first/\n" " └─asdf\n" " └─second/\n" " └─asdf\n" " └─qwer\n") pkg = Package() pkg.set('asdf', LOCAL_MANIFEST) pkg.set('qwer', LOCAL_MANIFEST) pkg.set('path1/asdf', LOCAL_MANIFEST) pkg.set('path1/qwer', LOCAL_MANIFEST) pkg.set('path2/first/asdf', LOCAL_MANIFEST) pkg.set('path2/second/asdf', LOCAL_MANIFEST) assert repr(pkg) == TEST_REPR
def test_filter(self): pkg = Package() pkg.set('a/df', LOCAL_MANIFEST) pkg.set('a/qw', LOCAL_MANIFEST) p_copy = pkg.filter(lambda lk, entry: lk == 'a/df') assert list(p_copy) == ['a'] and list(p_copy['a']) == ['df'] pkg = Package() pkg.set('a/df', LOCAL_MANIFEST) pkg.set('a/qw', LOCAL_MANIFEST) pkg.set('b/df', LOCAL_MANIFEST) pkg['a'].set_meta({'foo': 'bar'}) pkg['b'].set_meta({'foo': 'bar'}) p_copy = pkg.filter(lambda lk, entry: lk == 'a/', include_directories=True) assert list(p_copy) == [] p_copy = pkg.filter(lambda lk, entry: lk == 'a/' or lk == 'a/df', include_directories=True) assert list(p_copy) == ['a'] and list(p_copy['a']) == ['df']
def test_siblings_succeed(self): pkg = Package() pkg.set('as/df', LOCAL_MANIFEST) pkg.set('as/qw', LOCAL_MANIFEST)
def test_overwrite_entry_fails(self): with pytest.raises(QuiltException): pkg = Package() pkg.set('asdf', LOCAL_MANIFEST) pkg.set('asdf/jkl', LOCAL_MANIFEST)
def test_load_into_quilt(self): """ Verify loading local manifest and data into S3. """ top_hash1 = 'abbf5f171cf20bfb2313ecd8684546958cd72ac4f3ec635e4510d9c771168226' self.s3_stubber.add_response( method='put_object', service_response={ 'VersionId': 'v1' }, expected_params={ 'Body': ANY, 'Bucket': 'my_test_bucket', 'Key': 'Quilt/package/foo1', } ) self.s3_stubber.add_response( method='put_object', service_response={ 'VersionId': 'v1' }, expected_params={ 'Body': ANY, 'Bucket': 'my_test_bucket', 'Key': 'Quilt/package/foo2', } ) self.s3_stubber.add_response( method='put_object', service_response={ 'VersionId': 'v2' }, expected_params={ 'Body': ANY, 'Bucket': 'my_test_bucket', 'Key': '.quilt/packages/' + top_hash1, } ) self.s3_stubber.add_response( method='put_object', service_response={ 'VersionId': 'v3' }, expected_params={ 'Body': top_hash1.encode(), 'Bucket': 'my_test_bucket', 'Key': '.quilt/named_packages/Quilt/package/1234567890', } ) self.s3_stubber.add_response( method='put_object', service_response={ 'VersionId': 'v4' }, expected_params={ 'Body': top_hash1.encode(), 'Bucket': 'my_test_bucket', 'Key': '.quilt/named_packages/Quilt/package/latest', } ) new_pkg = Package() # Create two dummy files to add to the package. test_file1 = Path('bar1') test_file1.write_text('blah') new_pkg.set('foo1', test_file1) test_file2 = Path('bar2') test_file2.write_text('omg') new_pkg.set('foo2', test_file1) with patch('time.time', return_value=1234567890), \ patch('quilt3.data_transfer.s3_transfer_config.max_request_concurrency', 1): remote_pkg = new_pkg.push('Quilt/package', 's3://my_test_bucket/') # Modify one file, and check that only that file gets uploaded. top_hash2 = 'd4efbb1734a53726d97086824d153e6cb5e9d8bc31d15ead0dbc019022cfe539' self.s3_stubber.add_response( method='put_object', service_response={ 'VersionId': 'v2' }, expected_params={ 'Body': ANY, 'Bucket': 'my_test_bucket', 'Key': 'Quilt/package/foo2', } ) self.s3_stubber.add_response( method='put_object', service_response={ 'VersionId': 'v2' }, expected_params={ 'Body': ANY, 'Bucket': 'my_test_bucket', 'Key': '.quilt/packages/' + top_hash2, } ) self.s3_stubber.add_response( method='put_object', service_response={ 'VersionId': 'v3' }, expected_params={ 'Body': top_hash2.encode(), 'Bucket': 'my_test_bucket', 'Key': '.quilt/named_packages/Quilt/package/1234567891', } ) self.s3_stubber.add_response( method='put_object', service_response={ 'VersionId': 'v4' }, expected_params={ 'Body': top_hash2.encode(), 'Bucket': 'my_test_bucket', 'Key': '.quilt/named_packages/Quilt/package/latest', } ) test_file3 = Path('bar3') test_file3.write_text('!!!') remote_pkg.set('foo2', test_file3) with patch('time.time', return_value=1234567891), \ patch('quilt3.data_transfer.s3_transfer_config.max_request_concurrency', 1): remote_pkg.push('Quilt/package', 's3://my_test_bucket/')
def run_benchmarks(args: Args): # Results are stored as they are returned all_results = {} # Try running the benchmarks try: # Get benchmark resources dir resources_dir = Path( ).parent.parent / "aicsimageio" / "tests" / "resources" # Store machine config _ = { "platform": platform.system(), "platform_version": platform.version(), "architecture": platform.machine(), "cpu_total_count": psutil.cpu_count(), "cpu_current_utilization": psutil.cpu_percent(), "memory_total_gb": psutil.virtual_memory().total / 10e8, "memory_available_gb": psutil.virtual_memory().available / 10e8, } # Store python config pyversion = sys.version_info _ = { "python_version": f"{pyversion.major}.{pyversion.minor}.{pyversion.micro}", "aicsimageio": aicsimageio.__version__, "czifile": czifile.__version__, "imageio": imageio.__version__, "tifffile": tifffile.__version__, } # Run tests ####################################################################### log.info(f"Running tests: no cluster...") log.info(f"=" * 80) all_results["no-cluster"] = _run_benchmark_suite( resources_dir=resources_dir) ####################################################################### for cluster_config in CLUSTER_CONFIGS: total_cores = cluster_config["per_worker_cores"] * cluster_config[ "workers"] log.info(f"Running tests: {cluster_config['name']} " f"(Total cores: {total_cores}) ...") log.info(f"=" * 80) # Create or get log dir # Do not include ms log_dir_name = datetime.now().isoformat().split(".")[0] log_dir = Path(f".dask_logs/{log_dir_name}").expanduser() # Log dir settings log_dir.mkdir(parents=True, exist_ok=True) # Calc per_worker_memory per_worker_memory = cluster_config["per_worker_cores"] * 2 per_worker_memory = f"{per_worker_memory}GB" # Create cluster cluster = SLURMCluster( cores=cluster_config["per_worker_cores"], memory=per_worker_memory, queue="aics_cpu_general", walltime="10:00:00", local_directory=str(log_dir), log_directory=str(log_dir), ) # Scale cluster cluster.scale(cluster_config["workers"]) # Create client connection client = Client(cluster) # Wait for a minute for the cluster to fully spin up time.sleep(60) # Run benchmark all_results[cluster_config["name"]] = _run_benchmark_suite( resources_dir=resources_dir) client.shutdown() cluster.close() # Wait for a minute for the cluster to fully shutdown time.sleep(60) ####################################################################### log.info(f"Completed all tests") log.info(f"=" * 80) # Ensure save dir exists and save results args.save_path.parent.mkdir(parents=True, exist_ok=True) with open(args.save_path, "w") as write_out: json.dump(all_results, write_out) # Construct and push package if args.upload: p = Package() p.set("results.json", args.save_path) p.push( "aicsimageio/benchmarks", "s3://aics-modeling-packages-test-resources", message=f"aicsimageio version: {aicsimageio.__version__}", ) # Catch any exception except Exception as e: log.error("=============================================") if args.debug: log.error("\n\n" + traceback.format_exc()) log.error("=============================================") log.error("\n\n" + str(e) + "\n") log.error("=============================================") sys.exit(1)
def test_invalid_key(self): pkg = Package() with pytest.raises(QuiltException): pkg.set('', LOCAL_MANIFEST) with pytest.raises(QuiltException): pkg.set('foo/', LOCAL_MANIFEST) with pytest.raises(QuiltException): pkg.set('foo', './') with pytest.raises(QuiltException): pkg.set('foo', os.path.dirname(__file__)) # we do not allow '.' or '..' files or filename separators with pytest.raises(QuiltException): pkg.set('.', LOCAL_MANIFEST) with pytest.raises(QuiltException): pkg.set('..', LOCAL_MANIFEST) with pytest.raises(QuiltException): pkg.set('./foo', LOCAL_MANIFEST) with pytest.raises(QuiltException): pkg.set('../foo', LOCAL_MANIFEST) with pytest.raises(QuiltException): pkg.set('foo/.', LOCAL_MANIFEST) with pytest.raises(QuiltException): pkg.set('foo/..', LOCAL_MANIFEST) with pytest.raises(QuiltException): pkg.set('foo/./bar', LOCAL_MANIFEST) with pytest.raises(QuiltException): pkg.set('foo/../bar', LOCAL_MANIFEST) with pytest.raises(QuiltException): pkg.set('s3://foo/.', LOCAL_MANIFEST) with pytest.raises(QuiltException): pkg.set('s3://foo/..', LOCAL_MANIFEST)
def test_invalid_set_key(self): """Verify an exception when setting a key with a path object.""" pkg = Package() with pytest.raises(TypeError): pkg.set('asdf/jkl', Package())