def test_create_empty_repository(managed_tmpdir): from hangar import Repository repo = Repository(path=managed_tmpdir) repo.init(user_name='Test User', user_email='*****@*****.**') init_branches = repo.list_branch_names() assert init_branches == ['master'] assert repo._repo_path == os.path.join(managed_tmpdir, '__hangar')
class MakeCommit(object): params = (5_000, 20_000, 50_000) param_names = ['num_samples'] processes = 2 repeat = (2, 4, 20) number = 1 warmup_time = 0 def setup(self, num_samples): self.tmpdir = mkdtemp() self.repo = Repository(path=self.tmpdir, exists=False) self.repo.init('tester', '*****@*****.**', remove_old=True) self.co = self.repo.checkout(write=True) arr = np.array([0,], dtype=np.uint8) try: aset = self.co.arraysets.init_arrayset('aset', prototype=arr, backend_opts='10') except TypeError: aset = self.co.arraysets.init_arrayset('aset', prototype=arr, backend='10') except AttributeError: aset = self.co.add_ndarray_column('aset', prototype=arr, backend='10') with aset as cm_aset: for i in range(num_samples): arr[:] = i % 255 cm_aset[i] = arr def teardown(self, num_samples): self.co.close() self.repo._env._close_environments() rmtree(self.tmpdir) def time_commit(self, num_samples): self.co.commit('hello')
def init_repo(name=None, email=None, overwrite=False): """ init hangar repo, create stock file and add details to .gitignore """ if not Path.cwd().joinpath('.git').exists(): warnings.warn( "initializing stock repository in a directory which is not a " "git repository. Some features won't work", UserWarning) repo = Repository(Path.cwd(), exists=False) if not overwrite and repo.initialized: commit_hash = repo.log(return_contents=True)['head'] print(f'Hangar Repo already exists at {repo.path}. ' f'Initializing it as stock repository') else: if name is None or email is None: raise ValueError("Both ``name`` and ``email`` cannot be None") commit_hash = '' repo.init(user_name=name, user_email=email, remove_old=overwrite) # closing the environment for avoiding issues in windows repo._env._close_environments() stock_file = Path.cwd() / 'head.stock' if stock_file.exists(): warnings.warn( "Trying to initialize an already initialized stock repository. " "No action taken", UserWarning) else: with open(str(stock_file), 'w+') as f: f.write(commit_hash) print("Stock file created") gitignore = Path.cwd() / '.gitignore' with open(str(gitignore), 'a+') as f: f.seek(0) if '.hangar' not in f.read(): f.write('\n# hangar artifacts\n.hangar\n')
def test_push_fetch_records(server_instance, backend): runner = CliRunner() with runner.isolated_filesystem(): repo = Repository(getcwd(), exists=False) try: repo.init('foo', 'bar') dummyData = np.arange(50) co1 = repo.checkout(write=True, branch='master') co1.add_ndarray_column(name='dummy', prototype=dummyData, backend=backend) for idx in range(10): dummyData[:] = idx co1.columns['dummy'][str(idx)] = dummyData cmt1 = co1.commit('first commit adding dummy data') co1.close() repo.create_branch('testbranch') co2 = repo.checkout(write=True, branch='testbranch') for idx in range(10, 20): dummyData[:] = idx co2.columns['dummy'][str(idx)] = dummyData cmt2 = co2.commit( 'first commit on test branch adding non-conflict data') co2.close() repo.remote.add('origin', server_instance) res = runner.invoke(cli.push, ['origin', 'master'], obj=repo) assert res.exit_code == 0 res = runner.invoke(cli.push, ['origin', 'testbranch'], obj=repo) assert res.exit_code == 0 finally: repo._env._close_environments()
def test_export_images(backend, in_commands, expected_fnames, generate_3_images): im1, im2, im3 = generate_3_images imgDir = os.path.split(im1[0])[0] runner = CliRunner() with runner.isolated_filesystem(): P = getcwd() repo = Repository(P, exists=False) repo.init('foo', 'bar') dummyData = np.ones_like(im1[1]) co1 = repo.checkout(write=True, branch='master') co1.arraysets.init_arrayset(name='dummy', prototype=dummyData, named_samples=True, backend=backend) co1.arraysets['dummy']['lol.jpg'] = dummyData co1.close() res = runner.invoke(cli.import_data, ['dummy', imgDir], obj=repo) assert res.exit_code == 0 co1b = repo.checkout(write=True) co1b.commit('hi') res = runner.invoke(cli.export_data, in_commands, obj=repo) assert res.exit_code == 0 for fn in expected_fnames: assert os.path.isfile(os.path.join(P, fn)) co1b.close()
def classrepo(tmp_path_factory) -> Repository: old00_count = hangar.backends.hdf5_00.COLLECTION_COUNT old00_size = hangar.backends.hdf5_00.COLLECTION_SIZE old01_count = hangar.backends.hdf5_01.COLLECTION_COUNT old01_size = hangar.backends.hdf5_01.COLLECTION_SIZE old10_size = hangar.backends.numpy_10.COLLECTION_SIZE old30_lmdb_settings = hangar.backends.lmdb_30.LMDB_SETTINGS hangar.backends.hdf5_00.COLLECTION_COUNT = 5 hangar.backends.hdf5_00.COLLECTION_SIZE = 20 hangar.backends.hdf5_01.COLLECTION_COUNT = 5 hangar.backends.hdf5_01.COLLECTION_SIZE = 20 hangar.backends.numpy_10.COLLECTION_SIZE = 50 hangar.backends.lmdb_30.LMDB_SETTINGS['map_size'] = 1_000_000 old_map_size = hangar.constants.LMDB_SETTINGS['map_size'] hangar.constants.LMDB_SETTINGS['map_size'] = 2_000_000 hangar.txnctx.TxnRegisterSingleton._instances = {} pth = tmp_path_factory.mktemp('classrepo') repo_obj = Repository(path=str(pth), exists=False) repo_obj.init(user_name='tester', user_email='*****@*****.**', remove_old=True) yield repo_obj hangar.constants.LMDB_SETTINGS['map_size'] = old_map_size hangar.backends.hdf5_00.COLLECTION_COUNT = old00_count hangar.backends.hdf5_00.COLLECTION_SIZE = old00_size hangar.backends.hdf5_01.COLLECTION_COUNT = old01_count hangar.backends.hdf5_01.COLLECTION_SIZE = old01_size hangar.backends.numpy_10.COLLECTION_SIZE = old10_size hangar.backends.lmdb_30.LMDB_SETTINGS = old30_lmdb_settings repo_obj._env._close_environments()
def test_view_images(monkeypatch, backend, generate_3_images): im1, im2, im3 = generate_3_images imgDir = os.path.split(im1[0])[0] runner = CliRunner() with runner.isolated_filesystem(): P = getcwd() repo = Repository(P, exists=False) repo.init('foo', 'bar') dummyData = np.ones_like(im1[1]) co1 = repo.checkout(write=True, branch='master') co1.arraysets.init_arrayset(name='dummy', prototype=dummyData, named_samples=True, backend=backend) co1.arraysets['dummy']['lol.jpg'] = dummyData co1.close() res = runner.invoke(cli.import_data, ['dummy', imgDir], obj=repo) assert res.exit_code == 0 co1b = repo.checkout(write=True) co1b.commit('hi') from hangar.cli import io def mock_show(*args, **kwargs): return True monkeypatch.setattr(io, 'show', mock_show) res = runner.invoke(cli.view_data, ['master', 'dummy', 'arr1.jpg'], obj=repo) assert res.exit_code == 0
def repo(managed_tmpdir) -> Repository: repo_obj = Repository(path=managed_tmpdir, exists=False) repo_obj.init(user_name='tester', user_email='*****@*****.**', remove_old=True) yield repo_obj repo_obj._env._close_environments()
def init(repo: Repository, name, email, overwrite): """Initialize an empty repository at the current path. """ if repo.initialized and (not overwrite): click.echo(f'Repo already exists at: {repo.path}') else: repo.init(user_name=name, user_email=email, remove_old=overwrite)
def test_push_fetch_records(server_instance, backend): runner = CliRunner() with runner.isolated_filesystem(): repo = Repository(getcwd(), exists=False) repo.init('foo', 'bar') dummyData = np.arange(50) co1 = repo.checkout(write=True, branch='master') co1.arraysets.init_arrayset( name='dummy', prototype=dummyData, named_samples=True, backend_opts=backend) for idx in range(10): dummyData[:] = idx co1.arraysets['dummy'][str(idx)] = dummyData co1.metadata['hello'] = 'world' co1.metadata['somemetadatakey'] = 'somemetadatavalue' cmt1 = co1.commit('first commit adding dummy data and hello meta') co1.close() repo.create_branch('testbranch') co2 = repo.checkout(write=True, branch='testbranch') for idx in range(10, 20): dummyData[:] = idx co2.arraysets['dummy'][str(idx)] = dummyData co2.metadata['foo'] = 'bar' cmt2 = co2.commit('first commit on test branch adding non-conflict data and meta') co2.close() repo.remote.add('origin', server_instance) res = runner.invoke(cli.push, ['origin', 'master'], obj=repo) assert res.exit_code == 0 res = runner.invoke(cli.push, ['origin', 'testbranch'], obj=repo) assert res.exit_code == 0
def test_empty_commit(managed_tmpdir, caplog): repo = Repository(path=managed_tmpdir, exists=False) repo.init(user_name='tester', user_email='*****@*****.**', remove_old=True) w_checkout = repo.checkout(write=True) with pytest.raises(RuntimeError): w_checkout.commit('this is a merge message') w_checkout.close() repo._env._close_environments()
def test_repo_summary_does_not_error_before_any_commit_made( capfd, managed_tmpdir): repo = Repository(path=managed_tmpdir, exists=False) repo.init(user_name='tester', user_email='*****@*****.**', remove_old=True) assert repo.summary() is None out, _ = capfd.readouterr() assert 'No commits have been made in the repository' in out repo._env._close_environments()
def setup_cache(self): backend_code = { 'numpy_10': '10', 'hdf5_00': '00', 'hdf5_01': '01', } sample_shape = (50, 50, 10) num_samples = 3_000 repo = Repository(path=os.getcwd(), exists=False) repo.init('tester', '*****@*****.**', remove_old=True) co = repo.checkout(write=True) component_arrays = [] ndims = len(sample_shape) for idx, shape in enumerate(sample_shape): layout = [1 for i in range(ndims)] layout[idx] = shape component = np.hamming(shape).reshape(*layout) * 100 component_arrays.append(component.astype(np.float32)) arr = np.prod(component_arrays).astype(np.float32) for backend, code in backend_code.items(): try: co.arraysets.init_arrayset(backend, prototype=arr, backend_opts=code) except TypeError: try: co.arraysets.init_arrayset(backend, prototype=arr, backend=code) except ValueError: pass except ValueError: pass except AttributeError: co.add_ndarray_column(backend, prototype=arr, backend=code) try: col = co.columns except AttributeError: col = co.arraysets with col as asets_cm: for aset in asets_cm.values(): changer = 0 for i in range(num_samples): arr[changer, changer, changer] += 1 aset[i] = arr changer += 1 co.commit('first commit') co.close() repo._env._close_environments()
def init(ctx, name, email, overwrite): """Initialize an empty repository at the current path """ P = os.getcwd() repo = Repository(path=P, exists=False) try: repo.init(user_name=name, user_email=email, remove_old=overwrite) click.echo(f'Hangar repository initialized at {P}') except OSError as e: click.echo(e)
def test_get_ecosystem_details(managed_tmpdir): repo = Repository(path=managed_tmpdir, exists=False) repo.init(user_name='tester', user_email='*****@*****.**', remove_old=True) eco = repo._ecosystem_details() assert isinstance(eco, dict) assert 'host' in eco assert 'packages' in eco for package_name, version in eco['packages']: assert version is not None repo._env._close_environments()
class CheckoutCommit(object): params = [(5_000, 20_000), (5_000, 20_000)] param_names = ['num_samples', 'num_metadata'] processes = 2 number = 1 repeat = (2, 4, 20) warmup_time = 0 def setup(self, num_samples, num_metadata): self.tmpdir = mkdtemp() self.repo = Repository(path=self.tmpdir, exists=False) self.repo.init('tester', '*****@*****.**', remove_old=True) self.co = self.repo.checkout(write=True) arr = np.array([ 0, ], dtype=np.uint8) try: aset = self.co.arraysets.init_arrayset('aset', prototype=arr, backend_opts='10') except TypeError: aset = self.co.arraysets.init_arrayset('aset', prototype=arr, backend='10') except AttributeError: aset = self.co.add_ndarray_column('aset', prototype=arr, backend='10') with aset as cm_aset: for i in range(num_samples): arr[:] = i % 255 cm_aset[i] = arr with self.co.metadata as cm_meta: for i in range(num_metadata): cm_meta[i] = f'{i % 500} data' self.co.commit('first') self.co.close() self.co = None def teardown(self, num_samples, num_metadata): try: self.co.close() except PermissionError: pass self.repo._env._close_environments() rmtree(self.tmpdir) def time_checkout_read_only(self, num_samples, num_metadata): self.co = self.repo.checkout(write=False) def time_checkout_write_enabled(self, num_samples, num_metadata): self.co = self.repo.checkout(write=True) self.co.close()
def test_starting_up_repo_warns_should_exist_manual_args(managed_tmpdir): with pytest.warns(UserWarning): repo = Repository(path=managed_tmpdir, exists=True) repo.init(user_name='tester', user_email='*****@*****.**', remove_old=True) assert repo.list_branches() == ['master'] assert os.path.isdir(repo._repo_path) assert repo._repo_path == os.path.join(managed_tmpdir, '.hangar') co = repo.checkout(write=True) assert co.diff.status() == 'CLEAN' co.close() repo._env._close_environments()
def test_check_repository_software_version_startup(managed_tmpdir): from hangar import Repository, __version__ from pkg_resources import parse_version repo = Repository(managed_tmpdir, exists=False) repo.init('test user', '*****@*****.**', remove_old=True) repo._env._close_environments() nrepo = Repository(managed_tmpdir, exists=True) assert nrepo.initialized is True assert nrepo.version == parse_version(__version__).base_version nrepo._env._close_environments()
def test_initial_arrayset(managed_tmpdir, randomsizedarray): repo = Repository(path=managed_tmpdir, exists=False) repo.init(user_name='tester', user_email='*****@*****.**', remove_old=True) w_checkout = repo.checkout(write=True) assert len(w_checkout.columns) == 0 with pytest.raises(KeyError): w_checkout.columns['aset'] aset = w_checkout.add_ndarray_column('aset', prototype=randomsizedarray) assert aset.column == 'aset' w_checkout.close() repo._env._close_environments()
def test_summary_before_commit_made(managed_tmpdir): runner = CliRunner() with runner.isolated_filesystem(): P = getcwd() new_repo = Repository(P, exists=False) new_repo.init('Test User', '*****@*****.**') try: res = runner.invoke(cli.summary, obj=new_repo) assert res.exit_code == 0 assert 'No commits have been made in the repository' in res.stdout finally: new_repo._env._close_environments
def test_force_release_writer_lock_works(managed_tmpdir): repo = Repository(path=managed_tmpdir, exists=False) repo.init(user_name='tester', user_email='*****@*****.**', remove_old=True) co = repo.checkout(write=True) # try to release the writer lock with a process which has different uid with pytest.warns(ResourceWarning): repo.force_release_writer_lock() co._writer_lock == 'LOCK_AVAILABLE' co.close() # replace, but rest of object is closed repo._env._close_environments()
def test_check_repository_software_version_fails_on_older_repo(managed_tmpdir): from hangar import Repository from hangar.records.vcompat import set_repository_software_version repo = Repository(managed_tmpdir, exists=False) repo.init('test user', '*****@*****.**', remove_old=True) # force writing of new software version. should trigger error on next read. set_repository_software_version(repo._env.branchenv, '0.2.0', overwrite=True) repo._env._close_environments() with pytest.raises(RuntimeError): Repository(managed_tmpdir, exists=True)
def init(uname, email, overwrite): P = os.getcwd() if isinstance(uname, (list, tuple)): uname = ' '.join(uname) repo = Repository(path=P) if overwrite: repoDir = repo.init(user_name=uname, user_email=email, remove_old=True) else: try: repoDir = repo.init(user_name=uname, user_email=email, remove_old=False) except OSError as e: click.echo(e)
def test_check_repository_software_version_works_on_newer_hangar_version(managed_tmpdir, monkeypatch, futureVersion): from hangar import Repository repo = Repository(managed_tmpdir, exists=False) repo.init('test user', '*****@*****.**', remove_old=True) old_version = repo.version # force writing of new software version. should trigger error on next read. repo._env._close_environments() import hangar monkeypatch.setattr(hangar, '__version__', futureVersion) nrepo = Repository(managed_tmpdir, exists=True) assert hangar.__version__ == futureVersion assert nrepo.version == old_version nrepo._env._close_environments()
def test_check_repository_software_version_fails_hangar_version(monkeypatch, managed_tmpdir, repo_v, hangar_v): import hangar monkeypatch.setattr("hangar.__version__", hangar_v) monkeypatch.setattr("hangar.context.__version__", hangar_v) from hangar import Repository from hangar.records.vcompat import set_repository_software_version repo = Repository(managed_tmpdir, exists=False) repo.init('test user', '*****@*****.**', remove_old=True) # force writing of new software version. should trigger error on next read. set_repository_software_version(repo._env.branchenv, repo_v, overwrite=True) try: assert repo.version == repo_v finally: repo._env._close_environments() assert hangar.__version__ == hangar_v with pytest.raises(RuntimeError): Repository(managed_tmpdir, exists=True)
def test_import_images(backend, plug, generate_3_images): im1, im2, im3 = generate_3_images imgDir = os.path.split(im1[0])[0] runner = CliRunner() with runner.isolated_filesystem(): P = getcwd() repo = Repository(P, exists=False) repo.init('foo', 'bar') dummyData = np.ones_like(im1[1]) dummyData[:] = 0 co1 = repo.checkout(write=True, branch='master') co1.arraysets.init_arrayset(name='dummy', prototype=dummyData, named_samples=True, backend=backend) co1.arraysets['dummy']['arr1.jpg'] = dummyData co1.close() res = runner.invoke(cli.import_data, ['--plugin', plug, 'dummy', imgDir], obj=repo) assert res.exit_code == 0 co1b = repo.checkout(write=True) assert np.allclose(co1b.arraysets['dummy']['arr1.jpg'], dummyData) assert np.allclose(co1b.arraysets['dummy']['arr2.jpg'], im2[1]) assert np.allclose(co1b.arraysets['dummy']['arr3.jpg'], im3[1]) co1b.close() res = runner.invoke(cli.import_data, ['--plugin', plug, '--overwrite', 'dummy', imgDir], obj=repo) assert res.exit_code == 0 co1c = repo.checkout(write=True) assert np.allclose(co1c.arraysets['dummy']['arr1.jpg'], im1[1]) assert np.allclose(co1c.arraysets['dummy']['arr2.jpg'], im2[1]) assert np.allclose(co1c.arraysets['dummy']['arr3.jpg'], im3[1]) co1c.close()
def test_force_release_writer_lock(managed_tmpdir, monkeypatch): repo = Repository(path=managed_tmpdir, exists=False) repo.init(user_name='tester', user_email='*****@*****.**', remove_old=True) co = repo.checkout(write=True) orig_lock = str(co._writer_lock) def mock_true(*args, **kwargs): return True # try to release the writer lock with a process which has different uid co._writer_lock = 'lololol' with pytest.raises(RuntimeError): monkeypatch.setattr(co, '_verify_alive', mock_true) monkeypatch.setattr(co._columns, '_destruct', mock_true) co.close() # replace, but rest of object is closed monkeypatch.setattr(co, '_writer_lock', orig_lock) monkeypatch.delattr(co._columns, '_destruct') co.close() repo._env._close_environments()
def test_force_release_writer_lock(managed_tmpdir, monkeypatch): from hangar.records import heads repo = Repository(path=managed_tmpdir, exists=False) repo.init(user_name='tester', user_email='*****@*****.**', remove_old=True) co = repo.checkout(write=True) orig_lock = str(co._writer_lock) def mock_true(*args, **kwargs): return True co.metadata['hello'] = 'world' # try to release the writer lock with a process which has different uid co._writer_lock = 'lololol' with pytest.raises(RuntimeError): monkeypatch.setattr(co, '_WriterCheckout__acquire_writer_lock', mock_true) co.close() # replace, but rest of object is closed monkeypatch.setattr(co, '_writer_lock', orig_lock) co.close() repo._env._close_environments()
def test_initial_read_checkout(managed_tmpdir): repo = Repository(path=managed_tmpdir, exists=False) repo.init(user_name='tester', user_email='*****@*****.**', remove_old=True) with pytest.raises(ValueError): repo.checkout() repo._env._close_environments()
class _WriterSuite: processes = 2 repeat = 2 number = 1 warmup_time = 0 def setup(self): # self.method # self.backend self.backend_code = {'numpy_10': '10', 'hdf5_00': '00'} # self.dtype self.type_code = { 'float32': np.float32, 'uint16': np.uint16, } # self.num_samples self.tmpdir = mkdtemp() self.repo = Repository(path=self.tmpdir, exists=False) self.repo.init('tester', '*****@*****.**', remove_old=True) co = self.repo.checkout(write=True) a = np.hamming(100).reshape(100, 1) b = np.hamming(100).reshape(1, 100) c = np.round(a * b * 1000).astype(self.type_code[self.dtype]) arr = np.zeros((100, 100), dtype=c.dtype) arr[:, :] = c try: aset = co.arraysets.init_arrayset( 'aset', prototype=arr, backend_opts=self.backend_code[self.backend]) except TypeError: aset = co.arraysets.init_arrayset( 'aset', prototype=arr, backend=self.backend_code[self.backend]) if self.method == 'read': with aset as cm_aset: for i in range(self.num_samples): arr += 1 cm_aset[i] = arr co.commit('first commit') co.close() self.co = self.repo.checkout(write=False) else: self.arr = arr self.co = co def teardown(self): self.co.close() self.repo._env._close_environments() rmtree(self.tmpdir) def read(self): aset = self.co.arraysets['aset'] ks = list(aset.keys()) with aset as cm_aset: for i in ks: arr = cm_aset[i] def write(self): arr = self.arr aset = self.co.arraysets['aset'] with aset as cm_aset: for i in range(self.num_samples): arr += 1 cm_aset[i] = arr def size(self): return folder_size(self.repo._env.repo_path, recurse=True)