def diff(repo: Repository, dev, master): """Display diff of DEV commit/branch to MASTER commit/branch. If no MASTER is specified, then the staging area branch HEAD will will be used as the commit digest for MASTER. This operation will return a diff which could be interpreted as if you were merging the changes in DEV into MASTER. TODO: VERIFY ORDER OF OUTPUT IS CORRECT. """ from hangar.records.commiting import expand_short_commit_digest from hangar.records.commiting import get_staging_branch_head from hangar.records.summarize import status if dev not in repo.list_branches(): dev = expand_short_commit_digest(repo._env.refenv, dev) if master is None: master = get_staging_branch_head(repo._env.branchenv) elif master not in repo.list_branches(): master = expand_short_commit_digest(repo._env.refenv, master) diff_spec = repo.diff(master, dev) buf = status(hashenv=repo._env.hashenv, branch_name=dev, diff=diff_spec.diff) click.echo(buf.getvalue())
def test_delete_samples(repo_20_filled_samples2): from hangar.records.summarize import status repo = repo_20_filled_samples2 expected = '============ \n'\ '| Branch: master \n'\ ' \n'\ '============ \n'\ '| ADDED \n'\ '|---------- \n'\ '| Schema: 0 \n'\ '|---------- \n'\ '| Samples: 0 \n'\ ' \n'\ '============ \n'\ '| DELETED \n'\ '|---------- \n'\ '| Schema: 0 \n'\ '|---------- \n'\ '| Samples: 5 \n'\ '| - "dummy": 5 \n'\ ' \n'\ '============ \n'\ '| MUTATED \n'\ '|---------- \n'\ '| Schema: 0 \n'\ '|---------- \n'\ '| Samples: 0 \n'\ ' \n' co2 = repo.checkout(write=True) for idx in range(5, 10): del co2.columns['dummy'][idx] df = co2.diff.staged() co2.close() assert status(repo._env.hashenv, 'master', df.diff).getvalue() == expected
def test_add_new_aset_schema_and_sample_and_delete_old_aset(dummy_repo): from hangar.records.summarize import status expected = '============ \n'\ '| Branch: master \n'\ ' \n'\ '============ \n'\ '| ADDED \n'\ '|---------- \n'\ '| Schema: 1 \n'\ '| - "new_aset": \n'\ '| named: True \n'\ '| dtype: float32 \n'\ '| (max) shape: (10, 10) \n'\ '| variable shape: False \n'\ '| backend: 00 \n'\ "| backend opts: {'shuffle': None, 'complib': 'blosc:zstd', 'complevel': 3} \n"\ '|---------- \n'\ '| Samples: 5 \n'\ '| - "new_aset": 5 \n'\ '|---------- \n'\ '| Metadata: 0 \n'\ ' \n'\ '============ \n'\ '| DELETED \n'\ '|---------- \n'\ '| Schema: 1 \n'\ '| - "dummy": \n'\ '| named: True \n'\ '| dtype: int64 \n'\ '| (max) shape: (50,) \n'\ '| variable shape: False \n'\ '| backend: 10 \n'\ '| backend opts: {} \n'\ '|---------- \n'\ '| Samples: 10 \n'\ '| - "dummy": 10 \n'\ '|---------- \n'\ '| Metadata: 0 \n'\ ' \n'\ '============ \n'\ '| MUTATED \n'\ '|---------- \n'\ '| Schema: 0 \n'\ '|---------- \n'\ '| Samples: 0 \n'\ '|---------- \n'\ '| Metadata: 0 \n'\ ' \n' co2 = dummy_repo.checkout(write=True) new = co2.arraysets.init_arrayset('new_aset', shape=(10, 10), dtype=np.float32) for idx in range(5): dummyData = np.random.randn(10, 10).astype(np.float32) co2.arraysets['new_aset'][idx] = dummyData del co2.arraysets['dummy'] df = co2.diff.staged() co2.close() assert status('master', df.diff).getvalue() == expected
def status(repo: Repository): """Display changes made in the staging area compared to its base commit. """ from hangar.records.summarize import status co = repo.checkout(write=True) try: diff = co.diff.staged() click.echo(status(co._hashenv, co.branch_name, diff.diff).getvalue(), nl=False) finally: co.close()
def test_add_new_column_schema_and_samples(repo_20_filled_samples2): from hangar.records.summarize import status repo = repo_20_filled_samples2 expected = ( '============ \n' '| Branch: master \n' ' \n' '============ \n' '| ADDED \n' '|---------- \n' '| Schema: 1 \n' '| - "new_aset": \n' '| digest="1=555a833b66ab" \n' '| column_layout: flat \n' '| column_type: ndarray \n' '| schema_hasher_tcode: 1 \n' '| data_hasher_tcode: 0 \n' '| schema_type: fixed_shape \n' '| shape: (10, 10) \n' '| dtype: float32 \n' '| backend: 01 \n' '| backend_options: {\'complib\': \'blosc:lz4hc\', \'complevel\': 5, \'shuffle\': \'byte\'} \n' '|---------- \n' '| Samples: 5 \n' '| - "new_aset": 5 \n' ' \n' '============ \n' '| DELETED \n' '|---------- \n' '| Schema: 0 \n' '|---------- \n' '| Samples: 0 \n' ' \n' '============ \n' '| MUTATED \n' '|---------- \n' '| Schema: 0 \n' '|---------- \n' '| Samples: 0 \n' ' \n' ) co2 = repo.checkout(write=True) co2.add_ndarray_column('new_aset', shape=(10, 10), dtype=np.float32) for idx in range(5): dummyData = np.random.randn(10, 10).astype(np.float32) co2.columns['new_aset'][idx] = dummyData df = co2.diff.staged() co2.close() result = status(repo._env.hashenv, 'master', df.diff).getvalue() assert result == expected
def test_status(repo_20_filled_samples2): from hangar.records.summarize import status repo = repo_20_filled_samples2 dummyData = np.arange(50).astype(np.int64) co2 = repo.checkout(write=True) for idx in range(10, 20): dummyData[:] = idx co2.columns['dummy'][str(idx)] = dummyData co2.columns['dummy'][idx] = dummyData df = co2.diff.staged() co2.close() expected = status(repo._env.hashenv, 'master', df.diff).getvalue() runner = CliRunner() res = runner.invoke(cli.status, obj=repo) assert res.exit_code == 0 assert res.stdout == expected
def test_status(dummy_repo): from hangar.records.summarize import status dummyData = np.arange(50).astype(np.int64) co2 = dummy_repo.checkout(write=True) for idx in range(10, 20): dummyData[:] = idx co2.arraysets['dummy'][str(idx)] = dummyData co2.arraysets['dummy'][idx] = dummyData co2.metadata['foo'] = 'bar' df = co2.diff.staged() co2.close() expected = status('master', df.diff).getvalue() runner = CliRunner() res = runner.invoke(cli.status, obj=dummy_repo) assert res.exit_code == 0 assert res.stdout == expected
def test_add_metadata_and_samples_to_existing_aset(dummy_repo): from hangar.records.summarize import status expected = '============ \n'\ '| Branch: master \n'\ ' \n'\ '============ \n'\ '| ADDED \n'\ '|---------- \n'\ '| Schema: 0 \n'\ '|---------- \n'\ '| Samples: 20 \n'\ '| - "dummy": 20 \n'\ '|---------- \n'\ '| Metadata: 1 \n'\ ' \n'\ '============ \n'\ '| DELETED \n'\ '|---------- \n'\ '| Schema: 0 \n'\ '|---------- \n'\ '| Samples: 0 \n'\ '|---------- \n'\ '| Metadata: 0 \n'\ ' \n'\ '============ \n'\ '| MUTATED \n'\ '|---------- \n'\ '| Schema: 0 \n'\ '|---------- \n'\ '| Samples: 0 \n'\ '|---------- \n'\ '| Metadata: 0 \n'\ ' \n' dummyData = np.arange(50).astype(np.int64) co2 = dummy_repo.checkout(write=True) for idx in range(10, 20): dummyData[:] = idx co2.arraysets['dummy'][str(idx)] = dummyData co2.arraysets['dummy'][idx] = dummyData co2.metadata['foo'] = 'bar' df = co2.diff.staged() co2.close() assert status('master', df.diff).getvalue() == expected
def test_delete_metadata_and_samples(dummy_repo): from hangar.records.summarize import status expected = '============ \n'\ '| Branch: master \n'\ ' \n'\ '============ \n'\ '| ADDED \n'\ '|---------- \n'\ '| Schema: 0 \n'\ '|---------- \n'\ '| Samples: 0 \n'\ '|---------- \n'\ '| Metadata: 0 \n'\ ' \n'\ '============ \n'\ '| DELETED \n'\ '|---------- \n'\ '| Schema: 0 \n'\ '|---------- \n'\ '| Samples: 5 \n'\ '| - "dummy": 5 \n'\ '|---------- \n'\ '| Metadata: 1 \n'\ ' \n'\ '============ \n'\ '| MUTATED \n'\ '|---------- \n'\ '| Schema: 0 \n'\ '|---------- \n'\ '| Samples: 0 \n'\ '|---------- \n'\ '| Metadata: 0 \n'\ ' \n' co2 = dummy_repo.checkout(write=True) for idx in range(5, 10): del co2.arraysets['dummy'][idx] del co2.metadata['hello'] df = co2.diff.staged() co2.close() assert status('master', df.diff).getvalue() == expected
def commit(repo: Repository, message): """Commits outstanding changes. Commit changes to the given files into the repository. You will need to 'push' to push up your changes to other repositories. """ from hangar.records.summarize import status co = repo.checkout(write=True) try: if not message: diff = co.diff.staged() status_txt = status(co._hashenv, co.branch_name, diff.diff) status_txt.seek(0) marker = '# Changes To Be committed: \n' hint = ['\n', '\n', marker, '# \n'] for line in status_txt.readlines(): hint.append(f'# {line}') # open default system editor message = click.edit(''.join(hint)) if message is None: click.echo('Aborted!') return msg = message.split(marker)[0].rstrip() if not msg: click.echo('Aborted! Empty commit message') return else: msg = '\n'.join(message) click.echo('Commit message:\n' + msg) try: digest = co.commit(msg) click.echo(f'Commit Successful. Digest: {digest}') except RuntimeError as e: raise click.ClickException(e) finally: co.close()
def test_add_samples_to_existing_column(repo_20_filled_samples2): from hangar.records.summarize import status repo = repo_20_filled_samples2 expected = '============ \n'\ '| Branch: master \n'\ ' \n'\ '============ \n'\ '| ADDED \n'\ '|---------- \n'\ '| Schema: 0 \n'\ '|---------- \n'\ '| Samples: 20 \n'\ '| - "dummy": 20 \n'\ ' \n'\ '============ \n'\ '| DELETED \n'\ '|---------- \n'\ '| Schema: 0 \n'\ '|---------- \n'\ '| Samples: 0 \n'\ ' \n'\ '============ \n'\ '| MUTATED \n'\ '|---------- \n'\ '| Schema: 0 \n'\ '|---------- \n'\ '| Samples: 0 \n'\ ' \n' dummyData = np.arange(50).astype(np.int64) co2 = repo.checkout(write=True) for idx in range(10, 20): dummyData[:] = idx co2.columns['dummy'][str(idx)] = dummyData co2.columns['dummy'][idx] = dummyData df = co2.diff.staged() co2.close() assert status(repo._env.hashenv, 'master', df.diff).getvalue() == expected