def test_diff_refs_modified_file(self): repo_type = DATASETS mdpath = os.path.join(self.test_dir, '.ml-git', repo_type, 'metadata') entity = 'dataset-ex' specpath = os.path.join('vision-computer', 'images', entity) config_test = deepcopy(config) config_test['mlgit_path'] = '.ml-git' m = Metadata(entity, mdpath, config_test, repo_type) m.init() ensure_path_exists(os.path.join(mdpath, specpath, entity)) manifestpath = os.path.join(os.path.join(mdpath, specpath), 'MANIFEST.yaml') shutil.copy('hdata/dataset-ex.spec', os.path.join(mdpath, specpath, '{}.spec'.format(entity))) yaml_save(files_mock, manifestpath) sha1 = m.commit(manifestpath, 'test') files_mock_copy = deepcopy(files_mock) del files_mock_copy[ 'zdj7WZzR8Tw87Dx3dm76W5aehnT23GSbXbQ9qo73JgtwREGwB'] files_mock_copy['NewHash'] = {'7.jpg'} yaml_save(files_mock_copy, manifestpath) sha2 = m.commit(manifestpath, 'test') added_files, deleted_files, modified_file = m.diff_refs_with_modified_files( entity, sha1, sha2) self.assertTrue(len(added_files) == 0) self.assertTrue(len(deleted_files) == 0) self.assertTrue(len(modified_file) == 1)
def store_del(store_type, bucket, global_conf=False): if not valid_store_type(store_type): return try: config_path = get_config_path(global_conf) conf = yaml_load(config_path) except Exception as e: log.error(e, class_name=ADMIN_CLASS_NAME) return store_exists = 'store' in conf and store_type in conf[ 'store'] and bucket in conf['store'][store_type] if not store_exists: log.warn('Store [%s://%s] not found in configuration file.' % (store_type, bucket), class_name=ADMIN_CLASS_NAME) return del conf['store'][store_type][bucket] log.info('Removed store [%s://%s] from configuration file.' % (store_type, bucket), class_name=ADMIN_CLASS_NAME) yaml_save(conf, config_path)
def test_remote_fsck(self): testbucketname = os.getenv('MLGIT_TEST_BUCKET', 'ml-git-datasets') hfspath = os.path.join(self.tmp_dir, 'objectsfs') ohfs = MultihashFS(hfspath) ohfs.put(HDATA_IMG_1) s3 = boto3.resource( 's3', region_name='us-east-1', aws_access_key_id='fake_access_key', aws_secret_access_key='fake_secret_key', ) s3.Object(testbucketname, 'zdj7WWsMkELZSGQGgpm5VieCWV8NxY5n5XEP73H4E7eeDMA3A').delete() self.assertRaises(botocore.exceptions.ClientError, lambda: self.check_delete(s3, testbucketname)) mdpath = os.path.join(self.tmp_dir, 'metadata-test') dataset_spec = get_sample_spec(testbucketname) specpath = os.path.join(mdpath, 'vision-computing', 'images', 'dataset-ex') ensure_path_exists(specpath) yaml_save(dataset_spec, os.path.join(specpath, 'dataset-ex.spec')) manifestpath = os.path.join(specpath, 'MANIFEST.yaml') yaml_save({'zdj7WjdojNAZN53Wf29rPssZamfbC6MVerzcGwd9tNciMpsQh': {'imghires.jpg'}}, manifestpath) fullspecpath = os.path.join(specpath, os.path.join(specpath, 'dataset-ex.spec')) spec = 'vision-computing__images__dataset-ex__5' c = yaml_load('hdata/config.yaml') r = LocalRepository(c, hfspath) ret = r.remote_fsck(mdpath, spec, fullspecpath, 2, True, True) self.assertTrue(ret) self.assertEqual(None, s3.Object(testbucketname, 'zdj7WWsMkELZSGQGgpm5VieCWV8NxY5n5XEP73H4E7eeDMA3A').load())
def test_add_metrics_file(self): hashfs_path = os.path.join(self.tmp_dir, 'objectsfs') test_config = yaml_load('hdata/config.yaml') local_repo = LocalRepository(test_config, hashfs_path, repo_type=MODELS) spec_path = os.path.join(self.tmp_dir, 'model-ex.spec') shutil.copy('hdata/dataset-ex.spec', spec_path) spec_file = yaml_load(spec_path) model = spec_file[DATASET_SPEC_KEY].copy() del spec_file[DATASET_SPEC_KEY] spec_file[MODEL_SPEC_KEY] = model yaml_save(spec_file, spec_path) metrics_file_path = os.path.join(self.tmp_dir, 'metrics.csv') self.create_csv_file(metrics_file_path, { 'metric_a': 10, 'metric_b': 9 }) local_repo.add_metrics(spec_path, (), metrics_file_path) test_spec_file = yaml_load(spec_path) self.assertEqual( test_spec_file[MODEL_SPEC_KEY]['metrics'].get('metric_a', ''), 10.0) self.assertEqual( test_spec_file[MODEL_SPEC_KEY]['metrics'].get('metric_b', ''), 9.0)
def test_get_metrics(self): repo_type = MODELS mdpath = os.path.join(self.test_dir, 'mdata', repo_type, 'metadata') specpath = os.path.join('vision-computer', 'images') entity = 'model-ex' m = Metadata(entity, self.test_dir, config, repo_type) m.init() ensure_path_exists(os.path.join(mdpath, specpath, entity)) spec_metadata_path = os.path.join(mdpath, specpath, entity, 'model-ex.spec') shutil.copy('hdata/dataset-ex.spec', spec_metadata_path) spec_file = yaml_load(spec_metadata_path) spec_file[MODEL_SPEC_KEY] = deepcopy(spec_file[DATASET_SPEC_KEY]) del spec_file[DATASET_SPEC_KEY] spec_file[MODEL_SPEC_KEY]['metrics'] = {'metric_1': 0, 'metric_2': 1} yaml_save(spec_file, spec_metadata_path) tag = 'vision-computer__images__model-ex__1' sha = m.commit(spec_metadata_path, specpath) m.tag_add(tag) metrics = m._get_metrics(entity, sha) test_table = PrettyTable() test_table.field_names = ['Name', 'Value'] test_table.align['Name'] = 'l' test_table.align['Value'] = 'l' test_table.add_row(['metric_1', 0]) test_table.add_row(['metric_2', 1]) test_metrics = '\nmetrics:\n{}'.format(test_table.get_string()) self.assertEqual(metrics, test_metrics)
def storage_del(storage_type, bucket, global_conf=False): if not valid_storage_type(storage_type): return try: config_path = get_config_path(global_conf) conf = yaml_load(config_path) except Exception as e: log.error(e, class_name=ADMIN_CLASS_NAME) return storage_exists = STORAGE_CONFIG_KEY in conf and storage_type in conf[ STORAGE_CONFIG_KEY] and bucket in conf[STORAGE_CONFIG_KEY][storage_type] if not storage_exists: log.warn(output_messages['WARN_STORAGE_NOT_IN_CONFIG'] % (storage_type, bucket), class_name=ADMIN_CLASS_NAME) return del conf[STORAGE_CONFIG_KEY][storage_type][bucket] log.info(output_messages['INFO_REMOVED_STORAGE'] % (storage_type, bucket), class_name=ADMIN_CLASS_NAME) yaml_save(conf, config_path)
def test_diff_refs_add_file(self): repo_type = DATASETS mdpath = os.path.join(self.test_dir, '.ml-git', repo_type, 'metadata') entity = 'dataset-ex' specpath = os.path.join('vision-computer', 'images', entity) config_test = deepcopy(config) config_test['mlgit_path'] = '.ml-git' m = Metadata(entity, mdpath, config_test, repo_type) m.init() ensure_path_exists(os.path.join(mdpath, specpath, entity)) manifestpath = os.path.join(os.path.join(mdpath, specpath), 'MANIFEST.yaml') shutil.copy('hdata/dataset-ex.spec', os.path.join(mdpath, specpath, '{}.spec'.format(entity))) yaml_save(files_mock, manifestpath) sha1 = m.commit(manifestpath, 'test') files_mock_copy = deepcopy(files_mock) files_mock_copy[ 'zPaksM5tNewHashQ2VABPvvfC3VW6wFRTWKvFhUW5QaDx6JMoma'] = { '11.jpg' } yaml_save(files_mock_copy, manifestpath) sha2 = m.commit(manifestpath, 'test') added_files, deleted_files, modified_file = m.diff_refs_with_modified_files( entity, sha1, sha2) self.assertTrue(len(added_files) == 1) self.assertTrue(len(deleted_files) == 0) self.assertTrue(len(modified_file) == 0)
def test_fetch(self): mdpath = os.path.join(self.tmp_dir, 'metadata-test') testbucketname = os.getenv('MLGIT_TEST_BUCKET', 'ml-git-datasets') config_spec = get_sample_config_spec(testbucketname, testprofile, testregion) dataset_spec = get_sample_spec(testbucketname) specpath = os.path.join(mdpath, 'vision-computing', 'images', 'dataset-ex') ensure_path_exists(specpath) yaml_save(dataset_spec, os.path.join(specpath, 'dataset-ex.spec')) manifestpath = os.path.join(specpath, 'MANIFEST.yaml') yaml_save( { 'zdj7WjdojNAZN53Wf29rPssZamfbC6MVerzcGwd9tNciMpsQh': {'imghires.jpg'} }, manifestpath) objectpath = os.path.join(self.tmp_dir, 'objects-test') spec = 'vision-computing__images__dataset-ex__5' r = LocalRepository(config_spec, objectpath) r.fetch(mdpath, spec, None) fs = set() for root, dirs, files in os.walk(objectpath): for file in files: fs.add(file) self.assertEqual(len(hs), len(fs)) self.assertTrue(len(hs.difference(fs)) == 0)
def remote_add(repotype, ml_git_remote, global_conf=False): file = get_config_path(global_conf) conf = yaml_load(file) if repotype in conf: if conf[repotype]['git'] is None or not len(conf[repotype]['git']) > 0: log.info(output_messages['INFO_ADD_REMOTE'] % (ml_git_remote, repotype), class_name=ADMIN_CLASS_NAME) else: log.warn(output_messages['WARN_HAS_CONFIGURED_REMOTE'], class_name=ADMIN_CLASS_NAME) log.info(output_messages['INFO_CHANGING_REMOTE'] % (conf[repotype]['git'], ml_git_remote, repotype), class_name=ADMIN_CLASS_NAME) else: log.info(output_messages['INFO_ADD_REMOTE'] % (ml_git_remote, repotype), class_name=ADMIN_CLASS_NAME) try: conf[repotype]['git'] = ml_git_remote except Exception: conf[repotype] = {} conf[repotype]['git'] = ml_git_remote yaml_save(conf, file)
def test_yaml_save(self): with tempfile.TemporaryDirectory() as tmpdir: arr = tmpdir.split('\\') temp_var = arr.pop() yaml_path = os.path.join(tmpdir, 'data.yaml') shutil.copy('udata/data.yaml', yaml_path) yal = yaml_load(yaml_path) temp_arr = yal[DATASETS]['git'].split('.') temp_arr.pop() temp_arr.pop() temp_arr.append(temp_var) temp_arr.append('git') # create new git variable new_git_var = '.'.join(temp_arr) self.assertFalse(yal[DATASETS]['git'] == new_git_var) yal[DATASETS]['git'] = new_git_var yaml_save(yal, yaml_path) self.assertTrue(yal[DATASETS]['git'] == new_git_var)
def test_increment_version_in_dataset_spec(self): dataset = 'test_dataset' dir1 = get_spec_file_dir(dataset) dir2 = os.path.join('.ml-git', DATASETS, 'index', 'metadata', dataset) # Linked path to the original os.makedirs(os.path.join(self.tmp_dir, dir1)) os.makedirs(os.path.join(self.tmp_dir, dir2)) file1 = os.path.join(self.tmp_dir, dir1, '%s.spec' % dataset) file2 = os.path.join(self.tmp_dir, dir2, '%s.spec' % dataset) self.assertFalse(increment_version_in_spec(None)) self.assertFalse( increment_version_in_spec(os.path.join(get_root_path(), dataset))) spec = yaml_load(os.path.join(testdir, 'invalid2.spec')) yaml_save(spec, file1) self.assertFalse( increment_version_in_spec(os.path.join(get_root_path(), dataset))) spec = yaml_load(os.path.join(testdir, 'valid.spec')) yaml_save(spec, file1) os.link(file1, file2) self.assertTrue( increment_version_in_spec( os.path.join(get_root_path(), self.tmp_dir, DATASETS, dataset, dataset + '.spec')))
def setUp(self): from ml_git import api self.manager = api.init_entity_manager('github_token', 'https://api.github.com') self.config_path = os.path.join(self.tmp_dir, 'config.yaml') yaml_save(dummy_config, self.config_path) self.setUp_mock(EntityType.DATASETS.value) self.setUp_mock(EntityType.LABELS.value) self.setUp_mock(EntityType.MODELS.value)
def test_set_version_in_spec(self): tmpfile = os.path.join(self.tmp_dir, 'sample.spec') file = os.path.join(testdir, 'sample.spec') spec_hash = yaml_load(file) yaml_save(spec_hash, tmpfile) set_version_in_spec(3, tmpfile, 'dataset') spec_hash = yaml_load(tmpfile) self.assertEqual(spec_hash['dataset']['version'], 3)
def test_set_version_in_spec(self): tmpfile = os.path.join(self.tmp_dir, 'sample.spec') file = os.path.join(testdir, 'sample.spec') spec_hash = yaml_load(file) yaml_save(spec_hash, tmpfile) set_version_in_spec(3, tmpfile, DATASETS) spec_hash = yaml_load(tmpfile) self.assertEqual(spec_hash[DATASET_SPEC_KEY]['version'], 3)
def test_add_manifest(self): manifestfile = os.path.join(self.tmp_dir, 'MANIFEST.yaml') yaml_save(singlefile['manifest'], manifestfile) idx = MultihashIndex('dataset-spec', self.tmp_dir, self.tmp_dir) idx.add('data', manifestfile) self.assertFalse(os.path.exists(os.path.join(self.tmp_dir, 'files', 'dataset-spec', 'MANIFEST.yaml')))
def __commit_spec(self, full_metadata_path, metadata): spec_file = self._spec + SPEC_EXTENSION # saves yaml metadata specification dst_spec_file = os.path.join(full_metadata_path, spec_file) yaml_save(metadata, dst_spec_file) return True
def incr_version(file, repotype='dataset'): spec_hash = utils.yaml_load(file) if is_valid_version(spec_hash, repotype): spec_hash[repotype]['version'] += 1 utils.yaml_save(spec_hash, file) log.debug('Version incremented to %s.' % spec_hash[repotype]['version'], class_name=ML_GIT_PROJECT_NAME) return spec_hash[repotype]['version'] else: log.error('Invalid version, could not increment. File:\n %s' % file, class_name=ML_GIT_PROJECT_NAME) return -1
def incr_version(file, repo_type=DATASETS): spec_hash = utils.yaml_load(file) entity_spec_key = get_spec_key(repo_type) if is_valid_version(spec_hash, entity_spec_key): spec_hash[entity_spec_key]['version'] += 1 utils.yaml_save(spec_hash, file) log.debug(output_messages['DEBUG_VERSION_INCREMENTED_TO'] % spec_hash[entity_spec_key]['version'], class_name=ML_GIT_PROJECT_NAME) return spec_hash[entity_spec_key]['version'] else: log.error(output_messages['ERROR_INVALID_VERSION_INCREMENT'] % file, class_name=ML_GIT_PROJECT_NAME) return -1
def storage_add(storage_type, bucket, credentials_profile, global_conf=False, endpoint_url=None, sftp_configs=None): if not valid_storage_type(storage_type): return try: region = get_bucket_region(bucket, credentials_profile) except Exception: region = None if storage_type not in (StorageType.S3H.value, StorageType.S3.value ) or credentials_profile is None: log.info(output_messages['INFO_ADD_STORAGE_WITHOUT_PROFILE'] % (storage_type, bucket), class_name=ADMIN_CLASS_NAME) else: log.info(output_messages['INFO_ADD_STORAGE'] % (storage_type, bucket, credentials_profile), class_name=ADMIN_CLASS_NAME) try: file = get_config_path(global_conf) conf = yaml_load(file) except Exception as e: log.error(e, class_name=ADMIN_CLASS_NAME) return if STORAGE_CONFIG_KEY not in conf: conf[STORAGE_CONFIG_KEY] = {} if storage_type not in conf[STORAGE_CONFIG_KEY]: conf[STORAGE_CONFIG_KEY][storage_type] = {} conf[STORAGE_CONFIG_KEY][storage_type][bucket] = {} if storage_type in [StorageType.S3.value, StorageType.S3H.value]: conf[STORAGE_CONFIG_KEY][storage_type][bucket]['aws-credentials'] = {} conf[STORAGE_CONFIG_KEY][storage_type][bucket]['aws-credentials'][ 'profile'] = credentials_profile conf[STORAGE_CONFIG_KEY][storage_type][bucket]['region'] = region conf[STORAGE_CONFIG_KEY][storage_type][bucket][ 'endpoint-url'] = endpoint_url elif storage_type in [StorageType.GDRIVEH.value]: conf[STORAGE_CONFIG_KEY][storage_type][bucket][ 'credentials-path'] = credentials_profile elif storage_type in [StorageType.SFTPH.value]: conf[STORAGE_CONFIG_KEY][storage_type][bucket][ 'endpoint-url'] = endpoint_url conf[STORAGE_CONFIG_KEY][storage_type][bucket][ 'username'] = sftp_configs['username'] conf[STORAGE_CONFIG_KEY][storage_type][bucket][ 'private-key'] = sftp_configs['private_key'] conf[STORAGE_CONFIG_KEY][storage_type][bucket]['port'] = sftp_configs[ 'port'] yaml_save(conf, file)
def test_incr_version(self): tmpfile = os.path.join(self.tmp_dir, 'sample.spec') file = os.path.join(testdir, 'sample.spec') spec_hash = yaml_load(file) yaml_save(spec_hash, tmpfile) version = spec_hash['dataset']['version'] incr_version(tmpfile) incremented_hash = yaml_load(tmpfile) self.assertEqual(incremented_hash['dataset']['version'], version + 1) incr_version('non-existent-file')
def update_store_spec(repotype, artefact_name, store_type, bucket): path = None try: path = get_root_path() except Exception as e: log.error(e, CLASS_NAME=ML_GIT_PROJECT_NAME) spec_path = os.path.join(path, repotype, artefact_name, artefact_name + '.spec') spec_hash = utils.yaml_load(spec_path) spec_hash[repotype]['manifest']['store'] = store_type + '://' + bucket utils.yaml_save(spec_hash, spec_path) return
def update_storage_spec(repo_type, artifact_name, storage_type, bucket, entity_dir=''): path = None try: path = get_root_path() except Exception as e: log.error(e, CLASS_NAME=ML_GIT_PROJECT_NAME) spec_path = os.path.join(path, repo_type, entity_dir, artifact_name, artifact_name + SPEC_EXTENSION) spec_hash = utils.yaml_load(spec_path) entity_spec_key = get_spec_key(repo_type) spec_hash[entity_spec_key]['manifest'][STORAGE_SPEC_KEY] = storage_type + '://' + bucket utils.yaml_save(spec_hash, spec_path) return
def create_workspace_tree_structure(repo_type, artifact_name, categories, storage_type, bucket_name, version, imported_dir, mutability, entity_dir=''): # get root path to create directories and files repo_type_dir = os.path.join(get_root_path(), repo_type) artifact_path = os.path.join(repo_type_dir, entity_dir, artifact_name) if not path_is_parent(repo_type_dir, artifact_path): raise Exception( output_messages['ERROR_INVALID_ENTITY_DIR'].format(entity_dir)) if os.path.exists(artifact_path): raise PermissionError(output_messages['INFO_ENTITY_NAME_EXISTS']) data_path = os.path.join(artifact_path, 'data') # import files from the directory passed if imported_dir is not None: import_dir(imported_dir, data_path) else: os.makedirs(data_path) spec_path = os.path.join(artifact_path, artifact_name + SPEC_EXTENSION) readme_path = os.path.join(artifact_path, 'README.md') file_exists = os.path.isfile(spec_path) storage = '%s://%s' % (storage_type, FAKE_STORAGE if bucket_name is None else bucket_name) entity_spec_key = get_spec_key(repo_type) spec_structure = { entity_spec_key: { 'categories': categories, 'manifest': { STORAGE_SPEC_KEY: storage }, 'name': artifact_name, 'mutability': mutability, 'version': version } } # write in spec file if not file_exists: yaml_save(spec_structure, spec_path) with open(readme_path, 'w'): pass return True else: return False
def remote_del(repo_type, global_conf=False): file = get_config_path(global_conf) conf = yaml_load(file) if repo_type in conf: git_url = conf[repo_type]['git'] if git_url is None or not len(conf[repo_type]['git']) > 0: log.error(output_messages['ERROR_REMOTE_UNCONFIGURED'] % repo_type, class_name=ADMIN_CLASS_NAME) else: log.info(output_messages['INFO_REMOVE_REMOTE'] % (git_url, repo_type), class_name=ADMIN_CLASS_NAME) conf[repo_type]['git'] = '' yaml_save(conf, file) else: log.error(output_messages['ERROR_ENTITY_NOT_FOUND'] % repo_type, class_name=ADMIN_CLASS_NAME)
def test_add_full_index(self): manifestfile = os.path.join(self.tmp_dir, 'MANIFEST.yaml') yaml_save(singlefile['manifest'], manifestfile) idx = MultihashIndex('dataset-spec', self.tmp_dir, self.tmp_dir) idx.add('data', manifestfile) f_idx = yaml_load(os.path.join(self.tmp_dir, 'metadata', 'dataset-spec', 'INDEX.yaml')) self.assertTrue(len(f_idx) > 0) for k, v in f_idx.items(): self.assertEqual(k, 'think-hires.jpg') self.assertEqual(v['hash'], 'zdj7WgHSKJkoJST5GWGgS53ARqV7oqMGYVvWzEWku3MBfnQ9u') self.assertEqual(v['status'], 'a') self.assertFalse(os.path.exists(os.path.join(self.tmp_dir, 'dataset-spec', 'INDEX.yaml')))
def test_update(self): mlgit_dir = os.path.join(self.tmp_dir, '.ml-git') objectpath = os.path.join(mlgit_dir, 'objects-test') manifest = os.path.join(self.tmp_dir, 'manifest.yaml') yaml_save( { 'zdj7WgHSKJkoJST5GWGgS53ARqV7oqMGYVvWzEWku3MBfnQ9u': {'think-hires.jpg'} }, manifest) data = os.path.join(self.test_dir, 'data') c = Cache(objectpath, data, manifest) c.update() set_write_read(os.path.join(self.test_dir, data, 'think-hires.jpg')) st = os.stat(os.path.join(self.test_dir, data, 'think-hires.jpg')) self.assertTrue(st.st_nlink > 1) self.assertTrue( c.exists('zdj7WgHSKJkoJST5GWGgS53ARqV7oqMGYVvWzEWku3MBfnQ9u'))
def test_add_metrics_with_none_metrics_options(self): hashfs_path = os.path.join(self.tmp_dir, 'objectsfs') test_config = yaml_load('hdata/config.yaml') local_repo = LocalRepository(test_config, hashfs_path, repo_type=MODELS) spec_path = os.path.join(self.tmp_dir, 'model-ex.spec') shutil.copy('hdata/dataset-ex.spec', spec_path) spec_file = yaml_load(spec_path) model = spec_file[DATASET_SPEC_KEY].copy() del spec_file[DATASET_SPEC_KEY] spec_file[MODEL_SPEC_KEY] = model yaml_save(spec_file, spec_path) local_repo.add_metrics(spec_path, (), None) test_spec_file = yaml_load(spec_path) self.assertFalse('metrics' in test_spec_file[MODEL_SPEC_KEY])
def test_get_tag(self): mdpath = os.path.join(self.test_dir, 'metadata') specpath = 'dataset-ex' ensure_path_exists(os.path.join(mdpath, specpath)) shutil.copy('hdata/dataset-ex.spec', os.path.join(mdpath, specpath) + '/dataset-ex.spec') manifestpath = os.path.join(os.path.join(mdpath, specpath), 'MANIFEST.yaml') yaml_save(files_mock, manifestpath) config['mlgit_path'] = self.test_dir m = Metadata(specpath, mdpath, config, DATASETS) r = Repository(config, DATASETS) r.init() tag_list = ['computer__images__dataset-ex__1'] with mock.patch('ml_git.metadata.Metadata.list_tags', return_value=tag_list): target_tag = m.get_tag(specpath, -1) self.assertEqual(target_tag, tag_list[0]) clear(m.path)
def test_tag_exist(self): mdpath = os.path.join(self.test_dir, 'metadata') specpath = 'dataset-ex' ensure_path_exists(os.path.join(mdpath, specpath)) shutil.copy('hdata/dataset-ex.spec', os.path.join(mdpath, specpath) + '/dataset-ex.spec') manifestpath = os.path.join(os.path.join(mdpath, specpath), 'MANIFEST.yaml') yaml_save(files_mock, manifestpath) config['mlgit_path'] = self.test_dir m = Metadata(specpath, mdpath, config, repotype) r = Repository(config, repotype) r.init() fullmetadatapath, categories_subpath, metadata = m.tag_exists( self.test_dir) self.assertFalse(metadata is None)
def test_push(self): mlgit_dir = os.path.join(self.tmp_dir, '.ml-git') indexpath = os.path.join(mlgit_dir, 'index-test') mdpath = os.path.join(mlgit_dir, 'metadata-test') objectpath = os.path.join(mlgit_dir, 'objects-test') specpath = os.path.join(mdpath, 'vision-computing/images/dataset-ex') ensure_path_exists(specpath) ensure_path_exists(indexpath) shutil.copy('hdata/dataset-ex.spec', specpath + '/dataset-ex.spec') shutil.copy('hdata/config.yaml', mlgit_dir + '/config.yaml') manifestpath = os.path.join(specpath, 'MANIFEST.yaml') yaml_save( { 'zdj7WjdojNAZN53Wf29rPssZamfbC6MVerzcGwd9tNciMpsQh': {'imghires.jpg'} }, manifestpath) # adds chunks to ml-git Index idx = MultihashIndex(specpath, indexpath, objectpath) idx.add('data-test-push/', manifestpath) fi = yaml_load(os.path.join(specpath, 'INDEX.yaml')) self.assertTrue(len(fi) > 0) self.assertTrue(os.path.exists(indexpath)) o = Objects(specpath, objectpath) o.commit_index(indexpath, self.tmp_dir) self.assertTrue(os.path.exists(objectpath)) c = yaml_load('hdata/config.yaml') r = LocalRepository(c, objectpath) r.push(objectpath, specpath + '/dataset-ex.spec') s3 = boto3.resource( 's3', region_name='eu-west-1', aws_access_key_id='fake_access_key', aws_secret_access_key='fake_secret_key', ) for key in idx.get_index(): self.assertIsNotNone(s3.Object(testbucketname, key))