Exemplo n.º 1
0
    def test_diff_refs_modified_file(self):
        repo_type = DATASETS
        mdpath = os.path.join(self.test_dir, '.ml-git', repo_type, 'metadata')
        entity = 'dataset-ex'
        specpath = os.path.join('vision-computer', 'images', entity)
        config_test = deepcopy(config)
        config_test['mlgit_path'] = '.ml-git'
        m = Metadata(entity, mdpath, config_test, repo_type)
        m.init()
        ensure_path_exists(os.path.join(mdpath, specpath, entity))
        manifestpath = os.path.join(os.path.join(mdpath, specpath),
                                    'MANIFEST.yaml')
        shutil.copy('hdata/dataset-ex.spec',
                    os.path.join(mdpath, specpath, '{}.spec'.format(entity)))
        yaml_save(files_mock, manifestpath)
        sha1 = m.commit(manifestpath, 'test')

        files_mock_copy = deepcopy(files_mock)
        del files_mock_copy[
            'zdj7WZzR8Tw87Dx3dm76W5aehnT23GSbXbQ9qo73JgtwREGwB']
        files_mock_copy['NewHash'] = {'7.jpg'}

        yaml_save(files_mock_copy, manifestpath)
        sha2 = m.commit(manifestpath, 'test')

        added_files, deleted_files, modified_file = m.diff_refs_with_modified_files(
            entity, sha1, sha2)
        self.assertTrue(len(added_files) == 0)
        self.assertTrue(len(deleted_files) == 0)
        self.assertTrue(len(modified_file) == 1)
Exemplo n.º 2
0
def store_del(store_type, bucket, global_conf=False):
    if not valid_store_type(store_type):
        return

    try:
        config_path = get_config_path(global_conf)
        conf = yaml_load(config_path)
    except Exception as e:
        log.error(e, class_name=ADMIN_CLASS_NAME)
        return

    store_exists = 'store' in conf and store_type in conf[
        'store'] and bucket in conf['store'][store_type]

    if not store_exists:
        log.warn('Store [%s://%s] not found in configuration file.' %
                 (store_type, bucket),
                 class_name=ADMIN_CLASS_NAME)
        return

    del conf['store'][store_type][bucket]
    log.info('Removed store [%s://%s] from configuration file.' %
             (store_type, bucket),
             class_name=ADMIN_CLASS_NAME)

    yaml_save(conf, config_path)
Exemplo n.º 3
0
    def test_remote_fsck(self):
        testbucketname = os.getenv('MLGIT_TEST_BUCKET', 'ml-git-datasets')
        hfspath = os.path.join(self.tmp_dir, 'objectsfs')
        ohfs = MultihashFS(hfspath)
        ohfs.put(HDATA_IMG_1)

        s3 = boto3.resource(
            's3',
            region_name='us-east-1',
            aws_access_key_id='fake_access_key',
            aws_secret_access_key='fake_secret_key',
        )

        s3.Object(testbucketname, 'zdj7WWsMkELZSGQGgpm5VieCWV8NxY5n5XEP73H4E7eeDMA3A').delete()
        self.assertRaises(botocore.exceptions.ClientError, lambda: self.check_delete(s3, testbucketname))
        mdpath = os.path.join(self.tmp_dir, 'metadata-test')

        dataset_spec = get_sample_spec(testbucketname)
        specpath = os.path.join(mdpath, 'vision-computing', 'images', 'dataset-ex')
        ensure_path_exists(specpath)

        yaml_save(dataset_spec, os.path.join(specpath, 'dataset-ex.spec'))
        manifestpath = os.path.join(specpath, 'MANIFEST.yaml')

        yaml_save({'zdj7WjdojNAZN53Wf29rPssZamfbC6MVerzcGwd9tNciMpsQh': {'imghires.jpg'}}, manifestpath)
        fullspecpath = os.path.join(specpath, os.path.join(specpath, 'dataset-ex.spec'))
        spec = 'vision-computing__images__dataset-ex__5'
        c = yaml_load('hdata/config.yaml')
        r = LocalRepository(c, hfspath)
        ret = r.remote_fsck(mdpath, spec, fullspecpath, 2, True, True)
        self.assertTrue(ret)

        self.assertEqual(None, s3.Object(testbucketname, 'zdj7WWsMkELZSGQGgpm5VieCWV8NxY5n5XEP73H4E7eeDMA3A').load())
Exemplo n.º 4
0
    def test_add_metrics_file(self):
        hashfs_path = os.path.join(self.tmp_dir, 'objectsfs')
        test_config = yaml_load('hdata/config.yaml')
        local_repo = LocalRepository(test_config,
                                     hashfs_path,
                                     repo_type=MODELS)
        spec_path = os.path.join(self.tmp_dir, 'model-ex.spec')
        shutil.copy('hdata/dataset-ex.spec', spec_path)
        spec_file = yaml_load(spec_path)
        model = spec_file[DATASET_SPEC_KEY].copy()
        del spec_file[DATASET_SPEC_KEY]
        spec_file[MODEL_SPEC_KEY] = model
        yaml_save(spec_file, spec_path)
        metrics_file_path = os.path.join(self.tmp_dir, 'metrics.csv')
        self.create_csv_file(metrics_file_path, {
            'metric_a': 10,
            'metric_b': 9
        })
        local_repo.add_metrics(spec_path, (), metrics_file_path)

        test_spec_file = yaml_load(spec_path)
        self.assertEqual(
            test_spec_file[MODEL_SPEC_KEY]['metrics'].get('metric_a', ''),
            10.0)
        self.assertEqual(
            test_spec_file[MODEL_SPEC_KEY]['metrics'].get('metric_b', ''), 9.0)
Exemplo n.º 5
0
    def test_get_metrics(self):
        repo_type = MODELS
        mdpath = os.path.join(self.test_dir, 'mdata', repo_type, 'metadata')
        specpath = os.path.join('vision-computer', 'images')
        entity = 'model-ex'
        m = Metadata(entity, self.test_dir, config, repo_type)
        m.init()
        ensure_path_exists(os.path.join(mdpath, specpath, entity))
        spec_metadata_path = os.path.join(mdpath, specpath, entity, 'model-ex.spec')
        shutil.copy('hdata/dataset-ex.spec', spec_metadata_path)

        spec_file = yaml_load(spec_metadata_path)
        spec_file[MODEL_SPEC_KEY] = deepcopy(spec_file[DATASET_SPEC_KEY])
        del spec_file[DATASET_SPEC_KEY]
        spec_file[MODEL_SPEC_KEY]['metrics'] = {'metric_1': 0, 'metric_2': 1}
        yaml_save(spec_file, spec_metadata_path)

        tag = 'vision-computer__images__model-ex__1'
        sha = m.commit(spec_metadata_path, specpath)
        m.tag_add(tag)

        metrics = m._get_metrics(entity, sha)

        test_table = PrettyTable()
        test_table.field_names = ['Name', 'Value']
        test_table.align['Name'] = 'l'
        test_table.align['Value'] = 'l'
        test_table.add_row(['metric_1', 0])
        test_table.add_row(['metric_2', 1])
        test_metrics = '\nmetrics:\n{}'.format(test_table.get_string())

        self.assertEqual(metrics, test_metrics)
Exemplo n.º 6
0
def storage_del(storage_type, bucket, global_conf=False):
    if not valid_storage_type(storage_type):
        return

    try:
        config_path = get_config_path(global_conf)
        conf = yaml_load(config_path)
    except Exception as e:
        log.error(e, class_name=ADMIN_CLASS_NAME)
        return

    storage_exists = STORAGE_CONFIG_KEY in conf and storage_type in conf[
        STORAGE_CONFIG_KEY] and bucket in conf[STORAGE_CONFIG_KEY][storage_type]

    if not storage_exists:
        log.warn(output_messages['WARN_STORAGE_NOT_IN_CONFIG'] %
                 (storage_type, bucket),
                 class_name=ADMIN_CLASS_NAME)
        return

    del conf[STORAGE_CONFIG_KEY][storage_type][bucket]
    log.info(output_messages['INFO_REMOVED_STORAGE'] % (storage_type, bucket),
             class_name=ADMIN_CLASS_NAME)

    yaml_save(conf, config_path)
Exemplo n.º 7
0
    def test_diff_refs_add_file(self):
        repo_type = DATASETS
        mdpath = os.path.join(self.test_dir, '.ml-git', repo_type, 'metadata')
        entity = 'dataset-ex'
        specpath = os.path.join('vision-computer', 'images', entity)
        config_test = deepcopy(config)
        config_test['mlgit_path'] = '.ml-git'
        m = Metadata(entity, mdpath, config_test, repo_type)
        m.init()
        ensure_path_exists(os.path.join(mdpath, specpath, entity))
        manifestpath = os.path.join(os.path.join(mdpath, specpath),
                                    'MANIFEST.yaml')
        shutil.copy('hdata/dataset-ex.spec',
                    os.path.join(mdpath, specpath, '{}.spec'.format(entity)))
        yaml_save(files_mock, manifestpath)
        sha1 = m.commit(manifestpath, 'test')
        files_mock_copy = deepcopy(files_mock)
        files_mock_copy[
            'zPaksM5tNewHashQ2VABPvvfC3VW6wFRTWKvFhUW5QaDx6JMoma'] = {
                '11.jpg'
            }
        yaml_save(files_mock_copy, manifestpath)
        sha2 = m.commit(manifestpath, 'test')

        added_files, deleted_files, modified_file = m.diff_refs_with_modified_files(
            entity, sha1, sha2)
        self.assertTrue(len(added_files) == 1)
        self.assertTrue(len(deleted_files) == 0)
        self.assertTrue(len(modified_file) == 0)
Exemplo n.º 8
0
    def test_fetch(self):
        mdpath = os.path.join(self.tmp_dir, 'metadata-test')
        testbucketname = os.getenv('MLGIT_TEST_BUCKET', 'ml-git-datasets')
        config_spec = get_sample_config_spec(testbucketname, testprofile,
                                             testregion)
        dataset_spec = get_sample_spec(testbucketname)

        specpath = os.path.join(mdpath, 'vision-computing', 'images',
                                'dataset-ex')
        ensure_path_exists(specpath)
        yaml_save(dataset_spec, os.path.join(specpath, 'dataset-ex.spec'))

        manifestpath = os.path.join(specpath, 'MANIFEST.yaml')
        yaml_save(
            {
                'zdj7WjdojNAZN53Wf29rPssZamfbC6MVerzcGwd9tNciMpsQh':
                {'imghires.jpg'}
            }, manifestpath)

        objectpath = os.path.join(self.tmp_dir, 'objects-test')
        spec = 'vision-computing__images__dataset-ex__5'

        r = LocalRepository(config_spec, objectpath)
        r.fetch(mdpath, spec, None)

        fs = set()
        for root, dirs, files in os.walk(objectpath):
            for file in files:
                fs.add(file)

        self.assertEqual(len(hs), len(fs))
        self.assertTrue(len(hs.difference(fs)) == 0)
Exemplo n.º 9
0
def remote_add(repotype, ml_git_remote, global_conf=False):
    file = get_config_path(global_conf)
    conf = yaml_load(file)

    if repotype in conf:
        if conf[repotype]['git'] is None or not len(conf[repotype]['git']) > 0:
            log.info(output_messages['INFO_ADD_REMOTE'] %
                     (ml_git_remote, repotype),
                     class_name=ADMIN_CLASS_NAME)
        else:
            log.warn(output_messages['WARN_HAS_CONFIGURED_REMOTE'],
                     class_name=ADMIN_CLASS_NAME)
            log.info(output_messages['INFO_CHANGING_REMOTE'] %
                     (conf[repotype]['git'], ml_git_remote, repotype),
                     class_name=ADMIN_CLASS_NAME)
    else:
        log.info(output_messages['INFO_ADD_REMOTE'] %
                 (ml_git_remote, repotype),
                 class_name=ADMIN_CLASS_NAME)
    try:
        conf[repotype]['git'] = ml_git_remote
    except Exception:
        conf[repotype] = {}
        conf[repotype]['git'] = ml_git_remote
    yaml_save(conf, file)
Exemplo n.º 10
0
    def test_yaml_save(self):

        with tempfile.TemporaryDirectory() as tmpdir:
            arr = tmpdir.split('\\')
            temp_var = arr.pop()

            yaml_path = os.path.join(tmpdir, 'data.yaml')

            shutil.copy('udata/data.yaml', yaml_path)

            yal = yaml_load(yaml_path)

            temp_arr = yal[DATASETS]['git'].split('.')
            temp_arr.pop()
            temp_arr.pop()
            temp_arr.append(temp_var)
            temp_arr.append('git')
            # create new git variable
            new_git_var = '.'.join(temp_arr)

            self.assertFalse(yal[DATASETS]['git'] == new_git_var)

            yal[DATASETS]['git'] = new_git_var

            yaml_save(yal, yaml_path)
            self.assertTrue(yal[DATASETS]['git'] == new_git_var)
Exemplo n.º 11
0
    def test_increment_version_in_dataset_spec(self):
        dataset = 'test_dataset'
        dir1 = get_spec_file_dir(dataset)
        dir2 = os.path.join('.ml-git', DATASETS, 'index', 'metadata',
                            dataset)  # Linked path to the original
        os.makedirs(os.path.join(self.tmp_dir, dir1))
        os.makedirs(os.path.join(self.tmp_dir, dir2))
        file1 = os.path.join(self.tmp_dir, dir1, '%s.spec' % dataset)
        file2 = os.path.join(self.tmp_dir, dir2, '%s.spec' % dataset)

        self.assertFalse(increment_version_in_spec(None))

        self.assertFalse(
            increment_version_in_spec(os.path.join(get_root_path(), dataset)))

        spec = yaml_load(os.path.join(testdir, 'invalid2.spec'))
        yaml_save(spec, file1)
        self.assertFalse(
            increment_version_in_spec(os.path.join(get_root_path(), dataset)))

        spec = yaml_load(os.path.join(testdir, 'valid.spec'))
        yaml_save(spec, file1)
        os.link(file1, file2)
        self.assertTrue(
            increment_version_in_spec(
                os.path.join(get_root_path(), self.tmp_dir, DATASETS, dataset,
                             dataset + '.spec')))
Exemplo n.º 12
0
 def setUp(self):
     from ml_git import api
     self.manager = api.init_entity_manager('github_token', 'https://api.github.com')
     self.config_path = os.path.join(self.tmp_dir, 'config.yaml')
     yaml_save(dummy_config, self.config_path)
     self.setUp_mock(EntityType.DATASETS.value)
     self.setUp_mock(EntityType.LABELS.value)
     self.setUp_mock(EntityType.MODELS.value)
Exemplo n.º 13
0
 def test_set_version_in_spec(self):
     tmpfile = os.path.join(self.tmp_dir, 'sample.spec')
     file = os.path.join(testdir, 'sample.spec')
     spec_hash = yaml_load(file)
     yaml_save(spec_hash, tmpfile)
     set_version_in_spec(3, tmpfile, 'dataset')
     spec_hash = yaml_load(tmpfile)
     self.assertEqual(spec_hash['dataset']['version'], 3)
Exemplo n.º 14
0
 def test_set_version_in_spec(self):
     tmpfile = os.path.join(self.tmp_dir, 'sample.spec')
     file = os.path.join(testdir, 'sample.spec')
     spec_hash = yaml_load(file)
     yaml_save(spec_hash, tmpfile)
     set_version_in_spec(3, tmpfile, DATASETS)
     spec_hash = yaml_load(tmpfile)
     self.assertEqual(spec_hash[DATASET_SPEC_KEY]['version'], 3)
Exemplo n.º 15
0
    def test_add_manifest(self):
        manifestfile = os.path.join(self.tmp_dir, 'MANIFEST.yaml')
        yaml_save(singlefile['manifest'], manifestfile)

        idx = MultihashIndex('dataset-spec', self.tmp_dir, self.tmp_dir)
        idx.add('data', manifestfile)

        self.assertFalse(os.path.exists(os.path.join(self.tmp_dir, 'files', 'dataset-spec', 'MANIFEST.yaml')))
Exemplo n.º 16
0
    def __commit_spec(self, full_metadata_path, metadata):
        spec_file = self._spec + SPEC_EXTENSION

        # saves yaml metadata specification
        dst_spec_file = os.path.join(full_metadata_path, spec_file)

        yaml_save(metadata, dst_spec_file)

        return True
Exemplo n.º 17
0
def incr_version(file, repotype='dataset'):
    spec_hash = utils.yaml_load(file)
    if is_valid_version(spec_hash, repotype):
        spec_hash[repotype]['version'] += 1
        utils.yaml_save(spec_hash, file)
        log.debug('Version incremented to %s.' % spec_hash[repotype]['version'], class_name=ML_GIT_PROJECT_NAME)
        return spec_hash[repotype]['version']
    else:
        log.error('Invalid version, could not increment.  File:\n     %s' % file, class_name=ML_GIT_PROJECT_NAME)
        return -1
Exemplo n.º 18
0
def incr_version(file, repo_type=DATASETS):
    spec_hash = utils.yaml_load(file)
    entity_spec_key = get_spec_key(repo_type)
    if is_valid_version(spec_hash, entity_spec_key):
        spec_hash[entity_spec_key]['version'] += 1
        utils.yaml_save(spec_hash, file)
        log.debug(output_messages['DEBUG_VERSION_INCREMENTED_TO'] % spec_hash[entity_spec_key]['version'], class_name=ML_GIT_PROJECT_NAME)
        return spec_hash[entity_spec_key]['version']
    else:
        log.error(output_messages['ERROR_INVALID_VERSION_INCREMENT'] % file, class_name=ML_GIT_PROJECT_NAME)
        return -1
Exemplo n.º 19
0
def storage_add(storage_type,
                bucket,
                credentials_profile,
                global_conf=False,
                endpoint_url=None,
                sftp_configs=None):
    if not valid_storage_type(storage_type):
        return

    try:
        region = get_bucket_region(bucket, credentials_profile)
    except Exception:
        region = None
    if storage_type not in (StorageType.S3H.value, StorageType.S3.value
                            ) or credentials_profile is None:
        log.info(output_messages['INFO_ADD_STORAGE_WITHOUT_PROFILE'] %
                 (storage_type, bucket),
                 class_name=ADMIN_CLASS_NAME)
    else:
        log.info(output_messages['INFO_ADD_STORAGE'] %
                 (storage_type, bucket, credentials_profile),
                 class_name=ADMIN_CLASS_NAME)
    try:
        file = get_config_path(global_conf)
        conf = yaml_load(file)
    except Exception as e:
        log.error(e, class_name=ADMIN_CLASS_NAME)
        return

    if STORAGE_CONFIG_KEY not in conf:
        conf[STORAGE_CONFIG_KEY] = {}
    if storage_type not in conf[STORAGE_CONFIG_KEY]:
        conf[STORAGE_CONFIG_KEY][storage_type] = {}
    conf[STORAGE_CONFIG_KEY][storage_type][bucket] = {}
    if storage_type in [StorageType.S3.value, StorageType.S3H.value]:
        conf[STORAGE_CONFIG_KEY][storage_type][bucket]['aws-credentials'] = {}
        conf[STORAGE_CONFIG_KEY][storage_type][bucket]['aws-credentials'][
            'profile'] = credentials_profile
        conf[STORAGE_CONFIG_KEY][storage_type][bucket]['region'] = region
        conf[STORAGE_CONFIG_KEY][storage_type][bucket][
            'endpoint-url'] = endpoint_url
    elif storage_type in [StorageType.GDRIVEH.value]:
        conf[STORAGE_CONFIG_KEY][storage_type][bucket][
            'credentials-path'] = credentials_profile
    elif storage_type in [StorageType.SFTPH.value]:
        conf[STORAGE_CONFIG_KEY][storage_type][bucket][
            'endpoint-url'] = endpoint_url
        conf[STORAGE_CONFIG_KEY][storage_type][bucket][
            'username'] = sftp_configs['username']
        conf[STORAGE_CONFIG_KEY][storage_type][bucket][
            'private-key'] = sftp_configs['private_key']
        conf[STORAGE_CONFIG_KEY][storage_type][bucket]['port'] = sftp_configs[
            'port']
    yaml_save(conf, file)
Exemplo n.º 20
0
    def test_incr_version(self):
        tmpfile = os.path.join(self.tmp_dir, 'sample.spec')
        file = os.path.join(testdir, 'sample.spec')
        spec_hash = yaml_load(file)
        yaml_save(spec_hash, tmpfile)
        version = spec_hash['dataset']['version']
        incr_version(tmpfile)
        incremented_hash = yaml_load(tmpfile)
        self.assertEqual(incremented_hash['dataset']['version'], version + 1)

        incr_version('non-existent-file')
Exemplo n.º 21
0
def update_store_spec(repotype, artefact_name, store_type, bucket):
    path = None
    try:
        path = get_root_path()
    except Exception as e:
        log.error(e, CLASS_NAME=ML_GIT_PROJECT_NAME)

    spec_path = os.path.join(path, repotype, artefact_name, artefact_name + '.spec')
    spec_hash = utils.yaml_load(spec_path)
    spec_hash[repotype]['manifest']['store'] = store_type + '://' + bucket
    utils.yaml_save(spec_hash, spec_path)
    return
Exemplo n.º 22
0
def update_storage_spec(repo_type, artifact_name, storage_type, bucket, entity_dir=''):
    path = None
    try:
        path = get_root_path()
    except Exception as e:
        log.error(e, CLASS_NAME=ML_GIT_PROJECT_NAME)
    spec_path = os.path.join(path, repo_type, entity_dir, artifact_name, artifact_name + SPEC_EXTENSION)
    spec_hash = utils.yaml_load(spec_path)

    entity_spec_key = get_spec_key(repo_type)
    spec_hash[entity_spec_key]['manifest'][STORAGE_SPEC_KEY] = storage_type + '://' + bucket
    utils.yaml_save(spec_hash, spec_path)
    return
Exemplo n.º 23
0
def create_workspace_tree_structure(repo_type,
                                    artifact_name,
                                    categories,
                                    storage_type,
                                    bucket_name,
                                    version,
                                    imported_dir,
                                    mutability,
                                    entity_dir=''):
    # get root path to create directories and files
    repo_type_dir = os.path.join(get_root_path(), repo_type)
    artifact_path = os.path.join(repo_type_dir, entity_dir, artifact_name)
    if not path_is_parent(repo_type_dir, artifact_path):
        raise Exception(
            output_messages['ERROR_INVALID_ENTITY_DIR'].format(entity_dir))
    if os.path.exists(artifact_path):
        raise PermissionError(output_messages['INFO_ENTITY_NAME_EXISTS'])
    data_path = os.path.join(artifact_path, 'data')
    # import files from  the directory passed
    if imported_dir is not None:
        import_dir(imported_dir, data_path)
    else:
        os.makedirs(data_path)

    spec_path = os.path.join(artifact_path, artifact_name + SPEC_EXTENSION)
    readme_path = os.path.join(artifact_path, 'README.md')
    file_exists = os.path.isfile(spec_path)

    storage = '%s://%s' % (storage_type, FAKE_STORAGE
                           if bucket_name is None else bucket_name)
    entity_spec_key = get_spec_key(repo_type)
    spec_structure = {
        entity_spec_key: {
            'categories': categories,
            'manifest': {
                STORAGE_SPEC_KEY: storage
            },
            'name': artifact_name,
            'mutability': mutability,
            'version': version
        }
    }

    # write in spec  file
    if not file_exists:
        yaml_save(spec_structure, spec_path)
        with open(readme_path, 'w'):
            pass
        return True
    else:
        return False
Exemplo n.º 24
0
def remote_del(repo_type, global_conf=False):
    file = get_config_path(global_conf)
    conf = yaml_load(file)

    if repo_type in conf:
        git_url = conf[repo_type]['git']
        if git_url is None or not len(conf[repo_type]['git']) > 0:
            log.error(output_messages['ERROR_REMOTE_UNCONFIGURED'] % repo_type, class_name=ADMIN_CLASS_NAME)
        else:
            log.info(output_messages['INFO_REMOVE_REMOTE'] % (git_url, repo_type), class_name=ADMIN_CLASS_NAME)
            conf[repo_type]['git'] = ''
            yaml_save(conf, file)
    else:
        log.error(output_messages['ERROR_ENTITY_NOT_FOUND'] % repo_type, class_name=ADMIN_CLASS_NAME)
Exemplo n.º 25
0
    def test_add_full_index(self):
        manifestfile = os.path.join(self.tmp_dir, 'MANIFEST.yaml')
        yaml_save(singlefile['manifest'], manifestfile)

        idx = MultihashIndex('dataset-spec', self.tmp_dir, self.tmp_dir)
        idx.add('data', manifestfile)
        f_idx = yaml_load(os.path.join(self.tmp_dir, 'metadata', 'dataset-spec', 'INDEX.yaml'))
        self.assertTrue(len(f_idx) > 0)
        for k, v in f_idx.items():
            self.assertEqual(k, 'think-hires.jpg')
            self.assertEqual(v['hash'], 'zdj7WgHSKJkoJST5GWGgS53ARqV7oqMGYVvWzEWku3MBfnQ9u')
            self.assertEqual(v['status'], 'a')

        self.assertFalse(os.path.exists(os.path.join(self.tmp_dir, 'dataset-spec', 'INDEX.yaml')))
Exemplo n.º 26
0
 def test_update(self):
     mlgit_dir = os.path.join(self.tmp_dir, '.ml-git')
     objectpath = os.path.join(mlgit_dir, 'objects-test')
     manifest = os.path.join(self.tmp_dir, 'manifest.yaml')
     yaml_save(
         {
             'zdj7WgHSKJkoJST5GWGgS53ARqV7oqMGYVvWzEWku3MBfnQ9u':
             {'think-hires.jpg'}
         }, manifest)
     data = os.path.join(self.test_dir, 'data')
     c = Cache(objectpath, data, manifest)
     c.update()
     set_write_read(os.path.join(self.test_dir, data, 'think-hires.jpg'))
     st = os.stat(os.path.join(self.test_dir, data, 'think-hires.jpg'))
     self.assertTrue(st.st_nlink > 1)
     self.assertTrue(
         c.exists('zdj7WgHSKJkoJST5GWGgS53ARqV7oqMGYVvWzEWku3MBfnQ9u'))
Exemplo n.º 27
0
    def test_add_metrics_with_none_metrics_options(self):
        hashfs_path = os.path.join(self.tmp_dir, 'objectsfs')
        test_config = yaml_load('hdata/config.yaml')
        local_repo = LocalRepository(test_config,
                                     hashfs_path,
                                     repo_type=MODELS)
        spec_path = os.path.join(self.tmp_dir, 'model-ex.spec')
        shutil.copy('hdata/dataset-ex.spec', spec_path)
        spec_file = yaml_load(spec_path)
        model = spec_file[DATASET_SPEC_KEY].copy()
        del spec_file[DATASET_SPEC_KEY]
        spec_file[MODEL_SPEC_KEY] = model
        yaml_save(spec_file, spec_path)
        local_repo.add_metrics(spec_path, (), None)

        test_spec_file = yaml_load(spec_path)
        self.assertFalse('metrics' in test_spec_file[MODEL_SPEC_KEY])
Exemplo n.º 28
0
    def test_get_tag(self):
        mdpath = os.path.join(self.test_dir, 'metadata')
        specpath = 'dataset-ex'
        ensure_path_exists(os.path.join(mdpath, specpath))
        shutil.copy('hdata/dataset-ex.spec', os.path.join(mdpath, specpath) + '/dataset-ex.spec')
        manifestpath = os.path.join(os.path.join(mdpath, specpath), 'MANIFEST.yaml')
        yaml_save(files_mock, manifestpath)

        config['mlgit_path'] = self.test_dir
        m = Metadata(specpath, mdpath, config, DATASETS)
        r = Repository(config, DATASETS)
        r.init()

        tag_list = ['computer__images__dataset-ex__1']
        with mock.patch('ml_git.metadata.Metadata.list_tags', return_value=tag_list):
            target_tag = m.get_tag(specpath, -1)
        self.assertEqual(target_tag, tag_list[0])
        clear(m.path)
Exemplo n.º 29
0
    def test_tag_exist(self):
        mdpath = os.path.join(self.test_dir, 'metadata')
        specpath = 'dataset-ex'
        ensure_path_exists(os.path.join(mdpath, specpath))
        shutil.copy('hdata/dataset-ex.spec',
                    os.path.join(mdpath, specpath) + '/dataset-ex.spec')
        manifestpath = os.path.join(os.path.join(mdpath, specpath),
                                    'MANIFEST.yaml')
        yaml_save(files_mock, manifestpath)

        config['mlgit_path'] = self.test_dir
        m = Metadata(specpath, mdpath, config, repotype)
        r = Repository(config, repotype)
        r.init()

        fullmetadatapath, categories_subpath, metadata = m.tag_exists(
            self.test_dir)
        self.assertFalse(metadata is None)
Exemplo n.º 30
0
    def test_push(self):

        mlgit_dir = os.path.join(self.tmp_dir, '.ml-git')

        indexpath = os.path.join(mlgit_dir, 'index-test')
        mdpath = os.path.join(mlgit_dir, 'metadata-test')
        objectpath = os.path.join(mlgit_dir, 'objects-test')
        specpath = os.path.join(mdpath, 'vision-computing/images/dataset-ex')
        ensure_path_exists(specpath)
        ensure_path_exists(indexpath)
        shutil.copy('hdata/dataset-ex.spec', specpath + '/dataset-ex.spec')
        shutil.copy('hdata/config.yaml', mlgit_dir + '/config.yaml')
        manifestpath = os.path.join(specpath, 'MANIFEST.yaml')
        yaml_save(
            {
                'zdj7WjdojNAZN53Wf29rPssZamfbC6MVerzcGwd9tNciMpsQh':
                {'imghires.jpg'}
            }, manifestpath)

        # adds chunks to ml-git Index
        idx = MultihashIndex(specpath, indexpath, objectpath)
        idx.add('data-test-push/', manifestpath)

        fi = yaml_load(os.path.join(specpath, 'INDEX.yaml'))
        self.assertTrue(len(fi) > 0)
        self.assertTrue(os.path.exists(indexpath))

        o = Objects(specpath, objectpath)
        o.commit_index(indexpath, self.tmp_dir)

        self.assertTrue(os.path.exists(objectpath))
        c = yaml_load('hdata/config.yaml')
        r = LocalRepository(c, objectpath)
        r.push(objectpath, specpath + '/dataset-ex.spec')
        s3 = boto3.resource(
            's3',
            region_name='eu-west-1',
            aws_access_key_id='fake_access_key',
            aws_secret_access_key='fake_secret_key',
        )
        for key in idx.get_index():
            self.assertIsNotNone(s3.Object(testbucketname, key))