Пример #1
0
    def test_05_checkout_bare_in_older_tag(self):
        entity_type = 'dataset'
        self._create_entity_with_mutability(entity_type, 'strict')
        data_path = os.path.join(self.tmp_dir, entity_type, 'computer-vision', 'images', entity_type+'-ex')
        self._clear_path()
        self._checkout_entity(entity_type, tag='computer-vision__images__'+entity_type+'-ex__1')
        os.mkdir(os.path.join(data_path, 'data'))
        create_file(data_path, 'file3', '1')

        spec_path = os.path.join(self.tmp_dir, 'dataset', 'computer-vision', 'images', 'dataset-ex', 'dataset-ex.spec')
        with open(spec_path, 'r') as y:
            spec = yaml_processor.load(y)

        with open(spec_path, 'w') as y:
            spec['dataset']['version'] = 2
            yaml_processor.dump(spec, y)

        self._push_files(entity_type)

        self._clear_path()

        self._checkout_entity(entity_type, tag='computer-vision__images__'+entity_type+'-ex__3', bare=False)

        file_path = os.path.join(self.tmp_dir, entity_type, 'computer-vision', 'images', entity_type+'-ex', 'data')
        self.assertTrue(os.path.exists(os.path.join(file_path, 'file1')))
        self.assertTrue(os.path.exists(os.path.join(file_path, 'file3')))
Пример #2
0
    def _clean_up_local_config(self):
        with open(os.path.join(self.tmp_dir, '.ml-git/config.yaml')) as file:
            config = yaml_processor.load(file)
            config['datasets']['git'] = ''
            config['labels']['git'] = ''
            config['models']['git'] = ''

        with open(os.path.join(self.tmp_dir, '.ml-git/config.yaml'),
                  'w') as file:
            yaml_processor.dump(config, file)
Пример #3
0
 def set_up_global(self, entity_type=DATASETS):
     self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_INIT))
     disable_wizard_in_config(self.tmp_dir)
     self.assertNotIn(
         ERROR_MESSAGE,
         check_output(MLGIT_REMOTE_ADD_GLOBAL %
                      (entity_type, 'local_git_server.git')))
     self.assertNotIn(
         ERROR_MESSAGE,
         check_output(MLGIT_STORAGE_ADD %
                      (BUCKET_NAME, PROFILE + ' --global')))
     with open(os.path.join(self.tmp_dir, GLOBAL_ML_GIT_CONFIG),
               'r') as config_file:
         config = yaml_processor.load(config_file)
         config[STORAGE_CONFIG_KEY][StorageType.S3H.value]['mlgit'][
             'endpoint-url'] = MINIO_ENDPOINT_URL
     with open(os.path.join(self.tmp_dir, GLOBAL_ML_GIT_CONFIG),
               'w') as config_file:
         yaml_processor.dump(config, config_file)
     clear(os.path.join(self.tmp_dir, ML_GIT_DIR))
Пример #4
0
    def set_up_test(self):
        init_repository('dataset', self)

        workspace = os.path.join(self.tmp_dir, 'dataset', 'dataset-ex')

        os.makedirs(workspace, exist_ok=True)

        spec = {
            'dataset': {
                'categories': ['computer-vision', 'images'],
                'manifest': {
                    'files': 'MANIFEST.yaml',
                    'store': 's3h://mlgit'
                },
                'mutability': Mutability.STRICT.value,
                'name': 'dataset-ex',
                'version': 9
            }
        }

        with open(os.path.join(workspace, 'dataset-ex.spec'), 'w') as y:
            yaml_processor.dump(spec, y)

        os.makedirs(os.path.join(workspace, 'data'), exist_ok=True)

        self.create_file(workspace, 'file1', '0')
        self.create_file(workspace, 'file2', '1')
        self.create_file(workspace, 'file3', 'a')
        self.create_file(workspace, 'file4', 'b')

        api.add('dataset', 'dataset-ex', bumpversion=True)
        api.commit('dataset', 'dataset-ex')
        api.push('dataset', 'dataset-ex')

        self.assertTrue(
            os.path.exists(os.path.join(self.tmp_dir, self.metadata)))

        clear(os.path.join(self.tmp_dir, ML_GIT_DIR))
        clear(workspace)
        init_repository('dataset', self)
Пример #5
0
    def set_up_test(self):
        init_repository(DATASETS, self)

        workspace = os.path.join(self.tmp_dir, DATASETS, DATASET_NAME)

        os.makedirs(workspace, exist_ok=True)

        spec = {
            DATASET_SPEC_KEY: {
                'categories': ['computer-vision', 'images'],
                'manifest': {
                    'files': 'MANIFEST.yaml',
                    STORAGE_SPEC_KEY: '%s://mlgit' % S3H
                },
                'mutability': STRICT,
                'name': DATASET_NAME,
                'version': 10
            }
        }

        with open(os.path.join(workspace, 'datasets-ex.spec'), 'w') as y:
            yaml_processor.dump(spec, y)

        os.makedirs(os.path.join(workspace, 'data'), exist_ok=True)

        self.create_file(workspace, 'file1', '0')
        self.create_file(workspace, 'file2', '1')
        self.create_file(workspace, 'file3', 'a')
        self.create_file(workspace, 'file4', 'b')

        api.add(DATASETS, DATASET_NAME)
        api.commit(DATASETS, DATASET_NAME)
        api.push(DATASETS, DATASET_NAME)

        self.assertTrue(
            os.path.exists(os.path.join(self.tmp_dir, self.metadata)))

        clear(os.path.join(self.tmp_dir, ML_GIT_DIR))
        clear(workspace)
        init_repository(DATASETS, self)
Пример #6
0
 def _change_mutability(self, entity_type, mutability_type,
                        spec_with_categories, ws_spec):
     with open(spec_with_categories, 'w') as y:
         ws_spec[entity_type]['mutability'] = mutability_type
         ws_spec[entity_type]['version'] = 2
         yaml_processor.dump(ws_spec, y)
    def test_01_change_metadata(self):
        init_repository(DATASETS, self)
        self.assertRegex(check_output(MLGIT_STATUS % (DATASETS, DATASET_NAME)),
                         r'Changes to be committed:\n\nUntracked files:\n\tdatasets-ex.spec\n\nCorrupted files')

        self.assertIn(output_messages['INFO_ADDING_PATH'] % DATASETS, check_output(MLGIT_ADD % (DATASETS, DATASET_NAME, '')))

        readme = os.path.join(DATASETS, DATASET_NAME, 'README.md')

        with open(readme, 'w') as file:
            file.write('NEW')

        self.assertRegex(check_output(MLGIT_STATUS % (DATASETS, DATASET_NAME)),
                         r'Changes to be committed:\n\tNew file: datasets-ex.spec\n\nUntracked files:\n\tREADME.md\n\nCorrupted files')

        self.assertIn(output_messages['INFO_ADDING_PATH'] % DATASETS, check_output(MLGIT_ADD % (DATASETS, DATASET_NAME, '')))

        status = check_output(MLGIT_STATUS % (DATASETS, DATASET_NAME))

        self.assertIn('New file: datasets-ex.spec', status)
        self.assertIn('New file: README.md', status)

        with open(readme, 'w') as file:
            file.write('NEW2')

        spec = {
            DATASET_SPEC_KEY: {
                'categories': ['computer-vision', 'images'],
                'manifest': {
                    'files': 'MANIFEST.yaml',
                    STORAGE_SPEC_KEY: '%s://mlgit' % S3H
                },
                'mutability': STRICT,
                'name': 'datasets-ex',
                'version': 16
            }
        }

        with open(os.path.join(DATASETS, DATASET_NAME, 'datasets-ex.spec'), 'w') as y:
            spec[DATASET_SPEC_KEY]['version'] = 17
            yaml_processor.dump(spec, y)

        status = check_output(MLGIT_STATUS % (DATASETS, DATASET_NAME))

        self.assertNotIn('new file: README.md', status)
        self.assertIn('README.md', status)
        self.assertNotIn('new file: datasets-ex.spec', status)
        self.assertIn('datasets-ex.spec', status)

        self.assertIn(output_messages['INFO_ADDING_PATH'] % DATASETS, check_output(MLGIT_ADD % (DATASETS, DATASET_NAME, '')))

        self.assertIn(output_messages['INFO_COMMIT_REPO'] % (os.path.join(self.tmp_dir, ML_GIT_DIR, DATASETS, 'metadata'), DATASET_NAME),
                      check_output(MLGIT_COMMIT % (DATASETS, DATASET_NAME, '')))

        self.assertIn('No blobs', check_output(MLGIT_PUSH % (DATASETS, DATASET_NAME)))

        self.assertRegex(check_output(MLGIT_STATUS % (DATASETS, DATASET_NAME)),
                         r'Changes to be committed:\n\nUntracked files:\n\nCorrupted files')

        clear(ML_GIT_DIR)
        clear(DATASETS)

        self.assertIn(output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir, check_output(MLGIT_INIT))
        self.assertIn(output_messages['INFO_ADD_REMOTE'] % (GIT_PATH, DATASETS), check_output(MLGIT_REMOTE_ADD % (DATASETS, GIT_PATH)))
        self.assertIn(output_messages['INFO_ADD_STORAGE'] % (STORAGE_TYPE, BUCKET_NAME, PROFILE),
                      check_output(MLGIT_STORAGE_ADD_WITH_TYPE % (BUCKET_NAME, PROFILE, STORAGE_TYPE)))
        self.assertIn(output_messages['INFO_METADATA_INIT'] % (GIT_PATH, os.path.join(self.tmp_dir, ML_GIT_DIR, DATASETS, 'metadata')),
                      check_output(MLGIT_ENTITY_INIT % DATASETS))

        check_output(MLGIT_CHECKOUT % (DATASETS, 'computer-vision__images__datasets-ex__17'))

        spec_file = os.path.join(self.tmp_dir, DATASETS, DATASET_NAME, 'datasets-ex.spec')
        readme = os.path.join(self.tmp_dir, DATASETS, DATASET_NAME, 'README.md')

        with open(spec_file, 'r') as f:
            spec = yaml_processor.load(f)
            self.assertEqual(spec[DATASET_SPEC_KEY]['version'], 17)

        with open(readme, 'r') as f:
            self.assertEqual(f.read(), 'NEW2')
Пример #8
0
    def test_01_change_metadata(self):
        init_repository('dataset', self)
        self.assertRegex(
            check_output(MLGIT_STATUS % ('dataset', 'dataset-ex')),
            r'Changes to be committed:\n\nUntracked files:\n\tdataset-ex.spec\n\nCorrupted files'
        )

        self.assertIn(messages[13] % 'dataset',
                      check_output(MLGIT_ADD % ('dataset', 'dataset-ex', '')))

        readme = os.path.join('dataset', 'dataset-ex', 'README.md')

        with open(readme, 'w') as file:
            file.write('NEW')

        self.assertRegex(
            check_output(MLGIT_STATUS % ('dataset', 'dataset-ex')),
            r'Changes to be committed:\n\tNew file: dataset-ex.spec\n\nUntracked files:\n\tREADME.md\n\nCorrupted files'
        )

        self.assertIn(messages[13] % 'dataset',
                      check_output(MLGIT_ADD % ('dataset', 'dataset-ex', '')))

        status = check_output(MLGIT_STATUS % ('dataset', 'dataset-ex'))

        self.assertIn('New file: dataset-ex.spec', status)
        self.assertIn('New file: README.md', status)

        with open(readme, 'w') as file:
            file.write('NEW2')

        spec = {
            'dataset': {
                'categories': ['computer-vision', 'images'],
                'manifest': {
                    'files': 'MANIFEST.yaml',
                    'store': 's3h://mlgit'
                },
                'mutability': 'strict',
                'name': 'dataset-ex',
                'version': 16
            }
        }

        with open(os.path.join('dataset', 'dataset-ex', 'dataset-ex.spec'),
                  'w') as y:
            spec['dataset']['version'] = 17
            yaml_processor.dump(spec, y)

        status = check_output(MLGIT_STATUS % ('dataset', 'dataset-ex'))

        self.assertNotIn('new file: README.md', status)
        self.assertIn('README.md', status)
        self.assertNotIn('new file: dataset-ex.spec', status)
        self.assertIn('dataset-ex.spec', status)

        self.assertIn(messages[13] % 'dataset',
                      check_output(MLGIT_ADD % ('dataset', 'dataset-ex', '')))

        self.assertIn(
            messages[17] %
            (os.path.join(self.tmp_dir, ML_GIT_DIR, 'dataset', 'metadata'),
             os.path.join('computer-vision', 'images', 'dataset-ex')),
            check_output(MLGIT_COMMIT % ('dataset', 'dataset-ex', '')))

        self.assertIn('No blobs',
                      check_output(MLGIT_PUSH % ('dataset', 'dataset-ex')))

        self.assertRegex(
            check_output(MLGIT_STATUS % ('dataset', 'dataset-ex')),
            r'Changes to be committed:\n\nUntracked files:\n\nCorrupted files')

        clear(ML_GIT_DIR)
        clear('dataset')

        self.assertIn(messages[0], check_output(MLGIT_INIT))
        self.assertIn(messages[2] % (GIT_PATH, 'dataset'),
                      check_output(MLGIT_REMOTE_ADD % ('dataset', GIT_PATH)))
        self.assertIn(
            messages[7] % (STORE_TYPE, BUCKET_NAME, PROFILE),
            check_output(MLGIT_STORE_ADD_WITH_TYPE %
                         (BUCKET_NAME, PROFILE, STORE_TYPE)))
        self.assertIn(
            messages[8] %
            (GIT_PATH,
             os.path.join(self.tmp_dir, ML_GIT_DIR, 'dataset', 'metadata')),
            check_output(MLGIT_ENTITY_INIT % 'dataset'))

        check_output(MLGIT_CHECKOUT %
                     ('dataset', 'computer-vision__images__dataset-ex__17'))

        spec_file = os.path.join(self.tmp_dir, 'dataset', 'computer-vision',
                                 'images', 'dataset-ex', 'dataset-ex.spec')
        readme = os.path.join(self.tmp_dir, 'dataset', 'computer-vision',
                              'images', 'dataset-ex', 'README.md')

        with open(spec_file, 'r') as f:
            spec = yaml_processor.load(f)
            self.assertEqual(spec['dataset']['version'], 17)

        with open(readme, 'r') as f:
            self.assertEqual(f.read(), 'NEW2')