def test_09_add_command_with_metric_for_wrong_entity(self): repo_type = DATASETS self.set_up_add() create_spec(self, repo_type, self.tmp_dir) workspace = os.path.join(self.tmp_dir, repo_type, DATASET_NAME) os.makedirs(os.path.join(workspace, 'data')) create_file(workspace, 'file1', '0') metrics_options = '--metric Accuracy 1 --metric Recall 2' self.assertIn( output_messages['INFO_ADDING_PATH'] % repo_type, check_output(MLGIT_ADD % (repo_type, DATASET_NAME, metrics_options))) index = os.path.join(ML_GIT_DIR, repo_type, 'index', 'metadata', DATASET_NAME, 'INDEX.yaml') self._check_index(index, ['data/file1'], []) with open(os.path.join(workspace, DATASET_NAME + '.spec')) as spec: spec_file = yaml_processor.load(spec) spec_key = get_spec_key(repo_type) metrics = spec_file[spec_key].get('metrics', {}) self.assertTrue(metrics == {})
def set_up_push(self): os.makedirs(self.workspace) create_spec(self, self.repo_type, self.tmp_dir, version=1, mutability='strict', store_type=self.store_type) self.assertIn(messages[0], check_output(MLGIT_INIT)) self.assertIn( messages[2] % (GIT_PATH, self.repo_type), check_output(MLGIT_REMOTE_ADD % (self.repo_type, GIT_PATH))) self.assertIn( messages[87] % (self.store_type, self.bucket), check_output('ml-git repository store add %s --type=%s' % (self.bucket, self.store_type))) self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_ENTITY_INIT % 'dataset')) add_file(self, self.repo_type, '', 'new') metadata_path = os.path.join(ML_GIT_DIR, 'dataset', 'metadata') self.assertIn( messages[17] % (os.path.join(self.tmp_dir, metadata_path), os.path.join('computer-vision', 'images', 'dataset-ex')), check_output(MLGIT_COMMIT % (self.repo_type, 'dataset-ex', ''))) HEAD = os.path.join(ML_GIT_DIR, 'dataset', 'refs', 'dataset-ex', 'HEAD') self.assertTrue(os.path.exists(HEAD))
def test_04_list_tags_without_similar_tags(self): self._list_tag_entity('dataset') entity_type = 'dataset' similar_entity = 'dataset-ex2' workspace = os.path.join('dataset', similar_entity) os.makedirs(workspace, exist_ok=True) create_spec(self, 'dataset', self.tmp_dir, artifact_name=similar_entity) add_file(self, 'dataset', '--bumpversion', 'new', artifact_name=similar_entity) self.assertIn( messages[17] % (os.path.join(self.tmp_dir, ML_GIT_DIR, 'dataset', 'metadata'), os.path.join('computer-vision', 'images', similar_entity)), check_output(MLGIT_COMMIT % ('dataset', similar_entity, ''))) check_output(MLGIT_PUSH % ('dataset', similar_entity)) self.assertNotIn( similar_entity, check_output(MLGIT_TAG_LIST % (entity_type, entity_type + '-ex'))) self.assertIn( similar_entity, check_output(MLGIT_TAG_LIST % (entity_type, similar_entity)))
def test_07_add_command_with_multiple_files(self): self.set_up_add() create_spec(self, DATASETS, self.tmp_dir) workspace = os.path.join(self.tmp_dir, DATASETS, DATASET_NAME) os.makedirs(os.path.join(workspace, 'data')) create_file(workspace, 'file1', '0') create_file(workspace, 'file2', '1') create_file(workspace, 'file3', '1') self.assertIn( output_messages['INFO_ADDING_PATH'] % DATASETS, check_output( MLGIT_ADD % (DATASETS, DATASET_NAME, os.path.join('data', 'file1')))) index = os.path.join(ML_GIT_DIR, DATASETS, 'index', 'metadata', DATASET_NAME, 'INDEX.yaml') self._check_index(index, ['data/file1'], ['data/file2', 'data/file3']) self.assertIn( output_messages['INFO_ADDING_PATH'] % DATASETS, check_output(MLGIT_ADD % (DATASETS, DATASET_NAME, 'data'))) self._check_index(index, ['data/file1', 'data/file2', 'data/file3'], []) create_file(workspace, 'file4', '0') self.assertIn(output_messages['INFO_ADDING_PATH'] % DATASETS, check_output(MLGIT_ADD % (DATASETS, DATASET_NAME, ''))) self._check_index( index, ['data/file1', 'data/file2', 'data/file3', 'data/file4'], [])
def test_10_add_command_with_metric_file(self): repo_type = MODELS entity_name = '{}-ex'.format(repo_type) self.set_up_add(repo_type) create_spec(self, repo_type, self.tmp_dir) workspace = os.path.join(self.tmp_dir, repo_type, entity_name) os.makedirs(os.path.join(workspace, 'data')) create_file(workspace, 'file1', '0') csv_file = os.path.join(self.tmp_dir, 'metrics.csv') self.create_csv_file(csv_file, {'Accuracy': 1, 'Recall': 2}) metrics_options = '--metrics-file="{}"'.format(csv_file) self.assertIn( output_messages['INFO_ADDING_PATH'] % repo_type, check_output(MLGIT_ADD % (repo_type, entity_name, metrics_options))) index = os.path.join(ML_GIT_DIR, repo_type, 'index', 'metadata', entity_name, 'INDEX.yaml') self._check_index(index, ['data/file1'], []) with open(os.path.join(workspace, entity_name + '.spec')) as spec: spec_file = yaml_processor.load(spec) spec_key = get_spec_key(repo_type) metrics = spec_file[spec_key].get('metrics', {}) self.assertFalse(metrics == {}) self.assertTrue(metrics['Accuracy'] == 1) self.assertTrue(metrics['Recall'] == 2)
def test_05_add_command_without_file_added(self): self.set_up_add() create_spec(self, 'dataset', self.tmp_dir) self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_ADD % ('dataset', 'dataset-ex', ''))) self.assertIn(messages[27], check_output(MLGIT_ADD % ('dataset', 'dataset-ex', '--bumpversion')))
def test_04_commit_command_with_version(self): init_repository(DATASETS, self) create_spec(self, DATASETS, self.tmp_dir) workspace = os.path.join(self.tmp_dir, DATASETS, DATASET_NAME) os.makedirs(os.path.join(workspace, 'data')) create_file(workspace, 'file1', '0') self.assertIn(output_messages['INFO_ADDING_PATH'] % DATASETS, check_output(MLGIT_ADD % (DATASETS, DATASET_NAME, ""))) self.assertIn( output_messages['INFO_COMMIT_REPO'] % (os.path.join( self.tmp_dir, ML_GIT_DIR, DATASETS, 'metadata'), DATASET_NAME), check_output(MLGIT_COMMIT % (DATASETS, DATASET_NAME, ''))) create_file(workspace, 'file2', '1') self.assertIn(output_messages['INFO_ADDING_PATH'] % DATASETS, check_output(MLGIT_ADD % (DATASETS, DATASET_NAME, ""))) self.assertIn( output_messages['ERROR_INVALID_VALUE_FOR'] % ('--version', '-10'), check_output(MLGIT_COMMIT % (DATASETS, DATASET_NAME, ' --version=-10'))) self.assertIn( output_messages['ERROR_INVALID_VALUE_FOR'] % ('--version', 'test'), check_output(MLGIT_COMMIT % (DATASETS, DATASET_NAME, '--version=test'))) self.assertIn( output_messages['INFO_COMMIT_REPO'] % (os.path.join( self.tmp_dir, ML_GIT_DIR, DATASETS, 'metadata'), DATASET_NAME), check_output(MLGIT_COMMIT % (DATASETS, DATASET_NAME, '--version=2')))
def set_up_test(self, repo_type=MODELS): self.TAG_TIMES = [] entity_name = '{}-ex'.format(repo_type) init_repository(repo_type, self) create_spec(self, repo_type, self.tmp_dir) metrics_options = '--metric Accuracy 10 --metric Recall 10' self.assertNotIn( ERROR_MESSAGE, check_output(MLGIT_ADD % (repo_type, entity_name, metrics_options))) self.assertNotIn( ERROR_MESSAGE, check_output(MLGIT_COMMIT % (repo_type, entity_name, ''))) self._git_commit_time() metrics_options = '--metric Accuracy 20 --metric Recall 20' workspace = os.path.join(self.tmp_dir, repo_type, entity_name) os.makedirs(os.path.join(workspace, 'data')) create_file(workspace, 'file1', '0') self.assertNotIn( ERROR_MESSAGE, check_output(MLGIT_ADD % (repo_type, entity_name, metrics_options))) self.assertNotIn( ERROR_MESSAGE, check_output(MLGIT_COMMIT % (repo_type, entity_name, ' --version=2'))) self._git_commit_time()
def set_up_push(self): os.makedirs(self.workspace) create_spec(self, self.repo_type, self.tmp_dir, version=1, mutability=STRICT, storage_type=self.storage_type) self.assertIn( output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir, check_output(MLGIT_INIT)) self.assertIn( output_messages['INFO_ADD_REMOTE'] % (GIT_PATH, self.repo_type), check_output(MLGIT_REMOTE_ADD % (self.repo_type, GIT_PATH))) self.assertIn( output_messages['INFO_ADD_STORAGE_WITHOUT_PROFILE'] % (self.storage_type, self.bucket), check_output('ml-git repository storage add %s --type=%s' % (self.bucket, self.storage_type))) self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_ENTITY_INIT % DATASETS)) add_file(self, self.repo_type, '', 'new') metadata_path = os.path.join(ML_GIT_DIR, DATASETS, 'metadata') self.assertIn( output_messages['INFO_COMMIT_REPO'] % (os.path.join(self.tmp_dir, metadata_path), DATASET_NAME), check_output(MLGIT_COMMIT % (self.repo_type, DATASET_NAME, ''))) HEAD = os.path.join(ML_GIT_DIR, DATASETS, 'refs', DATASET_NAME, 'HEAD') self.assertTrue(os.path.exists(HEAD))
def _create_entity_with_mutability(self, entity_type, mutability_type): init_repository(entity_type, self) workspace = os.path.join(self.tmp_dir, entity_type, entity_type + '-ex') create_spec(self, entity_type, self.tmp_dir, 1, mutability_type) os.makedirs(os.path.join(workspace, 'data')) create_file(workspace, 'file1', '0') self.assertNotIn( ERROR_MESSAGE, check_output(MLGIT_ADD % (entity_type, entity_type + '-ex', ''))) self.assertIn( messages[17] % (os.path.join(self.tmp_dir, ML_GIT_DIR, entity_type, 'metadata'), os.path.join('computer-vision', 'images', entity_type + '-ex')), check_output(MLGIT_COMMIT % (entity_type, entity_type + '-ex', ''))) self.assertNotIn( ERROR_MESSAGE, check_output(MLGIT_PUSH % (entity_type, entity_type + '-ex'))) clear(os.path.join(self.tmp_dir, ML_GIT_DIR)) clear(workspace) clear(os.path.join(self.tmp_dir, entity_type))
def _create_entity_with_mutability(self, entity_type, mutability_type): init_repository(entity_type, self) workspace = os.path.join(self.tmp_dir, entity_type, entity_type + '-ex') create_spec(self, entity_type, self.tmp_dir, 1, mutability_type) os.makedirs(os.path.join(workspace, 'data')) create_file(workspace, 'file1', '0') self._push_files(entity_type, '') self._clear_path()
def test_06_commit_with_large_version_number(self): init_repository(DATASETS, self) create_spec(self, DATASETS, self.tmp_dir) self.assertIn(output_messages['ERROR_INVALID_VALUE_FOR'] % ('--version', '9999999999'), check_output(MLGIT_COMMIT % (DATASETS, DATASET_NAME, ' --version=9999999999'))) self.assertIn(output_messages['ERROR_INVALID_VALUE_FOR'] % ('--version', '9999999999'), check_output(MLGIT_COMMIT % (MODELS, MODELS + '-ex', ' --version=9999999999'))) self.assertIn(output_messages['ERROR_INVALID_VALUE_FOR'] % ('--version', '9999999999'), check_output(MLGIT_COMMIT % (LABELS, LABELS + '-ex', ' --version=9999999999')))
def test_03_checkout(self): os.makedirs(self.workspace) create_spec(self, self.repo_type, self.tmp_dir, version=1, mutability='strict', store_type=self.store_type) self.assertIn(messages[0], check_output(MLGIT_INIT)) self.assertIn( messages[2] % (GIT_PATH, self.repo_type), check_output(MLGIT_REMOTE_ADD % (self.repo_type, GIT_PATH))) self.assertIn( messages[87] % (self.store_type, self.bucket), check_output('ml-git repository store add %s --type=%s' % (self.bucket, self.store_type))) self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_ENTITY_INIT % 'dataset')) add_file(self, self.repo_type, '', 'new') metadata_path = os.path.join(ML_GIT_DIR, 'dataset', 'metadata') self.assertIn( messages[17] % (os.path.join(self.tmp_dir, metadata_path), os.path.join('computer-vision', 'images', 'dataset-ex')), check_output(MLGIT_COMMIT % (self.repo_type, 'dataset-ex', ''))) HEAD = os.path.join(ML_GIT_DIR, 'dataset', 'refs', 'dataset-ex', 'HEAD') self.assertTrue(os.path.exists(HEAD)) self.assertEqual(os.getenv('AZURE_STORAGE_CONNECTION_STRING'), self.dev_store_account_) self.assertNotIn( ERROR_MESSAGE, check_output(MLGIT_PUSH % (self.repo_type, 'dataset-ex'))) clear(self.workspace) clear(os.path.join(ML_GIT_DIR, 'dataset')) self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_ENTITY_INIT % self.repo_type)) self.assertEqual(os.getenv('AZURE_STORAGE_CONNECTION_STRING'), self.dev_store_account_) self.assertNotIn( ERROR_MESSAGE, check_output( MLGIT_CHECKOUT % (self.repo_type, 'computer-vision__images__dataset-ex__1'))) ws_path = os.path.join(self.tmp_dir, 'dataset', 'computer-vision', 'images', 'dataset-ex') self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile0'))) self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile1'))) self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile2'))) self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile3'))) self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile4')))
def setUp_test(self): init_repository('dataset', self) create_spec(self, 'dataset', self.tmp_dir) self.assertIn(messages[13] % 'dataset', check_output(MLGIT_ADD % ('dataset', 'dataset-ex', ''))) self.assertIn( messages[17] % (os.path.join(self.tmp_dir, ML_GIT_DIR, 'dataset', 'metadata'), os.path.join('computer-vision', 'images', 'dataset-ex')), check_output( MLGIT_COMMIT % ('dataset', 'dataset-ex', '-m ' + self.COMMIT_MESSAGE)))
def test_05_commit_command_with_deprecated_version_number(self): init_repository(DATASETS, self) create_spec(self, DATASETS, self.tmp_dir) workspace = os.path.join(self.tmp_dir, DATASETS, DATASET_NAME) os.makedirs(os.path.join(workspace, 'data')) create_file(workspace, 'file1', '0') self.assertIn(output_messages['INFO_ADDING_PATH'] % DATASETS, check_output(MLGIT_ADD % (DATASETS, DATASET_NAME, ""))) result = check_output(MLGIT_COMMIT % (DATASETS, DATASET_NAME, '--version-number=2')) self.assertIn(output_messages['ERROR_NO_SUCH_OPTION'] % '--version-number', result)
def test_03_checkout(self): os.makedirs(self.workspace) create_spec(self, self.repo_type, self.tmp_dir, version=1, mutability=STRICT, storage_type=self.storage_type) self.assertIn( output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir, check_output(MLGIT_INIT)) self.assertIn( output_messages['INFO_ADD_REMOTE'] % (GIT_PATH, self.repo_type), check_output(MLGIT_REMOTE_ADD % (self.repo_type, GIT_PATH))) self.assertIn( output_messages['INFO_ADD_STORAGE_WITHOUT_PROFILE'] % (self.storage_type, self.bucket), check_output('ml-git repository storage add %s --type=%s' % (self.bucket, self.storage_type))) self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_ENTITY_INIT % DATASETS)) add_file(self, self.repo_type, '', 'new') metadata_path = os.path.join(ML_GIT_DIR, DATASETS, 'metadata') self.assertIn( output_messages['INFO_COMMIT_REPO'] % (os.path.join(self.tmp_dir, metadata_path), DATASET_NAME), check_output(MLGIT_COMMIT % (self.repo_type, DATASET_NAME, ''))) HEAD = os.path.join(ML_GIT_DIR, DATASETS, 'refs', DATASET_NAME, 'HEAD') self.assertTrue(os.path.exists(HEAD)) self.assertEqual(os.getenv('AZURE_STORAGE_CONNECTION_STRING'), self.dev_store_account_) self.assertNotIn( ERROR_MESSAGE, check_output(MLGIT_PUSH % (self.repo_type, DATASET_NAME))) clear(self.workspace) clear(os.path.join(ML_GIT_DIR, DATASETS)) self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_ENTITY_INIT % self.repo_type)) self.assertEqual(os.getenv('AZURE_STORAGE_CONNECTION_STRING'), self.dev_store_account_) self.assertNotIn( ERROR_MESSAGE, check_output(MLGIT_CHECKOUT % (self.repo_type, DATASET_TAG))) ws_path = os.path.join(self.tmp_dir, DATASETS, DATASET_NAME) self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile0'))) self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile1'))) self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile2'))) self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile3'))) self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile4')))
def test_05_add_command_without_file_added(self): self.set_up_add() create_spec(self, DATASETS, self.tmp_dir) self.assertNotIn( ERROR_MESSAGE, check_output(MLGIT_ADD % (DATASETS, DATASET_NAME, ''))) self.assertIn( output_messages['INFO_NO_NEW_DATA_TO_ADD'], check_output(MLGIT_ADD % (DATASETS, DATASET_NAME, '--bumpversion')))
def test_16_add_command_with_metric_file_empty(self): repo_type = MODELS entity_name = '{}-ex'.format(repo_type) self.set_up_add(repo_type) create_spec(self, repo_type, self.tmp_dir) workspace = os.path.join(self.tmp_dir, repo_type, entity_name) os.makedirs(os.path.join(workspace, 'data')) create_file(workspace, 'file1', '0') metrics_options = '--metrics-file=' self.assertIn( output_messages['ERROR_EMPTY_VALUE'], check_output(MLGIT_ADD % (repo_type, entity_name, metrics_options)))
def set_up_test(self, repo_type=DATASETS, with_metrics=False): entity = '{}-ex'.format(repo_type) init_repository(repo_type, self) create_spec(self, repo_type, self.tmp_dir) metrics_options = '' if with_metrics: metrics_options = '--metric Accuracy 1 --metric Recall 2' self.assertIn( output_messages['INFO_ADDING_PATH'] % repo_type, check_output(MLGIT_ADD % (repo_type, entity, metrics_options))) self.assertIn( output_messages['INFO_COMMIT_REPO'] % (os.path.join( self.tmp_dir, ML_GIT_DIR, repo_type, 'metadata'), entity), check_output(MLGIT_COMMIT % (repo_type, entity, '-m ' + self.COMMIT_MESSAGE)))
def test_17_add_command_with_empty_metric_file(self): repo_type = MODELS entity_name = '{}-ex'.format(repo_type) self.set_up_add(repo_type) create_spec(self, repo_type, self.tmp_dir) workspace = os.path.join(self.tmp_dir, repo_type, entity_name) os.makedirs(os.path.join(workspace, 'data')) create_file(workspace, 'file1', '0') csv_file = os.path.join(self.tmp_dir, 'metrics.csv') with open(csv_file, 'wt') as f: f.write('') metrics_options = '--metrics-file="{}"'.format(csv_file) self.assertIn( output_messages['ERROR_INVALID_METRICS_FILE'], check_output(MLGIT_ADD % (repo_type, entity_name, metrics_options)))
def test_06_commit_with_large_version_number(self): init_repository('dataset', self) create_spec(self, 'dataset', self.tmp_dir) self.assertIn( messages[96] % '9999999999', check_output( MLGIT_COMMIT % ('dataset', 'dataset' + '-ex', ' --version=9999999999'))) self.assertIn( messages[96] % '9999999999', check_output(MLGIT_COMMIT % ('model', 'model' + '-ex', ' --version=9999999999'))) self.assertIn( messages[96] % '9999999999', check_output( MLGIT_COMMIT % ('labels', 'labels' + '-ex', ' --version=9999999999')))
def set_up_test(self, entity): init_repository(entity, self) workspace = os.path.join(self.tmp_dir, entity, entity + '-ex') os.makedirs(workspace, exist_ok=True) create_spec(self, entity, self.tmp_dir, 20, STRICT) os.makedirs(os.path.join(workspace, 'data'), exist_ok=True) self.create_file(workspace, 'file1', '0') self.create_file(workspace, 'file2', '1') self.create_file(workspace, 'file3', 'a') self.create_file(workspace, 'file4', 'b') api.add(entity, entity + '-ex', bumpversion=True, fsck=False, file_path=['file']) api.commit(entity, entity + '-ex')
def set_up_unlock(self, entity_type, mutability_type): init_repository(entity_type, self) workspace = os.path.join(entity_type, entity_type + '-ex') create_spec(self, entity_type, self.tmp_dir, 1, mutability=mutability_type) os.makedirs(os.path.join(workspace, 'data')) with open(os.path.join(workspace, self.file), 'w') as file: file.write('0' * 2048) self.assertNotIn( ERROR_MESSAGE, check_output(MLGIT_ADD % (entity_type, entity_type + '-ex', '--bumpversion')))
def test_05_commit_command_with_deprecated_version_number(self): init_repository('dataset', self) create_spec(self, 'dataset', self.tmp_dir) workspace = os.path.join(self.tmp_dir, 'dataset', 'dataset-ex') os.makedirs(os.path.join(workspace, 'data')) create_file(workspace, 'file1', '0') self.assertIn(messages[13] % 'dataset', check_output(MLGIT_ADD % ('dataset', 'dataset-ex', ""))) result = check_output( MLGIT_COMMIT % ('dataset', 'dataset' + '-ex', '--version-number=2')) self.assertIn(messages[106] % ('--version-number', '--version'), result) self.assertIn( messages[17] % (os.path.join(self.tmp_dir, ML_GIT_DIR, 'dataset', 'metadata'), os.path.join('computer-vision', 'images', 'dataset' + '-ex')), result)
def test_07_add_command_with_multiple_files(self): self.set_up_add() create_spec(self, 'dataset', self.tmp_dir) workspace = os.path.join(self.tmp_dir, 'dataset', 'dataset-ex') os.makedirs(os.path.join(workspace, 'data')) create_file(workspace, 'file1', '0') create_file(workspace, 'file2', '1') create_file(workspace, 'file3', '1') self.assertIn(messages[13] % 'dataset', check_output(MLGIT_ADD % ('dataset', 'dataset-ex', os.path.join('data', 'file1')))) index = os.path.join(ML_GIT_DIR, 'dataset', 'index', 'metadata', 'dataset-ex', 'INDEX.yaml') self._check_index(index, ['data/file1'], ['data/file2', 'data/file3']) self.assertIn(messages[13] % 'dataset', check_output(MLGIT_ADD % ('dataset', 'dataset-ex', 'data'))) self._check_index(index, ['data/file1', 'data/file2', 'data/file3'], []) create_file(workspace, 'file4', '0') self.assertIn(messages[13] % 'dataset', check_output(MLGIT_ADD % ('dataset', 'dataset-ex', ''))) self._check_index(index, ['data/file1', 'data/file2', 'data/file3', 'data/file4'], [])
def set_up_push(self, create_know_file=False): os.makedirs(self.workspace) create_spec(self, self.repo_type, self.tmp_dir, version=1, mutability=STRICT, storage_type=self.storage_type) self.assertIn( output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir, check_output(MLGIT_INIT)) self.assertIn( output_messages['INFO_ADD_REMOTE'] % (GIT_PATH, self.repo_type), check_output(MLGIT_REMOTE_ADD % (self.repo_type, GIT_PATH))) self.assertIn( output_messages['INFO_ADD_STORAGE_WITHOUT_PROFILE'] % (self.storage_type, self.bucket), check_output( 'ml-git repository storage add %s --type=%s' % ('mlgit --username=mlgit_user ' '--endpoint-url=127.0.0.1 --port=9922 --private-key=' + FAKE_SSH_KEY_PATH, self.storage_type))) self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_ENTITY_INIT % DATASETS)) if create_know_file: with open(os.path.join(self.repo_type, DATASET_NAME, 'file'), 'wt') as z: z.write(str('0' * 10011)) add_file(self, self.repo_type, '', 'new') metadata_path = os.path.join(ML_GIT_DIR, self.repo_type, 'metadata') self.assertIn( output_messages['INFO_COMMIT_REPO'] % (os.path.join(self.tmp_dir, metadata_path), DATASET_NAME), check_output(MLGIT_COMMIT % (self.repo_type, DATASET_NAME, ''))) HEAD = os.path.join(ML_GIT_DIR, self.repo_type, 'refs', DATASET_NAME, 'HEAD') self.assertTrue(os.path.exists(HEAD))
def test_04_list_tags_without_similar_tags(self): self._list_tag_entity(DATASETS) entity_type = DATASETS similar_entity = 'datasets-ex2' workspace = os.path.join(DATASETS, similar_entity) os.makedirs(workspace, exist_ok=True) create_spec(self, DATASETS, self.tmp_dir, artifact_name=similar_entity) add_file(self, DATASETS, '--bumpversion', 'new', artifact_name=similar_entity) self.assertIn( output_messages['INFO_COMMIT_REPO'] % (os.path.join(self.tmp_dir, ML_GIT_DIR, DATASETS, 'metadata'), similar_entity), check_output(MLGIT_COMMIT % (DATASETS, similar_entity, ''))) check_output(MLGIT_PUSH % (DATASETS, similar_entity)) self.assertNotIn( similar_entity, check_output(MLGIT_TAG_LIST % (entity_type, entity_type + '-ex'))) self.assertIn( similar_entity, check_output(MLGIT_TAG_LIST % (entity_type, similar_entity)))
def test_04_commit_command_with_version(self): init_repository('dataset', self) create_spec(self, 'dataset', self.tmp_dir) workspace = os.path.join(self.tmp_dir, 'dataset', 'dataset-ex') os.makedirs(os.path.join(workspace, 'data')) create_file(workspace, 'file1', '0') self.assertIn(messages[13] % 'dataset', check_output(MLGIT_ADD % ('dataset', 'dataset-ex', ""))) self.assertIn( messages[17] % (os.path.join(self.tmp_dir, ML_GIT_DIR, 'dataset', 'metadata'), os.path.join('computer-vision', 'images', 'dataset' + '-ex')), check_output(MLGIT_COMMIT % ('dataset', 'dataset' + '-ex', ''))) create_file(workspace, 'file2', '1') self.assertIn(messages[13] % 'dataset', check_output(MLGIT_ADD % ('dataset', 'dataset-ex', ""))) self.assertIn( messages[96] % '-10', check_output(MLGIT_COMMIT % ('dataset', 'dataset' + '-ex', ' --version=-10'))) self.assertIn( messages[96] % 'test', check_output(MLGIT_COMMIT % ('dataset', 'dataset' + '-ex', '--version=test'))) self.assertIn( messages[17] % (os.path.join(self.tmp_dir, ML_GIT_DIR, 'dataset', 'metadata'), os.path.join('computer-vision', 'images', 'dataset' + '-ex')), check_output(MLGIT_COMMIT % ('dataset', 'dataset' + '-ex', '--version=2')))
def test_20_model_related(self): git_server = os.path.join(self.tmp_dir, GIT_PATH) self.assertIn( output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir, check_output(MLGIT_INIT)) self.assertIn( output_messages['INFO_ADD_REMOTE'] % (git_server, MODELS), check_output(MLGIT_REMOTE_ADD % (MODELS, git_server))) self.assertIn( output_messages['INFO_ADD_STORAGE'] % (STORAGE_TYPE, BUCKET_NAME, PROFILE), check_output(MLGIT_STORAGE_ADD % (BUCKET_NAME, PROFILE))) self.assertIn( output_messages['INFO_METADATA_INIT'] % (git_server, os.path.join(self.tmp_dir, '.ml-git', MODELS, 'metadata')), check_output(MLGIT_ENTITY_INIT % MODELS)) edit_config_yaml(os.path.join(self.tmp_dir, '.ml-git')) workspace_model = os.path.join(MODELS, MODELS + '-ex') os.makedirs(workspace_model) version = 1 create_spec(self, MODELS, self.tmp_dir, version) with open(os.path.join(self.tmp_dir, workspace_model, 'file1'), 'wb') as z: z.write(b'0' * 1024) self.assertIn( output_messages['INFO_ADD_REMOTE'] % (git_server, DATASETS), check_output(MLGIT_REMOTE_ADD % (DATASETS, git_server))) self.assertIn( output_messages['INFO_ADD_STORAGE'] % (STORAGE_TYPE, BUCKET_NAME, PROFILE), check_output(MLGIT_STORAGE_ADD % (BUCKET_NAME, PROFILE))) self.assertIn( output_messages['INFO_METADATA_INIT'] % (git_server, os.path.join(self.tmp_dir, '.ml-git', DATASETS, 'metadata')), check_output(MLGIT_ENTITY_INIT % DATASETS)) edit_config_yaml(os.path.join(self.tmp_dir, '.ml-git')) workspace_dataset = os.path.join(DATASETS, DATASETS + '-ex') os.makedirs(workspace_dataset) version = 1 create_spec(self, DATASETS, self.tmp_dir, version) with open(os.path.join(self.tmp_dir, workspace_dataset, 'file1'), 'wb') as z: z.write(b'0' * 1024) expected_push_result = '2.00/2.00' self.assertIn( output_messages['INFO_ADDING_PATH'] % DATASETS, check_output(MLGIT_ADD % (DATASETS, DATASET_NAME, '--bumpversion'))) self.assertIn( output_messages['INFO_COMMIT_REPO'] % (os.path.join( self.tmp_dir, '.ml-git', DATASETS, 'metadata'), DATASET_NAME), check_output(MLGIT_COMMIT % (DATASETS, DATASET_NAME, ''))) self.assertIn(expected_push_result, check_output(MLGIT_PUSH % (DATASETS, DATASET_NAME))) self.assertIn( output_messages['INFO_ADD_REMOTE'] % (git_server, LABELS), check_output(MLGIT_REMOTE_ADD % (LABELS, git_server))) self.assertIn( output_messages['INFO_ADD_STORAGE'] % (STORAGE_TYPE, BUCKET_NAME, PROFILE), check_output(MLGIT_STORAGE_ADD % (BUCKET_NAME, PROFILE))) self.assertIn( output_messages['INFO_METADATA_INIT'] % (git_server, os.path.join(self.tmp_dir, '.ml-git', LABELS, 'metadata')), check_output(MLGIT_ENTITY_INIT % LABELS)) edit_config_yaml(os.path.join(self.tmp_dir, '.ml-git')) workspace_labels = os.path.join(LABELS, LABELS + '-ex') os.makedirs(workspace_labels) version = 1 create_spec(self, LABELS, self.tmp_dir, version) with open(os.path.join(self.tmp_dir, workspace_labels, 'file1'), 'wb') as z: z.write(b'0' * 1024) self.assertIn( output_messages['INFO_ADDING_PATH'] % LABELS, check_output(MLGIT_ADD % (LABELS, LABELS + '-ex', '--bumpversion'))) self.assertIn( output_messages['INFO_COMMIT_REPO'] % (os.path.join( self.tmp_dir, '.ml-git', LABELS, 'metadata'), LABELS + '-ex'), check_output(MLGIT_COMMIT % (LABELS, LABELS + '-ex', ''))) self.assertIn(expected_push_result, check_output(MLGIT_PUSH % (LABELS, LABELS + '-ex'))) self.assertIn( output_messages['INFO_ADDING_PATH'] % MODELS, check_output(MLGIT_ADD % (MODELS, MODELS + '-ex', '--bumpversion'))) self.assertIn( output_messages['INFO_COMMIT_REPO'] % (os.path.join( self.tmp_dir, '.ml-git', MODELS, 'metadata'), MODELS + '-ex'), check_output(MLGIT_COMMIT % (MODELS, MODELS + '-ex', '--dataset=datasets-ex') + ' --labels=labels-ex')) self.assertIn(expected_push_result, check_output(MLGIT_PUSH % (MODELS, MODELS + '-ex'))) set_write_read(os.path.join(self.tmp_dir, workspace_model, 'file1')) set_write_read(os.path.join(self.tmp_dir, workspace_dataset, 'file1')) set_write_read(os.path.join(self.tmp_dir, workspace_labels, 'file1')) if not sys.platform.startswith('linux'): recursive_write_read(os.path.join(self.tmp_dir, '.ml-git')) clear(os.path.join(self.tmp_dir, MODELS)) clear(os.path.join(self.tmp_dir, DATASETS)) clear(os.path.join(self.tmp_dir, LABELS)) clear(os.path.join(self.tmp_dir, '.ml-git', MODELS)) clear(os.path.join(self.tmp_dir, '.ml-git', DATASETS)) clear(os.path.join(self.tmp_dir, '.ml-git', LABELS)) self.assertIn( output_messages['INFO_METADATA_INIT'] % (git_server, os.path.join(self.tmp_dir, '.ml-git', MODELS, 'metadata')), check_output(MLGIT_ENTITY_INIT % MODELS)) self.assertNotIn( ERROR_MESSAGE, check_output(MLGIT_CHECKOUT % (MODELS, 'computer-vision__images__models-ex__2') + ' -d -l')) self.assertTrue(os.path.exists(os.path.join(self.tmp_dir, MODELS))) self.assertTrue(os.path.exists(os.path.join(self.tmp_dir, DATASETS))) self.assertTrue(os.path.exists(os.path.join(self.tmp_dir, LABELS)))
def test_20_model_related(self): model = 'model' dataset = 'dataset' labels = 'labels' git_server = os.path.join(self.tmp_dir, GIT_PATH) self.assertIn(messages[0], check_output(MLGIT_INIT)) self.assertIn(messages[2] % (git_server, model), check_output(MLGIT_REMOTE_ADD % (model, git_server))) self.assertIn(messages[7] % (STORE_TYPE, BUCKET_NAME, PROFILE), check_output(MLGIT_STORE_ADD % (BUCKET_NAME, PROFILE))) self.assertIn(messages[8] % (git_server, os.path.join(self.tmp_dir, '.ml-git', model, 'metadata')), check_output(MLGIT_ENTITY_INIT % 'model')) edit_config_yaml(os.path.join(self.tmp_dir, '.ml-git')) workspace_model = os.path.join(model, model + '-ex') os.makedirs(workspace_model) version = 1 create_spec(self, model, self.tmp_dir, version) with open(os.path.join(self.tmp_dir, workspace_model, 'file1'), 'wb') as z: z.write(b'0' * 1024) self.assertIn(messages[2] % (git_server, dataset), check_output(MLGIT_REMOTE_ADD % (dataset, git_server))) self.assertIn(messages[7] % (STORE_TYPE, BUCKET_NAME, PROFILE), check_output(MLGIT_STORE_ADD % (BUCKET_NAME, PROFILE))) self.assertIn(messages[8] % (git_server, os.path.join(self.tmp_dir, '.ml-git', dataset, 'metadata')), check_output(MLGIT_ENTITY_INIT % 'dataset')) edit_config_yaml(os.path.join(self.tmp_dir, '.ml-git')) workspace_dataset = os.path.join(dataset, dataset + '-ex') os.makedirs(workspace_dataset) version = 1 create_spec(self, dataset, self.tmp_dir, version) with open(os.path.join(self.tmp_dir, workspace_dataset, 'file1'), 'wb') as z: z.write(b'0' * 1024) self.assertIn(messages[13] % 'dataset', check_output(MLGIT_ADD % ('dataset', 'dataset-ex', '--bumpversion'))) self.assertIn(messages[17] % (os.path.join(self.tmp_dir, '.ml-git', 'dataset', 'metadata'), os.path.join('computer-vision', 'images', 'dataset-ex')), check_output(MLGIT_COMMIT % ('dataset', 'dataset-ex', ''))) self.assertIn(messages[47], check_output(MLGIT_PUSH % ('dataset', 'dataset-ex'))) self.assertIn(messages[2] % (git_server, labels), check_output(MLGIT_REMOTE_ADD % (labels, git_server))) self.assertIn(messages[7] % (STORE_TYPE, BUCKET_NAME, PROFILE), check_output(MLGIT_STORE_ADD % (BUCKET_NAME, PROFILE))) self.assertIn(messages[8] % (git_server, os.path.join(self.tmp_dir, '.ml-git', labels, 'metadata')), check_output(MLGIT_ENTITY_INIT % labels)) edit_config_yaml(os.path.join(self.tmp_dir, '.ml-git')) workspace_labels = os.path.join(labels, labels + '-ex') os.makedirs(workspace_labels) version = 1 create_spec(self, labels, self.tmp_dir, version) with open(os.path.join(self.tmp_dir, workspace_labels, 'file1'), 'wb') as z: z.write(b'0' * 1024) self.assertIn(messages[15], check_output(MLGIT_ADD % ('labels', 'labels-ex', '--bumpversion'))) self.assertIn(messages[17] % (os.path.join(self.tmp_dir, '.ml-git', 'labels', 'metadata'), os.path.join('computer-vision', 'images', 'labels-ex')), check_output(MLGIT_COMMIT % ('labels', 'labels-ex', ''))) self.assertIn(messages[47], check_output(MLGIT_PUSH % ('labels', 'labels-ex'))) self.assertIn(messages[14], check_output(MLGIT_ADD % ('model', 'model-ex', '--bumpversion'))) self.assertIn(messages[17] % (os.path.join(self.tmp_dir, '.ml-git', 'model', 'metadata'), os.path.join('computer-vision', 'images', 'model-ex')), check_output(MLGIT_COMMIT % ('model', 'model-ex', '--dataset=dataset-ex') + ' --labels=labels-ex')) self.assertIn(messages[47], check_output(MLGIT_PUSH % ('model', 'model-ex'))) set_write_read(os.path.join(self.tmp_dir, workspace_model, 'file1')) set_write_read(os.path.join(self.tmp_dir, workspace_dataset, 'file1')) set_write_read(os.path.join(self.tmp_dir, workspace_labels, 'file1')) if not sys.platform.startswith('linux'): recursive_write_read(os.path.join(self.tmp_dir, '.ml-git')) clear(os.path.join(self.tmp_dir, model)) clear(os.path.join(self.tmp_dir, dataset)) clear(os.path.join(self.tmp_dir, labels)) clear(os.path.join(self.tmp_dir, '.ml-git', model)) clear(os.path.join(self.tmp_dir, '.ml-git', dataset)) clear(os.path.join(self.tmp_dir, '.ml-git', labels)) self.assertIn(messages[8] % (git_server, os.path.join(self.tmp_dir, '.ml-git', model, 'metadata')), check_output(MLGIT_ENTITY_INIT % model)) self.assertIn('', check_output(MLGIT_CHECKOUT % ('model', 'computer-vision__images__model-ex__2') + ' -d -l')) self.assertTrue(os.path.exists(os.path.join(self.tmp_dir, model))) self.assertTrue(os.path.exists(os.path.join(self.tmp_dir, dataset))) self.assertTrue(os.path.exists(os.path.join(self.tmp_dir, labels)))