def test_28_checkout_entity_with_ignore_file(self): entity = DATASETS init_repository(entity, self) workspace = os.path.join(self.tmp_dir, DATASETS, DATASET_NAME) os.mkdir(os.path.join(workspace, 'data')) create_file(workspace, 'image.png', '0') create_file(workspace, 'file1', '0') create_ignore_file(workspace) self.assertNotIn( ERROR_MESSAGE, check_output(MLGIT_ADD % (DATASETS, DATASET_NAME, '--bumpversion'))) self.assertNotIn( ERROR_MESSAGE, check_output(MLGIT_COMMIT % (DATASETS, DATASET_NAME, ''))) self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_PUSH % (DATASETS, DATASET_NAME))) clear(os.path.join(self.tmp_dir, ML_GIT_DIR, entity)) clear(workspace) mlgit_ignore_file_path = os.path.join(workspace, MLGIT_IGNORE_FILE_NAME) self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_ENTITY_INIT % entity)) self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_CHECKOUT % (entity, DATASET_NAME))) self.assertTrue(os.path.exists(mlgit_ignore_file_path)) self.assertTrue( os.path.exists(os.path.join(workspace, 'data', 'file1'))) self.assertFalse( os.path.exists(os.path.join(workspace, 'data', 'image.png')))
def _push_entity(self, entity_type): clear( os.path.join(MINIO_BUCKET_PATH, 'zdj7WWjGAAJ8gdky5FKcVLfd63aiRUGb8fkc8We2bvsp9WW12')) init_repository(entity_type, self) add_file(self, entity_type, '--bumpversion', 'new', file_content='0') metadata_path = os.path.join(self.tmp_dir, ML_GIT_DIR, entity_type, 'metadata') self.assertIn( messages[17] % (metadata_path, os.path.join('computer-vision', 'images', entity_type + '-ex')), check_output(MLGIT_COMMIT % (entity_type, entity_type + '-ex', ''))) HEAD = os.path.join(self.tmp_dir, ML_GIT_DIR, entity_type, 'refs', entity_type + '-ex', 'HEAD') self.assertTrue(os.path.exists(HEAD)) self.assertNotIn( ERROR_MESSAGE, check_output(MLGIT_PUSH % (entity_type, entity_type + '-ex'))) os.chdir(metadata_path) self.assertTrue( os.path.exists( os.path.join( MINIO_BUCKET_PATH, 'zdj7WWjGAAJ8gdky5FKcVLfd63aiRUGb8fkc8We2bvsp9WW12'))) self.assertIn('computer-vision__images__' + entity_type + '-ex__2', check_output('git describe --tags'))
def _clear_workspace(self, entity): workspace = os.path.join(self.tmp_dir, entity) clear(os.path.join(self.tmp_dir, ML_GIT_DIR, entity)) clear(workspace) self.assertIn(output_messages['INFO_METADATA_INIT'] % ( os.path.join(self.tmp_dir, GIT_PATH), os.path.join(self.tmp_dir, ML_GIT_DIR, entity, 'metadata')), check_output(MLGIT_ENTITY_INIT % entity))
def set_up_add_test(self, entity=DATASETS): clear(os.path.join(self.tmp_dir, ML_GIT_DIR)) clear(os.path.join(self.tmp_dir, entity)) init_repository(entity, self) self.create_file_in_ws(entity, 'file', '0') self.create_file_in_ws(entity, 'file2', '1')
def test_07_gc_basic_flow(self): entity = 'dataset' self.set_up_gc(entity) original_size, number_of_files = self._get_metadata_info() result = check_output(MLGIT_REPOSITORY_GC) self._check_result(result, entity, original_size, number_of_files, expected_removed_files=3, expected_reclaimed_space='2.1 kB') file = os.path.join(self.tmp_dir, 'dataset', 'computer-vision', 'images', 'dataset-ex', 'file-after-gc') with open(file, 'wb') as z: z.write(b'1' * 1024) self.assertNotIn( ERROR_MESSAGE, check_output(MLGIT_ADD % (entity, entity + '-ex', ''))) self.assertNotIn( ERROR_MESSAGE, check_output(MLGIT_COMMIT % (entity, entity + '-ex', '--version=3'))) self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_PUSH % (entity, entity + '-ex'))) clear(os.path.join(self.tmp_dir, ML_GIT_DIR, entity)) clear(os.path.join(self.tmp_dir, entity)) self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_ENTITY_INIT % entity)) self.assertNotIn( ERROR_MESSAGE, check_output(MLGIT_CHECKOUT % (entity, entity + '-ex --version=3'))) self.assertTrue(os.path.exists(file))
def set_up_checkout(self, entity): metadata_path = os.path.join(self.tmp_dir, ML_GIT_DIR, entity, 'metadata') workspace = os.path.join(self.tmp_dir, entity) self.set_up_status('dataset') data_path = os.path.join(workspace, 'dataset-ex', 'data') os.makedirs(data_path, exist_ok=True) create_file(data_path, 'file', '0', '') create_file(data_path, 'file2', '0', '') self.assertIn(messages[13] % 'dataset', check_output(MLGIT_ADD % ('dataset', 'dataset-ex', ''))) self.assertIn( messages[17] % (metadata_path, os.path.join('computer-vision', 'images', entity + '-ex')), check_output(MLGIT_COMMIT % (entity, entity + '-ex', ''))) HEAD = os.path.join(self.tmp_dir, ML_GIT_DIR, entity, 'refs', entity + '-ex', 'HEAD') self.assertTrue(os.path.exists(HEAD)) self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_PUSH % (entity, entity + '-ex'))) clear(os.path.join(self.tmp_dir, ML_GIT_DIR, entity)) clear(workspace) self.assertIn( messages[8] % (os.path.join(self.tmp_dir, GIT_PATH), os.path.join(self.tmp_dir, ML_GIT_DIR, entity, 'metadata')), check_output(MLGIT_ENTITY_INIT % entity))
def test_08_push_after_remote_del(self): clear( os.path.join(MINIO_BUCKET_PATH, 'zdj7WWjGAAJ8gdky5FKcVLfd63aiRUGb8fkc8We2bvsp9WW12')) entity_type = DATASETS init_repository(entity_type, self) add_file(self, entity_type, '--bumpversion', 'new', file_content='0') metadata_path = os.path.join(self.tmp_dir, ML_GIT_DIR, entity_type, 'metadata') self.assertIn( output_messages['INFO_COMMIT_REPO'] % (metadata_path, entity_type + '-ex'), check_output(MLGIT_COMMIT % (entity_type, entity_type + '-ex', ''))) head_file = os.path.join(self.tmp_dir, ML_GIT_DIR, entity_type, 'refs', entity_type + '-ex', 'HEAD') self.assertTrue(os.path.exists(head_file)) self._remote_del(entity_type) self.assertIn( output_messages['ERROR_REMOTE_NOT_FOUND'], check_output(MLGIT_PUSH % (entity_type, entity_type + '-ex'))) self.assertFalse( os.path.exists( os.path.join( MINIO_BUCKET_PATH, 'zdj7WWjGAAJ8gdky5FKcVLfd63aiRUGb8fkc8We2bvsp9WW12')))
def set_up_checkout(self, entity): metadata_path = os.path.join(self.tmp_dir, ML_GIT_DIR, entity, 'metadata') workspace = os.path.join(self.tmp_dir, entity) self.set_up_status(DATASETS) data_path = os.path.join(workspace, DATASET_NAME, 'data') os.makedirs(data_path, exist_ok=True) create_file(data_path, 'file', '0', '') create_file(data_path, 'file2', '0', '') self.assertIn(output_messages['INFO_ADDING_PATH'] % DATASETS, check_output(MLGIT_ADD % (DATASETS, DATASET_NAME, ''))) self.assertIn( output_messages['INFO_COMMIT_REPO'] % (metadata_path, entity + '-ex'), check_output(MLGIT_COMMIT % (entity, entity + '-ex', ''))) HEAD = os.path.join(self.tmp_dir, ML_GIT_DIR, entity, 'refs', entity + '-ex', 'HEAD') self.assertTrue(os.path.exists(HEAD)) self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_PUSH % (entity, entity + '-ex'))) clear(os.path.join(self.tmp_dir, ML_GIT_DIR, entity)) clear(workspace) self.assertIn( output_messages['INFO_METADATA_INIT'] % (os.path.join(self.tmp_dir, GIT_PATH), os.path.join(self.tmp_dir, ML_GIT_DIR, entity, 'metadata')), check_output(MLGIT_ENTITY_INIT % entity))
def test_05_gc_deleted_entity(self): self.set_up_gc(DATASETS) self.set_up_gc(LABELS) original_size, number_of_files = self._get_metadata_info() clear(os.path.join(self.tmp_dir, LABELS)) result = check_output(MLGIT_REPOSITORY_GC) self.assertIn(output_messages['INFO_STARTING_GC'] % LABELS, result) self._check_result(result, DATASETS, original_size, number_of_files, expected_removed_files=21, expected_reclaimed_space='33.7 kB')
def test_03_checkout(self): os.makedirs(self.workspace) create_spec(self, self.repo_type, self.tmp_dir, version=1, mutability='strict', store_type=self.store_type) self.assertIn(messages[0], check_output(MLGIT_INIT)) self.assertIn( messages[2] % (GIT_PATH, self.repo_type), check_output(MLGIT_REMOTE_ADD % (self.repo_type, GIT_PATH))) self.assertIn( messages[87] % (self.store_type, self.bucket), check_output('ml-git repository store add %s --type=%s' % (self.bucket, self.store_type))) self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_ENTITY_INIT % 'dataset')) add_file(self, self.repo_type, '', 'new') metadata_path = os.path.join(ML_GIT_DIR, 'dataset', 'metadata') self.assertIn( messages[17] % (os.path.join(self.tmp_dir, metadata_path), os.path.join('computer-vision', 'images', 'dataset-ex')), check_output(MLGIT_COMMIT % (self.repo_type, 'dataset-ex', ''))) HEAD = os.path.join(ML_GIT_DIR, 'dataset', 'refs', 'dataset-ex', 'HEAD') self.assertTrue(os.path.exists(HEAD)) self.assertEqual(os.getenv('AZURE_STORAGE_CONNECTION_STRING'), self.dev_store_account_) self.assertNotIn( ERROR_MESSAGE, check_output(MLGIT_PUSH % (self.repo_type, 'dataset-ex'))) clear(self.workspace) clear(os.path.join(ML_GIT_DIR, 'dataset')) self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_ENTITY_INIT % self.repo_type)) self.assertEqual(os.getenv('AZURE_STORAGE_CONNECTION_STRING'), self.dev_store_account_) self.assertNotIn( ERROR_MESSAGE, check_output( MLGIT_CHECKOUT % (self.repo_type, 'computer-vision__images__dataset-ex__1'))) ws_path = os.path.join(self.tmp_dir, 'dataset', 'computer-vision', 'images', 'dataset-ex') self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile0'))) self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile1'))) self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile2'))) self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile3'))) self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile4')))
def test_03_checkout_with_two_entities_wit_same_name(self): entity = 'dataset' self._create_entity(entity, 'images') clear(os.path.join(self.tmp_dir, '.ml-git')) self._create_entity(entity, 'video') self.assertIn(output_messages['INFO_METADATA_INIT'] % ( os.path.join(self.tmp_dir, GIT_PATH), os.path.join(self.tmp_dir, ML_GIT_DIR, entity, 'metadata')), check_output(MLGIT_ENTITY_INIT % entity)) self.assertIn(output_messages['ERROR_MULTIPLES_ENTITIES_WITH_SAME_NAME'] + '\tcomputer-vision__images__dataset-ex__2\n\tcomputer-vision__video__dataset-ex__2', check_output(MLGIT_CHECKOUT % ('dataset', 'dataset-ex')))
def set_up_checkout(self, entity): configure_global(self, DATASETS) init_repository(entity, self) add_file(self, entity, '', 'new') workspace = os.path.join(self.tmp_dir, entity) self.assertNotIn( ERROR_MESSAGE, check_output(MLGIT_COMMIT % (entity, entity + '-ex', ''))) self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_PUSH % (entity, entity + '-ex'))) clear(os.path.join(self.tmp_dir, ML_GIT_DIR)) clear(workspace)
def test_06_status_after_delete_file(self): self.set_up_checkout(DATASETS) self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_CHECKOUT % (DATASETS, DATASET_TAG))) data_path = os.path.join(self.tmp_dir, DATASETS, DATASET_NAME, 'data') file_to_be_deleted = os.path.join(data_path, 'file') file_to_be_deleted2 = os.path.join(data_path, 'file2') clear(file_to_be_deleted) clear(file_to_be_deleted2) self.assertRegex(check_output(MLGIT_STATUS_SHORT % (DATASETS, DATASET_NAME)), DATASET_NO_COMMITS_INFO_REGEX + r'Changes to be committed:\s+' r'Deleted: data/\t->\t2 FILES')
def test_03_checkout(self): os.makedirs(self.workspace) create_spec(self, self.repo_type, self.tmp_dir, version=1, mutability=STRICT, storage_type=self.storage_type) self.assertIn( output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir, check_output(MLGIT_INIT)) self.assertIn( output_messages['INFO_ADD_REMOTE'] % (GIT_PATH, self.repo_type), check_output(MLGIT_REMOTE_ADD % (self.repo_type, GIT_PATH))) self.assertIn( output_messages['INFO_ADD_STORAGE_WITHOUT_PROFILE'] % (self.storage_type, self.bucket), check_output('ml-git repository storage add %s --type=%s' % (self.bucket, self.storage_type))) self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_ENTITY_INIT % DATASETS)) add_file(self, self.repo_type, '', 'new') metadata_path = os.path.join(ML_GIT_DIR, DATASETS, 'metadata') self.assertIn( output_messages['INFO_COMMIT_REPO'] % (os.path.join(self.tmp_dir, metadata_path), DATASET_NAME), check_output(MLGIT_COMMIT % (self.repo_type, DATASET_NAME, ''))) HEAD = os.path.join(ML_GIT_DIR, DATASETS, 'refs', DATASET_NAME, 'HEAD') self.assertTrue(os.path.exists(HEAD)) self.assertEqual(os.getenv('AZURE_STORAGE_CONNECTION_STRING'), self.dev_store_account_) self.assertNotIn( ERROR_MESSAGE, check_output(MLGIT_PUSH % (self.repo_type, DATASET_NAME))) clear(self.workspace) clear(os.path.join(ML_GIT_DIR, DATASETS)) self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_ENTITY_INIT % self.repo_type)) self.assertEqual(os.getenv('AZURE_STORAGE_CONNECTION_STRING'), self.dev_store_account_) self.assertNotIn( ERROR_MESSAGE, check_output(MLGIT_CHECKOUT % (self.repo_type, DATASET_TAG))) ws_path = os.path.join(self.tmp_dir, DATASETS, DATASET_NAME) self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile0'))) self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile1'))) self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile2'))) self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile3'))) self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile4')))
def test_26_adding_data_based_in_older_tag(self): entity = 'dataset' self.set_up_checkout(entity) self.assertNotIn( ERROR_MESSAGE, check_output(MLGIT_CHECKOUT % (entity, 'computer-vision__images__dataset-ex__1'))) workspace = os.path.join(self.tmp_dir, entity, 'computer-vision', 'images', entity + '-ex') create_file(workspace, 'newfile5', '0', file_path='') populate_entity_with_new_data(self, entity) self.assertNotIn( ERROR_MESSAGE, check_output( MLGIT_CHECKOUT % ('dataset', 'computer-vision__images__dataset-ex__1'))) expected_files_in_tag_1 = 6 self.check_amount_of_files(entity, expected_files_in_tag_1, sampling=False) create_file(workspace, 'newfile6', '0', file_path='') populate_entity_with_new_data(self, entity, bumpversion='', version='--version=3') clear(os.path.join(self.tmp_dir, ML_GIT_DIR, entity)) clear(workspace) self.assertIn( messages[8] % (os.path.join(self.tmp_dir, GIT_PATH), os.path.join(self.tmp_dir, ML_GIT_DIR, entity, 'metadata')), check_output(MLGIT_ENTITY_INIT % entity)) self.assertNotIn( ERROR_MESSAGE, check_output(MLGIT_CHECKOUT % (entity, 'computer-vision__images__dataset-ex__3'))) path_of_tag_2_file = os.path.join(self.tmp_dir, entity, 'computer-vision', 'images', entity + '-ex', 'newfile5') path_of_tag_3_file = os.path.join(self.tmp_dir, entity, 'computer-vision', 'images', entity + '-ex', 'newfile6') self.assertFalse(os.path.exists(path_of_tag_2_file)) self.assertTrue(os.path.exists(path_of_tag_3_file)) expected_files_in_tag_3 = 7 self.check_amount_of_files(entity, expected_files_in_tag_3, sampling=False)
def _push_entity(self, entity_type): clear(os.path.join(MINIO_BUCKET_PATH, 'zdj7WWjGAAJ8gdky5FKcVLfd63aiRUGb8fkc8We2bvsp9WW12')) init_repository(entity_type, self) add_file(self, entity_type, '--bumpversion', 'new', file_content='0') metadata_path = os.path.join(self.tmp_dir, ML_GIT_DIR, entity_type, 'metadata') self.assertIn(output_messages['INFO_COMMIT_REPO'] % (metadata_path, entity_type + '-ex'), check_output(MLGIT_COMMIT % (entity_type, entity_type + '-ex', ''))) HEAD = os.path.join(self.tmp_dir, ML_GIT_DIR, entity_type, 'refs', entity_type+'-ex', 'HEAD') self.assertTrue(os.path.exists(HEAD)) self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_PUSH % (entity_type, entity_type+'-ex'))) self.check_metadata_after_push(entity_type)
def set_up_fetch(self, entity='dataset'): init_repository(entity, self) add_file(self, entity, '', 'new') metadata_path = os.path.join(self.tmp_dir, ML_GIT_DIR, entity, 'metadata') workspace = os.path.join(self.tmp_dir, entity) self.assertIn(messages[17] % (metadata_path, os.path.join('computer-vision', 'images', entity + '-ex')), check_output(MLGIT_COMMIT % (entity, entity + '-ex', ''))) HEAD = os.path.join(self.tmp_dir, ML_GIT_DIR, entity, 'refs', entity + '-ex', 'HEAD') self.assertTrue(os.path.exists(HEAD)) self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_PUSH % (entity, entity + '-ex'))) clear(os.path.join(self.tmp_dir, ML_GIT_DIR, entity)) clear(workspace) self.assertIn(messages[8] % (os.path.join(self.tmp_dir, GIT_PATH), os.path.join(self.tmp_dir, ML_GIT_DIR, entity, 'metadata')), check_output(MLGIT_ENTITY_INIT % entity))
def _clear_path(self, entity_type=DATASETS): clear(os.path.join(self.tmp_dir, ML_GIT_DIR)) workspace = os.path.join(self.tmp_dir, entity_type, entity_type + '-ex') clear(os.path.join(self.tmp_dir, ML_GIT_DIR, entity_type, 'cache')) clear(workspace) clear(os.path.join(self.tmp_dir, entity_type))
def test_05_push_with_fail(self): clear(SFTP_BUCKET_PATH) os.mkdir(SFTP_BUCKET_PATH) self.set_up_push(create_know_file=True) object_path = os.path.join( self.tmp_dir, '.ml-git', self.repo_type, 'objects', 'hashfs', 'i9', '96', 'zdj7Wi996ViPiddvDGvzjBBACZzw6YfPujBCaPHunVoyiTUCj') clear(object_path) number_of_files_in_bucket = 0 self.check_amount_of_files(number_of_files_in_bucket) self.assertIn( ERROR_MESSAGE, check_output(MLGIT_PUSH % (self.repo_type, DATASET_NAME + ' --clearonfail')))
def set_up_checkout(self, entity): init_repository(entity, self) add_file(self, entity, '', 'new') metadata_path = os.path.join(self.tmp_dir, ML_GIT_DIR, entity, 'metadata') workspace = os.path.join(self.tmp_dir, entity) self.assertIn(output_messages['INFO_COMMIT_REPO'] % (metadata_path, entity + '-ex'), check_output(MLGIT_COMMIT % (entity, entity + '-ex', ''))) head_path = os.path.join(self.tmp_dir, ML_GIT_DIR, entity, 'refs', entity + '-ex', 'HEAD') self.assertTrue(os.path.exists(head_path)) self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_PUSH % (entity, entity + '-ex'))) clear(os.path.join(self.tmp_dir, ML_GIT_DIR, entity)) clear(workspace) self.assertIn(output_messages['INFO_METADATA_INIT'] % ( os.path.join(self.tmp_dir, GIT_PATH), os.path.join(self.tmp_dir, ML_GIT_DIR, entity, 'metadata')), check_output(MLGIT_ENTITY_INIT % entity))
def test_06_status_after_delete_file(self): self.set_up_checkout('dataset') self.assertNotIn( ERROR_MESSAGE, check_output( MLGIT_CHECKOUT % ('dataset', 'computer-vision__images__dataset-ex__1'))) data_path = os.path.join(self.tmp_dir, 'dataset', 'computer-vision', 'images', 'dataset-ex', 'data') file_to_be_deleted = os.path.join(data_path, 'file') file_to_be_deleted2 = os.path.join(data_path, 'file2') clear(file_to_be_deleted) clear(file_to_be_deleted2) self.assertRegex( check_output(MLGIT_STATUS_SHORT % ('dataset', 'dataset-ex')), r'Changes to be committed:\s+Deleted: (\s|.)*data/\t->\t2 FILES(\s|.)*' )
def test_03_checkout(self): self.set_up_push() self.assertNotIn( ERROR_MESSAGE, check_output(MLGIT_PUSH % (self.repo_type, DATASET_NAME))) clear(self.workspace) clear(os.path.join(ML_GIT_DIR, self.repo_type)) self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_ENTITY_INIT % self.repo_type)) self.assertNotIn( ERROR_MESSAGE, check_output(MLGIT_CHECKOUT % (self.repo_type, DATASET_NAME))) ws_path = os.path.join(self.tmp_dir, self.repo_type, DATASET_NAME) self.assertTrue(os.path.exists(ws_path)) self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile0'))) self.assertTrue(os.path.isfile(os.path.join(ws_path, 'newfile1')))
def test_01_push_and_checkout(self): cpath = 'credentials-json' init_repository('dataset', self, store_type='gdriveh', profile=cpath) add_file(self, 'dataset', '--bumpversion', 'new') metadata_path = os.path.join(self.tmp_dir, ML_GIT_DIR, 'dataset', 'metadata') self.assertIn( messages[17] % (metadata_path, os.path.join('computer-vision', 'images', 'dataset-ex')), check_output('ml-git dataset commit dataset-ex')) HEAD = os.path.join(self.tmp_dir, ML_GIT_DIR, 'dataset', 'refs', 'dataset-ex', 'HEAD') self.assertTrue(os.path.exists(HEAD)) self.assertNotIn(ERROR_MESSAGE, check_output('ml-git dataset push dataset-ex')) os.chdir(metadata_path) tag = 'computer-vision__images__dataset-ex__2' self.assertIn(tag, check_output('git describe --tags')) os.chdir(self.tmp_dir) workspace = os.path.join(self.tmp_dir, 'dataset') clear(workspace) clear(os.path.join(self.tmp_dir, ML_GIT_DIR)) init_repository('dataset', self, store_type='gdriveh', profile=cpath) self.assertNotIn(ERROR_MESSAGE, check_output('ml-git dataset checkout %s' % tag)) objects = os.path.join(self.tmp_dir, ML_GIT_DIR, 'dataset', 'objects') refs = os.path.join(self.tmp_dir, ML_GIT_DIR, 'dataset', 'refs') cache = os.path.join(self.tmp_dir, ML_GIT_DIR, 'dataset', 'cache') spec_file = os.path.join(self.tmp_dir, 'dataset', 'computer-vision', 'images', 'dataset-ex', 'dataset-ex.spec') file = os.path.join(self.tmp_dir, 'dataset', 'computer-vision', 'images', 'dataset-ex', 'newfile0') self.assertTrue(os.path.exists(objects)) self.assertTrue(os.path.exists(refs)) self.assertTrue(os.path.exists(cache)) self.assertTrue(os.path.exists(file)) self.assertTrue(os.path.exists(spec_file))
def test_04_push_with_wrong_repository(self): init_repository('dataset', self) add_file(self, 'dataset', '--bumpversion', 'new') metadata_path = os.path.join(self.tmp_dir, ML_GIT_DIR, 'dataset', 'metadata') self.assertIn(messages[17] % (metadata_path, os.path.join('computer-vision', 'images', 'dataset-ex')), check_output(MLGIT_COMMIT % ('dataset', 'dataset-ex', ''))) HEAD = os.path.join(self.tmp_dir, ML_GIT_DIR, 'dataset', 'refs', 'dataset-ex', 'HEAD') self.assertTrue(os.path.exists(HEAD)) git_path = os.path.join(self.tmp_dir, GIT_PATH) clear(git_path) output = check_output(MLGIT_PUSH % ('dataset', 'dataset-ex')) self.assertIn(ERROR_MESSAGE, output) self.assertIn(git_path, output)
def test_04_push_with_wrong_repository(self): init_repository(DATASETS, self) add_file(self, DATASETS, '--bumpversion', 'new') metadata_path = os.path.join(self.tmp_dir, ML_GIT_DIR, DATASETS, 'metadata') self.assertIn(output_messages['INFO_COMMIT_REPO'] % (metadata_path, DATASET_NAME), check_output(MLGIT_COMMIT % (DATASETS, DATASET_NAME, ''))) HEAD = os.path.join(self.tmp_dir, ML_GIT_DIR, DATASETS, 'refs', DATASET_NAME, 'HEAD') self.assertTrue(os.path.exists(HEAD)) git_path = os.path.join(self.tmp_dir, GIT_PATH) clear(git_path) output = check_output(MLGIT_PUSH % (DATASETS, DATASET_NAME)) self.assertIn(ERROR_MESSAGE, output) self.assertIn(GIT_PATH, output)
def test_01_push_and_checkout(self): cpath = 'credentials-json' init_repository(DATASETS, self, storage_type=GDRIVEH, profile=cpath) add_file(self, DATASETS, '--bumpversion', 'new') metadata_path = os.path.join(self.tmp_dir, ML_GIT_DIR, DATASETS, 'metadata') self.assertIn( output_messages['INFO_COMMIT_REPO'] % (metadata_path, DATASET_NAME), check_output('ml-git datasets commit datasets-ex')) HEAD = os.path.join(self.tmp_dir, ML_GIT_DIR, DATASETS, 'refs', DATASET_NAME, 'HEAD') self.assertTrue(os.path.exists(HEAD)) self.assertNotIn(ERROR_MESSAGE, check_output('ml-git datasets push datasets-ex')) os.chdir(metadata_path) tag = 'computer-vision__images__datasets-ex__2' self.assertIn(tag, check_output('git describe --tags')) os.chdir(self.tmp_dir) workspace = os.path.join(self.tmp_dir, DATASETS) clear(workspace) clear(os.path.join(self.tmp_dir, ML_GIT_DIR)) init_repository(DATASETS, self, storage_type=GDRIVEH, profile=cpath) self.assertNotIn(ERROR_MESSAGE, check_output('ml-git datasets checkout %s' % tag)) objects = os.path.join(self.tmp_dir, ML_GIT_DIR, DATASETS, 'objects') refs = os.path.join(self.tmp_dir, ML_GIT_DIR, DATASETS, 'refs') cache = os.path.join(self.tmp_dir, ML_GIT_DIR, DATASETS, 'cache') spec_file = os.path.join(self.tmp_dir, DATASETS, DATASET_NAME, 'datasets-ex.spec') file = os.path.join(self.tmp_dir, DATASETS, DATASET_NAME, 'newfile0') self.assertTrue(os.path.exists(objects)) self.assertTrue(os.path.exists(refs)) self.assertTrue(os.path.exists(cache)) self.assertTrue(os.path.exists(file)) self.assertTrue(os.path.exists(spec_file))
def test_04_push_with_wrong_bucket(self): clear(SFTP_BUCKET_PATH) os.mkdir(SFTP_BUCKET_PATH) wrong_bucket = 'wrong_bucket' self.assertIn( output_messages['INFO_INITIALIZED_PROJECT_IN'] % self.tmp_dir, check_output(MLGIT_INIT)) self.assertIn( output_messages['INFO_ADD_REMOTE'] % (GIT_PATH, self.repo_type), check_output(MLGIT_REMOTE_ADD % (self.repo_type, GIT_PATH))) self.assertIn( output_messages['INFO_ADD_STORAGE_WITHOUT_PROFILE'] % (self.storage_type, wrong_bucket), check_output( 'ml-git repository storage add %s --type=%s' % (wrong_bucket, self.storage_type + ' --username=mlgit_user ' '--port=9922 --endpoint-url=127.0.0.1 --private-key=' + FAKE_SSH_KEY_PATH))) self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_ENTITY_INIT % self.repo_type)) self.assertNotIn( ERROR_MESSAGE, check_output( MLGIT_CREATE % (DATASETS, DATASET_NAME + ' --storage-type=sftph --mutability=strict --category=test ' '--bucket-name=wrong_bucket'))) add_file(self, self.repo_type, '', 'new') self.assertNotIn( ERROR_MESSAGE, check_output(MLGIT_ADD % (self.repo_type, DATASET_NAME, ''))) self.assertNotIn( ERROR_MESSAGE, check_output(MLGIT_COMMIT % (self.repo_type, DATASET_NAME, ''))) number_of_files_in_bucket = 0 self.check_amount_of_files(number_of_files_in_bucket) self.assertIn( output_messages['ERROR_BUCKET_DOES_NOT_EXIST'] % wrong_bucket, check_output(MLGIT_PUSH % (self.repo_type, DATASET_NAME))) self.check_amount_of_files(number_of_files_in_bucket)
def set_up_global(self, entity_type=DATASETS): self.assertNotIn(ERROR_MESSAGE, check_output(MLGIT_INIT)) disable_wizard_in_config(self.tmp_dir) self.assertNotIn( ERROR_MESSAGE, check_output(MLGIT_REMOTE_ADD_GLOBAL % (entity_type, 'local_git_server.git'))) self.assertNotIn( ERROR_MESSAGE, check_output(MLGIT_STORAGE_ADD % (BUCKET_NAME, PROFILE + ' --global'))) with open(os.path.join(self.tmp_dir, GLOBAL_ML_GIT_CONFIG), 'r') as config_file: config = yaml_processor.load(config_file) config[STORAGE_CONFIG_KEY][StorageType.S3H.value]['mlgit'][ 'endpoint-url'] = MINIO_ENDPOINT_URL with open(os.path.join(self.tmp_dir, GLOBAL_ML_GIT_CONFIG), 'w') as config_file: yaml_processor.dump(config, config_file) clear(os.path.join(self.tmp_dir, ML_GIT_DIR))
def set_up_test(self): init_repository('dataset', self) workspace = os.path.join(self.tmp_dir, 'dataset', 'dataset-ex') os.makedirs(workspace, exist_ok=True) spec = { 'dataset': { 'categories': ['computer-vision', 'images'], 'manifest': { 'files': 'MANIFEST.yaml', 'store': 's3h://mlgit' }, 'mutability': Mutability.STRICT.value, 'name': 'dataset-ex', 'version': 9 } } with open(os.path.join(workspace, 'dataset-ex.spec'), 'w') as y: yaml_processor.dump(spec, y) os.makedirs(os.path.join(workspace, 'data'), exist_ok=True) self.create_file(workspace, 'file1', '0') self.create_file(workspace, 'file2', '1') self.create_file(workspace, 'file3', 'a') self.create_file(workspace, 'file4', 'b') api.add('dataset', 'dataset-ex', bumpversion=True) api.commit('dataset', 'dataset-ex') api.push('dataset', 'dataset-ex') self.assertTrue( os.path.exists(os.path.join(self.tmp_dir, self.metadata))) clear(os.path.join(self.tmp_dir, ML_GIT_DIR)) clear(workspace) init_repository('dataset', self)
def _create_entity_with_mutability(self, entity_type, mutability_type): init_repository(entity_type, self) workspace = os.path.join(self.tmp_dir, entity_type, entity_type + '-ex') create_spec(self, entity_type, self.tmp_dir, 1, mutability_type) os.makedirs(os.path.join(workspace, 'data')) create_file(workspace, 'file1', '0') self.assertNotIn( ERROR_MESSAGE, check_output(MLGIT_ADD % (entity_type, entity_type + '-ex', ''))) self.assertIn( messages[17] % (os.path.join(self.tmp_dir, ML_GIT_DIR, entity_type, 'metadata'), os.path.join('computer-vision', 'images', entity_type + '-ex')), check_output(MLGIT_COMMIT % (entity_type, entity_type + '-ex', ''))) self.assertNotIn( ERROR_MESSAGE, check_output(MLGIT_PUSH % (entity_type, entity_type + '-ex'))) clear(os.path.join(self.tmp_dir, ML_GIT_DIR)) clear(workspace) clear(os.path.join(self.tmp_dir, entity_type))