def clone_config_repository(url, folder, track): try: if get_root_path(): log.error(output_messages['ERROR_IN_INTIALIZED_PROJECT'], class_name=ADMIN_CLASS_NAME) return False except RootPathException: pass git_dir = '.git' try: if folder is not None: project_dir = os.path.join(os.getcwd(), folder) ensure_path_exists(project_dir) else: project_dir = os.getcwd() if len(os.listdir(project_dir)) != 0: log.error(output_messages['ERROR_PATH_NOT_EMPTY'] % project_dir, class_name=ADMIN_CLASS_NAME) return False Repo.clone_from(url, project_dir) except Exception as e: error_msg = handle_clone_exception(e, folder, project_dir) log.error(error_msg, class_name=ADMIN_CLASS_NAME) return False if not check_successfully_clone(project_dir, git_dir): return False if not track: clear(os.path.join(project_dir, git_dir)) return True
def move_metadata_dir(self, old_directory, new_directory): repo = Repo(self.__path) old_path = os.path.join(self.__path, old_directory) new_path = os.path.join(self.__path, os.path.dirname(new_directory)) ensure_path_exists(new_path) repo.git.mv([old_path, new_path]) if not os.listdir(os.path.dirname(old_path)): clear(os.path.dirname(old_path))
def test_get_target_tag(self): tags = ['computer__images__dataset-ex__1', 'computer__images__dataset-ex__2', 'computer__videos__dataset-ex__1'] m = Metadata('', self.test_dir, config, DATASETS) self.assertRaises(RuntimeError, lambda: m._get_target_tag(tags, 'dataset-ex', -1)) self.assertRaises(RuntimeError, lambda: m._get_target_tag(tags, 'dataset-ex', 1)) self.assertRaises(RuntimeError, lambda: m._get_target_tag(tags, 'dataset-wrong', 1)) self.assertEqual(m._get_target_tag(tags, 'dataset-ex', 2), 'computer__images__dataset-ex__2') clear(m.path)
def check_successfully_clone(project_dir, git_dir): try: os.chdir(project_dir) get_root_path() except RootPathException: clear(project_dir) log.error(output_messages['ERROR_MINIMAL_CONFIGURATION'], class_name=ADMIN_CLASS_NAME) clear(git_dir) return False return True
def handle_clone_exception(e, folder, project_dir): error_msg = str(e) if (e.__class__ == GitCommandError and 'Permission denied' in str( e.args[2])) or e.__class__ == PermissionError: error_msg = 'Permission denied in folder %s' % project_dir else: if folder is not None: clear(project_dir) if e.__class__ == GitCommandError: error_msg = 'Could not read from remote repository.' return error_msg
def test_default_branch(self): default_branch_for_empty_repo = 'master' new_branch = 'main' m = Metadata('', self.test_dir, config, DATASETS) m.init() self.assertTrue(m.check_exists()) self.assertEqual(m.get_default_branch(), default_branch_for_empty_repo) self.change_branch(m.path, new_branch) self.assertNotEqual(m.get_default_branch(), default_branch_for_empty_repo) self.assertEqual(m.get_default_branch(), new_branch) clear(m.path)
def check_successfully_clone(project_dir, git_dir): try: os.chdir(project_dir) get_root_path() except RootPathException: clear(project_dir) log.error('Wrong minimal configuration files!', class_name=ADMIN_CLASS_NAME) clear(git_dir) return False return True
def handle_clone_exception(e, folder, project_dir): error_msg = str(e) if (e.__class__ == GitCommandError and 'Permission denied' in str(e.args[2])) or e.__class__ == PermissionError: error_msg = 'Permission denied in folder %s' % project_dir elif e.__class__ == GitError and 'not an empty directory' in error_msg: error_msg = output_messages['ERROR_PATH_ALREAD_EXISTS'] % folder else: if folder is not None: clear(project_dir) if e.__class__ == GitCommandError: error_msg = 'Could not read from remote repository.' return error_msg
def create(self, kwargs): artifact_name = kwargs['artifact_name'] categories = list(kwargs['category']) version = int(kwargs['version_number']) imported_dir = kwargs['import'] store_type = kwargs['store_type'] bucket_name = kwargs['bucket_name'] start_wizard = kwargs['wizard_config'] import_url = kwargs['import_url'] unzip_file = kwargs['unzip'] credentials_path = kwargs['credentials_path'] repo_type = self.__repo_type try: create_workspace_tree_structure(repo_type, artifact_name, categories, store_type, bucket_name, version, imported_dir, kwargs['mutability']) if start_wizard: has_new_store, store_type, bucket, profile, endpoint_url, git_repo = start_wizard_questions( repo_type) if has_new_store: store_add(store_type, bucket, profile, endpoint_url) update_store_spec(repo_type, artifact_name, store_type, bucket) remote_add(repo_type, git_repo) if import_url: self.create_config_store('gdrive', credentials_path) local = LocalRepository( self.__config, get_objects_path(self.__config, repo_type)) destine_path = os.path.join(repo_type, artifact_name, 'data') local.import_file_from_url(destine_path, import_url, StoreType.GDRIVE.value) if unzip_file: log.info('Unzipping files', CLASS_NAME=REPOSITORY_CLASS_NAME) data_path = os.path.join(get_root_path(), repo_type, artifact_name, 'data') unzip_files_in_directory(data_path) log.info("Project Created.", CLASS_NAME=REPOSITORY_CLASS_NAME) except Exception as e: if not isinstance(e, PermissionError): clear(os.path.join(repo_type, artifact_name)) if isinstance(e, KeyboardInterrupt): log.info("Create command aborted!", class_name=REPOSITORY_CLASS_NAME) else: log.error(e, CLASS_NAME=REPOSITORY_CLASS_NAME)
def test_get_tag(self): mdpath = os.path.join(self.test_dir, 'metadata') specpath = 'dataset-ex' ensure_path_exists(os.path.join(mdpath, specpath)) shutil.copy('hdata/dataset-ex.spec', os.path.join(mdpath, specpath) + '/dataset-ex.spec') manifestpath = os.path.join(os.path.join(mdpath, specpath), 'MANIFEST.yaml') yaml_save(files_mock, manifestpath) config['mlgit_path'] = self.test_dir m = Metadata(specpath, mdpath, config, DATASETS) r = Repository(config, DATASETS) r.init() tag_list = ['computer__images__dataset-ex__1'] with mock.patch('ml_git.metadata.Metadata.list_tags', return_value=tag_list): target_tag = m.get_tag(specpath, -1) self.assertEqual(target_tag, tag_list[0]) clear(m.path)
def clone_config_repo(self): dataset = self.__config['dataset']['git'] if 'dataset' in self.__config else '' model = self.__config['model']['git'] if 'model' in self.__config else '' labels = self.__config['labels']['git'] if 'labels' in self.__config else '' if not (dataset or model or labels): log.error('No repositories found, verify your configurations!', class_name=METADATA_CLASS_NAME) clear(ROOT_FILE_NAME) return if dataset: self.initialize_metadata('dataset') if model: self.initialize_metadata('model') if labels: self.initialize_metadata('labels') log.info('Successfully loaded configuration files!', class_name=METADATA_CLASS_NAME)
def clone_config_repository(url, folder, track): try: if get_root_path(): log.error('You are in initialized ml-git project.', class_name=ADMIN_CLASS_NAME) return False except RootPathException: pass git_dir = '.git' try: if folder is not None: project_dir = os.path.join(os.getcwd(), folder) ensure_path_exists(project_dir) else: project_dir = os.getcwd() if len(os.listdir(project_dir)) != 0: log.error( 'The path [%s] is not an empty directory. Consider using --folder to create an empty folder.' % project_dir, class_name=ADMIN_CLASS_NAME) return False Repo.clone_from(url, project_dir) except Exception as e: error_msg = str(e) if (e.__class__ == GitCommandError and 'Permission denied' in str( e.args[2])) or e.__class__ == PermissionError: error_msg = 'Permission denied in folder %s' % project_dir else: if folder is not None: clear(project_dir) if e.__class__ == GitCommandError: error_msg = 'Could not read from remote repository.' log.error(error_msg, class_name=ADMIN_CLASS_NAME) return False try: os.chdir(project_dir) get_root_path() except RootPathException: clear(project_dir) log.error('Wrong minimal configuration files!', class_name=ADMIN_CLASS_NAME) clear(git_dir) return False if not track: clear(os.path.join(project_dir, git_dir)) return True
def test_last_tag_version(self): sepc_path = 'dataset-ex' config['mlgit_path'] = self.test_dir m = Metadata('', '', config, DATASETS) m.init() tag_list = [ 'computer__images__dataset-ex__1', 'computer__images__dataset-ex__2' ] with mock.patch('ml_git.metadata.Metadata.list_tags', return_value=tag_list): last_version = m.get_last_tag_version(sepc_path) self.assertEqual(last_version, 2) tag_list = [] with mock.patch('ml_git.metadata.Metadata.list_tags', return_value=tag_list): last_version = m.get_last_tag_version(sepc_path) self.assertEqual(last_version, 0) clear(self.test_dir)
def clone_config_repository(url, folder, untracked): try: if get_root_path(): log.error(output_messages['ERROR_IN_INTIALIZED_PROJECT'], class_name=ADMIN_CLASS_NAME) return False except RootPathException: pass git_dir = '.git' try: project_dir = None if folder is not None: project_dir = os.path.join(os.getcwd(), folder) ensure_path_exists(project_dir) if len(os.listdir(project_dir)) != 0: log.error(output_messages['ERROR_PATH_ALREAD_EXISTS'] % project_dir, class_name=ADMIN_CLASS_NAME) return False git_client = GitClient(url, project_dir) else: folder = get_repo_name_from_url(url) project_dir = os.path.join(os.getcwd(), folder) git_client = GitClient(url) git_client.clone() except Exception as e: error_msg = handle_clone_exception(e, folder, project_dir) log.error(error_msg, class_name=ADMIN_CLASS_NAME) return False if not check_successfully_clone(project_dir, git_dir): return False if untracked: clear(os.path.join(project_dir, git_dir)) create_or_update_gitignore() return True
def clone_config_repository(url, folder, track): try: if get_root_path(): log.error('You are in initialized ml-git project.', class_name=ADMIN_CLASS_NAME) return False except RootPathException: pass git_dir = '.git' try: if folder is not None: project_dir = os.path.join(os.getcwd(), folder) ensure_path_exists(project_dir) else: project_dir = os.getcwd() if len(os.listdir(project_dir)) != 0: log.error( 'The path [%s] is not an empty directory. Consider using --folder to create an empty folder.' % project_dir, class_name=ADMIN_CLASS_NAME) return False Repo.clone_from(url, project_dir) except Exception as e: error_msg = handle_clone_exception(e, folder, project_dir) log.error(error_msg, class_name=ADMIN_CLASS_NAME) return False if not check_successfully_clone(project_dir, git_dir): return False if not track: clear(os.path.join(project_dir, git_dir)) return True
def clone_config_repo(self): DATASETS = EntityType.DATASETS.value MODELS = EntityType.MODELS.value LABELS = EntityType.LABELS.value dataset = self.__config[DATASETS][ 'git'] if DATASETS in self.__config else '' model = self.__config[MODELS]['git'] if MODELS in self.__config else '' labels = self.__config[LABELS]['git'] if LABELS in self.__config else '' if not (dataset or model or labels): log.error(output_messages['ERROR_REPOSITORY_NOT_FOUND'], class_name=METADATA_CLASS_NAME) clear(ROOT_FILE_NAME) return if dataset: self.initialize_metadata(DATASETS) if model: self.initialize_metadata(MODELS) if labels: self.initialize_metadata(LABELS) log.info(output_messages['INFO_SUCCESS_LOAD_CONFIGURATION'], class_name=METADATA_CLASS_NAME)
def test_blank_remote_url(self): config_cp = deepcopy(config) config_cp['dataset']['git'] = '' m = Metadata(spec, self.test_dir, config_cp, repotype) self.assertRaises(GitError, m.validate_blank_remote_url) clear(m.path)
def test_init_local_repo(self): m = Metadata(spec, self.test_dir, config, DATASETS) m.init() self.assertTrue(m.check_exists()) clear(m.path)
def test_init(self): m = Metadata(spec, self.test_dir, config, repotype) m.init() self.assertTrue(m.check_exists()) clear(m.path)