def _get_project(package_slug, project_path): if package_slug is not None: package = import_module_from_modname(package_slug) return Project(package) elif project_path is not None: return Project.from_path(project_path) else: return Project.from_cwd()
def project(self) -> Project: """Access ballet-specific project info""" if self._prj is None: return Project.from_cwd() else: if isinstance(self._prj, Project): return self._prj elif isinstance(self._prj, ModuleType): return Project(self._prj) elif isinstance(self._prj, (str, PathLike)): return Project.from_path(self._prj) else: raise ValueError('not supported')
def project(self): # 1. configuration option passed explicitly # 2. from notebooks dir # 3. from cwd if self.ballet_yml_path: return Project.from_path(self.ballet_yml_path) path = NotebookApp.instance().notebook_dir with fy.suppress(Exception): return Project.from_path(path) with fy.suppress(Exception): return Project.from_cwd() raise ConfigurationError('Could not detect Ballet project')
def quickstart(tempdir): """ $ cd tempdir $ ballet quickstart $ tree . """ # cd tempdir with work_in(safepath(tempdir)): project_slug = 'foo-bar' package_slug = 'foo_bar' extra_context = { 'project_name': project_slug.capitalize(), 'project_slug': project_slug, 'package_slug': package_slug, } # ballet quickstart render_project_template(no_input=True, extra_context=extra_context, output_dir=safepath(tempdir)) # tree . tree(tempdir) project = Project.from_path(tempdir.joinpath(project_slug)) repo = project.repo yield (namedtuple( 'Quickstart', 'project tempdir project_slug package_slug repo')._make( (project, tempdir, project_slug, package_slug, repo)))
def test_project_pr_num(self, mock_get_pr_num, mock_repo): expected = 3 mock_get_pr_num.return_value = expected package = None project = Project(package) self.assertEqual(project.pr_num, expected)
def quickstart(tempdir): """ $ cd tempdir $ ballet quickstart $ tree . """ # cd tempdir with work_in(tempdir): project_slug = 'foo-bar' package_slug = 'foo_bar' extra_context = { 'project_name': project_slug.capitalize(), 'project_slug': project_slug, 'package_slug': package_slug, } # ballet quickstart render_project_template(no_input=True, extra_context=extra_context, output_dir=tempdir) # tree . tree(tempdir) project = Project.from_path(tempdir.joinpath(project_slug)) repo = project.repo yield QuickstartResult(project, tempdir, project_slug, package_slug, repo)
def collect_contrib_features(project_root): """Collect contributed features for a project at project_root For a project ``foo``, walks modules within the ``foo.features.contrib`` subpackage. A single object that is an instance of ``ballet.Feature`` is imported if present in each module. The resulting ``Feature`` objects are collected. Args: project_root (str, path-like): Path to project root Returns: List[ballet.Feature]: list of Feature objects """ # TODO Project should require ModuleType project = Project(project_root) contrib = project._resolve('.features.contrib') return _collect_contrib_features(contrib)
def validate_feature_acceptance(feature, X, y, subsample=False, path=None, package=None): if package is not None: project = Project(package) elif path is not None: project = Project.from_path(path) else: project = Project.from_cwd() if subsample: X, y = subsample_data_for_validation(X, y) # build project result = project.build(X, y) # load accepter for this project Accepter = _load_class(project, 'validation.feature_accepter') accepter = Accepter(result.X_df, result.y, result.features, feature) return accepter.judge()
def test_detect_github_username_config(mock_project_repo): expected_username = '******' # output of project.repo.config_reader().get_value(...) mock_get_value = ( mock_project_repo.return_value.config_reader.return_value.get_value) mock_get_value.return_value = expected_username project = Project(None) username = detect_github_username(project) assert expected_username == username mock_get_value.assert_called_with('github', 'user', default=ANY)
def start_new_feature(contrib_dir=None, **cc_kwargs): """Start a new feature within a ballet project By default, will prompt the user for input using cookiecutter's input interface. Renders the feature template into a temporary directory, then copies the feature files into the proper path within the contrib directory. Args: contrib_dir: directory under which to place contributed features **cc_kwargs: options for the cookiecutter template Raises: ballet.exc.BalletError: the new feature has the same name as an existing one """ if contrib_dir is None: project = Project.from_path(pathlib.Path.cwd().resolve()) contrib_dir = project.config.get('contrib.module_path') # inject default username into context default_username = detect_github_username(project) cc_kwargs.setdefault('extra_context', {}) cc_kwargs['extra_context'].update({'_default_username': default_username}) with tempfile.TemporaryDirectory() as tempdir: # render feature template output_dir = tempdir cc_kwargs['output_dir'] = output_dir rendered_dir = render_feature_template(**cc_kwargs) # clean pyc files from rendered dir for path in pwalk(rendered_dir, topdown=False): if path.suffix == '.pyc': path.unlink() if path.name == '__pycache__': with fy.suppress(OSError): path.rmdir() # copy into contrib dir src = rendered_dir dst = contrib_dir result = synctree(src, dst, onexist=_fail_if_feature_exists) _log_start_new_feature_success(result) return result
def render_project_template(project_template_path: Optional[Pathy] = None, create_github_repo: bool = False, github_token: Optional[str] = None, **cc_kwargs) -> str: """Generate a ballet project according to the project template If creating the GitHub repo is requested and the process fails for any reason, quickstart will complete successfully and users are instructed to read the corresponding section of the Maintainer's Guide to continue manually. Args: project_template_path: path to specific project template create_github_repo: whether to act to create the desired repo on GitHub after rendering the project. The repo will be owned by either the user or an org that the user has relevant permissions for, depending on what is entered during the quickstart prompts. If True, then a valid github token must also be provided. github_token: valid github token with appropriate permissions **cc_kwargs: options for the cookiecutter template """ if project_template_path is None: project_template_path = PROJECT_TEMPLATE_PATH project_path = cookiecutter(project_template_path, **cc_kwargs) if create_github_repo: if github_token is None: raise ValueError('Need to provide github token') g = Github(github_token) # need to get params from new project config project = Project.from_path(project_path) owner = project.config.get('github.github_owner') name = project.config.get('project.project_slug') # create repo on github try: github_repo = ballet.util.git.create_github_repo(g, owner, name) logger.info(f'Created repo on GitHub at {github_repo.html_url}') except GithubException: logger.exception('Failed to create GitHub repo for this project') logger.warning( 'Failed to create GitHub repo for this project...\n' 'did you specify the intended repo owner, and do you have' ' permissions to create a repo under that owner?\n' 'Try manually creating the repo: https://ballet.github.io/ballet/maintainer_guide.html#manual-repository-creation' # noqa E501 ) return project_path # now push to remote # we don't need to set up the remote, as it has already been setup in # post_gen_hook.py local_repo = project.repo remote_name = project.config.get('github.remote') branches = [DEFAULT_BRANCH] try: push_branches_to_remote(local_repo, remote_name, branches) except BalletError: logger.exception('Failed to push branches to GitHub repo') logger.warning( 'Failed to push branches to GitHub repo...\n' 'Try manually pushing the branches: https://ballet.github.io/ballet/maintainer_guide.html#manual-repository-creation' # noqa E501 ) return project_path return project_path
def start_new_feature(contrib_dir: Pathy = None, branching: bool = True, **cc_kwargs) -> List[Tuple[pathlib.Path, str]]: """Start a new feature within a ballet project If run from default branch, by default will attempt to switch to a new branch for this feature, given by `<username>/feature-<featurename>`. By default, will prompt the user for input using cookiecutter's input interface. Renders the feature template into a temporary directory, then copies the feature files into the proper path within the contrib directory. Args: contrib_dir: directory under which to place contributed features branching: whether to attempt to manage branching **cc_kwargs: options for the cookiecutter template Raises: ballet.exc.BalletError: the new feature has the same name as an existing one """ if contrib_dir is not None: try: project = Project.from_path(contrib_dir, ascend=True) default_username = detect_github_username(project) except ConfigurationError: default_username = '******' else: project = Project.from_cwd() contrib_dir = project.config.get('contrib.module_path') default_username = detect_github_username(project) # inject default username into context cc_kwargs.setdefault('extra_context', {}) cc_kwargs['extra_context'].update({'_default_username': default_username}) with tempfile.TemporaryDirectory() as tempdir: # render feature template output_dir = tempdir cc_kwargs['output_dir'] = output_dir rendered_dir = render_feature_template(**cc_kwargs) # clean pyc files from rendered dir for path in pwalk(rendered_dir, topdown=False): if path.suffix == '.pyc': path.unlink() if path.name == '__pycache__': with fy.suppress(OSError): path.rmdir() # copy into contrib dir src = rendered_dir dst = contrib_dir result = synctree(src, dst, onexist=_fail_if_feature_exists) target_branch = None if branching and project.on_master: # try to set the target branch name paths = [path for path, kind in result if kind == 'file'] for path in paths: parts = pathlib.Path(path).parts subpackage, module = parts[-2], parts[-1] user_match = fy.re_find(SUBPACKAGE_NAME_REGEX, subpackage) feature_match = fy.re_find(FEATURE_MODULE_NAME_REGEX, module) if feature_match: username = user_match['username'] featurename = feature_match['featurename'].replace('_', '-') target_branch = f'{username}/feature-{featurename}' if target_branch is not None: switch_to_new_branch(project.repo, target_branch) _log_start_new_feature_success(result) _log_switch_to_new_branch(target_branch) return result
def test_project_version(mock_repo, commit_object): mock_repo.return_value.head.commit = commit_object project = Project(None) version = project.version assert isinstance(version, str)
def update_project_template(push: bool = False, project_template_path: Optional[Pathy] = None): """Update project with updates to upstream project template The update is fairly complicated and proceeds as follows: 1. Load project: user must run command from master branch and ballet must be able to detect the project-template branch 2. Load the saved cookiecutter context from disk 3. Render the project template into a temporary directory using the saved context, *prompting the user if new keys are required*. Note that the project template is simply loaded from the data files of the installed version of ballet. Note further that by the project template's post_gen_hook, a new git repo is initialized [in the temporary directory] and files are committed. 4. Add the temporary directory as a remote and merge it into the project-template branch, favoring changes made to the upstream template. Any failure to merge results in an unrecoverable error. 5. Merge the project-template branch into the master branch. The user is responsible for merging conflicts and they are given instructions to do so and recover. 6. If applicable, push to master. Args: push: whether to push updates to remote, defaults to False project_template_path: an override for the path to the project template """ cwd = pathlib.Path.cwd().resolve() # get ballet project info -- must be at project root directory with a # ballet.yml file. try: project = Project.from_path(cwd) except ConfigurationError: raise ConfigurationError('Must run command from project root.') repo = project.repo original_head = repo.head.commit.hexsha[:7] if repo.is_dirty(): raise BalletError( 'Can\'t update project template with uncommitted changes. ' 'Please commit your changes and try again.') if repo.head.ref.name != DEFAULT_BRANCH: raise ConfigurationError( f'Must run command from branch {DEFAULT_BRANCH}') if TEMPLATE_BRANCH not in repo.branches: raise ConfigurationError( f'Could not find \'{TEMPLATE_BRANCH}\' branch.') # check for upstream updates to ballet new_version = _check_for_updated_ballet() if new_version: _warn_of_updated_ballet(new_version) with tempfile.TemporaryDirectory() as _tempdir: tempdir = pathlib.Path(_tempdir) # cookiecutter returns path to the resulting project dir logger.debug(f'Re-rendering project template at {tempdir}') updated_template = _render_project_template( cwd, tempdir, project_template_path=project_template_path) updated_repo = git.Repo(updated_template) # tempdir is a randomly-named dir suitable for a random remote name # to avoid conflicts remote_name = tempdir.name remote = repo.create_remote( remote_name, updated_repo.working_tree_dir) remote.fetch() repo.heads[TEMPLATE_BRANCH].checkout() try: logger.debug('Merging re-rendered template to project-template ' 'branch') repo.git.merge( remote_name + '/' + DEFAULT_BRANCH, allow_unrelated_histories=True, strategy_option='theirs', squash=True, ) if not repo.is_dirty(): logger.info('No updates to template -- done.') return commit_message = _make_template_branch_merge_commit_message() logger.debug(f'Committing updates: {commit_message}') repo.git.commit(m=commit_message) except GitCommandError: logger.critical( f'Could not merge changes into {TEMPLATE_BRANCH} branch, ' f'update failed') raise finally: _safe_delete_remote(repo, remote_name) logger.debug('Checking out master branch') repo.heads[DEFAULT_BRANCH].checkout() try: logger.debug('Merging project-template branch into master') repo.git.merge(TEMPLATE_BRANCH, no_ff=True) except GitCommandError as e: if 'merge conflict' in str(e).lower(): logger.critical(dedent( f''' Update failed due to a merge conflict. Fix conflicts, and then complete merge manually: $ git add . $ git commit --no-edit Otherwise, abandon the update: $ git reset --merge {original_head} ''' ).strip()) raise if push: repo = project.repo remote_name = project.config.get('github.remote') branches = [DEFAULT_BRANCH, TEMPLATE_BRANCH] push_branches_to_remote(repo, remote_name, branches) _log_recommended_reinstall()
class RandomValue(BaseTransformer): def fit(self, X, y=None): return self def transform(self, X): return np.random.random(X.shape[0]) transformer = RandomValue() feature = Feature(input=input, transformer=transformer) if __name__ == "__main__": from ballet.util.log import enable from ballet.project import Project from ballet.validation.main import _load_class from ballet_predict_house_prices.features import build from ballet_predict_house_prices.load_data import load_data enable(level='INFO') X_df, y_df = load_data() out = build(X_df, y_df) X_df, y, features = out.X_df, out.y, out.features project = Project.from_path(".") Accepter = _load_class(project, 'validation.feature_accepter') accepter = Accepter(X_df, y, features, feature) assert accepter.judge()