Пример #1
0
def test_stats_off(monkeypatch):
    mock = Mock()
    posthog_mock = Mock()
    mock.patch(telemetry, '_get_telemetry_info', (False, 'TestUID'))
    telemetry.log_api("test_action")

    assert posthog_mock.call_count == 0
Пример #2
0
def test_offline_stats(monkeypatch):
    mock = Mock()
    posthog_mock = Mock()
    mock.patch(telemetry, 'is_online', False)
    telemetry.log_api("test_action")

    assert posthog_mock.call_count == 0
Пример #3
0
def main():
    start_time = datetime.datetime.now()
    parser = CustomParser(description='Call an entry point '
                          '(pipeline.yaml or dotted path to factory)',
                          prog='ploomber interact')
    with parser:
        # this command has no static args
        pass

    dag, _ = parser.load_from_entry_point_arg()

    try:
        dag.render()
    except Exception:
        err = ('Your dag failed to render, but you can still inspect the '
               'object to debug it.\n')
        telemetry.log_api("interact_error",
                          dag=dag,
                          metadata={
                              'type': 'dag_render_failed',
                              'exception': err
                          })
        print(err)

    end_time = datetime.datetime.now()
    telemetry.log_api("ploomber_interact",
                      total_runtime=str(end_time - start_time),
                      dag=dag)

    # NOTE: do not use embed here, we must use start_ipython, see here:
    # https://github.com/ipython/ipython/issues/8918
    start_ipython(argv=[], user_ns={'dag': dag})
Пример #4
0
def _next_steps(cmdr, cmd_activate, start_time):
    end_time = datetime.datetime.now()
    telemetry.log_api("install-success",
                      total_runtime=str(end_time - start_time))

    cmdr.success('Next steps')
    cmdr.print((f'$ {cmd_activate}\n'
                '$ ploomber build'))
    cmdr.success()
Пример #5
0
def main():
    start_time = datetime.datetime.now()
    parser = CustomParser(description='Build tasks', prog='ploomber task')
    with parser:
        parser.add_argument('task_name')
        parser.add_argument('--source',
                            '-s',
                            help='Print task source code',
                            action='store_true')
        parser.add_argument(
            '--build',
            '-b',
            help='Build task (default if no other option is passed)',
            action='store_true')
        parser.add_argument('--force',
                            '-f',
                            help='Force task build (ignore up-to-date status)',
                            action='store_true')
        parser.add_argument('--status',
                            '-st',
                            help='Get task status',
                            action='store_true')
        parser.add_argument('--on-finish',
                            '-of',
                            help='Only execute on_finish hook',
                            action='store_true')
    dag, args = parser.load_from_entry_point_arg()

    dag.render()
    task = dag[args.task_name]

    if args.source:
        print(task.source)

    if args.status:
        print(task.status())

    if args.on_finish:
        task._run_on_finish()

    # task if build by default, but when --source or --status are passed,
    # the --build flag is required
    no_flags = not any((args.build, args.status, args.source, args.on_finish))

    if no_flags or args.build:
        task.build(force=args.force)

    end_time = datetime.datetime.now()
    telemetry.log_api("ploomber_task",
                      total_runtime=str(end_time - start_time),
                      dag=dag,
                      metadata={'argv': sys.argv})
Пример #6
0
def main():
    start_time = datetime.datetime.now()
    parser = CustomParser(description='Show pipeline status',
                          prog='ploomber status')
    with parser:
        # this command has no static args
        pass
    dag, args = parser.load_from_entry_point_arg()
    print(dag.status())
    end_time = datetime.datetime.now()
    telemetry.log_api("ploomber_status",
                      total_runtime=str(end_time - start_time),
                      dag=dag)
Пример #7
0
def _find_conda_root(conda_bin):
    conda_bin = Path(conda_bin)

    for parent in conda_bin.parents:
        # I've seen variations of this. on windows: Miniconda3 and miniconda3
        # on linux miniconda3, anaconda and miniconda
        if parent.name.lower() in {'miniconda3', 'miniconda', 'anaconda3'}:
            return parent
    err = ('Failed to locate conda root from '
           f'directory: {str(conda_bin)!r}. Please submit an issue: '
           'https://github.com/ploomber/ploomber/issues/new')
    telemetry.log_api("install-error",
                      metadata={
                          'type': 'no_conda_root',
                          'exception': err
                      })
    raise RuntimeError(err)
Пример #8
0
def main():
    start_time = datetime.datetime.now()
    parser = CustomParser(description='Make a pipeline report',
                          prog='ploomber report')
    with parser:
        parser.add_argument(
            '--output',
            '-o',
            help='Where to save the report, defaults to pipeline.html',
            default='pipeline.html')
    dag, args = parser.load_from_entry_point_arg()
    dag.to_markup(path=args.output)
    end_time = datetime.datetime.now()
    telemetry.log_api("ploomber_report",
                      total_runtime=str(end_time - start_time),
                      dag=dag,
                      metadata={'argv': sys.argv})
    print('Report saved at:', args.output)
Пример #9
0
def _locate_pip_inside_conda(env_name):
    """
    Locates pip inside the conda env with a given name
    """
    pip = _path_to_pip_in_env_with_name(shutil.which('conda'), env_name)

    # this might happen if the environment does not contain python/pip
    if not Path(pip).exists():
        err = (f'Could not locate pip in environment {env_name!r}, make sure '
               'it is included in your environment.yml and try again')
        telemetry.log_api("install-error",
                          metadata={
                              'type': 'no_pip_env',
                              'exception': err
                          })
        raise FileNotFoundError(err)

    return pip
Пример #10
0
def cmd_router():
    cmd_name = None if len(sys.argv) < 2 else sys.argv[1]

    custom = {
        'build': cli_module.build.main,
        'plot': cli_module.plot.main,
        'task': cli_module.task.main,
        'report': cli_module.report.main,
        'interact': cli_module.interact.main,
        'status': cli_module.status.main,
        'nb': cli_module.nb.main,
    }

    # users may attempt to run execute/run, suggest to use build instead
    alias = {'execute': 'build', 'run': 'build'}

    if cmd_name in custom:
        # NOTE: we don't use the argument here, it is parsed by _main
        # pop the second element ('entry') to make the CLI behave as expected
        sys.argv.pop(1)
        # Add the current working directory, this is done automatically when
        # calling "python -m ploomber.build" but not here ("ploomber build")
        sys.path.insert(0, os.path.abspath('.'))
        fn = custom[cmd_name]
        fn()
    elif cmd_name in alias:
        suggestion = alias[cmd_name]
        telemetry.log_api("unsupported_build_cmd",
                          metadata={
                              'cmd_name': cmd_name,
                              'suggestion': suggestion,
                              'argv': sys.argv
                          })
        _exit_with_error_message("Try 'ploomber --help' for help.\n\n"
                                 f"Error: {cmd_name!r} is not a valid command."
                                 f" Did you mean {suggestion!r}?")
    else:
        if cmd_name not in ['examples', 'scaffold', 'install']:
            telemetry.log_api("unsupported-api-call",
                              metadata={'argv': sys.argv})
        cli()
Пример #11
0
def examples(name, force, branch, output):
    """Get sample projects. Run "ploomber examples" to list them
    """
    try:
        cli_module.examples.main(name=name,
                                 force=force,
                                 branch=branch,
                                 output=output)
    except click.ClickException:
        raise
    except Exception as e:
        telemetry.log_api("examples_error",
                          metadata={
                              'type': 'runtime_error',
                              'exception': e,
                              'argv': sys.argv
                          })
        raise RuntimeError(
            'An error happened when executing the examples command. Check out '
            'the full error message for details. Downloading the examples '
            'again or upgrading Ploomber may fix the '
            'issue.\nDownload: ploomber examples -f\n'
            'Update: pip install ploomber -U\n'
            'Update [conda]: conda update ploomber -c conda-forge') from e
Пример #12
0
def main():
    start_time = datetime.datetime.now()
    parser = CustomParser(description='Plot a pipeline', prog='ploomber plot')
    with parser:
        parser.add_argument(
            '--output',
            '-o',
            help='Where to save the plot, defaults to pipeline.png',
            default=None)
    dag, args = parser.load_from_entry_point_arg()

    if args.output is not None:
        output = args.output
    else:
        name = extract_name(args.entry_point)
        output = 'pipeline.png' if name is None else f'pipeline.{name}.png'

    dag.plot(output=output)
    end_time = datetime.datetime.now()
    telemetry.log_api("ploomber_plot",
                      total_runtime=str(end_time - start_time),
                      dag=dag,
                      metadata={'argv': sys.argv})
    print('Plot saved at:', output)
Пример #13
0
Файл: nb.py Проект: cxz/ploomber
def main():
    parser = CustomParser(description='Manage scripts and notebooks',
                          prog='ploomber nb')

    with parser:
        # The next options do not require a valid entry point

        # opening .py files as notebooks in JupyterLab with a single click
        single_click = parser.add_mutually_exclusive_group()
        single_click.add_argument(
            '--single-click',
            '-S',
            action='store_true',
            help=('Override JupyterLab defaults to open '
                  'scripts as notebook with a single click'))
        single_click.add_argument(
            '--single-click-disable',
            '-d',
            action='store_true',
            help=('Disables opening scripts as notebook with a single '
                  'click in JupyterLab'))

        # install/uninstall hook
        hook = parser.add_mutually_exclusive_group()
        hook.add_argument('--install-hook',
                          '-I',
                          action='store_true',
                          help='Install git pre-commit hook')
        hook.add_argument('--uninstall-hook',
                          '-u',
                          action='store_true',
                          help='Uninstall git pre-commit hook')

        # The next options require a valid entry point

        # inject/remove cell
        cell = parser.add_mutually_exclusive_group()
        cell.add_argument('--inject',
                          '-i',
                          action='store_true',
                          help='Inject cell to all script/notebook tasks')
        cell.add_argument(
            '--remove',
            '-r',
            action='store_true',
            help='Remove injected cell in all script/notebook tasks')

        # re-format
        parser.add_argument('--format',
                            '-f',
                            help='Re-format all script/notebook tasks')

        # pair scripts and nbs
        parser.add_argument('--pair',
                            '-p',
                            help='Pair scripts with ipynb files')

        # sync scripts and nbs
        parser.add_argument('--sync',
                            '-s',
                            action='store_true',
                            help='Sync scripts with ipynb files')

    loading_error = None

    # commands that need an entry point to work
    needs_entry_point = {'format', 'inject', 'remove', 'sync', 'pair'}

    args_ = parser.parse_args()

    if any(getattr(args_, arg) for arg in needs_entry_point):
        try:
            dag, args = parser.load_from_entry_point_arg()
        except Exception as e:
            loading_error = e
        else:
            dag.render(show_progress=False)

        if loading_error:
            err = ('Could not run nb command: the DAG ' 'failed to load')
            telemetry.log_api("nb_error",
                              metadata={
                                  'type': 'dag_load_failed',
                                  'exception': err + f' {loading_error}',
                                  'argv': sys.argv
                              })
            raise RuntimeError(err) from loading_error
    else:
        dag = None
        args = args_

    # options that do not need a DAG

    if args.single_click:
        _py_with_single_click_enable()

    if args.single_click_disable:
        _py_with_single_click_disable()

    if args.install_hook:
        if not Path('.git').is_dir():
            err = ('Expected a .git/ directory in the current working '
                   'directory. Run this from the repository root directory.')
            telemetry.log_api("nb_error",
                              metadata={
                                  'type': 'no_git_config',
                                  'exception': err,
                                  'argv': sys.argv
                              })
            raise NotADirectoryError(err)

        parent = Path('.git', 'hooks')
        parent.mkdir(exist_ok=True)

        # pre-commit: remove injected cells
        _install_hook(parent / 'pre-commit', pre_commit_hook, args.entry_point)
        click.echo('Successfully installed pre-commit git hook')

        # post-commit: inject cells
        _install_hook(parent / 'post-commit', post_commit_hook,
                      args.entry_point)
        click.echo('Successfully installed post-commit git hook')

    if args.uninstall_hook:
        _delete_hook(Path('.git', 'hooks', 'pre-commit'))
        _delete_hook(Path('.git', 'hooks', 'post-commit'))

    # options that need a valid DAG

    if args.format:
        new_paths = [
            str(p) for p in _call_in_source(
                dag,
                'format',
                'Formatted notebooks',
                dict(fmt=args.format),
            ) if p is not None
        ]

        if len(new_paths):
            click.echo('Extension changed for the following '
                       f'tasks: {", ".join(new_paths)}. Update your '
                       'pipeline declaration.')

    if args.inject:
        _call_in_source(
            dag,
            'save_injected_cell',
            'Injected celll',
            dict(),
        )

        click.secho(
            'Finished cell injection. Re-run this command if your '
            'pipeline.yaml changes.',
            fg='green')

    if args.remove:
        _call_in_source(
            dag,
            'remove_injected_cell',
            'Removed injected cell',
            dict(),
        )

    if args.sync:
        # maybe its more efficient to pass all notebook paths at once?
        _call_in_source(dag, 'sync', 'Synced notebooks')

    # can pair give trouble if we're reformatting?
    if args.pair:
        _call_in_source(
            dag,
            'pair',
            'Paired notebooks',
            dict(base_path=args.pair),
        )
        click.echo(f'Finished pairing notebooks. Tip: add {args.pair!r} to '
                   'your .gitignore to keep your repository clean')

    telemetry.log_api("ploomber_nb", dag=dag, metadata={'argv': sys.argv})
Пример #14
0
def scaffold(conda, package, entry_point, empty):
    """Create new projects (if no pipeline.yaml exists) or add missings tasks
    """
    template = '-e/--entry-point is not compatible with the {flag} flag'

    if entry_point and conda:
        err = template.format(flag='--conda')
        telemetry.log_api("scaffold_error",
                          metadata={
                              'type': 'entry_and_conda_flag',
                              'exception': err,
                              'argv': sys.argv
                          })
        raise click.ClickException(err)

    if entry_point and package:
        err = template.format(flag='--package')
        telemetry.log_api("scaffold_error",
                          metadata={
                              'type': 'entry_and_package_flag',
                              'exception': err,
                              'argv': sys.argv
                          })
        raise click.ClickException(err)

    if entry_point and empty:
        err = template.format(flag='--empty')
        telemetry.log_api("scaffold_error",
                          metadata={
                              'type': 'entry_and_empty_flag',
                              'exception': err,
                              'argv': sys.argv
                          })
        raise click.ClickException(err)

    # try to load a dag by looking in default places
    if entry_point is None:
        loaded = _scaffold.load_dag()
    else:
        try:
            loaded = (
                DAGSpec(entry_point, lazy_import='skip'),
                Path(entry_point).parent,
                Path(entry_point),
            )
        except Exception as e:
            telemetry.log_api("scaffold_error",
                              metadata={
                                  'type': 'dag_load_failed',
                                  'exception': e,
                                  'argv': sys.argv
                              })
            raise click.ClickException(e) from e

    if loaded:
        # existing pipeline, add tasks
        spec, _, path_to_spec = loaded
        _scaffold.add(spec, path_to_spec)
        telemetry.log_api("ploomber_scaffold",
                          dag=loaded,
                          metadata={
                              'type': 'add_task',
                              'argv': sys.argv
                          })
    else:
        # no pipeline, create base project
        telemetry.log_api("ploomber_scaffold",
                          metadata={
                              'type': 'base_project',
                              'argv': sys.argv
                          })
        scaffold_project.cli(project_path=None,
                             conda=conda,
                             package=package,
                             empty=empty)
Пример #15
0
def main(name, force=False, branch=None, output=None):
    """
    Entry point for examples
    """
    manager = _ExamplesManager(home=_home, branch=branch)
    tw = TerminalWriter()

    if not manager.examples.exists() or manager.outdated() or force:
        if not manager.examples.exists():
            click.echo('Local copy does not exist...')
        elif force:
            click.echo('Forcing download...')

        manager.clone()

    if not name:
        manager.list()
    else:
        selected = manager.path_to(name)

        if not selected.exists():
            telemetry.log_api("examples_error",
                              metadata={
                                  'type': 'wrong_example_name',
                                  'example_name': name
                              })
            click.echo(f'There is no example named {name!r}.\n'
                       'To list examples: ploomber examples\n'
                       'To update local copy: ploomber examples -f')
        else:
            output = output or name

            tw.sep('=', f'Copying example {name} to {output}/', blue=True)

            if Path(output).exists():
                telemetry.log_api("examples_error",
                                  metadata={
                                      'type': 'duplicated_output',
                                      'output_name': output
                                  })
                raise click.ClickException(
                    f"{output!r} already exists in the current working "
                    "directory, please rename it or move it "
                    "to another location and try again.")

            shutil.copytree(selected, output)

            path_to_readme = Path(output, 'README.md')
            out_dir = output + ('\\'
                                if platform.system() == 'Windows' else '/')

            tw.write('Next steps:\n\n'
                     f'$ cd {out_dir}'
                     f'\n$ ploomber install')
            tw.write(f'\n\nOpen {str(path_to_readme)} for details.\n',
                     blue=True)
            telemetry.log_api("ploomber_examples",
                              metadata={
                                  'argv': sys.argv,
                                  'example_name': name
                              })
Пример #16
0
def main(render_only=False):
    start_time = datetime.datetime.now()
    parser = CustomParser(description='Build pipeline', prog='ploomber build')

    with parser:
        parser.add_argument('--force',
                            '-f',
                            help='Force execution by ignoring status',
                            action='store_true',
                            default=False)
        parser.add_argument('--skip-upstream',
                            '-su',
                            help='Skip building upstream dependencies. '
                            'Only applicable when using --partially',
                            action='store_true',
                            default=False)
        parser.add_argument(
            '--partially',
            '-p',
            help='Build a pipeline partially until certain task',
            default=None)
        parser.add_argument(
            '--debug',
            '-d',
            help='Drop a debugger session if an exception happens',
            action='store_true',
            default=False)

    # users may try to run "ploomber build {name}" to build a single task
    if len(sys.argv) > 1 and not sys.argv[1].startswith('-'):
        suggestion = 'ploomber task {task-name}'
        cmd_name = parser.prog
        telemetry.log_api("unsupported_build_cmd",
                          metadata={
                              'cmd_name': cmd_name,
                              'suggestion': suggestion,
                              'argv': sys.argv
                          })
        parser.error(f'{cmd_name!r} does not take positional arguments.\n'
                     f'To build a single task, try: {suggestion!r}')

    dag, args = parser.load_from_entry_point_arg()

    # when using the parallel executor from the CLI, ensure we print progress
    # to stdout
    if isinstance(dag.executor, Parallel):
        dag.executor.print_progress = True

    if render_only:
        dag.render()
    else:
        if args.partially:
            report = dag.build_partially(args.partially,
                                         force=args.force,
                                         debug=args.debug,
                                         skip_upstream=args.skip_upstream)
        else:
            report = dag.build(force=args.force, debug=args.debug)

        if report:
            print(report)
    end_time = datetime.datetime.now()
    telemetry.log_api("ploomber_build",
                      total_runtime=str(end_time - start_time),
                      dag=dag,
                      metadata={'argv': sys.argv})
    return dag
Пример #17
0
def main(use_lock):
    """
    Install project, automatically detecting if it's a conda-based or pip-based
    project.

    Parameters
    ---------
    use_lock : bool
        If True Uses requirements.lock.txt/environment.lock.yml and
        requirements.dev.lock.txt/environment.dev.lock.yml files. Otherwise
        it uses regular files and creates the lock ones after installing
        dependencies
    """
    start_time = datetime.datetime.now()
    telemetry.log_api("install-started")
    HAS_CONDA = shutil.which('conda')
    HAS_ENV_YML = Path(_ENV_YML).exists()
    HAS_ENV_LOCK_YML = Path(_ENV_LOCK_YML).exists()
    HAS_REQS_TXT = Path(_REQS_TXT).exists()
    HAS_REQS_LOCK_TXT = Path(_REQS_LOCK_TXT).exists()

    if use_lock and not HAS_ENV_LOCK_YML and not HAS_REQS_LOCK_TXT:
        err = ("Expected and environment.lock.yaml "
               "(conda) or requirements.lock.txt (pip) in the current "
               "directory. Add one of them and try again.")
        telemetry.log_api("install-error",
                          metadata={
                              'type': 'no_lock',
                              'exception': err
                          })
        raise exceptions.ClickException(err)
    elif not use_lock and not HAS_ENV_YML and not HAS_REQS_TXT:
        err = ("Expected an environment.yaml (conda)"
               " or requirements.txt (pip) in the current directory."
               " Add one of them and try again.")
        telemetry.log_api("install-error",
                          metadata={
                              'type': 'no_env_requirements',
                              'exception': err
                          })
        raise exceptions.ClickException(err)
    elif (not HAS_CONDA and use_lock and HAS_ENV_LOCK_YML
          and not HAS_REQS_LOCK_TXT):
        err = ("Found env environment.lock.yaml "
               "but conda is not installed. Install conda or add a "
               "requirements.lock.txt to use pip instead")
        telemetry.log_api("install-error",
                          metadata={
                              'type': 'no_conda',
                              'exception': err
                          })
        raise exceptions.ClickException(err)
    elif not HAS_CONDA and not use_lock and HAS_ENV_YML and not HAS_REQS_TXT:
        err = ("Found environment.yaml but conda is not installed."
               " Install conda or add a requirements.txt to use pip instead")
        telemetry.log_api("install-error",
                          metadata={
                              'type': 'no_conda2',
                              'exception': err
                          })
        raise exceptions.ClickException(err)
    elif HAS_CONDA and use_lock and HAS_ENV_LOCK_YML:
        main_conda(start_time, use_lock=True)
    elif HAS_CONDA and not use_lock and HAS_ENV_YML:
        main_conda(start_time, use_lock=False)
    else:
        main_pip(start_time, use_lock=use_lock)
Пример #18
0
def main_conda(start_time, use_lock):
    """
    Install conda-based project, looks for environment.yml files

    Parameters
    ----------
    start_time : datetime
        The initial runtime of the function.
    use_lock : bool
        If True Uses environment.lock.yml and environment.dev.lock.yml files
    """
    env_yml = _ENV_LOCK_YML if use_lock else _ENV_YML

    # TODO: ensure ploomber-scaffold includes dependency file (including
    # lock files in MANIFEST.in
    cmdr = Commander()

    # TODO: provide helpful error messages on each command

    with open(env_yml) as f:
        env_name = yaml.safe_load(f)['name']

    current_env = Path(shutil.which('python')).parents[1].name

    if env_name == current_env:
        err = (f'{env_yml} will create an environment '
               f'named {env_name!r}, which is the current active '
               'environment. Move to a different one and try '
               'again (e.g., "conda activate base")')
        telemetry.log_api("install-error",
                          metadata={
                              'type': 'env_running_conflict',
                              'exception': err
                          })
        raise RuntimeError(err)

    # get current installed envs
    conda = shutil.which('conda')
    mamba = shutil.which('mamba')

    # if already installed and running on windows, ask to delete first,
    # otherwise it might lead to an intermittent error (permission denied
    # on vcruntime140.dll)
    if os.name == 'nt':
        envs = cmdr.run(conda, 'env', 'list', '--json', capture_output=True)
        already_installed = any([
            env for env in json.loads(envs)['envs']
            # only check in the envs folder, ignore envs in other locations
            if 'envs' in env and env_name in env
        ])

        if already_installed:
            err = (f'Environment {env_name!r} already exists, '
                   f'delete it and try again '
                   f'(conda env remove --name {env_name})')
            telemetry.log_api("install-error",
                              metadata={
                                  'type': 'duplicate_env',
                                  'exception': err
                              })
            raise ValueError(err)

    pkg_manager = mamba if mamba else conda
    cmdr.run(pkg_manager,
             'env',
             'create',
             '--file',
             env_yml,
             '--force',
             description='Creating env')

    if Path(_SETUP_PY).exists():
        _pip_install_setup_py_conda(cmdr, env_name)

    if not use_lock:
        env_lock = cmdr.run(conda,
                            'env',
                            'export',
                            '--no-build',
                            '--name',
                            env_name,
                            description='Locking dependencies',
                            capture_output=True)
        Path(_ENV_LOCK_YML).write_text(env_lock)

    _try_conda_install_and_lock_dev(cmdr,
                                    pkg_manager,
                                    env_name,
                                    use_lock=use_lock)

    cmd_activate = f'conda activate {env_name}'
    _next_steps(cmdr, cmd_activate, start_time)