def execute_preview_command(cli_args, print_fn): handle = handle_for_repo_cli_args(cli_args) repository = handle.build_repository_definition() instance = DagsterInstance.get() scheduler_handle = handle.build_scheduler_handle( artifacts_dir=instance.schedules_directory()) if not scheduler_handle: print_fn("Scheduler not defined for repository {name}".format( name=repository.name)) return print_changes(scheduler_handle, print_fn, preview=True)
def execute_preview_command(cli_args, print_fn): handle = handle_for_repo_cli_args(cli_args) instance = DagsterInstance.get() check_handle_and_scheduler(handle, instance) scheduler_handle = handle.build_scheduler_handle() repository = handle.build_repository_definition() print_changes(scheduler_handle, repository, instance, print_fn, preview=True)
def ui(host, port, storage_fallback, reload_trigger, **kwargs): handle = handle_for_repo_cli_args(kwargs) # add the path for the cwd so imports in dynamically loaded code work correctly sys.path.append(os.getcwd()) if port is None: port_lookup = True port = DEFAULT_DAGIT_PORT else: port_lookup = False host_dagit_ui(handle, host, port, storage_fallback, reload_trigger, port_lookup)
def execute_stop_command(schedule_name, cli_args, print_fn, instance=None): handle = handle_for_repo_cli_args(cli_args) instance = DagsterInstance.get() check_handle_and_scheduler(handle, instance) repository = handle.build_repository_definition() try: instance.stop_schedule(repository, schedule_name) except DagsterInvariantViolationError as ex: raise click.UsageError(ex) print_fn( "Stopped schedule {schedule_name}".format(schedule_name=schedule_name))
def execute_wipe_command(cli_args, print_fn): handle = handle_for_repo_cli_args(cli_args) instance = DagsterInstance.get() check_handle_and_scheduler(handle, instance) confirmation = click.prompt( 'Are you sure you want to delete all schedules and schedule cron jobs? Type DELETE' ) if confirmation == 'DELETE': instance.wipe_all_schedules() print_fn("Wiped all schedules and schedule cron jobs") else: click.echo( 'Exiting without deleting all schedules and schedule cron jobs')
def execute_execute_command_with_preset(preset, raise_on_error, cli_args, mode): pipeline = handle_for_pipeline_cli_args( cli_args).build_pipeline_definition() cli_args.pop('pipeline_name') repository = handle_for_repo_cli_args( cli_args).build_repository_definition() kwargs = repository.get_preset_pipeline(pipeline.name, preset) return execute_pipeline(run_config=RunConfig( mode=mode, executor_config=InProcessExecutorConfig( raise_on_error=raise_on_error)), **kwargs)
def execute_restart_command(schedule_name, all_running_flag, cli_args, print_fn): if not _is_dagster_home_set(): raise click.UsageError( dagster_home_error_message_for_command('dagster schedule restart ...') ) handle = handle_for_repo_cli_args(cli_args) repository = handle.build_repository_definition() instance = DagsterInstance.get() schedule_handle = handle.build_scheduler_handle(artifacts_dir=instance.schedules_directory()) if not schedule_handle: print_fn("Scheduler not defined for repository {name}".format(name=repository.name)) return scheduler = schedule_handle.get_scheduler() if all_running_flag: for schedule in scheduler.all_schedules(): if schedule.status == ScheduleStatus.RUNNING: try: scheduler.stop_schedule(schedule.name) scheduler.start_schedule(schedule.name) except DagsterInvariantViolationError as ex: raise click.UsageError(ex) print_fn( "Restarted all running schedules for repository {name}".format(name=repository.name) ) else: schedule = scheduler.get_schedule_by_name(schedule_name) if schedule.status != ScheduleStatus.RUNNING: click.UsageError( "Cannot restart a schedule {name} because is not currently running".format( name=schedule.name ) ) try: scheduler.stop_schedule(schedule_name) scheduler.start_schedule(schedule_name) except DagsterInvariantViolationError as ex: raise click.UsageError(ex) print_fn( "Restarted schedule {schedule_name} with ID {schedule_id}".format( schedule_name=schedule_name, schedule_id=schedule.schedule_id ) )
def execute_list_command(running_filter, stopped_filter, name_filter, cli_args, print_fn): handle = handle_for_repo_cli_args(cli_args) instance = DagsterInstance.get() check_handle_and_scheduler(handle, instance) schedule_handle = handle.build_scheduler_handle() repository = handle.build_repository_definition() if not name_filter: title = 'Repository {name}'.format(name=repository.name) print_fn(title) print_fn('*' * len(title)) first = True if running_filter: schedules = [ s for s in instance.all_schedules(repository) if s.status == ScheduleStatus.RUNNING ] elif stopped_filter: schedules = [ s for s in instance.all_schedules(repository) if s.status == ScheduleStatus.STOPPED ] else: schedules = instance.all_schedules(repository) for schedule in schedules: schedule_def = schedule_handle.get_schedule_def_by_name(schedule.name) # If --name filter is present, only print the schedule name if name_filter: print_fn(schedule_def.name) continue flag = "[{status}]".format( status=schedule.status.value) if schedule else "" schedule_title = 'Schedule: {name} {flag}'.format( name=schedule_def.name, flag=flag) if not first: print_fn('*' * len(schedule_title)) first = False print_fn(schedule_title) print_fn('Cron Schedule: {cron_schedule}'.format( cron_schedule=schedule_def.cron_schedule))
def ui(host, port, sync, log, log_dir, no_watch=False, **kwargs): handle = handle_for_repo_cli_args(kwargs) # add the path for the cwd so imports in dynamically loaded code work correctly sys.path.append(os.getcwd()) if log and not log_dir: log_dir = dagster_logs_dir_for_handle(handle) check.invariant( not no_watch, 'Do not set no_watch when calling the Dagit Python CLI directly -- this flag is a no-op ' 'at this level and should be set only when invoking dagit/bin/dagit.', ) host_dagit_ui(log, log_dir, handle, sync, host, port)
def ui(text, file, predefined, variables, **kwargs): handle = handle_for_repo_cli_args(kwargs) query = None if text is not None and file is None and predefined is None: query = text.strip('\'" \n\t') elif file is not None and text is None and predefined is None: query = file.read() elif predefined is not None and text is None and file is None: query = PREDEFINED_QUERIES[predefined] else: raise click.UsageError( 'Must select one and only one of text (-t), file (-f), or predefined (-p) ' 'to select GraphQL document to execute.') execute_query_from_cli(handle, query, variables)
def load_dagit_for_repo_cli_args(n_pipelines=1, **kwargs): handle = handle_for_repo_cli_args(kwargs) app = create_app(handle, DagsterInstance.ephemeral()) client = app.test_client() res = client.get('/graphql?query={query_string}'.format( query_string=PIPELINES_OR_ERROR_QUERY)) json_res = json.loads(res.data.decode('utf-8')) assert 'data' in json_res assert 'pipelinesOrError' in json_res['data'] assert 'nodes' in json_res['data']['pipelinesOrError'] assert len(json_res['data']['pipelinesOrError']['nodes']) == n_pipelines return res
def execute_list_command(running_filter, name_filter, verbose, cli_args, print_fn): handle = handle_for_repo_cli_args(cli_args) repository = handle.build_repository_definition() instance = DagsterInstance.get() scheduler = handle.build_scheduler( artifacts_dir=instance.schedules_directory()) if not scheduler and not name_filter: print_fn("Scheduler not defined for repository {name}".format( name=repository.name)) return if not name_filter: title = 'Repository {name}'.format(name=repository.name) print_fn(title) print_fn('*' * len(title)) first = True for schedule in scheduler.get_all_schedule_defs(): is_running = scheduler.get_schedule_by_name(schedule.name) # If --running flag is present and the schedule is not running, # do not print if running_filter and not is_running: continue # If --name filter is present, only print the schedule name if name_filter: print_fn(schedule.name) continue running_flag = "[Running]" if is_running else "" schedule_title = 'Schedule: {name} {running_flag}'.format( name=schedule.name, running_flag=running_flag) if not first: print_fn('*' * len(schedule_title)) first = False print_fn(schedule_title) print_fn('Cron Schedule: {cron_schedule}'.format( cron_schedule=schedule.cron_schedule)) if verbose: print_fn('Execution Params: {execution_params}'.format( execution_params=schedule.execution_params))
def execute_stop_command(schedule_name, cli_args, print_fn, instance=None): instance = check.opt_inst_param(instance, 'instance', DagsterInstance, DagsterInstance.get()) handle = handle_for_repo_cli_args(cli_args) repository = handle.build_repository_definition() schedule_handle = handle.build_scheduler_handle() if not schedule_handle: print_fn("Scheduler not defined for repository {name}".format(name=repository.name)) return try: instance.stop_schedule(repository, schedule_name) except DagsterInvariantViolationError as ex: raise click.UsageError(ex) print_fn("Stopped schedule {schedule_name}".format(schedule_name=schedule_name))
def execute_end_command(schedule_name, schedule_dir, cli_args, print_fn): handle = handle_for_repo_cli_args(cli_args) repository = handle.build_repository_definition() if not schedule_dir: schedule_dir = dagster_schedule_dir_for_handle(handle) schedule_definition = repository.get_schedule(schedule_name) scheduler = SystemCronScheduler(schedule_dir) try: schedule = scheduler.end_schedule(schedule_definition) except DagsterInvariantViolationError as ex: raise click.UsageError(ex) print_fn("Ended schedule {schedule_name} with ID {schedule_id}".format( schedule_name=schedule_definition.name, schedule_id=schedule.schedule_id))
def ui(host, port, storage_fallback, reload_trigger, **kwargs): handle = handle_for_repo_cli_args(kwargs) # add the path for the cwd so imports in dynamically loaded code work correctly sys.path.append(os.getcwd()) if port is None: port_lookup = True port = DEFAULT_DAGIT_PORT else: port_lookup = False # The dagit entrypoint always sets this but if someone launches dagit-cli # directly make sure things still works by providing a temp directory if storage_fallback is None: storage_fallback = seven.TemporaryDirectory().name host_dagit_ui(handle, host, port, storage_fallback, reload_trigger, port_lookup)
def execute_end_command(schedule_name, cli_args, print_fn): handle = handle_for_repo_cli_args(cli_args) repository = handle.build_repository_definition() schedule_dir = DagsterInstance.get().schedules_directory() scheduler = repository.build_scheduler(schedule_dir=schedule_dir) if not scheduler: print_fn("Scheduler not defined for repository {name}".format( name=repository.name)) return schedule_definition = repository.get_schedule(schedule_name) try: schedule = scheduler.end_schedule(schedule_definition) except DagsterInvariantViolationError as ex: raise click.UsageError(ex) print_fn("Ended schedule {schedule_name} with ID {schedule_id}".format( schedule_name=schedule_definition.name, schedule_id=schedule.schedule_id))
def ui(log, log_dir, text, file, predefined, variables, **kwargs): handle = handle_for_repo_cli_args(kwargs) query = None if text is not None and file is None and predefined is None: query = text.strip('\'" \n\t') elif file is not None and text is None and predefined is None: with open(file) as ff: query = ff.read() elif predefined is not None and text is None and file is None: query = PREDEFINED_QUERIES[predefined] else: raise click.UsageError( 'Must select one and only one of text (-t), file (-f), or predefined (-p) ' 'to select GraphQL document to execute.') if log and not log_dir: log_dir = dagster_logs_dir_for_handle(handle) execute_query_from_cli(handle, query, variables, log, log_dir)
def execute_up_command(preview, cli_args, print_fn): handle = handle_for_repo_cli_args(cli_args) instance = DagsterInstance.get() check_handle_and_scheduler(handle, instance) repository = handle.build_repository_definition() python_path = sys.executable repository_path = handle.data.repository_yaml print_changes(repository, instance, print_fn, preview=preview) if preview: return try: reconcile_scheduler_state(python_path, repository_path, repository, instance=instance) except DagsterInvariantViolationError as ex: raise click.UsageError(ex)
def execute_up_command(preview, cli_args, print_fn): handle = handle_for_repo_cli_args(cli_args) repository = handle.build_repository_definition() python_path = sys.executable repository_path = handle.data.repository_yaml instance = DagsterInstance.get() scheduler_handle = handle.build_scheduler_handle() if not scheduler_handle: print_fn("Scheduler not defined for repository {name}".format(name=repository.name)) return print_changes(scheduler_handle, repository, instance, print_fn, preview=preview) if preview: return try: scheduler_handle.up(python_path, repository_path, repository, instance=instance) except DagsterInvariantViolationError as ex: raise click.UsageError(ex)
def execute_list_command(cli_args, print_fn): repository = handle_for_repo_cli_args(cli_args).build_repository_definition() title = 'Repository {name}'.format(name=repository.name) print_fn(title) print_fn('*' * len(title)) first = True for pipeline in repository.get_all_pipelines(): pipeline_title = 'Pipeline: {name}'.format(name=pipeline.name) if not first: print_fn('*' * len(pipeline_title)) first = False print_fn(pipeline_title) if pipeline.description: print_fn('Description:') print_fn(format_description(pipeline.description, indent=' ' * 4)) print_fn('Solids: (Execution Order)') for solid in pipeline.solids_in_topological_order: print_fn(' ' + solid.name)
def execute_start_command(schedule_name, schedule_dir, cli_args, print_fn): handle = handle_for_repo_cli_args(cli_args) repository = handle.build_repository_definition() if not schedule_dir: schedule_dir = dagster_schedule_dir_for_handle(handle) python_path = sys.executable repository_path = handle.data.repository_yaml schedule_definition = repository.get_schedule(schedule_name) scheduler = SystemCronScheduler(schedule_dir) try: schedule = scheduler.start_schedule(schedule_definition, python_path, repository_path) except DagsterInvariantViolationError as ex: raise click.UsageError(ex) print_fn("Started schedule {schedule_name} with ID {schedule_id}".format( schedule_name=schedule_definition.name, schedule_id=schedule.schedule_id))
def execute_stop_command(schedule_name, cli_args, print_fn): handle = handle_for_repo_cli_args(cli_args) repository = handle.build_repository_definition() instance = DagsterInstance.get() schedule_handle = handle.build_scheduler_handle( artifacts_dir=instance.schedules_directory()) if not schedule_handle: print_fn("Scheduler not defined for repository {name}".format( name=repository.name)) return scheduler = schedule_handle.get_scheduler() try: schedule = scheduler.stop_schedule(schedule_name) except DagsterInvariantViolationError as ex: raise click.UsageError(ex) print_fn("Stopped schedule {schedule_name} with ID {schedule_id}".format( schedule_name=schedule_name, schedule_id=schedule.schedule_id))
def execute_wipe_command(cli_args, print_fn): handle = handle_for_repo_cli_args(cli_args) repository = handle.build_repository_definition() instance = DagsterInstance.get() schedule_handle = handle.build_scheduler_handle(artifacts_dir=instance.schedules_directory()) if not schedule_handle: print_fn("Scheduler not defined for repository {name}".format(name=repository.name)) return confirmation = click.prompt( 'Are you sure you want to delete all schedules and schedule cron jobs? Type DELETE' ) if confirmation == 'DELETE': scheduler = schedule_handle.get_scheduler() scheduler.wipe() print_fn("Wiped all schedules and schedule cron jobs") else: click.echo('Exiting without deleting all schedules and schedule cron jobs')
def execute_restart_command(schedule_name, all_running_flag, cli_args, print_fn): handle = handle_for_repo_cli_args(cli_args) repository = handle.build_repository_definition() instance = DagsterInstance.get() schedule_handle = handle.build_scheduler_handle() if not schedule_handle: print_fn("Scheduler not defined for repository {name}".format(name=repository.name)) return if all_running_flag: for schedule in instance.all_schedules(repository): if schedule.status == ScheduleStatus.RUNNING: try: instance.stop_schedule(repository, schedule.name) instance.start_schedule(repository, schedule.name) except DagsterInvariantViolationError as ex: raise click.UsageError(ex) print_fn( "Restarted all running schedules for repository {name}".format(name=repository.name) ) else: schedule = instance.get_schedule_by_name(repository, schedule_name) if schedule.status != ScheduleStatus.RUNNING: click.UsageError( "Cannot restart a schedule {name} because is not currently running".format( name=schedule.name ) ) try: instance.stop_schedule(repository, schedule_name) instance.start_schedule(repository, schedule_name) except DagsterInvariantViolationError as ex: raise click.UsageError(ex) print_fn("Restarted schedule {schedule_name}".format(schedule_name=schedule_name))
def execute_wipe_command(cli_args, print_fn): handle = handle_for_repo_cli_args(cli_args) repository = handle.build_repository_definition() instance = DagsterInstance.get() if not instance.scheduler: raise click.UsageError( 'A scheduler must be configured to run schedule commands. You can configure a scheduler ' '(e.g. dagster_cron.scheduler.SystemCronScheduler) on your instance ' '`dagster.yaml` settings. See ' 'https://dagster.readthedocs.io/en/latest/sections/learn/tutorial/scheduler.html for more' 'information.' ) confirmation = click.prompt( 'Are you sure you want to delete all schedules and schedule cron jobs? Type DELETE' ) if confirmation == 'DELETE': instance.wipe_all_schedules() print_fn("Wiped all schedules and schedule cron jobs") else: click.echo('Exiting without deleting all schedules and schedule cron jobs')
def execute_start_command(schedule_name, all_flag, cli_args, print_fn): if not _is_dagster_home_set(): raise click.UsageError( dagster_home_error_message_for_command( 'dagster schedule start ...')) handle = handle_for_repo_cli_args(cli_args) repository = handle.build_repository_definition() instance = DagsterInstance.get() schedule_handle = handle.build_scheduler_handle( artifacts_dir=instance.schedules_directory()) if not schedule_handle: print_fn("Scheduler not defined for repository {name}".format( name=repository.name)) return scheduler = schedule_handle.get_scheduler() if all_flag: for schedule in scheduler.all_schedules(): try: schedule = scheduler.start_schedule(schedule.name) except DagsterInvariantViolationError as ex: raise click.UsageError(ex) print_fn("Started all schedules for repository {name}".format( name=repository.name)) else: try: schedule = scheduler.start_schedule(schedule_name) except DagsterInvariantViolationError as ex: raise click.UsageError(ex) print_fn( "Started schedule {schedule_name} with ID {schedule_id}".format( schedule_name=schedule_name, schedule_id=schedule.schedule_id))
def execute_list_command(schedule_dir, running_filter, verbose, cli_args, print_fn): handle = handle_for_repo_cli_args(cli_args) repository = handle.build_repository_definition() if not schedule_dir: schedule_dir = dagster_schedule_dir_for_handle(handle) scheduler = SystemCronScheduler(schedule_dir) title = 'Repository {name}'.format(name=repository.name) print_fn(title) print_fn('*' * len(title)) first = True for schedule in repository.get_all_schedules(): is_running = scheduler.get_schedule_by_name(schedule.name) # If --running flag is present and the schedule is not running, # do not print if running_filter and not is_running: continue running_flag = "[Running]" if is_running else "" schedule_title = 'Schedule: {name} {running_flag}'.format( name=schedule.name, running_flag=running_flag) if not first: print_fn('*' * len(schedule_title)) first = False print_fn(schedule_title) print_fn('Cron Schedule: {cron_schedule}'.format( cron_schedule=schedule.cron_schedule)) if verbose: print_fn('Execution Params: {execution_params}'.format( execution_params=schedule.execution_params))
def execute_start_command(schedule_name, all_flag, cli_args, print_fn): handle = handle_for_repo_cli_args(cli_args) instance = DagsterInstance.get() check_handle_and_scheduler(handle, instance) repository = handle.build_repository_definition() if all_flag: for schedule in instance.all_schedules(repository): try: schedule = instance.start_schedule(repository, schedule.name) except DagsterInvariantViolationError as ex: raise click.UsageError(ex) print_fn("Started all schedules for repository {name}".format( name=repository.name)) else: try: schedule = instance.start_schedule(repository, schedule_name) except DagsterInvariantViolationError as ex: raise click.UsageError(ex) print_fn("Started schedule {schedule_name}".format( schedule_name=schedule_name))
def execute_up_command(preview, cli_args, print_fn): if not _is_dagster_home_set(): raise click.UsageError(dagster_home_error_message_for_command('dagster schedule up')) handle = handle_for_repo_cli_args(cli_args) repository = handle.build_repository_definition() python_path = sys.executable repository_path = handle.data.repository_yaml instance = DagsterInstance.get() scheduler_handle = handle.build_scheduler_handle(artifacts_dir=instance.schedules_directory()) if not scheduler_handle: print_fn("Scheduler not defined for repository {name}".format(name=repository.name)) return print_changes(scheduler_handle, print_fn, preview=preview) if preview: return try: scheduler_handle.up(python_path, repository_path) except DagsterInvariantViolationError as ex: raise click.UsageError(ex)
def execute_backfill_command(cli_args, print_fn, instance=None): pipeline_name = cli_args.pop('pipeline_name') repo_args = {k: v for k, v in cli_args.items() if k in REPO_ARG_NAMES} if pipeline_name and not isinstance(pipeline_name, six.string_types): if len(pipeline_name) == 1: pipeline_name = pipeline_name[0] instance = instance or DagsterInstance.get() handle = handle_for_repo_cli_args(repo_args) repository = handle.build_repository_definition() noprompt = cli_args.get('noprompt') # check run launcher if not instance.run_launcher: raise click.UsageError( 'A run launcher must be configured before running a backfill. You can configure a run ' 'launcher (e.g. dagster_graphql.launcher.RemoteDagitRunLauncher) in your instance ' '`dagster.yaml` settings. See ' 'https://docs.dagster.io/latest/deploying/instance/ for more' 'information.') # Resolve pipeline if not pipeline_name and noprompt: raise click.UsageError('No pipeline specified') if not pipeline_name: pipeline_name = click.prompt( 'Select a pipeline to backfill: {}'.format(', '.join( repository.pipeline_names))) repository = handle.build_repository_definition() if not repository.has_pipeline(pipeline_name): raise click.UsageError( 'No pipeline found named `{}`'.format(pipeline_name)) pipeline = repository.get_pipeline(pipeline_name) # Resolve partition set all_partition_sets = get_partition_sets_for_handle(handle) pipeline_partition_sets = [ x for x in all_partition_sets if x.pipeline_name == pipeline.name ] if not pipeline_partition_sets: raise click.UsageError( 'No partition sets found for pipeline `{}`'.format(pipeline.name)) partition_set_name = cli_args.get('partition_set') if not partition_set_name: if len(pipeline_partition_sets) == 1: partition_set_name = pipeline_partition_sets[0].name elif noprompt: raise click.UsageError( 'No partition set specified (see option `--partition-set`)') else: partition_set_name = click.prompt( 'Select a partition set to use for backfill: {}'.format( ', '.join(x.name for x in pipeline_partition_sets))) partition_set = next( (x for x in pipeline_partition_sets if x.name == partition_set_name), None) if not partition_set: raise click.UsageError( 'No partition set found named `{}`'.format(partition_set_name)) # Resolve partitions to backfill partitions = gen_partitions_from_args(partition_set, cli_args) # Resolve priority celery_priority = get_backfill_priority_from_args(cli_args) # Print backfill info print_fn('\n Pipeline: {}'.format(pipeline.name)) print_fn('Partition set: {}'.format(partition_set.name)) print_fn(' Partitions: {}\n'.format( print_partition_format(partitions, indent_level=15))) # Confirm and launch if noprompt or click.confirm( 'Do you want to proceed with the backfill ({} partitions)?'.format( len(partitions))): print_fn('Launching runs... ') backfill_id = make_new_backfill_id() run_tags = merge_dicts( PipelineRun.tags_for_backfill_id(backfill_id), get_tags_from_args(cli_args), ) # for backwards compatibility - remove once prezi switched over to using tags argument if celery_priority is not None: run_tags['dagster-celery/run_priority'] = celery_priority for partition in partitions: run = PipelineRun( pipeline_name=pipeline.name, run_id=make_new_run_id(), selector=ExecutionSelector(pipeline.name), environment_dict=partition_set.environment_dict_for_partition( partition), mode=cli_args.get('mode') or 'default', tags=merge_dicts(partition_set.tags_for_partition(partition), run_tags), status=PipelineRunStatus.NOT_STARTED, ) instance.launch_run(run) # Remove once we can handle synchronous execution... currently limited by sqlite time.sleep(0.1) print_fn('Launched backfill job `{}`'.format(backfill_id)) else: print_fn(' Aborted!')