def execute_preview_command( sensor_name, since, last_run_key, cursor, cli_args, print_fn, instance=None ): with DagsterInstance.get() as instance: with get_repository_location_from_kwargs(cli_args) as repo_location: try: external_repo = get_external_repository_from_repo_location( repo_location, cli_args.get("repository") ) check_repo_and_scheduler(external_repo, instance) external_sensor = external_repo.get_external_sensor(sensor_name) try: sensor_runtime_data = repo_location.get_external_sensor_execution_data( instance, external_repo.handle, external_sensor.name, since, last_run_key, cursor, ) except Exception: # pylint: disable=broad-except error_info = serializable_error_info_from_exc_info(sys.exc_info()) print_fn( "Failed to resolve sensor for {sensor_name} : {error_info}".format( sensor_name=external_sensor.name, error_info=error_info.to_string(), ) ) return if not sensor_runtime_data.run_requests: if sensor_runtime_data.skip_message: print_fn( "Sensor returned false for {sensor_name}, skipping: {skip_message}".format( sensor_name=external_sensor.name, skip_message=sensor_runtime_data.skip_message, ) ) else: print_fn( "Sensor returned false for {sensor_name}, skipping".format( sensor_name=external_sensor.name ) ) else: print_fn( "Sensor returning run requests for {num} run(s):\n\n{run_requests}".format( num=len(sensor_runtime_data.run_requests), run_requests="\n".join( yaml.safe_dump(run_request.run_config, default_flow_style=False) for run_request in sensor_runtime_data.run_requests ), ) ) except DagsterInvariantViolationError as ex: raise click.UsageError(ex)
def _logged_pipeline_launch_command(config, preset, mode, instance, kwargs): check.inst_param(instance, 'instance', DagsterInstance) env = ( canonicalize_backcompat_args( (config if config else None), '--config', (kwargs.get('env') if kwargs.get('env') else None), '--env', '0.9.0', stacklevel=2, # this stacklevel can point the warning to this line ) or tuple() # back to default empty tuple ) env = list(check.opt_tuple_param(env, 'env', default=(), of_type=str)) repo_location = get_repository_location_from_kwargs(kwargs, instance) external_repo = get_external_repository_from_repo_location( repo_location, kwargs.get('repository')) external_pipeline = get_external_pipeline_from_external_repo( external_repo, kwargs.get('pipeline'), ) log_external_repo_stats( instance=instance, external_pipeline=external_pipeline, external_repo=external_repo, source='pipeline_launch_command', ) if preset: if env: raise click.UsageError('Can not use --preset with --config.') preset = external_pipeline.get_preset(preset) else: preset = None run_tags = get_tags_from_args(kwargs) solid_selection = get_solid_selection_from_args(kwargs) pipeline_run = _create_external_pipeline_run( instance=instance, repo_location=repo_location, external_repo=external_repo, external_pipeline=external_pipeline, run_config=get_run_config_from_env_file_list(env), mode=mode, preset=preset, tags=run_tags, solid_selection=solid_selection, ) return instance.launch_run(pipeline_run.run_id, external_pipeline)
def execute_launch_command(instance: DagsterInstance, kwargs: Dict[str, str], using_job_op_graph_apis: bool = False): preset = cast(Optional[str], kwargs.get("preset")) mode = cast(Optional[str], kwargs.get("mode")) check.inst_param(instance, "instance", DagsterInstance) config = get_config_from_args(kwargs) with get_workspace_from_kwargs(instance, version=dagster_version, kwargs=kwargs) as workspace: repo_location = get_repository_location_from_workspace( workspace, kwargs.get("location")) external_repo = get_external_repository_from_repo_location( repo_location, cast(Optional[str], kwargs.get("repository"))) external_pipeline = get_external_pipeline_or_job_from_external_repo( external_repo, cast(Optional[str], kwargs.get("pipeline_or_job")), using_job_op_graph_apis, ) log_external_repo_stats( instance=instance, external_pipeline=external_pipeline, external_repo=external_repo, source="pipeline_launch_command", ) if preset and config: raise click.UsageError( "Can not use --preset with -c / --config / --config-json.") run_tags = get_tags_from_args(kwargs) solid_selection = get_solid_selection_from_args(kwargs) pipeline_run = _create_external_pipeline_run( instance=instance, repo_location=repo_location, external_repo=external_repo, external_pipeline=external_pipeline, run_config=config, mode=mode, preset=preset, tags=run_tags, solid_selection=solid_selection, run_id=cast(Optional[str], kwargs.get("run_id")), ) return instance.submit_run(pipeline_run.run_id, workspace)
def execute_cursor_command(sensor_name, cli_args, print_fn): with DagsterInstance.get() as instance: with get_repository_location_from_kwargs( instance, version=dagster_version, kwargs=cli_args) as repo_location: if bool(cli_args.get("delete")) == bool(cli_args.get("set")): # must use one of delete/set raise click.UsageError( "Must set cursor using `--set <value>` or use `--delete`") cursor_value = cli_args.get("set") external_repo = get_external_repository_from_repo_location( repo_location, cli_args.get("repository")) check_repo_and_scheduler(external_repo, instance) external_sensor = external_repo.get_external_sensor(sensor_name) job_state = instance.get_instigator_state( external_sensor.get_external_origin_id(), external_sensor.selector_id) if not job_state: instance.add_instigator_state( InstigatorState( external_sensor.get_external_origin(), InstigatorType.SENSOR, InstigatorStatus.STOPPED, SensorInstigatorData( min_interval=external_sensor.min_interval_seconds, cursor=cursor_value), )) else: instance.update_instigator_state( job_state.with_data( SensorInstigatorData( last_tick_timestamp=job_state.instigator_data. last_tick_timestamp, last_run_key=job_state.instigator_data. last_run_key, min_interval=external_sensor.min_interval_seconds, cursor=cursor_value, ), )) if cursor_value: print_fn( f'Set cursor state for sensor {external_sensor.name} to "{cursor_value}"' ) else: print_fn( f"Cleared cursor state for sensor {external_sensor.name}")
def execute_launch_command(instance, kwargs): preset = kwargs.get("preset") mode = kwargs.get("mode") check.inst_param(instance, "instance", DagsterInstance) config = get_config_from_args(kwargs) with get_repository_location_from_kwargs(kwargs) as repo_location: external_repo = get_external_repository_from_repo_location( repo_location, kwargs.get("repository") ) external_pipeline = get_external_pipeline_from_external_repo( external_repo, kwargs.get("pipeline") ) log_external_repo_stats( instance=instance, external_pipeline=external_pipeline, external_repo=external_repo, source="pipeline_launch_command", ) if preset and config: raise click.UsageError("Can not use --preset with -c / --config / --config-json.") run_tags = get_tags_from_args(kwargs) solid_selection = get_solid_selection_from_args(kwargs) pipeline_run = _create_external_pipeline_run( instance=instance, repo_location=repo_location, external_repo=external_repo, external_pipeline=external_pipeline, run_config=config, mode=mode, preset=preset, tags=run_tags, solid_selection=solid_selection, run_id=kwargs.get("run_id"), ) return instance.submit_run(pipeline_run.run_id, external_pipeline)
def execute_launch_command(instance, kwargs): preset = kwargs.get("preset") mode = kwargs.get("mode") check.inst_param(instance, "instance", DagsterInstance) config = list(check.opt_tuple_param(kwargs.get("config"), "config", default=(), of_type=str)) with get_repository_location_from_kwargs(kwargs, instance) as repo_location: external_repo = get_external_repository_from_repo_location( repo_location, kwargs.get("repository") ) external_pipeline = get_external_pipeline_from_external_repo( external_repo, kwargs.get("pipeline"), ) log_external_repo_stats( instance=instance, external_pipeline=external_pipeline, external_repo=external_repo, source="pipeline_launch_command", ) if preset and config: raise click.UsageError("Can not use --preset with --config.") run_tags = get_tags_from_args(kwargs) solid_selection = get_solid_selection_from_args(kwargs) pipeline_run = _create_external_pipeline_run( instance=instance, repo_location=repo_location, external_repo=external_repo, external_pipeline=external_pipeline, run_config=get_run_config_from_file_list(config), mode=mode, preset=preset, tags=run_tags, solid_selection=solid_selection, ) return instance.launch_run(pipeline_run.run_id, external_pipeline)
def _execute_backfill_command_at_location(cli_args, print_fn, instance, workspace, repo_location): external_repo = get_external_repository_from_repo_location( repo_location, cli_args.get("repository") ) external_pipeline = get_external_pipeline_from_external_repo( external_repo, cli_args.get("pipeline"), ) noprompt = cli_args.get("noprompt") pipeline_partition_set_names = { external_partition_set.name: external_partition_set for external_partition_set in external_repo.get_external_partition_sets() if external_partition_set.pipeline_name == external_pipeline.name } if not pipeline_partition_set_names: raise click.UsageError( "No partition sets found for pipeline `{}`".format(external_pipeline.name) ) partition_set_name = cli_args.get("partition_set") if not partition_set_name: if len(pipeline_partition_set_names) == 1: partition_set_name = next(iter(pipeline_partition_set_names.keys())) elif noprompt: raise click.UsageError("No partition set specified (see option `--partition-set`)") else: partition_set_name = click.prompt( "Select a partition set to use for backfill: {}".format( ", ".join(x for x in pipeline_partition_set_names.keys()) ) ) partition_set = pipeline_partition_set_names.get(partition_set_name) if not partition_set: raise click.UsageError("No partition set found named `{}`".format(partition_set_name)) run_tags = get_tags_from_args(cli_args) repo_handle = RepositoryHandle( repository_name=external_repo.name, repository_location=repo_location, ) try: partition_names_or_error = repo_location.get_external_partition_names( repo_handle, partition_set_name, ) except Exception: # pylint: disable=broad-except error_info = serializable_error_info_from_exc_info(sys.exc_info()) raise DagsterBackfillFailedError( "Failure fetching partition names for {partition_set_name}: {error_message}".format( partition_set_name=partition_set_name, error_message=error_info.message, ), serialized_error_info=error_info, ) partition_names = gen_partition_names_from_args( partition_names_or_error.partition_names, cli_args ) # Print backfill info print_fn("\n Pipeline: {}".format(external_pipeline.name)) print_fn("Partition set: {}".format(partition_set_name)) print_fn(" Partitions: {}\n".format(print_partition_format(partition_names, indent_level=15))) # Confirm and launch if noprompt or click.confirm( "Do you want to proceed with the backfill ({} partitions)?".format(len(partition_names)) ): print_fn("Launching runs... ") backfill_id = make_new_backfill_id() backfill_job = PartitionBackfill( backfill_id=backfill_id, partition_set_origin=partition_set.get_external_origin(), status=BulkActionStatus.REQUESTED, partition_names=partition_names, from_failure=False, reexecution_steps=None, tags=run_tags, backfill_timestamp=pendulum.now("UTC").timestamp(), ) try: partition_execution_data = ( repo_location.get_external_partition_set_execution_param_data( repository_handle=repo_handle, partition_set_name=partition_set_name, partition_names=partition_names, ) ) except Exception: # pylint: disable=broad-except error_info = serializable_error_info_from_exc_info(sys.exc_info()) instance.add_backfill( backfill_job.with_status(BulkActionStatus.FAILED).with_error(error_info) ) return print_fn("Backfill failed: {}".format(error_info)) assert isinstance(partition_execution_data, ExternalPartitionSetExecutionParamData) for partition_data in partition_execution_data.partition_data: pipeline_run = create_backfill_run( instance, repo_location, external_pipeline, partition_set, backfill_job, partition_data, ) if pipeline_run: instance.submit_run(pipeline_run.run_id, workspace) instance.add_backfill(backfill_job.with_status(BulkActionStatus.COMPLETED)) print_fn("Launched backfill job `{}`".format(backfill_id)) else: print_fn("Aborted!")
def _execute_backfill_command_at_location(cli_args, print_fn, instance, repo_location): external_repo = get_external_repository_from_repo_location( repo_location, cli_args.get("repository") ) external_pipeline = get_external_pipeline_from_external_repo( external_repo, cli_args.get("pipeline"), ) noprompt = cli_args.get("noprompt") pipeline_partition_set_names = { external_partition_set.name: external_partition_set for external_partition_set in external_repo.get_external_partition_sets() if external_partition_set.pipeline_name == external_pipeline.name } if not pipeline_partition_set_names: raise click.UsageError( "No partition sets found for pipeline `{}`".format(external_pipeline.name) ) partition_set_name = cli_args.get("partition_set") if not partition_set_name: if len(pipeline_partition_set_names) == 1: partition_set_name = next(iter(pipeline_partition_set_names.keys())) elif noprompt: raise click.UsageError("No partition set specified (see option `--partition-set`)") else: partition_set_name = click.prompt( "Select a partition set to use for backfill: {}".format( ", ".join(x for x in pipeline_partition_set_names.keys()) ) ) partition_set = pipeline_partition_set_names.get(partition_set_name) if not partition_set: raise click.UsageError("No partition set found named `{}`".format(partition_set_name)) mode = partition_set.mode solid_selection = partition_set.solid_selection run_tags = get_tags_from_args(cli_args) repo_handle = RepositoryHandle( repository_name=external_repo.name, repository_location_handle=repo_location.location_handle, ) # Resolve partitions to backfill partition_names_or_error = repo_location.get_external_partition_names( repo_handle, partition_set_name, ) if isinstance(partition_names_or_error, ExternalPartitionExecutionErrorData): raise DagsterBackfillFailedError( "Failure fetching partition names for {partition_set_name}: {error_message}".format( partition_set_name=partition_set_name, error_message=partition_names_or_error.error.message, ), serialized_error_info=partition_names_or_error.error, ) partition_names = gen_partition_names_from_args( partition_names_or_error.partition_names, cli_args ) # Print backfill info print_fn("\n Pipeline: {}".format(external_pipeline.name)) print_fn("Partition set: {}".format(partition_set_name)) print_fn(" Partitions: {}\n".format(print_partition_format(partition_names, indent_level=15))) # Confirm and launch if noprompt or click.confirm( "Do you want to proceed with the backfill ({} partitions)?".format(len(partition_names)) ): print_fn("Launching runs... ") backfill_id = make_new_backfill_id() backfill_tags = PipelineRun.tags_for_backfill_id(backfill_id) partition_execution_data = repo_location.get_external_partition_set_execution_param_data( repository_handle=repo_handle, partition_set_name=partition_set_name, partition_names=partition_names, ) if isinstance(partition_execution_data, ExternalPartitionExecutionErrorData): return print_fn("Backfill failed: {}".format(partition_execution_data.error)) assert isinstance(partition_execution_data, ExternalPartitionSetExecutionParamData) for partition_data in partition_execution_data.partition_data: run = _create_external_pipeline_run( instance=instance, repo_location=repo_location, external_repo=external_repo, external_pipeline=external_pipeline, run_config=partition_data.run_config, mode=mode, preset=None, tags=merge_dicts(merge_dicts(partition_data.tags, backfill_tags), run_tags), solid_selection=frozenset(solid_selection) if solid_selection else None, ) instance.launch_run(run.run_id, external_pipeline) print_fn("Launched backfill job `{}`".format(backfill_id)) else: print_fn("Aborted!")
def execute_backfill_command(cli_args, print_fn, instance=None): instance = instance or DagsterInstance.get() repo_location = get_repository_location_from_kwargs(cli_args, instance) external_repo = get_external_repository_from_repo_location( repo_location, cli_args.get('repository')) external_pipeline = get_external_pipeline_from_external_repo( external_repo, cli_args.get('pipeline'), ) noprompt = cli_args.get('noprompt') pipeline_partition_set_names = { external_partition_set.name: external_partition_set for external_partition_set in external_repo.get_external_partition_sets() if external_partition_set.pipeline_name == external_pipeline.name } if not pipeline_partition_set_names: raise click.UsageError( 'No partition sets found for pipeline `{}`'.format( external_pipeline.name)) partition_set_name = cli_args.get('partition_set') if not partition_set_name: if len(pipeline_partition_set_names) == 1: partition_set_name = next(iter( pipeline_partition_set_names.keys())) elif noprompt: raise click.UsageError( 'No partition set specified (see option `--partition-set`)') else: partition_set_name = click.prompt( 'Select a partition set to use for backfill: {}'.format( ', '.join(x for x in pipeline_partition_set_names.keys()))) partition_set = pipeline_partition_set_names.get(partition_set_name) if not partition_set: raise click.UsageError( 'No partition set found named `{}`'.format(partition_set_name)) mode = partition_set.mode solid_selection = partition_set.solid_selection repo_handle = RepositoryHandle( repository_name=external_repo.name, repository_location_handle=repo_location.location_handle, ) # Resolve partitions to backfill partition_names_or_error = repo_location.get_external_partition_names( repo_handle, partition_set_name, ) if isinstance(partition_names_or_error, ExternalPartitionExecutionErrorData): raise DagsterBackfillFailedError( 'Failure fetching partition names for {partition_set_name}: {error_message}' .format( partition_set_name=partition_set_name, error_message=partition_names_or_error.error.message, ), serialized_error_info=partition_names_or_error.error, ) partition_names = gen_partition_names_from_args( partition_names_or_error.partition_names, cli_args) # Print backfill info print_fn('\n Pipeline: {}'.format(external_pipeline.name)) print_fn('Partition set: {}'.format(partition_set_name)) print_fn(' Partitions: {}\n'.format( print_partition_format(partition_names, indent_level=15))) # Confirm and launch if noprompt or click.confirm( 'Do you want to proceed with the backfill ({} partitions)?'.format( len(partition_names))): print_fn('Launching runs... ') backfill_id = make_new_backfill_id() run_tags = merge_dicts( PipelineRun.tags_for_backfill_id(backfill_id), get_tags_from_args(cli_args), ) for partition_name in partition_names: run_config_or_error = repo_location.get_external_partition_config( repo_handle, partition_set_name, partition_name) if isinstance(run_config_or_error, ExternalPartitionExecutionErrorData): raise DagsterBackfillFailedError( 'Failure fetching run config for partition {partition_name} in {partition_set_name}: {error_message}' .format( partition_name=partition_name, partition_set_name=partition_set_name, error_message=run_config_or_error.error.message, ), serialized_error_info=run_config_or_error.error, ) tags_or_error = repo_location.get_external_partition_tags( repo_handle, partition_set_name, partition_name) if isinstance(tags_or_error, ExternalPartitionExecutionErrorData): raise DagsterBackfillFailedError( 'Failure fetching tags for partition {partition_name} in {partition_set_name}: {error_message}' .format( partition_name=partition_name, partition_set_name=partition_set_name, error_message=tags_or_error.error.message, ), serialized_error_info=tags_or_error.error, ) run = _create_external_pipeline_run( instance=instance, repo_location=repo_location, external_repo=external_repo, external_pipeline=external_pipeline, run_config=run_config_or_error.run_config, mode=mode, preset=None, tags=merge_dicts(tags_or_error.tags, run_tags), solid_selection=frozenset(solid_selection) if solid_selection else None, ) instance.launch_run(run.run_id, external_pipeline) # Remove once we can handle synchronous execution... currently limited by sqlite time.sleep(0.1) print_fn('Launched backfill job `{}`'.format(backfill_id)) else: print_fn('Aborted!')