Exemplo n.º 1
0
def execute_run_command(input_json):
    with capture_interrupts():
        args = deserialize_as(input_json, ExecuteRunArgs)
        recon_pipeline = recon_pipeline_from_origin(args.pipeline_origin)

        with (DagsterInstance.from_ref(args.instance_ref)
              if args.instance_ref else DagsterInstance.get()) as instance:
            buffer = []

            def send_to_buffer(event):
                buffer.append(serialize_dagster_namedtuple(event))

            return_code = _execute_run_command_body(
                recon_pipeline,
                args.pipeline_run_id,
                instance,
                send_to_buffer,
                set_exit_code_on_failure=args.set_exit_code_on_failure
                or False,
            )

            for line in buffer:
                click.echo(line)

            if return_code != 0:
                sys.exit(return_code)
Exemplo n.º 2
0
    def launch_run(self, context: LaunchRunContext) -> None:
        run = context.pipeline_run

        check.inst_param(run, "run", PipelineRun)

        if not context.workspace:
            raise DagsterInvariantViolationError(
                "DefaultRunLauncher requires a workspace to be included in its LaunchRunContext"
            )

        external_pipeline_origin = check.not_none(run.external_pipeline_origin)
        repository_location = context.workspace.get_location(
            external_pipeline_origin.external_repository_origin.repository_location_origin.location_name
        )

        check.inst(
            repository_location,
            GrpcServerRepositoryLocation,
            "DefaultRunLauncher: Can't launch runs for pipeline not loaded from a GRPC server",
        )

        self._instance.add_run_tags(
            run.run_id,
            {
                GRPC_INFO_TAG: seven.json.dumps(
                    merge_dicts(
                        {"host": repository_location.host},
                        (
                            {"port": repository_location.port}
                            if repository_location.port
                            else {"socket": repository_location.socket}
                        ),
                        ({"use_ssl": True} if repository_location.use_ssl else {}),
                    )
                )
            },
        )

        res = deserialize_as(
            repository_location.client.start_run(
                ExecuteExternalPipelineArgs(
                    pipeline_origin=external_pipeline_origin,
                    pipeline_run_id=run.run_id,
                    instance_ref=self._instance.get_ref(),
                )
            ),
            StartRunResult,
        )
        if not res.success:
            raise (
                DagsterLaunchFailedError(
                    res.message, serializable_error_info=res.serializable_error_info
                )
            )

        self._run_ids.add(run.run_id)

        if self._wait_for_processes:
            self._locations_to_wait_for.append(repository_location)
Exemplo n.º 3
0
    def get_daemon_heartbeats(self) -> Dict[str, DaemonHeartbeat]:

        with self.connect() as conn:
            rows = conn.execute(db.select(DaemonHeartbeatsTable.columns))
            heartbeats = []
            for row in rows:
                heartbeats.append(deserialize_as(row.body, DaemonHeartbeat))
            return {heartbeat.daemon_type: heartbeat for heartbeat in heartbeats}
Exemplo n.º 4
0
def execute_step_command(input_json):
    with capture_interrupts():

        args = deserialize_as(input_json, ExecuteStepArgs)

        with (DagsterInstance.from_ref(args.instance_ref)
              if args.instance_ref else DagsterInstance.get()) as instance:
            pipeline_run = instance.get_run_by_id(args.pipeline_run_id)
            check.inst(
                pipeline_run,
                PipelineRun,
                "Pipeline run with id '{}' not found for step execution".
                format(args.pipeline_run_id),
            )

            if args.should_verify_step:
                success = verify_step(
                    instance,
                    pipeline_run,
                    args.known_state.get_retry_state(),
                    args.step_keys_to_execute,
                )
                if not success:
                    return

            recon_pipeline = recon_pipeline_from_origin(
                args.pipeline_origin
            ).subset_for_execution_from_existing_pipeline(
                pipeline_run.solids_to_execute)

            execution_plan = create_execution_plan(
                recon_pipeline,
                run_config=pipeline_run.run_config,
                step_keys_to_execute=args.step_keys_to_execute,
                mode=pipeline_run.mode,
                known_state=args.known_state,
            )

            buff = []

            for event in execute_plan_iterator(
                    execution_plan,
                    recon_pipeline,
                    pipeline_run,
                    instance,
                    run_config=pipeline_run.run_config,
                    retry_mode=args.retry_mode,
            ):
                buff.append(serialize_dagster_namedtuple(event))

            for line in buff:
                click.echo(line)
Exemplo n.º 5
0
def add_selector_id_to_jobs_table(storage, print_fn=None):
    """
    Utility method that calculates the selector_id for each stored instigator state, and writes
    it to the jobs table.
    """

    if print_fn:
        print_fn("Querying storage.")

    with storage.connect() as conn:
        rows = conn.execute(
            db.select([
                JobTable.c.id,
                JobTable.c.job_body,
                JobTable.c.create_timestamp,
                JobTable.c.update_timestamp,
            ]).order_by(JobTable.c.id.asc())).fetchall()

        for (row_id, state_str, create_timestamp,
             update_timestamp) in tqdm(rows):
            state = deserialize_as(state_str, InstigatorState)
            selector_id = state.selector_id

            # insert the state into a new instigator table, which has a unique constraint on
            # selector_id
            try:
                conn.execute(InstigatorsTable.insert().values(
                    selector_id=selector_id,
                    repository_selector_id=state.repository_selector_id,
                    status=state.status.value,
                    instigator_type=state.instigator_type.value,
                    instigator_body=state_str,
                    create_timestamp=create_timestamp,
                    update_timestamp=update_timestamp,
                ))
            except db.exc.IntegrityError:
                conn.execute(InstigatorsTable.update().where(
                    InstigatorsTable.c.selector_id == selector_id).values(
                        status=state.status.value,
                        repository_selector_id=state.repository_selector_id,
                        instigator_type=state.instigator_type.value,
                        instigator_body=state_str,
                        update_timestamp=update_timestamp,
                    ))

            conn.execute(JobTable.update()  # pylint: disable=no-value-for-parameter
                         .where(JobTable.c.id == row_id).where(
                             JobTable.c.selector_id == None).values(
                                 selector_id=state.selector_id))

    if print_fn:
        print_fn("Complete.")
Exemplo n.º 6
0
    def get_run_by_id(self, run_id: str) -> Optional[PipelineRun]:
        """Get a run by its id.

        Args:
            run_id (str): The id of the run

        Returns:
            Optional[PipelineRun]
        """
        check.str_param(run_id, "run_id")

        query = db.select([RunsTable.c.run_body]).where(RunsTable.c.run_id == run_id)
        rows = self.fetchall(query)
        return deserialize_as(rows[0][0], PipelineRun) if len(rows) else None
Exemplo n.º 7
0
def sync_get_external_execution_plan_grpc(
    api_client: "DagsterGrpcClient",
    pipeline_origin: ExternalPipelineOrigin,
    run_config: Mapping[str, Any],
    mode: str,
    pipeline_snapshot_id: str,
    solid_selection: Optional[List[str]] = None,
    step_keys_to_execute: Optional[List[str]] = None,
    known_state: Optional[KnownExecutionState] = None,
    instance: Optional[DagsterInstance] = None,
) -> ExecutionPlanSnapshot:
    from dagster.grpc.client import DagsterGrpcClient

    check.inst_param(api_client, "api_client", DagsterGrpcClient)
    check.inst_param(pipeline_origin, "pipeline_origin",
                     ExternalPipelineOrigin)
    solid_selection = check.opt_list_param(solid_selection,
                                           "solid_selection",
                                           of_type=str)
    run_config = check.dict_param(run_config, "run_config", key_type=str)
    check.str_param(mode, "mode")
    check.opt_nullable_list_param(step_keys_to_execute,
                                  "step_keys_to_execute",
                                  of_type=str)
    check.str_param(pipeline_snapshot_id, "pipeline_snapshot_id")
    check.opt_inst_param(known_state, "known_state", KnownExecutionState)
    check.opt_inst_param(instance, "instance", DagsterInstance)

    result = deserialize_as(
        api_client.execution_plan_snapshot(
            execution_plan_snapshot_args=ExecutionPlanSnapshotArgs(
                pipeline_origin=pipeline_origin,
                solid_selection=solid_selection,
                run_config=run_config,
                mode=mode,
                step_keys_to_execute=step_keys_to_execute,
                pipeline_snapshot_id=pipeline_snapshot_id,
                known_state=known_state,
                instance_ref=instance.get_ref(
                ) if instance and instance.is_persistent else None,
            )),
        (ExecutionPlanSnapshot, ExecutionPlanSnapshotErrorData),
    )

    if isinstance(result, ExecutionPlanSnapshotErrorData):
        raise DagsterUserCodeProcessError.from_error_info(result.error)
    return result
Exemplo n.º 8
0
 def get_backfills(self,
                   status: BulkActionStatus = None,
                   cursor: str = None,
                   limit: int = None) -> List[PartitionBackfill]:
     check.opt_inst_param(status, "status", BulkActionStatus)
     query = db.select([BulkActionsTable.c.body])
     if status:
         query = query.where(BulkActionsTable.c.status == status.value)
     if cursor:
         cursor_query = db.select([
             BulkActionsTable.c.id
         ]).where(BulkActionsTable.c.key == cursor)
         query = query.where(BulkActionsTable.c.id < cursor_query)
     if limit:
         query = query.limit(limit)
     query = query.order_by(BulkActionsTable.c.id.desc())
     rows = self.fetchall(query)
     return [deserialize_as(row[0], PartitionBackfill) for row in rows]
Exemplo n.º 9
0
    def get_run_records(
        self,
        filters: PipelineRunsFilter = None,
        limit: int = None,
        order_by: str = None,
        ascending: bool = False,
        cursor: str = None,
        bucket_by: Optional[Union[JobBucket, TagBucket]] = None,
    ) -> List[RunRecord]:
        filters = check.opt_inst_param(filters,
                                       "filters",
                                       PipelineRunsFilter,
                                       default=PipelineRunsFilter())
        check.opt_int_param(limit, "limit")

        columns = ["id", "run_body", "create_timestamp", "update_timestamp"]

        if self.has_run_stats_index_cols():
            columns += ["start_time", "end_time"]
        # only fetch columns we use to build RunRecord
        query = self._runs_query(
            filters=filters,
            limit=limit,
            columns=columns,
            order_by=order_by,
            ascending=ascending,
            cursor=cursor,
            bucket_by=bucket_by,
        )

        rows = self.fetchall(query)
        return [
            RunRecord(
                storage_id=check.int_param(row["id"], "id"),
                pipeline_run=deserialize_as(
                    check.str_param(row["run_body"], "run_body"), PipelineRun),
                create_timestamp=check.inst(row["create_timestamp"], datetime),
                update_timestamp=check.inst(row["update_timestamp"], datetime),
                start_time=check.opt_inst(row["start_time"], float)
                if "start_time" in row else None,
                end_time=check.opt_inst(row["end_time"], float)
                if "end_time" in row else None,
            ) for row in rows
        ]
Exemplo n.º 10
0
def execute_step_command(input_json):
    with capture_interrupts():

        args = deserialize_as(input_json, ExecuteStepArgs)

        with (DagsterInstance.from_ref(args.instance_ref)
              if args.instance_ref else DagsterInstance.get()) as instance:
            pipeline_run = instance.get_run_by_id(args.pipeline_run_id)

            buff = []

            for event in _execute_step_command_body(
                    args,
                    instance,
                    pipeline_run,
            ):
                buff.append(serialize_dagster_namedtuple(event))

            for line in buff:
                click.echo(line)
Exemplo n.º 11
0
def sync_get_external_partition_names_grpc(
        api_client: "DagsterGrpcClient", repository_handle: RepositoryHandle,
        partition_set_name: str) -> ExternalPartitionNamesData:
    from dagster.grpc.client import DagsterGrpcClient

    check.inst_param(api_client, "api_client", DagsterGrpcClient)
    check.inst_param(repository_handle, "repository_handle", RepositoryHandle)
    check.str_param(partition_set_name, "partition_set_name")
    repository_origin = repository_handle.get_external_origin()
    result = deserialize_as(
        api_client.external_partition_names(
            partition_names_args=PartitionNamesArgs(
                repository_origin=repository_origin,
                partition_set_name=partition_set_name,
            ), ),
        (ExternalPartitionNamesData, ExternalPartitionExecutionErrorData),
    )
    if isinstance(result, ExternalPartitionExecutionErrorData):
        raise DagsterUserCodeProcessError.from_error_info(result.error)

    return result
Exemplo n.º 12
0
def resume_run_command(input_json):
    with capture_interrupts():
        args = deserialize_as(input_json, ResumeRunArgs)
        recon_pipeline = recon_pipeline_from_origin(args.pipeline_origin)

        with (DagsterInstance.from_ref(args.instance_ref)
              if args.instance_ref else DagsterInstance.get()) as instance:
            buffer = []

            def send_to_buffer(event):
                buffer.append(serialize_dagster_namedtuple(event))

            _resume_run_command_body(
                recon_pipeline,
                args.pipeline_run_id,
                instance,
                send_to_buffer,
            )

            for line in buffer:
                click.echo(line)
Exemplo n.º 13
0
 def get_backfill(self, backfill_id: str) -> Optional[PartitionBackfill]:
     check.str_param(backfill_id, "backfill_id")
     query = db.select([BulkActionsTable.c.body
                        ]).where(BulkActionsTable.c.key == backfill_id)
     row = self.fetchone(query)
     return deserialize_as(row[0], PartitionBackfill) if row else None
Exemplo n.º 14
0
 def _row_to_run(self, row: Tuple) -> PipelineRun:
     return deserialize_as(row[0], PipelineRun)
Exemplo n.º 15
0
 def _reload_current_image(self) -> str:
     return deserialize_as(
         self.client.get_current_image(),
         GetCurrentImageResult,
     ).current_image
Exemplo n.º 16
0
def _run_in_subprocess(
    serialized_execute_run_args,
    recon_pipeline,
    termination_event,
    subprocess_status_handler,
    run_event_handler,
):

    start_termination_thread(termination_event)
    try:
        execute_run_args = deserialize_as(serialized_execute_run_args,
                                          ExecuteExternalPipelineArgs)

        with (DagsterInstance.from_ref(execute_run_args.instance_ref)
              if execute_run_args.instance_ref else nullcontext()) as instance:
            instance = check.not_none(instance)
            pipeline_run = instance.get_run_by_id(
                execute_run_args.pipeline_run_id)

            if not pipeline_run:
                raise DagsterRunNotFoundError(
                    "gRPC server could not load run {run_id} in order to execute it. Make sure that the gRPC server has access to your run storage."
                    .format(run_id=execute_run_args.pipeline_run_id),
                    invalid_run_id=execute_run_args.pipeline_run_id,
                )

            pid = os.getpid()

    except:
        serializable_error_info = serializable_error_info_from_exc_info(
            sys.exc_info())
        event = IPCErrorMessage(
            serializable_error_info=serializable_error_info,
            message="Error during RPC setup for executing run: {message}".
            format(message=serializable_error_info.message),
        )
        subprocess_status_handler(event)
        subprocess_status_handler(RunInSubprocessComplete())
        return

    subprocess_status_handler(StartRunInSubprocessSuccessful())

    run_event_handler(
        instance.report_engine_event(
            "Started process for run (pid: {pid}).".format(pid=pid),
            pipeline_run,
            EngineEventData.in_process(pid,
                                       marker_end="cli_api_subprocess_init"),
        ))

    # This is so nasty but seemingly unavoidable
    # https://amir.rachum.com/blog/2017/03/03/generator-cleanup/
    closed = False
    try:
        for event in core_execute_run(recon_pipeline, pipeline_run, instance):
            run_event_handler(event)
    except GeneratorExit:
        closed = True
        raise
    except:
        # Relies on core_execute_run logging all exceptions to the event log before raising
        pass
    finally:
        if not closed:
            run_event_handler(
                instance.report_engine_event(
                    "Process for run exited (pid: {pid}).".format(pid=pid),
                    pipeline_run,
                ))
        subprocess_status_handler(RunInSubprocessComplete())
        instance.dispose()