示例#1
0
文件: api.py 项目: keypointt/dagster
def grpc_command(
    port=None,
    socket=None,
    host="localhost",
    max_workers=None,
    heartbeat=False,
    heartbeat_timeout=30,
    lazy_load_user_code=False,
    ipc_output_file=None,
    fixed_server_id=None,
    override_system_timezone=None,
    **kwargs,
):
    if seven.IS_WINDOWS and port is None:
        raise click.UsageError(
            "You must pass a valid --port/-p on Windows: --socket/-s not supported."
        )
    if not (port or socket and not (port and socket)):
        raise click.UsageError("You must pass one and only one of --port/-p or --socket/-s.")

    loadable_target_origin = None
    if any(
        kwargs[key]
        for key in [
            "attribute",
            "working_directory",
            "module_name",
            "package_name",
            "python_file",
            "empty_working_directory",
        ]
    ):
        loadable_target_origin = LoadableTargetOrigin(
            executable_path=sys.executable,
            attribute=kwargs["attribute"],
            working_directory=get_working_directory_from_kwargs(kwargs),
            module_name=kwargs["module_name"],
            python_file=kwargs["python_file"],
            package_name=kwargs["package_name"],
        )

    with (
        mock_system_timezone(override_system_timezone)
        if override_system_timezone
        else nullcontext()
    ):
        server = DagsterGrpcServer(
            port=port,
            socket=socket,
            host=host,
            loadable_target_origin=loadable_target_origin,
            max_workers=max_workers,
            heartbeat=heartbeat,
            heartbeat_timeout=heartbeat_timeout,
            lazy_load_user_code=lazy_load_user_code,
            ipc_output_file=ipc_output_file,
            fixed_server_id=fixed_server_id,
        )

        server.serve()
示例#2
0
def docker_grpc_client(
    dagster_docker_image, grpc_host, grpc_port
):  # pylint: disable=redefined-outer-name, unused-argument
    with docker_service_up(
        file_relative_path(__file__, "docker-compose.yml"), "dagster-grpc-server"
    ) if not IS_BUILDKITE else nullcontext():
        wait_for_connection(grpc_host, grpc_port)
        yield DagsterGrpcClient(port=grpc_port, host=grpc_host)
示例#3
0
def launch_scheduled_execution(output_file, schedule_name,
                               override_system_timezone, **kwargs):
    with (mock_system_timezone(override_system_timezone)
          if override_system_timezone else nullcontext()):
        with ipc_write_stream(output_file) as stream:
            with DagsterInstance.get() as instance:
                repository_origin = get_repository_origin_from_kwargs(kwargs)
                job_origin = repository_origin.get_job_origin(schedule_name)

                # open the tick scope before we load any external artifacts so that
                # load errors are stored in DB
                with _schedule_tick_context(
                        instance,
                        stream,
                        JobTickData(
                            job_origin_id=job_origin.get_id(),
                            job_name=schedule_name,
                            job_type=JobType.SCHEDULE,
                            status=JobTickStatus.STARTED,
                            timestamp=time.time(),
                        ),
                ) as tick_context:
                    with get_repository_location_from_kwargs(
                            kwargs) as repo_location:
                        repo_dict = repo_location.get_repositories()
                        check.invariant(
                            repo_dict and len(repo_dict) == 1,
                            "Passed in arguments should reference exactly one repository, instead there are {num_repos}"
                            .format(num_repos=len(repo_dict)),
                        )
                        external_repo = next(iter(repo_dict.values()))
                        if not schedule_name in [
                                schedule.name for schedule in
                                external_repo.get_external_schedules()
                        ]:
                            raise DagsterInvariantViolationError(
                                "Could not find schedule named {schedule_name}"
                                .format(schedule_name=schedule_name), )

                        external_schedule = external_repo.get_external_schedule(
                            schedule_name)

                        # Validate that either the schedule has no timezone or it matches
                        # the system timezone
                        schedule_timezone = external_schedule.execution_timezone
                        if schedule_timezone:
                            system_timezone = pendulum.now().timezone.name

                            if system_timezone != external_schedule.execution_timezone:
                                raise DagsterInvariantViolationError(
                                    "Schedule {schedule_name} is set to execute in {schedule_timezone}, "
                                    "but this scheduler can only run in the system timezone, "
                                    "{system_timezone}. Use DagsterDaemonScheduler if you want to be able "
                                    "to execute schedules in arbitrary timezones."
                                    .format(
                                        schedule_name=external_schedule.name,
                                        schedule_timezone=schedule_timezone,
                                        system_timezone=system_timezone,
                                    ), )

                        _launch_scheduled_executions(instance, repo_location,
                                                     external_repo,
                                                     external_schedule,
                                                     tick_context)
示例#4
0
def grpc_command(
    port=None,
    socket=None,
    host=None,
    max_workers=None,
    heartbeat=False,
    heartbeat_timeout=30,
    lazy_load_user_code=False,
    ipc_output_file=None,
    fixed_server_id=None,
    override_system_timezone=None,
    log_level="INFO",
    use_python_environment_entry_point=False,
    container_context=None,
    **kwargs,
):
    if seven.IS_WINDOWS and port is None:
        raise click.UsageError(
            "You must pass a valid --port/-p on Windows: --socket/-s not supported."
        )
    if not (port or socket and not (port and socket)):
        raise click.UsageError(
            "You must pass one and only one of --port/-p or --socket/-s.")

    configure_loggers(log_level=coerce_valid_log_level(log_level))
    logger = logging.getLogger("dagster.code_server")

    loadable_target_origin = None
    if any(kwargs[key] for key in [
            "attribute",
            "working_directory",
            "module_name",
            "package_name",
            "python_file",
            "empty_working_directory",
    ]):
        loadable_target_origin = LoadableTargetOrigin(
            executable_path=sys.executable,
            attribute=kwargs["attribute"],
            working_directory=(None if kwargs.get("empty_working_directory")
                               else get_working_directory_from_kwargs(kwargs)),
            module_name=kwargs["module_name"],
            python_file=kwargs["python_file"],
            package_name=kwargs["package_name"],
        )

    with (mock_system_timezone(override_system_timezone)
          if override_system_timezone else nullcontext()):
        server = DagsterGrpcServer(
            port=port,
            socket=socket,
            host=host,
            loadable_target_origin=loadable_target_origin,
            max_workers=max_workers,
            heartbeat=heartbeat,
            heartbeat_timeout=heartbeat_timeout,
            lazy_load_user_code=lazy_load_user_code,
            ipc_output_file=ipc_output_file,
            fixed_server_id=fixed_server_id,
            entry_point=(get_python_environment_entry_point(sys.executable)
                         if use_python_environment_entry_point else
                         DEFAULT_DAGSTER_ENTRY_POINT),
            container_context=json.loads(container_context)
            if container_context != None else None,
        )

        code_desc = " "
        if loadable_target_origin:
            if loadable_target_origin.python_file:
                code_desc = f" for file {loadable_target_origin.python_file} "
            elif loadable_target_origin.package_name:
                code_desc = f" for package {loadable_target_origin.package_name} "
            elif loadable_target_origin.module_name:
                code_desc = f" for module {loadable_target_origin.module_name} "

        server_desc = (
            f"Dagster code server{code_desc}on port {port} in process {os.getpid()}"
            if port else
            f"Dagster code server{code_desc}in process {os.getpid()}")

        logger.info("Started {server_desc}".format(server_desc=server_desc))

        try:
            server.serve()
        finally:
            logger.info(
                "Shutting down {server_desc}".format(server_desc=server_desc))
示例#5
0
文件: impl.py 项目: keyz/dagster
def _run_in_subprocess(
    serialized_execute_run_args,
    recon_pipeline,
    termination_event,
    subprocess_status_handler,
    run_event_handler,
):

    start_termination_thread(termination_event)
    try:
        execute_run_args = deserialize_json_to_dagster_namedtuple(
            serialized_execute_run_args)
        check.inst_param(execute_run_args, "execute_run_args",
                         ExecuteExternalPipelineArgs)

        with (DagsterInstance.from_ref(execute_run_args.instance_ref)
              if execute_run_args.instance_ref else nullcontext()) as instance:
            pipeline_run = instance.get_run_by_id(
                execute_run_args.pipeline_run_id)

            if not pipeline_run:
                raise DagsterRunNotFoundError(
                    "gRPC server could not load run {run_id} in order to execute it. Make sure that the gRPC server has access to your run storage."
                    .format(run_id=execute_run_args.pipeline_run_id),
                    invalid_run_id=execute_run_args.pipeline_run_id,
                )

            pid = os.getpid()

    except:
        serializable_error_info = serializable_error_info_from_exc_info(
            sys.exc_info())
        event = IPCErrorMessage(
            serializable_error_info=serializable_error_info,
            message="Error during RPC setup for executing run: {message}".
            format(message=serializable_error_info.message),
        )
        subprocess_status_handler(event)
        subprocess_status_handler(RunInSubprocessComplete())
        return

    subprocess_status_handler(StartRunInSubprocessSuccessful())

    run_event_handler(
        instance.report_engine_event(
            "Started process for run (pid: {pid}).".format(pid=pid),
            pipeline_run,
            EngineEventData.in_process(pid,
                                       marker_end="cli_api_subprocess_init"),
        ))

    # This is so nasty but seemingly unavoidable
    # https://amir.rachum.com/blog/2017/03/03/generator-cleanup/
    closed = False
    try:
        for event in core_execute_run(recon_pipeline, pipeline_run, instance):
            run_event_handler(event)
    except GeneratorExit:
        closed = True
        raise
    except:
        # Relies on core_execute_run logging all exceptions to the event log before raising
        pass
    finally:
        if not closed:
            run_event_handler(
                instance.report_engine_event(
                    "Process for run exited (pid: {pid}).".format(pid=pid),
                    pipeline_run,
                ))
        subprocess_status_handler(RunInSubprocessComplete())
        instance.dispose()