Exemplo n.º 1
0
    def _deserialization_helper(self, state, ray_forking):
        """This is defined in order to make pickling work.

        Args:
            state: The serialized state of the actor handle.
            ray_forking: True if this is being called because Ray is forking
                the actor handle and false if it is being called by pickling.
        """
        worker = ray.worker.get_global_worker()
        worker.check_connected()

        if state["ray_forking"]:
            actor_handle_id = state["actor_handle_id"]
        else:
            # Right now, if the actor handle has been pickled, we create a
            # temporary actor handle id for invocations.
            # TODO(pcm): This still leads to a lot of actor handles being
            # created, there should be a better way to handle pickled
            # actor handles.
            # TODO(swang): Accessing the worker's current task ID is not
            # thread-safe.
            # TODO(swang): Unpickling the same actor handle twice in the same
            # task will break the application, and unpickling it twice in the
            # same actor is likely a performance bug. We should consider
            # logging a warning in these cases.
            actor_handle_id = compute_actor_handle_id_non_forked(
                state["actor_handle_id"], worker.current_task_id)

        self.__init__(
            state["actor_id"],
            state["module_name"],
            state["class_name"],
            state["actor_cursor"],
            state["actor_method_names"],
            state["method_signatures"],
            state["method_num_return_vals"],
            state["actor_creation_dummy_object_id"],
            state["actor_method_cpus"],
            # This is the driver ID of the driver that owns the actor, not
            # necessarily the driver that owns this actor handle.
            state["actor_driver_id"],
            actor_handle_id=actor_handle_id)
Exemplo n.º 2
0
def free(object_ids, local_only=False):
    """Free a list of IDs from object stores.

    This function is a low-level API which should be used in restricted
    scenarios.

    If local_only is false, the request will be send to all object stores.

    This method will not return any value to indicate whether the deletion is
    successful or not. This function is an instruction to object store. If
    the some of the objects are in use, object stores will delete them later
    when the ref count is down to 0.

    Args:
        object_ids (List[ObjectID]): List of object IDs to delete.
        local_only (bool): Whether only deleting the list of objects in local
            object store or all object stores.
    """
    worker = ray.worker.get_global_worker()

    if isinstance(object_ids, ray.ObjectID):
        object_ids = [object_ids]

    if not isinstance(object_ids, list):
        raise TypeError("free() expects a list of ObjectID, got {}".format(
            type(object_ids)))

    # Make sure that the values are object IDs.
    for object_id in object_ids:
        if not isinstance(object_id, ray.ObjectID):
            raise TypeError("Attempting to call `free` on the value {}, "
                            "which is not an ray.ObjectID.".format(object_id))

    worker.check_connected()
    with profiling.profile("ray.free"):
        if len(object_ids) == 0:
            return

        worker.raylet_client.free_objects(object_ids, local_only)
Exemplo n.º 3
0
    def _actor_method_call(self,
                           method_name,
                           args=None,
                           kwargs=None,
                           num_return_vals=None):
        """Method execution stub for an actor handle.

        This is the function that executes when
        `actor.method_name.remote(*args, **kwargs)` is called. Instead of
        executing locally, the method is packaged as a task and scheduled
        to the remote actor instance.

        Args:
            method_name: The name of the actor method to execute.
            args: A list of arguments for the actor method.
            kwargs: A dictionary of keyword arguments for the actor method.
            num_return_vals (int): The number of return values for the method.

        Returns:
            object_ids: A list of object IDs returned by the remote actor
                method.
        """
        worker = ray.worker.get_global_worker()

        worker.check_connected()

        function_signature = self._ray_method_signatures[method_name]
        if args is None:
            args = []
        if kwargs is None:
            kwargs = {}
        args = signature.extend_args(function_signature, args, kwargs)

        # Execute functions locally if Ray is run in LOCAL_MODE
        # Copy args to prevent the function from mutating them.
        if worker.mode == ray.LOCAL_MODE:
            return getattr(worker.actors[self._ray_actor_id],
                           method_name)(*copy.deepcopy(args))

        function_descriptor = FunctionDescriptor(
            self._ray_module_name, method_name, self._ray_class_name)
        with self._ray_actor_lock:
            object_ids = worker.submit_task(
                function_descriptor,
                args,
                actor_id=self._ray_actor_id,
                actor_handle_id=self._ray_actor_handle_id,
                actor_counter=self._ray_actor_counter,
                actor_creation_dummy_object_id=(
                    self._ray_actor_creation_dummy_object_id),
                execution_dependencies=[self._ray_actor_cursor],
                new_actor_handles=self._ray_new_actor_handles,
                # We add one for the dummy return ID.
                num_return_vals=num_return_vals + 1,
                resources={"CPU": self._ray_actor_method_cpus},
                placement_resources={},
                driver_id=self._ray_actor_driver_id,
            )
            # Update the actor counter and cursor to reflect the most recent
            # invocation.
            self._ray_actor_counter += 1
            # The last object returned is the dummy object that should be
            # passed in to the next actor method. Do not return it to the user.
            self._ray_actor_cursor = object_ids.pop()
            # We have notified the backend of the new actor handles to expect
            # since the last task was submitted, so clear the list.
            self._ray_new_actor_handles = []

        if len(object_ids) == 1:
            object_ids = object_ids[0]
        elif len(object_ids) == 0:
            object_ids = None

        return object_ids
Exemplo n.º 4
0
    def _actor_method_call(self,
                           method_name,
                           args=None,
                           kwargs=None,
                           num_return_vals=None):
        """Method execution stub for an actor handle.

        This is the function that executes when
        `actor.method_name.remote(*args, **kwargs)` is called. Instead of
        executing locally, the method is packaged as a task and scheduled
        to the remote actor instance.

        Args:
            method_name: The name of the actor method to execute.
            args: A list of arguments for the actor method.
            kwargs: A dictionary of keyword arguments for the actor method.
            dependency: The object ID that this method is dependent on.
                Defaults to None, for no dependencies. Most tasks should
                pass in the dummy object returned by the preceding task.
                Some tasks, such as checkpoint and terminate methods, have
                no dependencies.

        Returns:
            object_ids: A list of object IDs returned by the remote actor
                method.
        """
        worker = ray.worker.get_global_worker()

        worker.check_connected()

        function_signature = self._ray_method_signatures[method_name]
        if args is None:
            args = []
        if kwargs is None:
            kwargs = {}
        args = signature.extend_args(function_signature, args, kwargs)

        # Execute functions locally if Ray is run in LOCAL_MODE
        # Copy args to prevent the function from mutating them.
        if worker.mode == ray.LOCAL_MODE:
            return getattr(worker.actors[self._ray_actor_id],
                           method_name)(*copy.deepcopy(args))

        is_actor_checkpoint_method = (method_name == "__ray_checkpoint__")

        function_descriptor = FunctionDescriptor(self._ray_module_name,
                                                 method_name,
                                                 self._ray_class_name)
        with self._ray_actor_lock:
            object_ids = worker.submit_task(
                function_descriptor,
                args,
                actor_id=self._ray_actor_id,
                actor_handle_id=self._ray_actor_handle_id,
                actor_counter=self._ray_actor_counter,
                is_actor_checkpoint_method=is_actor_checkpoint_method,
                actor_creation_dummy_object_id=(
                    self._ray_actor_creation_dummy_object_id),
                execution_dependencies=[self._ray_actor_cursor],
                new_actor_handles=self._ray_new_actor_handles,
                # We add one for the dummy return ID.
                num_return_vals=num_return_vals + 1,
                resources={"CPU": self._ray_actor_method_cpus},
                placement_resources={},
                driver_id=self._ray_actor_driver_id,
            )
            # Update the actor counter and cursor to reflect the most recent
            # invocation.
            self._ray_actor_counter += 1
            # The last object returned is the dummy object that should be
            # passed in to the next actor method. Do not return it to the user.
            self._ray_actor_cursor = object_ids.pop()
            # We have notified the backend of the new actor handles to expect
            # since the last task was submitted, so clear the list.
            self._ray_new_actor_handles = []

        if len(object_ids) == 1:
            object_ids = object_ids[0]
        elif len(object_ids) == 0:
            object_ids = None

        return object_ids
Exemplo n.º 5
0
    def _remote(self,
                args=None,
                kwargs=None,
                num_cpus=None,
                num_gpus=None,
                memory=None,
                object_store_memory=None,
                resources=None,
                accelerator_type=None,
                max_concurrency=None,
                max_restarts=None,
                max_task_retries=None,
                name=None,
                lifetime=None,
                placement_group=None,
                placement_group_bundle_index=-1,
                placement_group_capture_child_tasks=None,
                override_environment_variables=None):
        """Create an actor.

        This method allows more flexibility than the remote method because
        resource requirements can be specified and override the defaults in the
        decorator.

        Args:
            args: The arguments to forward to the actor constructor.
            kwargs: The keyword arguments to forward to the actor constructor.
            num_cpus: The number of CPUs required by the actor creation task.
            num_gpus: The number of GPUs required by the actor creation task.
            memory: Restrict the heap memory usage of this actor.
            object_store_memory: Restrict the object store memory used by
                this actor when creating objects.
            resources: The custom resources required by the actor creation
                task.
            max_concurrency: The max number of concurrent calls to allow for
                this actor. This only works with direct actor calls. The max
                concurrency defaults to 1 for threaded execution, and 1000 for
                asyncio execution. Note that the execution order is not
                guaranteed when max_concurrency > 1.
            name: The globally unique name for the actor, which can be used
                to retrieve the actor via ray.get_actor(name) as long as the
                actor is still alive.
            lifetime: Either `None`, which defaults to the actor will fate
                share with its creator and will be deleted once its refcount
                drops to zero, or "detached", which means the actor will live
                as a global object independent of the creator.
            placement_group: the placement group this actor belongs to,
                or None if it doesn't belong to any group.
            placement_group_bundle_index: the index of the bundle
                if the actor belongs to a placement group, which may be -1 to
                specify any available bundle.
            placement_group_capture_child_tasks: Whether or not children tasks
                of this actor should implicitly use the same placement group
                as its parent. It is True by default.
            override_environment_variables: Environment variables to override
                and/or introduce for this actor.  This is a dictionary mapping
                variable names to their values.

        Returns:
            A handle to the newly created actor.
        """
        if args is None:
            args = []
        if kwargs is None:
            kwargs = {}
        meta = self.__ray_metadata__
        actor_has_async_methods = len(
            inspect.getmembers(meta.modified_class,
                               predicate=inspect.iscoroutinefunction)) > 0
        is_asyncio = actor_has_async_methods

        if max_concurrency is None:
            if is_asyncio:
                max_concurrency = 1000
            else:
                max_concurrency = 1

        if max_concurrency < 1:
            raise ValueError("max_concurrency must be >= 1")

        worker = ray.worker.global_worker
        worker.check_connected()

        if name is not None:
            if not isinstance(name, str):
                raise TypeError(
                    f"name must be None or a string, got: '{type(name)}'.")
            if name == "":
                raise ValueError("Actor name cannot be an empty string.")

        # Check whether the name is already taken.
        # TODO(edoakes): this check has a race condition because two drivers
        # could pass the check and then create the same named actor. We should
        # instead check this when we create the actor, but that's currently an
        # async call.
        if name is not None:
            try:
                ray.get_actor(name)
            except ValueError:  # Name is not taken.
                pass
            else:
                raise ValueError(
                    f"The name {name} is already taken. Please use "
                    "a different name or get the existing actor using "
                    f"ray.get_actor('{name}')")

        if lifetime is None:
            detached = False
        elif lifetime == "detached":
            detached = True
        else:
            raise ValueError("lifetime must be either `None` or 'detached'")

        if placement_group_capture_child_tasks is None:
            placement_group_capture_child_tasks = (
                worker.should_capture_child_tasks_in_placement_group)

        if placement_group is None:
            if placement_group_capture_child_tasks:
                placement_group = get_current_placement_group()

        if not placement_group:
            placement_group = PlacementGroup.empty()

        check_placement_group_index(placement_group,
                                    placement_group_bundle_index)

        # Set the actor's default resources if not already set. First three
        # conditions are to check that no resources were specified in the
        # decorator. Last three conditions are to check that no resources were
        # specified when _remote() was called.
        if (meta.num_cpus is None and meta.num_gpus is None
                and meta.resources is None and meta.accelerator_type is None
                and num_cpus is None and num_gpus is None and resources is None
                and accelerator_type is None):
            # In the default case, actors acquire no resources for
            # their lifetime, and actor methods will require 1 CPU.
            cpus_to_use = ray_constants.DEFAULT_ACTOR_CREATION_CPU_SIMPLE
            actor_method_cpu = ray_constants.DEFAULT_ACTOR_METHOD_CPU_SIMPLE
        else:
            # If any resources are specified (here or in decorator), then
            # all resources are acquired for the actor's lifetime and no
            # resources are associated with methods.
            cpus_to_use = (ray_constants.DEFAULT_ACTOR_CREATION_CPU_SPECIFIED
                           if meta.num_cpus is None else meta.num_cpus)
            actor_method_cpu = ray_constants.DEFAULT_ACTOR_METHOD_CPU_SPECIFIED

        # LOCAL_MODE cannot handle cross_language
        if worker.mode == ray.LOCAL_MODE:
            assert not meta.is_cross_language, \
                "Cross language ActorClass cannot be executed locally."

        # Export the actor.
        if not meta.is_cross_language and (meta.last_export_session_and_job !=
                                           worker.current_session_and_job):
            # If this actor class was not exported in this session and job,
            # we need to export this function again, because current GCS
            # doesn't have it.
            meta.last_export_session_and_job = (worker.current_session_and_job)
            # After serialize / deserialize modified class, the __module__
            # of modified class will be ray.cloudpickle.cloudpickle.
            # So, here pass actor_creation_function_descriptor to make
            # sure export actor class correct.
            worker.function_actor_manager.export_actor_class(
                meta.modified_class, meta.actor_creation_function_descriptor,
                meta.method_meta.methods.keys())

        resources = ray.utils.resources_from_resource_arguments(
            cpus_to_use, meta.num_gpus, meta.memory, meta.object_store_memory,
            meta.resources, meta.accelerator_type, num_cpus, num_gpus, memory,
            object_store_memory, resources, accelerator_type)

        # If the actor methods require CPU resources, then set the required
        # placement resources. If actor_placement_resources is empty, then
        # the required placement resources will be the same as resources.
        actor_placement_resources = {}
        assert actor_method_cpu in [0, 1]
        if actor_method_cpu == 1:
            actor_placement_resources = resources.copy()
            actor_placement_resources["CPU"] += 1
        if meta.is_cross_language:
            creation_args = cross_language.format_args(worker, args, kwargs)
        else:
            function_signature = meta.method_meta.signatures["__init__"]
            creation_args = signature.flatten_args(function_signature, args,
                                                   kwargs)
        actor_id = worker.core_worker.create_actor(
            meta.language,
            meta.actor_creation_function_descriptor,
            creation_args,
            max_restarts or meta.max_restarts,
            max_task_retries or meta.max_task_retries,
            resources,
            actor_placement_resources,
            max_concurrency,
            detached,
            name if name is not None else "",
            is_asyncio,
            placement_group.id,
            placement_group_bundle_index,
            placement_group_capture_child_tasks,
            # Store actor_method_cpu in actor handle's extension data.
            extension_data=str(actor_method_cpu),
            override_environment_variables=override_environment_variables
            or dict())

        actor_handle = ActorHandle(meta.language,
                                   actor_id,
                                   meta.method_meta.decorators,
                                   meta.method_meta.signatures,
                                   meta.method_meta.num_returns,
                                   actor_method_cpu,
                                   meta.actor_creation_function_descriptor,
                                   worker.current_session_and_job,
                                   original_handle=True)

        return actor_handle
Exemplo n.º 6
0
    def _actor_method_call(self,
                           method_name,
                           args=None,
                           kwargs=None,
                           num_return_vals=None):
        """Method execution stub for an actor handle.

        This is the function that executes when
        `actor.method_name.remote(*args, **kwargs)` is called. Instead of
        executing locally, the method is packaged as a task and scheduled
        to the remote actor instance.

        Args:
            method_name: The name of the actor method to execute.
            args: A list of arguments for the actor method.
            kwargs: A dictionary of keyword arguments for the actor method.
            num_return_vals (int): The number of return values for the method.

        Returns:
            object_ids: A list of object IDs returned by the remote actor
                method.
        """
        worker = ray.worker.get_global_worker()

        worker.check_connected()

        function_signature = self._ray_method_signatures[method_name]
        if args is None:
            args = []
        if kwargs is None:
            kwargs = {}
        args = signature.extend_args(function_signature, args, kwargs)

        function_descriptor = FunctionDescriptor(self._ray_module_name,
                                                 method_name,
                                                 self._ray_class_name)

        if worker.mode == ray.LOCAL_MODE:
            function = getattr(worker.actors[self._ray_actor_id], method_name)
            object_ids = worker.local_mode_manager.execute(
                function, function_descriptor, args, num_return_vals)
        else:
            with self._ray_actor_lock:
                object_ids = worker.submit_task(
                    function_descriptor,
                    args,
                    actor_id=self._ray_actor_id,
                    actor_handle_id=self._ray_actor_handle_id,
                    actor_counter=self._ray_actor_counter,
                    actor_creation_dummy_object_id=(
                        self._ray_actor_creation_dummy_object_id),
                    previous_actor_task_dummy_object_id=self._ray_actor_cursor,
                    new_actor_handles=self._ray_new_actor_handles,
                    # We add one for the dummy return ID.
                    num_return_vals=num_return_vals + 1,
                    resources={"CPU": self._ray_actor_method_cpus},
                    placement_resources={},
                    job_id=self._ray_actor_job_id,
                )
                # Update the actor counter and cursor to reflect the most
                # recent invocation.
                self._ray_actor_counter += 1
                # The last object returned is the dummy object that should be
                # passed in to the next actor method. Do not return it to the
                # user.
                self._ray_actor_cursor = object_ids.pop()
                # We have notified the backend of the new actor handles to
                # expect since the last task was submitted, so clear the list.
                self._ray_new_actor_handles = []

        if len(object_ids) == 1:
            object_ids = object_ids[0]
        elif len(object_ids) == 0:
            object_ids = None

        return object_ids