Пример #1
0
def make_actor(cls, num_cpus, num_gpus, memory, object_store_memory, resources,
               max_restarts, max_task_retries):
    Class = modify_class(cls)

    if max_restarts is None:
        max_restarts = 0
    if max_task_retries is None:
        max_task_retries = 0

    infinite_restart = max_restarts == -1
    if not infinite_restart:
        if max_restarts < 0:
            raise ValueError("max_restarts must be an integer >= -1 "
                             "-1 indicates infinite restarts")
        else:
            # Make sure we don't pass too big of an int to C++, causing
            # an overflow.
            max_restarts = min(max_restarts, ray_constants.MAX_INT64_VALUE)

    if max_restarts == 0 and max_task_retries != 0:
        raise ValueError(
            "max_task_retries cannot be set if max_restarts is 0.")

    return ActorClass._ray_from_modified_class(Class,
                                               ActorClassID.from_random(),
                                               max_restarts, max_task_retries,
                                               num_cpus, num_gpus, memory,
                                               object_store_memory, resources)
Пример #2
0
def make_actor(cls, num_cpus, num_gpus, memory, object_store_memory, resources,
               max_reconstructions):
    # Give an error if cls is an old-style class.
    if not issubclass(cls, object):
        raise TypeError(
            "The @ray.remote decorator cannot be applied to old-style "
            "classes. In Python 2, you must declare the class with "
            "'class ClassName(object):' instead of 'class ClassName:'.")

    if issubclass(cls, Checkpointable) and inspect.isabstract(cls):
        raise TypeError(
            "A checkpointable actor class should implement all abstract "
            "methods in the `Checkpointable` interface.")

    if max_reconstructions is None:
        if ray_constants.direct_call_enabled():
            # Allow the actor creation task to be resubmitted automatically
            # by default.
            max_reconstructions = 3
        else:
            max_reconstructions = 0

    if not (ray_constants.NO_RECONSTRUCTION <= max_reconstructions <=
            ray_constants.INFINITE_RECONSTRUCTION):
        raise Exception("max_reconstructions must be in range [%d, %d]." %
                        (ray_constants.NO_RECONSTRUCTION,
                         ray_constants.INFINITE_RECONSTRUCTION))

    # Modify the class to have an additional method that will be used for
    # terminating the worker.
    class Class(cls):
        def __ray_terminate__(self):
            worker = ray.worker.get_global_worker()
            if worker.mode != ray.LOCAL_MODE:
                ray.actor.exit_actor()

        def __ray_checkpoint__(self):
            """Save a checkpoint.

            This task saves the current state of the actor, the current task
            frontier according to the raylet, and the checkpoint index
            (number of tasks executed so far).
            """
            worker = ray.worker.global_worker
            if not isinstance(self, ray.actor.Checkpointable):
                raise Exception(
                    "__ray_checkpoint__.remote() may only be called on actors "
                    "that implement ray.actor.Checkpointable")
            return worker._save_actor_checkpoint()

    Class.__module__ = cls.__module__
    Class.__name__ = cls.__name__

    return ActorClass._ray_from_modified_class(Class,
                                               ActorClassID.from_random(),
                                               max_reconstructions, num_cpus,
                                               num_gpus, memory,
                                               object_store_memory, resources)
Пример #3
0
def make_actor(cls, num_cpus, num_gpus, resources, actor_method_cpus,
               max_reconstructions):
    # Give an error if cls is an old-style class.
    if not issubclass(cls, object):
        raise TypeError(
            "The @ray.remote decorator cannot be applied to old-style "
            "classes. In Python 2, you must declare the class with "
            "'class ClassName(object):' instead of 'class ClassName:'.")

    if issubclass(cls, Checkpointable) and inspect.isabstract(cls):
        raise TypeError(
            "A checkpointable actor class should implement all abstract "
            "methods in the `Checkpointable` interface.")

    if max_reconstructions is None:
        max_reconstructions = 0

    if not (ray_constants.NO_RECONSTRUCTION <= max_reconstructions <=
            ray_constants.INFINITE_RECONSTRUCTION):
        raise Exception("max_reconstructions must be in range [%d, %d]." %
                        (ray_constants.NO_RECONSTRUCTION,
                         ray_constants.INFINITE_RECONSTRUCTION))

    # Modify the class to have an additional method that will be used for
    # terminating the worker.
    class Class(cls):
        def __ray_terminate__(self):
            worker = ray.worker.get_global_worker()
            if worker.mode != ray.LOCAL_MODE:
                # Disconnect the worker from the local scheduler. The point of
                # this is so that when the worker kills itself below, the local
                # scheduler won't push an error message to the driver.
                worker.raylet_client.disconnect()
                sys.exit(0)
                assert False, "This process should have terminated."

        def __ray_checkpoint__(self):
            """Save a checkpoint.

            This task saves the current state of the actor, the current task
            frontier according to the local scheduler, and the checkpoint index
            (number of tasks executed so far).
            """
            worker = ray.worker.global_worker
            if not isinstance(self, ray.actor.Checkpointable):
                raise Exception(
                    "__ray_checkpoint__.remote() may only be called on actors "
                    "that implement ray.actor.Checkpointable")
            return worker._save_actor_checkpoint()

    Class.__module__ = cls.__module__
    Class.__name__ = cls.__name__

    class_id = ActorClassID(_random_string())

    return ActorClass(Class, class_id, max_reconstructions, num_cpus, num_gpus,
                      resources, actor_method_cpus)
Пример #4
0
def make_actor(cls, num_cpus, num_gpus, memory, object_store_memory, resources,
               max_reconstructions):
    Class = modify_class(cls)

    if max_reconstructions is None:
        max_reconstructions = 0

    if not (ray_constants.NO_RECONSTRUCTION <= max_reconstructions <=
            ray_constants.INFINITE_RECONSTRUCTION):
        raise ValueError("max_reconstructions must be in range [%d, %d]." %
                         (ray_constants.NO_RECONSTRUCTION,
                          ray_constants.INFINITE_RECONSTRUCTION))

    return ActorClass._ray_from_modified_class(
        Class, ActorClassID.from_random(), max_reconstructions, num_cpus,
        num_gpus, memory, object_store_memory, resources)
Пример #5
0
def make_actor(cls, num_cpus, num_gpus, resources, actor_method_cpus,
               checkpoint_interval, max_reconstructions):
    # Give an error if cls is an old-style class.
    if not issubclass(cls, object):
        raise TypeError(
            "The @ray.remote decorator cannot be applied to old-style "
            "classes. In Python 2, you must declare the class with "
            "'class ClassName(object):' instead of 'class ClassName:'.")

    if checkpoint_interval is None:
        checkpoint_interval = -1
    if max_reconstructions is None:
        max_reconstructions = 0

    if checkpoint_interval == 0:
        raise Exception("checkpoint_interval must be greater than 0.")
    if not (ray_constants.NO_RECONSTRUCTION <= max_reconstructions <=
            ray_constants.INFINITE_RECONSTRUCTION):
        raise Exception("max_reconstructions must be in range [%d, %d]." %
                        (ray_constants.NO_RECONSTRUCTION,
                         ray_constants.INFINITE_RECONSTRUCTION))

    # Modify the class to have an additional method that will be used for
    # terminating the worker.
    class Class(cls):
        def __ray_terminate__(self):
            worker = ray.worker.get_global_worker()
            if worker.mode != ray.LOCAL_MODE:
                # Disconnect the worker from the local scheduler. The point of
                # this is so that when the worker kills itself below, the local
                # scheduler won't push an error message to the driver.
                worker.raylet_client.disconnect()
                sys.exit(0)
                assert False, "This process should have terminated."

        def __ray_save_checkpoint__(self):
            if hasattr(self, "__ray_save__"):
                object_to_serialize = self.__ray_save__()
            else:
                object_to_serialize = self
            return pickle.dumps(object_to_serialize)

        @classmethod
        def __ray_restore_from_checkpoint__(cls, pickled_checkpoint):
            checkpoint = pickle.loads(pickled_checkpoint)
            if hasattr(cls, "__ray_restore__"):
                actor_object = cls.__new__(cls)
                actor_object.__ray_restore__(checkpoint)
            else:
                # TODO(rkn): It's possible that this will cause problems. When
                # you unpickle the same object twice, the two objects will not
                # have the same class.
                actor_object = checkpoint
            return actor_object

        def __ray_checkpoint__(self):
            """Save a checkpoint.

            This task saves the current state of the actor, the current task
            frontier according to the local scheduler, and the checkpoint index
            (number of tasks executed so far).
            """
            worker = ray.worker.global_worker
            checkpoint_index = worker.actor_task_counter
            # Get the state to save.
            checkpoint = self.__ray_save_checkpoint__()
            # Get the current task frontier, per actor handle.
            # NOTE(swang): This only includes actor handles that the local
            # scheduler has seen. Handle IDs for which no task has yet reached
            # the local scheduler will not be included, and may not be runnable
            # on checkpoint resumption.
            actor_id = worker.actor_id
            frontier = worker.raylet_client.get_actor_frontier(actor_id)
            # Save the checkpoint in Redis. TODO(rkn): Checkpoints
            # should not be stored in Redis. Fix this.
            set_actor_checkpoint(worker, worker.actor_id, checkpoint_index,
                                 checkpoint, frontier)

        def __ray_checkpoint_restore__(self):
            """Restore a checkpoint.

            This task looks for a saved checkpoint and if found, restores the
            state of the actor, the task frontier in the local scheduler, and
            the checkpoint index (number of tasks executed so far).

            Returns:
                A bool indicating whether a checkpoint was resumed.
            """
            worker = ray.worker.global_worker
            # Get the most recent checkpoint stored, if any.
            checkpoint_index, checkpoint, frontier = get_actor_checkpoint(
                worker, worker.actor_id)
            # Try to resume from the checkpoint.
            checkpoint_resumed = False
            if checkpoint_index is not None:
                # Load the actor state from the checkpoint.
                worker.actors[worker.actor_id] = (
                    worker.actor_class.__ray_restore_from_checkpoint__(
                        checkpoint))
                # Set the number of tasks executed so far.
                worker.actor_task_counter = checkpoint_index
                # Set the actor frontier in the local scheduler.
                worker.raylet_client.set_actor_frontier(frontier)
                checkpoint_resumed = True

            return checkpoint_resumed

    Class.__module__ = cls.__module__
    Class.__name__ = cls.__name__

    class_id = ActorClassID(_random_string())

    return ActorClass(Class, class_id, checkpoint_interval,
                      max_reconstructions, num_cpus, num_gpus, resources,
                      actor_method_cpus)