示例#1
0
def main():

    env = GB_game(num_char=4,
                  reward_circle=True,
                  death_penalty=False,
                  relative_positions=True,
                  discrete=True,
                  max_speed=10)
    print('the size of env is' + str(env.size))
    #     env = dummy_obj()
    ray.register_custom_serializer(
        GB_game, use_pickle=True)  # amazing. I needed to use this to get it to
    #    work!

    env = ray.put(env)
    print('\nthe put succeeded!!\n')
    actors = [
        Parallel_Actor.remote(computation_graph_args, sample_trajectory_args,
                              estimate_return_args) for i in range(num_cpus)
    ]
    CA = Counter.remote()
    #     weights_copy = actors[0].get_weights.remote()
    #     ray.get([actor.set_weights.remote(weights_copy) for actor in actors])
    #     weights = ray.get([actor.get_weights.remote() for actor in actors])
    #
    #     for i in range(len(weights)):
    #         np.testing.assert_equal(weights[i], weights[0])
    #     print('test passed!')
    return_array = ray.get([
        actor.sample_trajectories_fake.remote(10, env, CA) for actor in actors
    ])
    print(return_array)
示例#2
0
def initialize_ray():
    """Initializes ray based on environment variables and internal defaults."""
    if threading.current_thread().name == "MainThread":
        plasma_directory = None
        object_store_memory = os.environ.get("MODIN_MEMORY", None)
        if os.environ.get("MODIN_OUT_OF_CORE", "False").title() == "True":
            from tempfile import gettempdir

            plasma_directory = gettempdir()
            # We may have already set the memory from the environment variable, we don't
            # want to overwrite that value if we have.
            if object_store_memory is None:
                # Round down to the nearest Gigabyte.
                mem_bytes = ray.utils.get_system_memory() // 10**9 * 10**9
                # Default to 8x memory for out of core
                object_store_memory = 8 * mem_bytes
        # In case anything failed above, we can still improve the memory for Modin.
        if object_store_memory is None:
            # Round down to the nearest Gigabyte.
            object_store_memory = int(0.6 * ray.utils.get_system_memory() //
                                      10**9 * 10**9)
            # If the memory pool is smaller than 2GB, just use the default in ray.
            if object_store_memory == 0:
                object_store_memory = None
        else:
            object_store_memory = int(object_store_memory)
        ray.init(
            include_webui=False,
            ignore_reinit_error=True,
            plasma_directory=plasma_directory,
            object_store_memory=object_store_memory,
        )
        # Register custom serializer for method objects to avoid warning message.
        # We serialize `MethodType` objects when we use AxisPartition operations.
        ray.register_custom_serializer(types.MethodType, use_pickle=True)
示例#3
0
def custom_serializers():
    class Foo:
        def __init__(self):
            self.x = 3

    def custom_serializer(obj):
        return 3, "string1", type(obj).__name__

    def custom_deserializer(serialized_obj):
        return serialized_obj, "string2"

    ray.register_custom_serializer(Foo,
                                   serializer=custom_serializer,
                                   deserializer=custom_deserializer)

    assert ray.get(ray.put(Foo())) == ((3, "string1", Foo.__name__), "string2")

    class Bar:
        def __init__(self):
            self.x = 3

    ray.register_custom_serializer(Bar,
                                   serializer=custom_serializer,
                                   deserializer=custom_deserializer)

    @ray.remote
    def f():
        return Bar()

    assert ray.get(f.remote()) == ((3, "string1", Bar.__name__), "string2")
示例#4
0
def init_ray():

    ray.init()

    def serializer(obj):
        if obj.is_cuda:
            return obj.cpu().numpy()
        else:
            return obj.numpy()

    def deserializer(serialized_obj):
        return serialized_obj

    for t in [
            torch.FloatTensor,
            torch.DoubleTensor,
            torch.HalfTensor,
            torch.ByteTensor,
            torch.CharTensor,
            torch.ShortTensor,
            torch.IntTensor,
            torch.LongTensor,
            torch.Tensor,
    ]:
        ray.register_custom_serializer(t,
                                       serializer=serializer,
                                       deserializer=deserializer)
示例#5
0
def init(name=None,
         http_host=DEFAULT_HTTP_HOST,
         http_port=DEFAULT_HTTP_PORT,
         metric_exporter=InMemoryExporter):
    """Initialize or connect to a serve cluster.

    If serve cluster is already initialized, this function will just return.

    If `ray.init` has not been called in this process, it will be called with
    no arguments. To specify kwargs to `ray.init`, it should be called
    separately before calling `serve.init`.

    Args:
        name (str): A unique name for this serve instance. This allows
            multiple serve instances to run on the same ray cluster. Must be
            specified in all subsequent serve.init() calls.
        http_host (str): Host for HTTP server. Default to "0.0.0.0".
        http_port (int): Port for HTTP server. Default to 8000.
        metric_exporter(ExporterInterface): The class aggregates metrics from
            all RayServe actors and optionally export them to external
            services. RayServe has two options built in: InMemoryExporter and
            PrometheusExporter
    """
    if name is not None and not isinstance(name, str):
        raise TypeError("name must be a string.")

    # Initialize ray if needed.
    if not ray.is_initialized():
        ray.init()

    # Try to get serve master actor if it exists
    global master_actor
    master_actor_name = format_actor_name(SERVE_MASTER_NAME, name)
    try:
        master_actor = ray.get_actor(master_actor_name)
        return
    except ValueError:
        pass

    # Register serialization context once
    ray.register_custom_serializer(Query, Query.ray_serialize,
                                   Query.ray_deserialize)
    ray.register_custom_serializer(RequestMetadata,
                                   RequestMetadata.ray_serialize,
                                   RequestMetadata.ray_deserialize)

    # TODO(edoakes): for now, always start the HTTP proxy on the node that
    # serve.init() was run on. We should consider making this configurable
    # in the future.
    http_node_id = ray.state.current_node_id()
    master_actor = ServeMaster.options(
        name=master_actor_name,
        max_restarts=-1,
        max_task_retries=-1,
    ).remote(name, http_node_id, http_host, http_port, metric_exporter)

    block_until_http_ready("http://{}:{}/-/routes".format(
        http_host, http_port),
                           timeout=HTTP_PROXY_TIMEOUT)
示例#6
0
    def __init__(self,
                 benchmark_database=None,
                 evaluators=None,
                 terminal_when=None,
                 behaviors=None,
                 num_scenarios=None,
                 benchmark_configs=None,
                 log_eval_avg_every=None,
                 num_cpus=None,
                 memory_total=None):
        super().__init__(benchmark_database=benchmark_database,
                         evaluators=evaluators,
                         terminal_when=terminal_when,
                         behaviors=behaviors,
                         num_scenarios=num_scenarios,
                         benchmark_configs=benchmark_configs)
        num_cpus_available = psutil.cpu_count(logical=True)
        if num_cpus and num_cpus <= num_cpus_available:
            pass
        else:
            num_cpus = num_cpus_available

        mem = psutil.virtual_memory()
        memory_available = mem.available
        if memory_total and memory_total <= memory_available:
            pass
        else:
            memory_total = memory_available

        ray.init(
            num_cpus=num_cpus,
            memory=memory_total * 0.3,
            object_store_memory=memory_total *
            0.7)  # we split memory between workers (30%) and objects (70%)

        ray.register_custom_serializer(
            BenchmarkConfig,
            serializer=serialize_benchmark_config,
            deserializer=deserialize_benchmark_config)
        ray.register_custom_serializer(Scenario,
                                       serializer=serialize_scenario,
                                       deserializer=deserialize_scenario)
        self.benchmark_config_split = [
            self.benchmark_configs[i::num_cpus] for i in range(0, num_cpus)
        ]
        self.actors = [
            _BenchmarkRunnerActor.remote(
                evaluators=evaluators,
                terminal_when=terminal_when,
                benchmark_configs=self.benchmark_config_split[i],
                logger_name="BenchmarkingActor{}".format(i),
                log_eval_avg_every=log_eval_avg_every) for i in range(num_cpus)
        ]
示例#7
0
def run_ray_many(tune_config, exp_config, experiments, fix_seed=False):

    # update config
    tune_config["config"] = exp_config

    # override when running local for test
    if not torch.cuda.is_available():
        tune_config["config"]["device"] = "cpu"
        tune_config["resources_per_trial"] = {"cpu": 1}

    # MC code to fix for an unknown bug
    def serializer(obj):
        if obj.is_cuda:
            return obj.cpu().numpy()
        else:
            return obj.numpy()

    def deserializer(serialized_obj):
        return serialized_obj

    for t in [
        torch.FloatTensor,
        torch.DoubleTensor,
        torch.HalfTensor,
        torch.ByteTensor,
        torch.CharTensor,
        torch.ShortTensor,
        torch.IntTensor,
        torch.LongTensor,
        torch.Tensor,
    ]:
        ray.register_custom_serializer(
            t, serializer=serializer, deserializer=deserializer
        )

    # fix seed
    if fix_seed:
        set_random_seed(32)

    # multiple experiments
    exp_configs = [
        (name, new_experiment(exp_config, c)) for name, c in experiments.items()
    ]

    # init ray
    ray.init()
    results = [
        run_experiment.remote(name, RayTrainable, c, tune_config)
        for name, c in exp_configs
    ]
    ray.get(results)
    ray.shutdown()
示例#8
0
 def __init__(self,
              num_trees=0,
              rand_features=None,
              max_depth=15,
              header=""):
     super(RandomForest, self).__init__()
     self.trained_trees = [None] * num_trees
     self.num_trees = num_trees
     self.rand_features = "sqrt"
     self.max_depth = max_depth
     self.header = header
     ray.init()
     ray.register_custom_serializer(RandomForest, use_pickle=True)
示例#9
0
def register_serializer(conn_str=None, apikey=None, profile=None):
    """
    Register serializer for BTrDB Object
    Parameters
    ----------
    conn_str: str, default=None
        The address and port of the cluster to connect to, e.g. `192.168.1.1:4411`.
        If set to None, will look in the environment variable `$BTRDB_ENDPOINTS`
        (recommended).
    apikey: str, default=None
        The API key used to authenticate requests (optional). If None, the key
        is looked up from the environment variable `$BTRDB_API_KEY`.
    profile: str, default=None
        The name of a profile containing the required connection information as
        found in the user's predictive grid credentials file
        `~/.predictivegrid/credentials.yaml`.
    """
    try:
        import ray
    except ImportError:
        raise ImportError("must pip install ray to register custom serializer")
    try:
        import semver
    except ImportError:
        raise ImportError(
            "must pip install semver to register custom serializer")

    assert ray.is_initialized(
    ), "Need to call ray.init() before registering custom serializer"
    # TODO: check the version using the 'semver' package?
    ver = semver.VersionInfo.parse(ray.__version__)
    if ver.major == 0:
        ray.register_custom_serializer(BTrDB,
                                       serializer=btrdb_serializer,
                                       deserializer=partial(btrdb_deserializer,
                                                            conn_str=conn_str,
                                                            apikey=apikey,
                                                            profile=profile))
    elif ver.major == 1 and ver.minor in range(2, 4):
        # TODO: check different versions of ray?
        ray.util.register_serializer(BTrDB,
                                     serializer=btrdb_serializer,
                                     deserializer=partial(btrdb_deserializer,
                                                          conn_str=conn_str,
                                                          apikey=apikey,
                                                          profile=profile))
    else:
        raise Exception(
            "Ray version %s does not have custom serialization. Please upgrade to >= 1.2.0"
            % ray.__version__)
示例#10
0
文件: core.py 项目: luo981695830/mars
def _register_ray_serializer(op):
    # register a custom serializer for Mars operand
    try:
        ray.register_custom_serializer(type(op),
                                       serializer=operand_serializer,
                                       deserializer=operand_deserializer)
    except AttributeError:  # ray >= 1.0
        from ray.worker import global_worker

        global_worker.check_connected()
        context = global_worker.get_serialization_context()
        context.register_custom_serializer(type(op),
                                           serializer=operand_serializer,
                                           deserializer=operand_deserializer)
示例#11
0
    def __init__(self,
                 name,
                 flags,
                 auto_pilot,
                 camera_setups=[],
                 lidar_setups=[],
                 log_file_name=None,
                 csv_file_name=None):
        super(CarlaLegacyOperator, self).__init__(name)
        self._flags = flags
        self._logger = setup_logging(self.name, log_file_name)
        self._csv_logger = setup_csv_logging(self.name + '-csv', csv_file_name)
        self._auto_pilot = auto_pilot
        if self._flags.carla_high_quality:
            quality = 'Epic'
        else:
            quality = 'Low'
        self._settings = CarlaSettings()
        self._settings.set(
            SynchronousMode=self._flags.carla_synchronous_mode,
            SendNonPlayerAgentsInfo=True,
            NumberOfVehicles=self._flags.carla_num_vehicles,
            NumberOfPedestrians=self._flags.carla_num_pedestrians,
            WeatherId=self._flags.carla_weather,
            QualityLevel=quality)
        self._settings.randomize_seeds()
        self._transforms = {}
        # Add cameras to the simulation.
        for cs in camera_setups:
            self.__add_camera(cs)
            self._transforms[cs.name] = cs.get_transform()
        # Add lidars to the simulation.
        for ls in lidar_setups:
            self.__add_lidar(ls)
            self._transforms[ls.name] = ls.get_transform()
        self.agent_id_map = {}
        self.pedestrian_count = 0

        # Initialize the control state.
        self.control = {
            'steer': 0.0,
            'throttle': 0.0,
            'brake': 0.0,
            'hand_brake': False,
            'reverse': False
        }
        # Register custom serializers for Messages and WatermarkMessages
        ray.register_custom_serializer(Message, use_pickle=True)
        ray.register_custom_serializer(WatermarkMessage, use_pickle=True)
示例#12
0
def init_ray(num_cpus=None, num_gpus=None, ray_redis_address=None):
    """Initialize ray. If `ray_redis_address` is given, use the address to
    connect existing ray cluster. Otherwise start ray locally.
    """
    if ray_redis_address is not None:
        ray.init(redis_address=ray_redis_address)
    else:
        if num_gpus is None:
            num_gpus = torch.cuda.device_count()
        ray.init(num_gpus=num_gpus, num_cpus=num_cpus)

    # XXX: Currently, ray (pyarrow) does not serialize `requires_grad`
    # attribute. As a workaround, use custom serializer.
    # See https://github.com/ray-project/ray/issues/4855
    ray.register_custom_serializer(torch.nn.Module, use_pickle=True)
示例#13
0
def test_numpy_subclass_serialization_pickle(ray_start_regular):
    class MyNumpyConstant(np.ndarray):
        def __init__(self, value):
            super().__init__()
            self.constant = value

        def __str__(self):
            print(self.constant)

    constant = MyNumpyConstant(123)
    ray.register_custom_serializer(type(constant), use_pickle=True)

    repr_orig = repr(constant)
    repr_ser = repr(ray.get(ray.put(constant)))
    assert repr_orig == repr_ser
示例#14
0
 def __init__(self, experiment):
     from slm_lab.experiment.control import Experiment
     ray.register_custom_serializer(Experiment, use_pickle=True)
     ray.register_custom_serializer(InfoSpace, use_pickle=True)
     ray.register_custom_serializer(pd.DataFrame, use_pickle=True)
     ray.register_custom_serializer(pd.Series, use_pickle=True)
     self.experiment = experiment
     self.config_space = build_config_space(experiment)
     logger.info(f'Running {util.get_class_name(self)}, with meta spec:\n{self.experiment.spec["meta"]}')
示例#15
0
def register_ray_serializer():
    '''Helper to register so objects can be serialized in Ray'''
    from slm_lab.experiment.control import Experiment
    from slm_lab.experiment.monitor import InfoSpace
    import pandas as pd
    ray.register_custom_serializer(Experiment, use_pickle=True)
    ray.register_custom_serializer(InfoSpace, use_pickle=True)
    ray.register_custom_serializer(pd.DataFrame, use_pickle=True)
    ray.register_custom_serializer(pd.Series, use_pickle=True)
示例#16
0
def initialize_ray():
    """Initializes ray based on environment variables and internal defaults."""
    if threading.current_thread().name == "MainThread":
        plasma_directory = None
        object_store_memory = None
        if "MODIN_MEMORY" in os.environ:
            object_store_memory = os.environ["MODIN_MEMORY"]
        if ("MODIN_OUT_OF_CORE" in os.environ
                and os.environ["MODIN_OUT_OF_CORE"].title() == "True"):
            from tempfile import gettempdir

            plasma_directory = gettempdir()
            # We may have already set the memory from the environment variable, we don't
            # want to overwrite that value if we have.
            if object_store_memory is None:
                try:
                    from psutil import virtual_memory
                except ImportError:
                    raise ImportError(
                        "To use Modin out of core, please install modin[out_of_core]: "
                        '`pip install "modin[out_of_core]"`')
                # Round down to the nearest Gigabyte.
                mem_bytes = virtual_memory().total // 10**9 * 10**9
                # Default to 8x memory for out of core
                object_store_memory = 8 * mem_bytes
        elif "MODIN_MEMORY" in os.environ:
            object_store_memory = os.environ["MODIN_MEMORY"]
        # In case anything failed above, we can still improve the memory for Modin.
        if object_store_memory is None:
            # Round down to the nearest Gigabyte.
            object_store_memory = int(0.6 * ray.utils.get_system_memory() //
                                      10**9 * 10**9)
            # If the memory pool is smaller than 2GB, just use the default in ray.
            if object_store_memory == 0:
                object_store_memory = None
        ray.init(
            redirect_output=True,
            include_webui=False,
            redirect_worker_output=True,
            ignore_reinit_error=True,
            plasma_directory=plasma_directory,
            object_store_memory=object_store_memory,
        )
        # Register custom serializer for method objects to avoid warning message.
        # We serialize `MethodType` objects when we use AxisPartition operations.
        ray.register_custom_serializer(types.MethodType, use_pickle=True)
示例#17
0
 def __init__(self, experiment):
     from slm_lab.experiment.control import Experiment
     ray.register_custom_serializer(Experiment, use_pickle=True)
     ray.register_custom_serializer(InfoSpace, use_pickle=True)
     ray.register_custom_serializer(pd.DataFrame, use_pickle=True)
     ray.register_custom_serializer(pd.Series, use_pickle=True)
     self.experiment = experiment
     self.config_space = build_config_space(experiment)
     logger.info(
         f'Running {util.get_class_name(self)}, with meta spec:\n{self.experiment.spec["meta"]}')
示例#18
0
def test_custom_serializers_with_pickle(shutdown_only):
    ray.init(use_pickle=True)
    custom_serializers()

    class Foo:
        def __init__(self):
            self.x = 4

    # Test the pickle serialization backend without serializer.
    # NOTE: 'use_pickle' here is different from 'use_pickle' in
    # ray.init
    ray.register_custom_serializer(Foo, use_pickle=True)

    @ray.remote
    def f():
        return Foo()

    assert type(ray.get(f.remote())) == Foo
示例#19
0
def register_torch_serializers():
    """
    Registers ray custom serializer and deserializer for torch.tensor types. According
    to the ray documentation:
        "The serializer and deserializer are used when transferring objects of cls
        across processes and nodes."

    In particular, these are found handy when array-like logs (from a
    tune.Trainable) are transfered across nodes.

    Example:
    ```
    ray.init()
    register_torch_serializers()
    ```
    """

    # Register serializer and deserializer - needed when logging arrays and tensors.
    def serializer(obj):
        if obj.requires_grad:
            obj = obj.detach()
        if obj.is_cuda:
            return obj.cpu().numpy()
        else:
            return obj.numpy()

    for tensor_type in [
            torch.FloatTensor,
            torch.DoubleTensor,
            torch.HalfTensor,
            torch.ByteTensor,
            torch.CharTensor,
            torch.ShortTensor,
            torch.IntTensor,
            torch.LongTensor,
            torch.Tensor,
    ]:

        def deserializer(serialized_obj):
            return tensor_type(serialized_obj)  # cast to tensor_type

        ray.register_custom_serializer(tensor_type,
                                       serializer=serializer,
                                       deserializer=deserializer)
示例#20
0
def run_ray(tune_config, exp_config, fix_seed=False):

    # update config
    tune_config["config"] = exp_config
    download_dataset(exp_config)

    # override when running local for test
    if not torch.cuda.is_available():
        tune_config["config"]["device"] = "cpu"
        tune_config["resources_per_trial"] = {"cpu": 1}

    # init ray
    ray.init(load_code_from_local=True)

    # MC code to fix for an unknown bug
    def serializer(obj):
        if obj.is_cuda:
            return obj.cpu().numpy()
        else:
            return obj.numpy()

    def deserializer(serialized_obj):
        return serialized_obj

    for t in [
            torch.FloatTensor,
            torch.DoubleTensor,
            torch.HalfTensor,
            torch.ByteTensor,
            torch.CharTensor,
            torch.ShortTensor,
            torch.IntTensor,
            torch.LongTensor,
            torch.Tensor,
    ]:
        ray.register_custom_serializer(t,
                                       serializer=serializer,
                                       deserializer=deserializer)

    # fix seed
    if fix_seed:
        set_random_seed(32)

    tune.run(Trainable, **tune_config)
示例#21
0
文件: test_basic.py 项目: w0617/ray
def test_numpy_subclass_serialization(ray_start_regular):
    class MyNumpyConstant(np.ndarray):
        def __init__(self, value):
            super().__init__()
            self.constant = value

        def __str__(self):
            print(self.constant)

    constant = MyNumpyConstant(123)

    def explode(x):
        raise RuntimeError("Expected error.")

    ray.register_custom_serializer(
        type(constant), serializer=explode, deserializer=explode)

    try:
        ray.put(constant)
        assert False, "Should never get here!"
    except (RuntimeError, IndexError):
        print("Correct behavior, proof that customer serializer was used.")
示例#22
0
def register_serializer(conn_str=None, apikey=None, profile=None):
    """
    Register serializer for BTrDB Object
    Parameters
    ----------
    conn_str: str, default=None
        The address and port of the cluster to connect to, e.g. `192.168.1.1:4411`.
        If set to None, will look in the environment variable `$BTRDB_ENDPOINTS`
        (recommended).
    apikey: str, default=None
        The API key used to authenticate requests (optional). If None, the key
        is looked up from the environment variable `$BTRDB_API_KEY`.
    profile: str, default=None
        The name of a profile containing the required connection information as
        found in the user's predictive grid credentials file
        `~/.predictivegrid/credentials.yaml`.
    """
    ray.register_custom_serializer(BTrDB,
                                   serializer=btrdb_serializer,
                                   deserializer=partial(btrdb_deserializer,
                                                        conn_str=conn_str,
                                                        apikey=apikey,
                                                        profile=profile))
示例#23
0
    def run(self):
        ray.init()
        # serialize here as ray is not thread safe outside
        ray.register_custom_serializer(InfoSpace, use_pickle=True)
        ray.register_custom_serializer(pd.DataFrame, use_pickle=True)
        ray.register_custom_serializer(pd.Series, use_pickle=True)

        def lab_trial(config, reporter):
            '''Trainable method to run a trial given ray config and reporter'''
            trial_index = config.pop('trial_index')
            spec = self.spec_from_config(config)
            info_space = deepcopy(self.experiment.info_space)
            info_space.set('trial', trial_index)
            trial_fitness_df = self.experiment.init_trial_and_run(
                spec, info_space)
            fitness_vec = trial_fitness_df.iloc[0].to_dict()
            fitness = analysis.calc_fitness(trial_fitness_df)
            trial_index = trial_fitness_df.index[0]
            trial_data = {
                **config,
                **fitness_vec,
                'fitness': fitness,
                'trial_index': trial_index,
            }
            done = True
            # TODO timesteps = episode len or total_t from space_clock
            # call reporter from inside trial/session loop
            reporter(timesteps_total=-1, done=done, info=trial_data)

        register_trainable('lab_trial', lab_trial)

        # TODO use hyperband
        # TODO parallelize on trial sessions
        # TODO use advanced conditional config space via lambda func
        config_space = self.build_config_space()
        spec = self.experiment.spec
        ray_trials = run_experiments({
            spec['name']: {
                'run': 'lab_trial',
                'stop': {
                    'done': True
                },
                'config': config_space,
                'repeat': spec['meta']['max_trial'],
            }
        })
        logger.info('Ray.tune experiment.search.run() done.')
        # compose data format for experiment analysis
        trial_data_dict = {}
        for ray_trial in ray_trials:
            exp_trial_data = ray_trial.last_result.info
            trial_index = exp_trial_data.pop('trial_index')
            trial_data_dict[trial_index] = exp_trial_data

        ray.disconnect()
        return trial_data_dict
示例#24
0
    def build_graph(self):
        self.build_channels()

        # to support cyclic reference serialization
        try:
            ray.register_custom_serializer(Environment, use_pickle=True)
            ray.register_custom_serializer(ExecutionGraph, use_pickle=True)
            ray.register_custom_serializer(OpType, use_pickle=True)
            ray.register_custom_serializer(PStrategy, use_pickle=True)
        except Exception:
            # local mode can't use pickle
            pass

        # Each operator instance is implemented as a Ray actor
        # Actors are deployed in topological order, as we traverse the
        # logical dataflow from sources to sinks.
        for node in nx.topological_sort(self.env.logical_topo):
            operator = self.env.operators[node]
            # Instantiate Ray actors
            handles = self.__generate_actors(
                operator, self.input_channels.get(node, []),
                self.output_channels.get(node, []))
            if handles:
                self.actor_handles.extend(handles)
示例#25
0
    def __init__(self,
                 benchmark_database=None,
                 evaluators=None,
                 terminal_when=None,
                 behaviors=None,
                 behavior_configs=None,
                 num_scenarios=None,
                 benchmark_configs=None,
                 log_eval_avg_every=None,
                 glog_init_settings=None,
                 checkpoint_dir=None,
                 merge_existing=False,
                 num_cpus=None,
                 memory_total=None,
                 ip_head=None,
                 redis_password=None):
        super().__init__(benchmark_database=benchmark_database,
                         evaluators=evaluators,
                         terminal_when=terminal_when,
                         behaviors=behaviors,
                         behavior_configs=behavior_configs,
                         num_scenarios=num_scenarios,
                         benchmark_configs=benchmark_configs,
                         checkpoint_dir=checkpoint_dir,
                         merge_existing=merge_existing)
        num_cpus_available = psutil.cpu_count(logical=True)

        if ip_head and redis_password:
            ray.init(address=ip_head, redis_password=redis_password)
        else:
            if num_cpus and num_cpus <= num_cpus_available:
                pass
            else:
                num_cpus = num_cpus_available

            mem = psutil.virtual_memory()
            memory_available = mem.available
            if memory_total and memory_total <= memory_available:
                pass
            else:
                memory_total = memory_available

            ray.init(num_cpus=num_cpus, memory=memory_total*0.3, object_store_memory=memory_total*0.7, \
               _internal_config='{"initial_reconstruction_timeout_milliseconds": 100000}') # we split memory between workers (30%) and objects (70%)

        serialized_evaluators = pickle.dumps(evaluators)
        ray.register_custom_serializer(
            BenchmarkConfig,
            serializer=serialize_benchmark_config,
            deserializer=deserialize_benchmark_config)
        ray.register_custom_serializer(Scenario,
                                       serializer=serialize_scenario,
                                       deserializer=deserialize_scenario)
        self.benchmark_config_split = [
            self.configs_to_run[i::num_cpus] for i in range(0, num_cpus)
        ]
        self.actors = [
            _BenchmarkRunnerActor.remote(
                serialized_evaluators=serialized_evaluators,
                terminal_when=terminal_when,
                benchmark_configs=self.benchmark_config_split[i],
                logger_name="BenchmarkingActor{}".format(i),
                log_eval_avg_every=log_eval_avg_every,
                checkpoint_dir=checkpoint_dir,
                actor_id=i,
                glog_init_settings=glog_init_settings) for i in range(num_cpus)
        ]
示例#26
0
        on_perc=ray.tune.grid_search([0.02, 0.04]),
    ),
}
exp_configs = (
    [(name, new_experiment(base_exp_config, c)) for name, c in experiments.items()]
    if experiments
    else [(experiment_name, base_exp_config)]
)

# Register serializers.
ray.init()
for t in [
    torch.FloatTensor,
    torch.DoubleTensor,
    torch.HalfTensor,
    torch.ByteTensor,
    torch.CharTensor,
    torch.ShortTensor,
    torch.IntTensor,
    torch.LongTensor,
    torch.Tensor,
]:
    ray.register_custom_serializer(t, serializer=serializer, deserializer=deserializer)

# run all experiments in parallel
results = [
    run_experiment.remote(name, Trainable, c, tune_config) for name, c in exp_configs
]
ray.get(results)
ray.shutdown()
示例#27
0
async def test_router_use_max_concurrency(serve_instance):
    # The VisibleRouter::get_queues method needs to pickle queries
    # so we register serializer here. In regular code path, query
    # serialization is done by Serve manually for performance.
    ray.register_custom_serializer(Query, Query.ray_serialize,
                                   Query.ray_deserialize)

    signal = SignalActor.remote()

    @ray.remote
    class MockWorker:
        async def handle_request(self, request):
            await signal.wait.remote()
            return "DONE"

        def ready(self):
            pass

    class VisibleRouter(Router):
        def get_queues(self):
            return self.queries_counter, self.backend_queues

    worker = MockWorker.remote()
    q = ray.remote(VisibleRouter).remote()
    await q.setup.remote("")
    backend_name = "max-concurrent-test"
    config = BackendConfig({"max_concurrent_queries": 1})
    await q.set_traffic.remote("svc", TrafficPolicy({backend_name: 1.0}))
    await q.add_new_worker.remote(backend_name, "replica-tag", worker)
    await q.set_backend_config.remote(backend_name, config)

    # We send over two queries
    first_query = q.enqueue_request.remote(RequestMetadata("svc", None), 1)
    second_query = q.enqueue_request.remote(RequestMetadata("svc", None), 1)

    # Neither queries should be available
    with pytest.raises(ray.exceptions.RayTimeoutError):
        ray.get([first_query, second_query], timeout=0.2)

    # Let's retrieve the router internal state
    queries_counter, backend_queues = await q.get_queues.remote()
    # There should be just one inflight request
    assert queries_counter[backend_name][
        "max-concurrent-test:replica-tag"] == 1
    # The second query is buffered
    assert len(backend_queues["max-concurrent-test"]) == 1

    # Let's unblock the first query
    await signal.send.remote(clear=True)
    assert await first_query == "DONE"

    # The internal state of router should have changed.
    queries_counter, backend_queues = await q.get_queues.remote()
    # There should still be one inflight request
    assert queries_counter[backend_name][
        "max-concurrent-test:replica-tag"] == 1
    # But there shouldn't be any queries in the queue
    assert len(backend_queues["max-concurrent-test"]) == 0

    # Unblocking the second query
    await signal.send.remote(clear=True)
    assert await second_query == "DONE"

    # Checking the internal state of the router one more time
    queries_counter, backend_queues = await q.get_queues.remote()
    assert queries_counter[backend_name][
        "max-concurrent-test:replica-tag"] == 0
    assert len(backend_queues["max-concurrent-test"]) == 0
示例#28
0
def init(
    kv_store_connector=None,
    kv_store_path=None,
    blocking=False,
    start_server=True,
    http_host=DEFAULT_HTTP_HOST,
    http_port=DEFAULT_HTTP_PORT,
    ray_init_kwargs={
        "object_store_memory": int(1e8),
        "num_cpus": max(cpu_count(), 8)
    },
    gc_window_seconds=3600,
    queueing_policy=RoutePolicy.Random,
    policy_kwargs={},
):
    """Initialize a serve cluster.

    If serve cluster has already initialized, this function will just return.

    Calling `ray.init` before `serve.init` is optional. When there is not a ray
    cluster initialized, serve will call `ray.init` with `object_store_memory`
    requirement.

    Args:
        kv_store_connector (callable): Function of (namespace) => TableObject.
            We will use a SQLite connector that stores to /tmp by default.
        kv_store_path (str, path): Path to the SQLite table.
        blocking (bool): If true, the function will wait for the HTTP server to
            be healthy, and other components to be ready before returns.
        start_server (bool): If true, `serve.init` starts http server.
            (Default: True)
        http_host (str): Host for HTTP server. Default to "0.0.0.0".
        http_port (int): Port for HTTP server. Default to 8000.
        ray_init_kwargs (dict): Argument passed to ray.init, if there is no ray
            connection. Default to {"object_store_memory": int(1e8)} for
            performance stability reason
        gc_window_seconds(int): How long will we keep the metric data in
            memory. Data older than the gc_window will be deleted. The default
            is 3600 seconds, which is 1 hour.
        queueing_policy(RoutePolicy): Define the queueing policy for selecting
            the backend for a service. (Default: RoutePolicy.Random)
        policy_kwargs: Arguments required to instantiate a queueing policy
    """
    global master_actor
    if master_actor is not None:
        return

    # Initialize ray if needed.
    if not ray.is_initialized():
        ray.init(**ray_init_kwargs)

    # Register serialization context once
    ray.register_custom_serializer(Query, Query.ray_serialize,
                                   Query.ray_deserialize)

    # Try to get serve master actor if it exists
    try:
        master_actor = ray.util.get_actor(SERVE_MASTER_NAME)
        return
    except ValueError:
        pass

    # Register serialization context once
    ray.register_custom_serializer(Query, Query.ray_serialize,
                                   Query.ray_deserialize)
    ray.register_custom_serializer(RequestMetadata,
                                   RequestMetadata.ray_serialize,
                                   RequestMetadata.ray_deserialize)

    if kv_store_path is None:
        _, kv_store_path = mkstemp()

    # Serve has not been initialized, perform init sequence
    # TODO move the db to session_dir.
    #    ray.worker._global_node.address_info["session_dir"]
    def kv_store_connector(namespace):
        return SQLiteKVStore(namespace, db_path=kv_store_path)

    master_actor = ServeMaster.options(
        detached=True,
        name=SERVE_MASTER_NAME,
        max_reconstructions=ray.ray_constants.INFINITE_RECONSTRUCTION,
    ).remote(kv_store_connector, queueing_policy.value, policy_kwargs,
             start_server, http_host, http_port, gc_window_seconds)

    if start_server and blocking:
        block_until_http_ready("http://{}:{}/-/routes".format(
            http_host, http_port))
示例#29
0
def init(cluster_name=None,
         blocking=False,
         start_server=True,
         http_host=DEFAULT_HTTP_HOST,
         http_port=DEFAULT_HTTP_PORT,
         ray_init_kwargs={
             "object_store_memory": int(1e8),
             "num_cpus": max(cpu_count(), 8)
         },
         metric_exporter=InMemoryExporter):
    """Initialize a serve cluster.

    If serve cluster has already initialized, this function will just return.

    Calling `ray.init` before `serve.init` is optional. When there is not a ray
    cluster initialized, serve will call `ray.init` with `object_store_memory`
    requirement.

    Args:
        cluster_name (str): A unique name for this serve cluster. This allows
            multiple serve clusters to run on the same ray cluster. Must be
            specified in all subsequent serve.init() calls.
        blocking (bool): If true, the function will wait for the HTTP server to
            be healthy, and other components to be ready before returns.
        start_server (bool): If true, `serve.init` starts http server.
            (Default: True)
        http_host (str): Host for HTTP server. Default to "0.0.0.0".
        http_port (int): Port for HTTP server. Default to 8000.
        ray_init_kwargs (dict): Argument passed to ray.init, if there is no ray
            connection. Default to {"object_store_memory": int(1e8)} for
            performance stability reason
        metric_exporter(ExporterInterface): The class aggregates metrics from
            all RayServe actors and optionally export them to external
            services. RayServe has two options built in: InMemoryExporter and
            PrometheusExporter
    """
    if cluster_name is not None and not isinstance(cluster_name, str):
        raise TypeError("cluster_name must be a string.")

    # Initialize ray if needed.
    if not ray.is_initialized():
        ray.init(**ray_init_kwargs)

    # Try to get serve master actor if it exists
    global master_actor
    master_actor_name = format_actor_name(SERVE_MASTER_NAME, cluster_name)
    try:
        master_actor = ray.util.get_actor(master_actor_name)
        return
    except ValueError:
        pass

    # Register serialization context once
    ray.register_custom_serializer(Query, Query.ray_serialize,
                                   Query.ray_deserialize)
    ray.register_custom_serializer(RequestMetadata,
                                   RequestMetadata.ray_serialize,
                                   RequestMetadata.ray_deserialize)

    # TODO(edoakes): for now, always start the HTTP proxy on the node that
    # serve.init() was run on. We should consider making this configurable
    # in the future.
    http_node_id = ray.state.current_node_id()
    master_actor = ServeMaster.options(
        detached=True,
        name=master_actor_name,
        max_restarts=-1,
    ).remote(cluster_name, start_server, http_node_id, http_host, http_port,
             metric_exporter)

    if start_server and blocking:
        block_until_http_ready("http://{}:{}/-/routes".format(
            http_host, http_port))
示例#30
0
文件: api.py 项目: skyofwinter/ray
def init(kv_store_connector=None,
         kv_store_path=None,
         blocking=False,
         http_host=DEFAULT_HTTP_HOST,
         http_port=DEFAULT_HTTP_PORT,
         ray_init_kwargs={
             "object_store_memory": int(1e8),
             "num_cpus": max(cpu_count(), 8)
         },
         gc_window_seconds=3600,
         queueing_policy=RoutePolicy.Random,
         policy_kwargs={}):
    """Initialize a serve cluster.

    If serve cluster has already initialized, this function will just return.

    Calling `ray.init` before `serve.init` is optional. When there is not a ray
    cluster initialized, serve will call `ray.init` with `object_store_memory`
    requirement.

    Args:
        kv_store_connector (callable): Function of (namespace) => TableObject.
            We will use a SQLite connector that stores to /tmp by default.
        kv_store_path (str, path): Path to the SQLite table.
        blocking (bool): If true, the function will wait for the HTTP server to
            be healthy, and other components to be ready before returns.
        http_host (str): Host for HTTP server. Default to "0.0.0.0".
        http_port (int): Port for HTTP server. Default to 8000.
        ray_init_kwargs (dict): Argument passed to ray.init, if there is no ray
            connection. Default to {"object_store_memory": int(1e8)} for
            performance stability reason
        gc_window_seconds(int): How long will we keep the metric data in
            memory. Data older than the gc_window will be deleted. The default
            is 3600 seconds, which is 1 hour.
        queueing_policy(RoutePolicy): Define the queueing policy for selecting
            the backend for a service. (Default: RoutePolicy.Random)
        policy_kwargs: Arguments required to instantiate a queueing policy
    """
    global global_state
    # Noop if global_state is no longer None
    if global_state is not None:
        return

    # Initialize ray if needed.
    if not ray.is_initialized():
        ray.init(**ray_init_kwargs)

    # Try to get serve nursery if there exists
    try:
        ray.experimental.get_actor(SERVE_NURSERY_NAME)
        global_state = GlobalState()
        return
    except ValueError:
        pass

    # Register serialization context once
    ray.register_custom_serializer(Query, Query.ray_serialize,
                                   Query.ray_deserialize)

    if kv_store_path is None:
        _, kv_store_path = mkstemp()

    # Serve has not been initialized, perform init sequence
    # Todo, move the db to session_dir
    #    ray.worker._global_node.address_info["session_dir"]
    def kv_store_connector(namespace):
        return SQLiteKVStore(namespace, db_path=kv_store_path)

    nursery = start_initial_state(kv_store_connector)

    global_state = GlobalState(nursery)
    global_state.init_or_get_http_server(host=http_host, port=http_port)
    global_state.init_or_get_router(queueing_policy=queueing_policy,
                                    policy_kwargs=policy_kwargs)
    global_state.init_or_get_metric_monitor(
        gc_window_seconds=gc_window_seconds)

    if blocking:
        block_until_http_ready("http://{}:{}".format(http_host, http_port))
示例#31
0
        features, speakers, TRAINING_CHUNK_SIZE)
    print("training length: {}".format(train_set))
    return nspeakers, train_set


if __name__ == "__main__":
    parser = argparse.ArgumentParser("Speech Verification")
    parser.add_argument("--ray", action='store_true', default=False)
    parser.add_argument("--data-parallel", action='store_true', default=False)
    parser.add_argument("--chunks", type=int, default=1)

    args = parser.parse_args()
    print(args)
    ray.init(num_gpus=1)
    ray.register_custom_serializer(torch.Tensor,
                                   serializer=serializer,
                                   deserializer=deserializer)

    # Load train set
    nspeakers, train_set = load_train_set(args)
    train_set_id = pin_in_object_store(train_set)

    print("Loaded train. pinned={}".format(True))

    # Load dev set
    dev_set = load_dev_set(args)
    dev_set_id = pin_in_object_store(dev_set)
    print("Loaded dev. pinned={}".format(True))

    tune.register_trainable('train_sc', train.Trainer)
    exp = Experiment(
示例#32
0
        records.append(Record((w, 1)))
    return records


# Receives an object of type Record and returns the actual tuple
def as_tuple(record):
    return record.record


if __name__ == "__main__":
    # Get program parameters
    args = parser.parse_args()
    input_file = str(args.input_file)

    ray.init()
    ray.register_custom_serializer(Record, use_dict=True)
    ray.register_custom_serializer(BatchedQueue, use_pickle=True)
    ray.register_custom_serializer(OpType, use_pickle=True)
    ray.register_custom_serializer(PStrategy, use_pickle=True)

    # A Ray streaming environment with the default configuration
    env = Environment()
    env.set_parallelism(2)  # Each operator will be executed by two actors

    # 'key_by("word")' physically partitions the stream of records
    # based on the hash value of the 'word' attribute (see Record class above)
    # 'map(as_tuple)' maps a record of type Record into a tuple
    # 'sum(1)' sums the 2nd element of the tuple, i.e. the word count
    stream = env.read_text_file(input_file) \
                .round_robin() \
                .flat_map(splitter) \