def test_protocol_version(init_and_serve): server_handle = init_and_serve ray = RayAPIStub() info1 = ray.connect("localhost:50051") local_py_version = ".".join([str(x) for x in list(sys.version_info)[:3]]) assert info1["protocol_version"] == CURRENT_PROTOCOL_VERSION, info1 ray.disconnect() time.sleep(1) def mock_connection_response(): return ray_client_pb2.ConnectionInfoResponse( num_clients=1, python_version=local_py_version, ray_version="", ray_commit="", protocol_version="2050-01-01", # from the future ) # inject mock connection function server_handle.data_servicer._build_connection_response = \ mock_connection_response ray = RayAPIStub() with pytest.raises(RuntimeError): _ = ray.connect("localhost:50051") ray = RayAPIStub() info3 = ray.connect("localhost:50051", ignore_version=True) assert info3["num_clients"] == 1, info3 ray.disconnect()
def test_python_version(): server_handle, _ = ray_client_server.init_and_serve("localhost:50051") try: ray = RayAPIStub() info1 = ray.connect("localhost:50051") assert info1["python_version"] == ".".join( [str(x) for x in list(sys.version_info)[:3]]) ray.disconnect() time.sleep(1) def mock_connection_response(): return ray_client_pb2.ConnectionInfoResponse( num_clients=1, python_version="2.7.12", ray_version="", ray_commit="", ) # inject mock connection function server_handle.data_servicer._build_connection_response = \ mock_connection_response ray = RayAPIStub() with pytest.raises(RuntimeError): _ = ray.connect("localhost:50051") ray = RayAPIStub() info3 = ray.connect("localhost:50051", ignore_version=True) assert info3["num_clients"] == 1, info3 ray.disconnect() finally: ray_client_server.shutdown_with_server(server_handle.grpc_server) time.sleep(2)
def run(self): ray.init() # serialize here as ray is not thread safe outside ray.register_custom_serializer(InfoSpace, use_pickle=True) ray.register_custom_serializer(pd.DataFrame, use_pickle=True) ray.register_custom_serializer(pd.Series, use_pickle=True) def lab_trial(config, reporter): '''Trainable method to run a trial given ray config and reporter''' trial_index = config.pop('trial_index') spec = self.spec_from_config(config) info_space = deepcopy(self.experiment.info_space) info_space.set('trial', trial_index) trial_fitness_df = self.experiment.init_trial_and_run( spec, info_space) fitness_vec = trial_fitness_df.iloc[0].to_dict() fitness = analysis.calc_fitness(trial_fitness_df) trial_index = trial_fitness_df.index[0] trial_data = { **config, **fitness_vec, 'fitness': fitness, 'trial_index': trial_index, } done = True # TODO timesteps = episode len or total_t from space_clock # call reporter from inside trial/session loop reporter(timesteps_total=-1, done=done, info=trial_data) register_trainable('lab_trial', lab_trial) # TODO use hyperband # TODO parallelize on trial sessions # TODO use advanced conditional config space via lambda func config_space = self.build_config_space() spec = self.experiment.spec ray_trials = run_experiments({ spec['name']: { 'run': 'lab_trial', 'stop': { 'done': True }, 'config': config_space, 'repeat': spec['meta']['max_trial'], } }) logger.info('Ray.tune experiment.search.run() done.') # compose data format for experiment analysis trial_data_dict = {} for ray_trial in ray_trials: exp_trial_data = ray_trial.last_result.info trial_index = exp_trial_data.pop('trial_index') trial_data_dict[trial_index] = exp_trial_data ray.disconnect() return trial_data_dict
def reset_ray(master, params): ray.disconnect() ray.shutdown() time.sleep(5) del os.environ['RAY_USE_NEW_GCS'] ray.init( plasma_directory="/tmp") os.environ['RAY_USE_NEW_GCS'] = 'True' flush_policy = ray.experimental.SimpleGcsFlushPolicy(flush_period_secs=0.1) ray.experimental.set_flushing_policy(flush_policy)
def run(self): ''' Implement the main run_trial loop. Remember to call ray init and disconnect before and after loop. ''' ray.init() # loop for max_trial: generate_config(); run_trial.remote(config) ray.disconnect() raise NotImplementedError return trial_data_dict
def run(self): meta_spec = self.experiment.spec['meta'] ray.init(**meta_spec.get('resources', {})) max_generation = meta_spec['max_generation'] pop_size = meta_spec['max_trial'] or calc_population_size( self.experiment) logger.info( f'EvolutionarySearch max_generation: {max_generation}, population size: {pop_size}' ) trial_data_dict = {} config_hash = {} # config hash_str to trial_index toolbox = self.init_deap() population = toolbox.population(n=pop_size) for gen in range(1, max_generation + 1): logger.info(f'Running generation: {gen}/{max_generation}') ray_id_to_config = {} pending_ids = [] for individual in population: config = dict(individual.items()) hash_str = util.to_json(config, indent=0) if hash_str not in config_hash: trial_index = self.experiment.info_space.tick( 'trial')['trial'] config_hash[hash_str] = config['trial_index'] = trial_index ray_id = run_trial.remote(self.experiment, config) ray_id_to_config[ray_id] = config pending_ids.append(ray_id) individual['trial_index'] = config_hash[hash_str] trial_data_dict.update( get_ray_results(pending_ids, ray_id_to_config)) for individual in population: trial_index = individual.pop('trial_index') trial_data = trial_data_dict.get( trial_index, {'fitness': 0}) # if trial errored individual.fitness.values = trial_data['fitness'], preview = 'Fittest of population preview:' for individual in tools.selBest(population, k=min(10, pop_size)): preview += f'\nfitness: {individual.fitness.values[0]}, {individual}' logger.info(preview) # prepare offspring for next generation if gen < max_generation: population = toolbox.select(population, len(population)) # Vary the pool of individuals population = algorithms.varAnd(population, toolbox, cxpb=0.5, mutpb=0.5) ray.disconnect() return trial_data_dict
def testObjStore(self): node_ip_address = "127.0.0.1" scheduler_address = ray.services.start_ray_local(num_objstores=2, num_workers=0, worker_path=None) ray.connect(node_ip_address, scheduler_address, mode=ray.SCRIPT_MODE) objstore_addresses = [objstore_info["address"] for objstore_info in ray.scheduler_info()["objstores"]] w1 = ray.worker.Worker() w2 = ray.worker.Worker() ray.reusables._cached_reusables = [] # This is a hack to make the test run. ray.connect(node_ip_address, scheduler_address, objstore_address=objstore_addresses[0], mode=ray.SCRIPT_MODE, worker=w1) ray.reusables._cached_reusables = [] # This is a hack to make the test run. ray.connect(node_ip_address, scheduler_address, objstore_address=objstore_addresses[1], mode=ray.SCRIPT_MODE, worker=w2) for cls in [Foo, Bar, Baz, Qux, SubQux, Exception, CustomError, Point, NamedTupleExample]: ray.register_class(cls) # putting and getting an object shouldn't change it for data in RAY_TEST_OBJECTS: objectid = ray.put(data, w1) result = ray.get(objectid, w1) assert_equal(result, data) # putting an object, shipping it to another worker, and getting it shouldn't change it for data in RAY_TEST_OBJECTS: objectid = ray.put(data, w1) result = ray.get(objectid, w2) assert_equal(result, data) # putting an object, shipping it to another worker, and getting it shouldn't change it for data in RAY_TEST_OBJECTS: objectid = ray.put(data, w2) result = ray.get(objectid, w1) assert_equal(result, data) # This test fails. See https://github.com/ray-project/ray/issues/159. # getting multiple times shouldn't matter # for data in [np.zeros([10, 20]), np.random.normal(size=[45, 25]), np.zeros([10, 20], dtype=np.dtype("float64")), np.zeros([10, 20], dtype=np.dtype("float32")), np.zeros([10, 20], dtype=np.dtype("int64")), np.zeros([10, 20], dtype=np.dtype("int32"))]: # objectid = worker.put(data, w1) # result = worker.get(objectid, w2) # result = worker.get(objectid, w2) # result = worker.get(objectid, w2) # assert_equal(result, data) # Getting a buffer after modifying it before it finishes should return updated buffer objectid = ray.libraylib.get_objectid(w1.handle) buf = ray.libraylib.allocate_buffer(w1.handle, objectid, 100) buf[0][0] = 1 ray.libraylib.finish_buffer(w1.handle, objectid, buf[1], 0) completedbuffer = ray.libraylib.get_buffer(w1.handle, objectid) self.assertEqual(completedbuffer[0][0], 1) # We started multiple drivers manually, so we will disconnect them manually. ray.disconnect(worker=w1) ray.disconnect(worker=w2) ray.worker.cleanup()
def test_basic_preregister(init_and_serve): from ray.util.client import ray ray.connect("localhost:50051") val = ray.get(hello_world.remote()) print(val) assert val >= 20 assert val <= 200 c = C.remote(3) x = c.double.remote() y = c.double.remote() ray.wait([x, y]) val = ray.get(c.get.remote()) assert val == 12 ray.disconnect()
def test_idempotent_disconnect(init_and_serve): from ray.util.client import ray ray.disconnect() ray.disconnect() ray.connect("localhost:50051") ray.disconnect() ray.disconnect()
def run(self): meta_spec = self.experiment.spec['meta'] ray.init(**meta_spec.get('resources', {})) max_trial = meta_spec['max_trial'] trial_data_dict = {} ray_id_to_config = {} pending_ids = [] for _t in range(max_trial): configs = self.generate_config() for config in configs: ray_id = run_trial.remote(self.experiment, config) ray_id_to_config[ray_id] = config pending_ids.append(ray_id) trial_data_dict.update(get_ray_results(pending_ids, ray_id_to_config)) ray.disconnect() return trial_data_dict
def test_basic_preregister(): from ray.util.client import ray server, _ = ray_client_server.init_and_serve("localhost:50051") try: ray.connect("localhost:50051") val = ray.get(hello_world.remote()) print(val) assert val >= 20 assert val <= 200 c = C.remote(3) x = c.double.remote() y = c.double.remote() ray.wait([x, y]) val = ray.get(c.get.remote()) assert val == 12 finally: ray.disconnect() ray_client_server.shutdown_with_server(server) time.sleep(2)
def test_basic_preregister(init_and_serve): """Tests conversion of Ray actors and remote functions to client actors and client remote functions. Checks that the conversion works when disconnecting and reconnecting client sessions. """ from ray.util.client import ray for _ in range(2): ray.connect("localhost:50051") val = ray.get(hello_world.remote()) print(val) assert val >= 20 assert val <= 200 c = C.remote(3) x = c.double.remote() y = c.double.remote() ray.wait([x, y]) val = ray.get(c.get.remote()) assert val == 12 ray.disconnect()
def __del__(self): self.cluster.shutdown() ray.disconnect()
def bounce(): ray.disconnect() # might be incorrect return ray.init()