def __init__(self, memory_size=20000, random_generator=default_random_generator, memory_ratio=1.0): self.memory = Queue(maxsize=memory_size) self.random_generator = random_generator self.memory_ratio = memory_ratio
def test_simple_use(): q = Queue() items = list(range(10)) for item in items: q.put(item) for item in items: assert item == q.get()
class EvaluationLogger: def __init__(self, get_log_data, log_size=100000): self.log = Queue() self.get_log_data = get_log_data def log_data(self, env): self.log.put(self.get_log_data(env)) def get_data(self): return [self.log.get() for _ in range(self.log.size())]
def test_simple_usage(ray_start_regular): q = Queue() items = list(range(10)) for item in items: q.put(item) for item in items: assert item == q.get()
def test_async(): q = Queue() items = set(range(10)) producers = [ # noqa put_async.remote(q, item, True, None, 0.5) for item in items ] consumers = [get_async.remote(q, True, None, 0) for _ in items] result = set(ray.get(consumers)) assert items == result
def test_async_put(ray_start_regular): q = Queue(1) q.put(1) future = async_put.remote(q, 2) with pytest.raises(Full): q.put_nowait(3) with pytest.raises(RayTimeoutError): ray.get(future, timeout=0.1) # task not canceled on timeout. assert q.get() == 1 assert q.get() == 2
def test_qsize(): q = Queue() items = list(range(10)) size = 0 assert q.qsize() == size for item in items: q.put(item) size += 1 assert q.qsize() == size for item in items: assert q.get() == item size -= 1 assert q.qsize() == size
def test_async_get(ray_start_regular): q = Queue() future = async_get.remote(q) with pytest.raises(Empty): q.get_nowait() with pytest.raises(RayTimeoutError): ray.get(future, timeout=0.1) # task not canceled on timeout. q.put(1) assert ray.get(future) == 1
class ReplayMemory: def __init__(self, memory_size=20000, random_generator=default_random_generator, memory_ratio=1.0): self.memory = Queue(maxsize=memory_size) self.random_generator = random_generator self.memory_ratio = memory_ratio def add(self, data, block=False): if self.memory.full(): self.memory.get(True) if self.random_generator.rand() < self.memory_ratio: self.memory.put(data, block) def sample(self, n): assert n <= self.memory.size(), "Not enough replay memory" data = [] while self.memory.size() > 0: data.append(self.memory.get()) sample_idx = self.random_generator.randint(len(data), n) samples = [data[i] for i in sample_idx] return torch.stack(samples, dim=0)
def time_put(self): queue = Queue(1000) for i in range(1000): queue.put(i)
def test_put(ray_start_regular): q = Queue(1) item = 0 q.put(item, block=False) assert q.get() == item item = 1 q.put(item, timeout=0.2) assert q.get() == item with pytest.raises(ValueError): q.put(0, timeout=-1) q.put(0) with pytest.raises(Full): q.put_nowait(1) with pytest.raises(Full): q.put(1, timeout=0.2)
def test_get(ray_start_regular): q = Queue() item = 0 q.put(item) assert q.get(block=False) == item item = 1 q.put(item) assert q.get(timeout=0.2) == item with pytest.raises(ValueError): q.get(timeout=-1) with pytest.raises(Empty): q.get_nowait() with pytest.raises(Empty): q.get(timeout=0.2)
def test_put(): q = Queue(1) item = 0 q.put(item, block=False) assert q.get() == item item = 1 q.put(item, timeout=0.2) assert q.get() == item with pytest.raises(ValueError): q.put(0, timeout=-1) q.put(0) with pytest.raises(Full): q.put_nowait(1) with pytest.raises(Full): q.put(1, timeout=0.2) q.get() q.put(1) get_id = get_async.remote(q, False, None, 0.2) q.put(2) assert ray.get(get_id) == 1
def test_get(): q = Queue() item = 0 q.put(item) assert q.get(block=False) == item item = 1 q.put(item) assert q.get(timeout=0.2) == item with pytest.raises(ValueError): q.get(timeout=-1) with pytest.raises(Empty): q.get_nowait() with pytest.raises(Empty): q.get(timeout=0.2) item = 0 put_async.remote(q, item, True, None, 0.2) assert q.get() == item
def test_put(): start_ray() q = Queue(1) item = 0 q.put(item, block=False) assert q.get() == item item = 1 q.put(item, timeout=0.2) assert q.get() == item with pytest.raises(ValueError): q.put(0, timeout=-1) q.put(0) with pytest.raises(Full): q.put_nowait(1) with pytest.raises(Full): q.put(1, timeout=0.2) q.get() q.put(1) get_id = get_async.remote(q, False, None, 0.2) q.put(2) assert ray.get(get_id) == 1
def test_get(): start_ray() q = Queue() item = 0 q.put(item) assert q.get(block=False) == item item = 1 q.put(item) assert q.get(timeout=0.2) == item with pytest.raises(ValueError): q.get(timeout=-1) with pytest.raises(Empty): q.get_nowait() with pytest.raises(Empty): q.get(timeout=0.2) item = 0 put_async.remote(q, item, True, None, 0.2) assert q.get() == item
def time_get(self): queue = Queue() for i in range(1000): queue.put(i) for _ in range(1000): queue.get()
def time_qsize(self): queue = Queue() for _ in range(1000): queue.qsize()
def train(self, writer: Logger): ray.init() os.makedirs(self.config.results_path, exist_ok=True) # Initialize workers training_worker = trainer.Trainer.options( num_gpus=1 if "cuda" in self.config.training_device else 0).remote( copy.deepcopy(self.muzero_weights), self.config) shared_storage_worker = shared_storage.SharedStorage.remote( copy.deepcopy(self.muzero_weights), self.game_name, self.config, ) replay_buffer_worker = replay_buffer.ReplayBuffer.remote( self.config, shared_storage_worker) self_play_workers = [ self_play.SelfPlay.remote( copy.deepcopy(self.muzero_weights), self.Game(self.config.seed + seed), self.config, ) for seed in range(self.config.num_actors) ] test_worker = self_play.SelfPlay.remote( copy.deepcopy(self.muzero_weights), self.Game(self.config.seed + self.config.num_actors), self.config, ) queue = None if self.config.policy_update_rate > 0: if self.config.reanalyze_mode == "fast": reanalyze_worker = fast_reanalyze.ReanalyzeWorker.remote( copy.deepcopy(self.muzero_weights), shared_storage_worker, replay_buffer_worker, self.config) reanalyze_worker.update_policies.remote() else: queue = Queue() for i in range(self.config.num_reanalyze_cpus): reanalyze_worker = reanalyze.ReanalyzeQueueWorker.remote( copy.deepcopy(self.muzero_weights), shared_storage_worker, replay_buffer_worker, self.config, queue) reanalyze_worker.fill_batch_queue.remote() # Launch workers [ self_play_worker.continuous_self_play.remote( shared_storage_worker, replay_buffer_worker) for self_play_worker in self_play_workers ] test_worker.continuous_self_play.remote(shared_storage_worker, None, True) training_worker.continuous_update_weights.remote( replay_buffer_worker, shared_storage_worker, queue) # Save hyperparameters to TensorBoard hp_table = [ "| {} | {} |".format(key, value) for key, value in self.config.__dict__.items() ] writer.add_text( "Hyperparameters", "| Parameter | Value |\n|-------|-------|\n" + "\n".join(hp_table), ) # Loop for monitoring in real time the workers counter = 0 infos = ray.get(shared_storage_worker.get_infos.remote()) try: while infos["training_step"] < self.config.training_steps: # Get and save real time performance infos = ray.get(shared_storage_worker.get_infos.remote()) writer.add_scalar( "1.Total reward/1.Total reward", infos["total_reward"], counter, ) writer.add_scalar( "1.Total reward/2.Episode length", infos["episode_length"], counter, ) writer.add_scalar( "1.Total reward/3.Player 0 MuZero reward", infos["player_0_reward"], counter, ) writer.add_scalar( "1.Total reward/4.Player 1 Random reward", infos["player_1_reward"], counter, ) writer.add_scalar( "1.Total reward/5.Average reward", infos["average_reward"], counter, ) writer.add_scalar( "2.Workers/1.Self played games", ray.get(replay_buffer_worker.get_self_play_count.remote()), counter, ) writer.add_scalar("2.Workers/2.Training steps", infos["training_step"], counter) writer.add_scalar( "2.Workers/3.Self played games per training step ratio", ray.get(replay_buffer_worker.get_self_play_count.remote()) / max(1, infos["training_step"]), counter, ) writer.add_scalar("2.Workers/4.Learning rate", infos["lr"], counter) writer.add_scalar( "2.Workers/5.Self played test games", infos["test_games"], counter, ) writer.add_scalar( "2.Workers/6.Samples count per training step ratio", infos["samples_count"] / max(1, infos["training_step"]), counter, ) writer.add_scalar( "2.Workers/7.Samples count", infos["samples_count"], counter, ) writer.add_scalar( "2.Workers/8.Reanalyzed count", infos["reanalyzed_count"], counter, ) writer.add_scalar( "2.Workers/9.Reanalyzed count per samples count", infos["reanalyzed_count"] / max(1, infos["samples_count"]), counter, ) writer.add_scalar( "2.Workers/10.ReMCTS count", infos["remcts_count"], counter, ) writer.add_scalar( "2.Workers/11.ReMCTS count per samples count", infos["remcts_count"] / max(1, infos["samples_count"]), counter, ) writer.add_scalar("3.Loss/1.Total weighted loss", infos["total_loss"], counter) writer.add_scalar("3.Loss/Value loss", infos["value_loss"], counter) writer.add_scalar("3.Loss/Reward loss", infos["reward_loss"], counter) writer.add_scalar("3.Loss/Policy loss", infos["policy_loss"], counter) print( "Last test reward: {0:.2f}. Training step: {1}/{2}. Played games: {3}. Loss: {4:.2f}" .format( infos["total_reward"], infos["training_step"], self.config.training_steps, ray.get( replay_buffer_worker.get_self_play_count.remote()), infos["total_loss"], ), end="\r", ) counter += 1 time.sleep(0.5) except KeyboardInterrupt as err: # Comment the line below to be able to stop the training but keep running # raise err pass self.muzero_weights = ray.get( shared_storage_worker.get_target_network_weights.remote()) # End running actors ray.shutdown()
def test_queue(ray_start_regular): @ray.remote def get_async(queue, block, timeout, sleep): time.sleep(sleep) return queue.get(block, timeout) @ray.remote def put_async(queue, item, block, timeout, sleep): time.sleep(sleep) queue.put(item, block, timeout) # Test simple usage. q = Queue() items = list(range(10)) for item in items: q.put(item) for item in items: assert item == q.get() # Test asynchronous usage. q = Queue() items = set(range(10)) producers = [ # noqa put_async.remote(q, item, True, None, 0.5) for item in items ] consumers = [get_async.remote(q, True, None, 0) for _ in items] result = set(ray.get(consumers)) assert items == result # Test put. q = Queue(1) item = 0 q.put(item, block=False) assert q.get() == item item = 1 q.put(item, timeout=0.2) assert q.get() == item with pytest.raises(ValueError): q.put(0, timeout=-1) q.put(0) with pytest.raises(Full): q.put_nowait(1) with pytest.raises(Full): q.put(1, timeout=0.2) q.get() q.put(1) get_id = get_async.remote(q, False, None, 0.2) q.put(2) assert ray.get(get_id) == 1 # Test get. q = Queue() item = 0 q.put(item) assert q.get(block=False) == item item = 1 q.put(item) assert q.get(timeout=0.2) == item with pytest.raises(ValueError): q.get(timeout=-1) with pytest.raises(Empty): q.get_nowait() with pytest.raises(Empty): q.get(timeout=0.2) item = 0 put_async.remote(q, item, True, None, 0.2) assert q.get() == item # Test qsize. q = Queue() items = list(range(10)) size = 0 assert q.qsize() == size for item in items: q.put(item) size += 1 assert q.qsize() == size for item in items: assert q.get() == item size -= 1 assert q.qsize() == size
def __init__(self, get_log_data, log_size=100000): self.log = Queue() self.get_log_data = get_log_data
def evaluation( evaluation_config_path="./configs/evaluation/fast_reanalyze_evaluation.toml" ): t1 = time.time() ray.init() config = load_toml(evaluation_config_path) api = wandb.Api() if len(config.run_ids) > 0: runs = [ api.run(path=f"{config.entity}/{config.project_name}/{id}") for id in config.run_ids ] else: runs = api.runs(path=f"{config.entity}/{config.project_name}", filters=config.filters) results = SharedResults.remote(num_episodes=config.num_episodes) job_queue = Queue() # Fill the queue with models to evaluate for run in runs: files = run.files() print(files) env_config_file = find_env_config(files.objects, r"(:?^|\s)\w*(?=.py)") try: weights_file_result = run.files("model.weights") if env_config_file is None: continue env_config_name = os.path.splitext(env_config_file.name)[0] # if os.path.exists(os.path.join(ModelEvaluator.CONFIGS_DIR_PATH, env_config_file.name)) is False: env_config_file.download(True, root=ModelEvaluator.CONFIGS_DIR_PATH) weight_file_path = os.path.join(ModelEvaluator.WEIGHTS_DIR_PATH, env_config_name, f"{run.id}.weights") if os.path.exists(weight_file_path) is False: pathlib.Path(os.path.dirname(weight_file_path)).mkdir( parents=True, exist_ok=True) weights_file = weights_file_result[0].download( replace=True, root=ModelEvaluator.WEIGHTS_DIR_PATH) shutil.move(weights_file.name, weight_file_path) weight_file_path = weights_file.name del weights_file for seed in range(config.num_episodes): job_queue.put( (env_config_name, weight_file_path, env_config_file, seed)) except: print(f"{run.name} failure") # Start the model evaluator worker evaluators = [] for _ in range(config.num_workers): model_evaluator = ModelEvaluator.remote(job_queue, results, config.num_episodes) evaluators.append(model_evaluator.evaluate.remote()) # Wait for all the workers to be done ray.get(evaluators) # Save the results ids_string = '_'.join(config.run_ids[-10]) filter_string = '_'.join( [f"{key}-{value}" for key, value in config.filters.items()]) with open( f'evaluation_results/test_results_{ids_string}_{filter_string}.json', 'w') as outfile: json.dump(ray.get(results.get_result.remote()), outfile) print(f"Time taken : {time.time() - t1}")
def test_queue(ray_start): @ray.remote def get_async(queue, block, timeout, sleep): time.sleep(sleep) return queue.get(block, timeout) @ray.remote def put_async(queue, item, block, timeout, sleep): time.sleep(sleep) queue.put(item, block, timeout) # Test simple usage. q = Queue() items = list(range(10)) for item in items: q.put(item) for item in items: assert item == q.get() # Test asynchronous usage. q = Queue() items = set(range(10)) producers = [ # noqa put_async.remote(q, item, True, None, 0.5) for item in items ] consumers = [get_async.remote(q, True, None, 0) for _ in items] result = set(ray.get(consumers)) assert items == result # Test put. q = Queue(1) item = 0 q.put(item, block=False) assert q.get() == item item = 1 q.put(item, timeout=0.2) assert q.get() == item with pytest.raises(ValueError): q.put(0, timeout=-1) q.put(0) with pytest.raises(Full): q.put_nowait(1) with pytest.raises(Full): q.put(1, timeout=0.2) q.get() q.put(1) get_id = get_async.remote(q, False, None, 0.2) q.put(2) assert ray.get(get_id) == 1 # Test get. q = Queue() item = 0 q.put(item) assert q.get(block=False) == item item = 1 q.put(item) assert q.get(timeout=0.2) == item with pytest.raises(ValueError): q.get(timeout=-1) with pytest.raises(Empty): q.get_nowait() with pytest.raises(Empty): q.get(timeout=0.2) item = 0 put_async.remote(q, item, True, None, 0.2) assert q.get() == item # Test qsize. q = Queue() items = list(range(10)) size = 0 assert q.qsize() == size for item in items: q.put(item) size += 1 assert q.qsize() == size for item in items: assert q.get() == item size -= 1 assert q.qsize() == size