def search(self, n_mcts, c, Env, mcts_env, budget, max_depth=200, fixed_depth=True): """ Perform the MCTS search from the root """ env = copy.deepcopy(Env) self.create_root(env, budget) if self.root.terminal: raise (ValueError("Can't do tree search from a terminal state")) is_atari = is_atari_game(env) if is_atari: raise NotImplementedError while budget > 0: state = self.root # reset to root for new trace if not is_atari: mcts_env = copy.deepcopy( Env) # copy original Env to rollout from else: raise NotImplementedError mcts_env.seed(np.random.randint(1e7)) st = 0 terminal = False while not state.terminal: bias = c * self.gamma**st / ( 1 - self.gamma) if self.depth_based_bias else c action = state.select(c=bias, variance=self.variance) st += 1 if action.child_state is not None: state = action.child_state # select terminal, budget = state.sample(mcts_env, action.index, budget) if terminal: break else: rollout_depth = max_depth if fixed_depth else max_depth - st state, budget = action.add_child_state(mcts_env, budget, rollout_depth, depth=st) # expand break # Back-up R = state.V state.update() while state.parent_action is not None: # loop back-up until root is reached if not terminal: R = state.reward + self.gamma * R else: R = state.reward terminal = False action = state.parent_action action.update(R) state = action.parent_state state.update()
def search(self, n_mcts, c, env, mcts_env, budget, max_depth=200, fixed_depth=True): """ Perform the MCTS search from the root """ env = copy.deepcopy(env) self.create_root(env, budget) if self.root.terminal: raise (ValueError("Can't do tree search from a terminal state")) is_atari = is_atari_game(env) if is_atari: raise NotImplementedError while budget > 0: state = self.root # reset to root for new trace if not is_atari: mcts_env = copy.deepcopy(env) # copy original Env to rollout from else: raise NotImplementedError mcts_env.seed(np.random.randint(1e7)) st = 0 terminal = False while not state.terminal: bias = c * self.gamma ** st / (1 - self.gamma) if self.depth_based_bias else c action = state.select(c=bias, variance=self.variance) if action.child_state is not None: parent_owner = state.owner state = action.child_state # select terminal, budget = state.sample(mcts_env, action.index, budget, parent_owner) if terminal: break else: rollout_depth = max_depth if fixed_depth else max_depth - st state, budget = action.add_child_state(mcts_env, budget, rollout_depth, depth=st) # expand break if mcts_env.has_transitioned(): st += 1 # If there are no more agent in the decision queue, a lap has been completed # and the ordering of the agents must be re-evaluated # Back-up R = np.zeros(mcts_env.agents_number) if not state.terminal: R = copy.deepcopy(state.V) state.update() agents_reward = copy.deepcopy(state.reward) while state.parent_action is not None: # loop back-up until root is reached owner = state.parent_action.owner # rewards are stored in the state following the action, which has different owner if not terminal: if state.end_turn: agents_reward = copy.deepcopy(state.reward) try: R[owner] = agents_reward[owner] + self.gamma * R[owner] except TypeError: print("R:", R) print("agents_reward:", agents_reward) else: if state.terminal: R = copy.deepcopy(state.reward) else: # ??? R[owner] = state.reward[owner] terminal = False action = state.parent_action action.update(R[action.owner]) state = action.parent_state state.update()
def search(self, n_mcts, c, env, mcts_env, budget, max_depth=200, fixed_depth=True): """ Perform the MCTS search from the root """ env = copy.deepcopy(env) self.create_root(env, budget) if self.root.terminal: raise (ValueError("Can't do tree search from a terminal state")) is_atari = is_atari_game(env) if is_atari: raise NotImplementedError while budget > 0: state = self.root # reset to root for new trace if not is_atari: mcts_env = copy.deepcopy( env) # copy original Env to rollout from else: raise NotImplementedError mcts_env.seed(np.random.randint(1e7)) st = 0 flag = False source_particle = None while not state.terminal: bias = c * self.gamma**st / ( 1 - self.gamma) if self.depth_based_bias else c action = state.select(c=bias, variance=self.variance) k = np.ceil(self.beta * action.n**self.alpha) if action.child_state is not None: state = action.child_state # select add_particle = k >= state.get_n_particles() if add_particle and not flag: flag = True source_particle, budget = action.sample_from_parent_state( mcts_env, budget) state.add_particle(source_particle) if source_particle.terminal: break elif flag: source_particle, budget = action.sample_from_particle( source_particle, mcts_env, budget) state.add_particle(source_particle) if source_particle.terminal: break elif state.terminal: source_particle = np.random.choice(state.particles) budget -= 1 # sample from the terminal states particles else: rollout_depth = max_depth if fixed_depth else max_depth - st state, budget, source_particle = action.add_child_state( mcts_env, budget, max_depth=rollout_depth, source_particle=source_particle, depth=st) # expand break if mcts_env.has_transitioned(): st += 1 # Back-up R = np.zeros(env.agents_number) if not state.terminal: R = copy.deepcopy(state.V) state.update() particle = source_particle agents_reward = copy.deepcopy(particle.reward) while state.parent_action is not None: # loop back-up until root is reached owner = state.parent_action.owner # rewards are stored in the state following the action, which has different owner r = particle.reward if state.end_turn: agents_reward = copy.deepcopy(r) if not particle.terminal: R[owner] = agents_reward[owner] + self.gamma * R[owner] else: R = copy.deepcopy(r) action = state.parent_action action.update(R[action.owner]) state = action.parent_state state.update() particle = particle.parent_particle
def search(self, n_mcts, c, Env, mcts_env, budget, max_depth=200, fixed_depth=True): """ Perform the MCTS search from the root """ env = copy.deepcopy(Env) self.create_root(env, budget) if self.root.terminal: raise (ValueError("Can't do tree search from a terminal state")) is_atari = is_atari_game(env) if is_atari: raise NotImplementedError while budget > 0: state = self.root # reset to root for new trace if not is_atari: mcts_env = copy.deepcopy( Env) # copy original Env to rollout from else: raise NotImplementedError mcts_env.seed(np.random.randint(1e7)) st = 0 terminal = False while not state.terminal: bias = c * self.gamma**st / ( 1 - self.gamma) if self.depth_based_bias else c action = state.select(c=bias, variance=self.variance) st += 1 k = np.ceil(self.beta * action.n**self.alpha) if action.child_state is not None: state = action.child_state # select add_particle = k >= state.get_n_particles() if add_particle: source_particle, budget = action.sample_from_parent_state( mcts_env, budget) state.add_particle(source_particle) if source_particle.terminal: terminal = True break else: particle = state.sample_reward() if state.terminal or particle.terminal: terminal = True budget -= 1 # sample from the terminal states particles else: rollout_depth = max_depth if fixed_depth else max_depth - st state, budget, source_particle = action.add_child_state( mcts_env, budget, max_depth=rollout_depth, depth=st) # expand terminal = source_particle.terminal break # Back-up R = state.V state.update() while state.parent_action is not None: # loop back-up until root is reached r = state.reward if not terminal: R = r + self.gamma * R else: R = r action = state.parent_action action.update(R) state = action.parent_state state.update()
def search(self, n_mcts, c, Env, mcts_env, budget, max_depth=200, fixed_depth=True): """ Perform the MCTS search from the root """ Envs = None if not self.sampler: Envs = [copy.deepcopy(Env) for _ in range(self.n_particles)] if self.root is None: # initialize new root with many equal particles signature = Env.get_signature() box = None to_box = getattr(self, "index_to_box", None) if callable(to_box): box = Env.index_to_box(signature["state"]) particles = [ Particle(state=signature, seed=random.randint(0, 1e7), reward=0, terminal=False, info=box) for _ in range(self.n_particles) ] self.root = State(parent_action=None, na=self.na, envs=Envs, particles=particles, sampler=self.sampler, root=True, budget=budget) else: self.root.parent_action = None # continue from current root particles = self.root.particles if self.root.terminal: raise (ValueError("Can't do tree search from a terminal state")) is_atari = is_atari_game(Env) if is_atari: raise NotImplementedError snapshot = copy_atari_state( Env) # for Atari: snapshot the root at the beginning while budget > 0: state = self.root # reset to root for new trace if not is_atari: mcts_envs = None if not self.sampler: mcts_envs = [ copy.deepcopy(Env) for i in range(self.n_particles) ] # copy original Env to rollout from else: raise NotImplementedError restore_atari_state(mcts_env, snapshot) st = 0 while not state.terminal: action = state.select(c=c) st += 1 # s1, r, t, _ = mcts_env.step(action.index) if hasattr(action, 'child_state'): state = action.child_state # select if state.terminal: budget -= len(state.particles) continue else: rollout_depth = max_depth if fixed_depth else max_depth - st state, budget = action.add_child_state( state, mcts_envs, budget, self.sampler, rollout_depth) # expand break # Back-up R = state.V state.update() while state.parent_action is not None: # loop back-up until root is reached if not state.terminal: R = state.r + self.gamma * R else: R = state.r action = state.parent_action action.update(R) state = action.parent_state state.update()