def __init__(self, env, time_budget, bandit_policy, coef_progressive_widening, exec_dir): self.env = env self.time_budget = time_budget self.exec_dir = exec_dir self.best_config = None self.best_score = -np.inf # Init tree self.tree = Node() # Set up logger self.logger = logging.getLogger('mcts') # Policy if bandit_policy["policy_name"] == "uct": if "c_ucb" in bandit_policy: c_ucb = bandit_policy["c_ucb"] else: c_ucb = np.sqrt(2) self.policy = UCT(c_ucb) elif bandit_policy["policy_name"] == "besa": self.policy = Besa() elif bandit_policy["policy_name"] == "puct": bandit_policy["start_time"] = self.env.start_time bandit_policy["time_budget"] = self.time_budget self.policy = PUCT(self.env, self.tree, bandit_policy) else: raise NotImplemented("Policy {0} not implemented".format( bandit_policy["policy_name"])) self.n_iter = 0 self.tree.coef_progressive_widening = coef_progressive_widening
def __init__(self, env, policy="uct", time_budget=3600, policy_arg=None, exec_dir=""): self.env = env self.time_budget = time_budget self.exec_dir = exec_dir self.bestconfig = None self.bestscore = -np.inf # Init tree self.tree = Node() # Set up logger self.logger = logging.getLogger('mcts') # Policy if policy == "uct": if "c_ucb" in policy_arg: c_ucb = policy_arg["c_ucb"] else: c_ucb = np.sqrt(2) self.policy = UCT(c_ucb) elif policy == "besa": self.policy = Besa() elif policy == "puct": policy_arg["start_time"] = self.env.start_time policy_arg["time_budget"] = self.time_budget self.policy = PUCT(self.env, self.tree, policy_arg) else: raise NotImplemented("Policy {0} not implemented".format(policy)) # iteration logging self.n_iter = 0 if "proba" in policy_arg: self.env.proba_expert = policy_arg["proba"] if "coef_progressive_widening" in policy_arg: self.tree.coef_progressive_widening = policy_arg[ "coef_progressive_widening"]
def __init__(self, env, time_budget=3600, policy_arg=None, exec_dir=""): self.env = env self.time_budget = time_budget self.exec_dir = exec_dir # Init tree self.tree = Node() # Set up logger self.logger = logging.getLogger('mcts') # Policy policy_arg["start_time"] = self.env.start_time policy_arg["time_budget"] = self.time_budget self.policy = PUCT(self.env, self.tree, policy_arg) # iteration logging self.n_iter = 0 if "coef_progressive_widening" in policy_arg: self.tree.coef_progressive_widening = policy_arg[ "coef_progressive_widening"] else: self.tree.coef_progressive_widening = 0.6
class MCTS(): """Monte carlo tree search implementation.""" def __init__(self, env, policy="uct", time_budget=3600, policy_arg=None, exec_dir=""): self.env = env self.time_budget = time_budget self.exec_dir = exec_dir self.bestconfig = None self.bestscore = -np.inf # Init tree self.tree = Node() # Set up logger self.logger = logging.getLogger('mcts') # Policy if policy == "uct": if "c_ucb" in policy_arg: c_ucb = policy_arg["c_ucb"] else: c_ucb = np.sqrt(2) self.policy = UCT(c_ucb) elif policy == "besa": self.policy = Besa() elif policy == "puct": policy_arg["start_time"] = self.env.start_time policy_arg["time_budget"] = self.time_budget self.policy = PUCT(self.env, self.tree, policy_arg) else: raise NotImplemented("Policy {0} not implemented".format(policy)) # iteration logging self.n_iter = 0 if "proba" in policy_arg: self.env.proba_expert = policy_arg["proba"] if "coef_progressive_widening" in policy_arg: self.tree.coef_progressive_widening = policy_arg[ "coef_progressive_widening"] def reset(self, time_budget=3600): self.time_budget = time_budget self.n_iter = 0 def MCT_SEARCH(self): """Monte carlo tree search iteration.""" self.logger.info( "#########################Iteration={0}##################################" .format(self.n_iter)) self.logger.info("Begin SELECTION") front = self.TREEPOLICY() self.logger.info("End SELECTION") self.logger.info("Begin PLAYOUT") reward, config = self.PLAYOUT(front) self.logger.info("End PLAYOUT") if config is None: return 0, None self.logger.info("Begin BACKUP") self.BACKUP(front, reward) self.logger.info("End BACKUP") self.n_iter += 1 return reward, config def TREEPOLICY(self): """Selection using policy.""" node = 0 # Root of the tree while not self.tree.is_terminal(node): if len(self.tree.get_children(node)) == 0: return self.EXPAND(node) else: if not self.tree.fully_expanded(node, self.env): self.logger.info("Not fully expanded.") return self.EXPAND(node) else: current_node = self.tree.get_info_node(node) children = [[ n, self.tree.get_attribute(n, "reward"), self.tree.get_attribute(n, "visits") ] for n in self.tree.get_children(node) if not self.tree.get_attribute(n, "invalid")] if len(children) > 0: node = self.policy.selection( (current_node["reward"], current_node["visits"]), [x[0] for x in children], [x[1] for x in children], [x[2] for x in children], state=self.tree.get_path_to_node(node)) self.logger.info("Selection\t node={0}".format(node)) else: self.logger.error( "Empty list of valid children\n current node {0}\t List of children {1}" .format(current_node, self.tree.get_children(node))) return node return node def EXPAND(self, node): """Expand child node.""" st_time = time.time() self.logger.info("Expand on node {0}\n Current history: {1}".format( node, self.tree.get_path_to_node(node))) name, value, terminal = self.policy.expansion(self.env.next_move, [ self.tree.get_path_to_node(node), self.tree.get_children(node, info=["name", "value"]) ]) id = self.tree.add_node(name=name, value=value, terminal=terminal, parent_node=node) self.logger.info( "Expand\t id={0}\t name={1}\t value={2}\t terminal={3}".format( id, name, value, terminal)) return id def PLAYOUT(self, node_id): """Playout policy.""" self.logger.info("Playout on : {0}".format( self.tree.get_path_to_node(node_id))) st_time = time.time() try: playout_node = self.env.rollout( self.tree.get_path_to_node(node_id)) except Exception as e: self.logger.error("Add node %s to not possible state: %s" % (node_id, e)) self.tree.set_attribute(node_id, "invalid", True) return 0, None score = self.policy.evaluate(self.env._evaluate, [playout_node]) self.logger.info( "Playout\t param={0}\t score={1}\t exec time={2}".format( playout_node, score, time.time() - st_time)) return score, playout_node def BACKUP(self, node, reward): """Back propagate reward.""" for parent in self.tree.get_path_to_node(node_id=node, name=False): vl, vs = self.tree.get_attribute( parent, "reward"), self.tree.get_attribute(parent, "visits") new_val, new_vis = self.policy.backpropagate( parent, vl, vs, reward) self.tree.set_attribute(parent, "reward", new_val) self.tree.set_attribute(parent, "visits", new_vis) def run(self, n=1, initial_configurations=[], nb_iter_to_generate_img=-1): start_run = time.time() with Timeout(int(self.time_budget - (start_run - time.time()))): try: self.logger.info("Run default configuration") self.env.run_default_configuration() if len(initial_configurations) > 0: self.logger.info("Run initial configurations") else: self.logger.info("No initial configuration to run.") for i in range(n): if time.time() - self.env.start_time < self.time_budget: res, config = self.MCT_SEARCH() if res > self.bestscore: self.bestscore = res self.bestconfig = config else: return 0 if nb_iter_to_generate_img == -1 or i % nb_iter_to_generate_img == 0: self.tree.draw_tree( os.path.join(self.exec_dir, "images")) self.print_tree("tree_{0}".format(i)) except Timeout.Timeout: self.logger.info("Budget exhausted.") return 0 def print_tree(self, name_img): img_dir = os.path.join(self.exec_dir, "images") if not os.path.isdir(img_dir): os.mkdir(img_dir) self.tree.draw_tree(os.path.join(img_dir, name_img))
class MCTS(): """Monte carlo tree search implementation.""" def __init__(self, env, time_budget=3600, policy_arg=None, exec_dir=""): self.env = env self.time_budget = time_budget self.exec_dir = exec_dir # Init tree self.tree = Node() # Set up logger self.logger = logging.getLogger('mcts') # Policy policy_arg["start_time"] = self.env.start_time policy_arg["time_budget"] = self.time_budget self.policy = PUCT(self.env, self.tree, policy_arg) # iteration logging self.n_iter = 0 if "coef_progressive_widening" in policy_arg: self.tree.coef_progressive_widening = policy_arg[ "coef_progressive_widening"] else: self.tree.coef_progressive_widening = 0.6 def reset(self, time_budget=3600): self.time_budget = time_budget self.n_iter = 0 def MCT_SEARCH(self): """Monte carlo tree search iteration.""" self.logger.info( "#########################Iteration={0}##################################" .format(self.n_iter)) front = self.TREEPOLICY() reward = self.PLAYOUT(front) self.BACKUP(front, reward) self.n_iter += 1 self.env.score_model.save_data(self.exec_dir) write_gpickle(self.tree, os.path.join(self.exec_dir, "tree.json")) with open(os.path.join(self.exec_dir, "full_log.json"), 'w') as outfile: json.dump(self.env.history_score, outfile) def TREEPOLICY(self): """Selection using policy.""" node = 0 # Root of the tree while not self.tree.is_terminal(node): if len(self.tree.get_childs(node)) == 0: return self.EXPAND(node) else: if not self.tree.fully_expanded(node, self.env): return self.EXPAND(node) else: current_node = self.tree.get_info_node(node) children = [[ n, self.tree.get_attribute(n, "reward"), self.tree.get_attribute(n, "visits") ] for n in self.tree.get_childs(node)] node = self.policy.selection( (current_node["reward"], current_node["visits"]), [x[0] for x in children], [x[1] for x in children], [x[2] for x in children], state=self.tree.get_path_to_node(node)) self.logger.info("Selection\t node={0}".format(node)) return node def EXPAND(self, node): """Expand child node.""" st_time = time.time() name, value, terminal = self.policy.expansion(self.env.next_moves, [ self.tree.get_path_to_node(node), self.tree.get_childs(node, info=["name", "value"]) ]) id = self.tree.add_node(name=name, value=value, terminal=terminal, parent_node=node) print("Expand: ", time.time() - st_time, " sec") self.logger.info( "Expand\t id={0}\t name={1}\t value={2}\t terminal={3}".format( id, name, value, terminal)) return id def PLAYOUT(self, node_id): """Playout policy.""" st_time = time.time() playout_nodes = self.env.rollout_in_expert_neighborhood( self.tree.get_path_to_node(node_id)) print("Playout: ", time.time() - st_time, " sec") st_time = time.time() for i, playout_node in enumerate(playout_nodes): print("PLAYOUT ", i) st_time_playout = time.time() score = self.policy.evaluate(self.env._evaluate, [playout_node]) if score > 0: self.logger.info("Playout\t param={0}\t score={1}".format( playout_node, score)) return score elif time.time() - st_time_playout > 200: break print("Evaluate: ", time.time() - st_time, " sec") self.logger.info("Playout\t param={0}\t score={1}".format( playout_node, 0)) return 0 def BACKUP(self, node, reward): """Back propagate reward.""" for parent in self.tree.get_path_to_node(node_id=node, name=False): vl, vs = self.tree.get_attribute( parent, "reward"), self.tree.get_attribute(parent, "visits") new_val, new_vis = self.policy.backpropagate( parent, vl, vs, reward) self.tree.set_attribute(parent, "reward", new_val) self.tree.set_attribute(parent, "visits", new_vis) def create_node_for_algorithm(self): id_class = {} for cl in [ "bernoulli_nb", "multinomial_nb", "decision_tree", "gaussian_nb", "sgd", "passive_aggressive", "xgradient_boosting", "adaboost", "extra_trees", "gradient_boosting", "lda", "liblinear_svc", "libsvm_svc", "qda", "k_nearest_neighbors", "random_forest" ]: id_class[cl] = self.tree.add_node(name="classifier:__choice__", value=cl, terminal=False, parent_node=0) return id_class def run(self, n=1, intial_configuration=[], generate_image_path=""): start_run = time.time() with Timeout(int(self.time_budget - (start_run - time.time()))): try: self.env.run_default_configuration() self.env.check_time() if len(intial_configuration) > 0: executed_config = self.env.run_default_all() id_class = self.create_node_for_algorithm() score_each_cl = self.env.run_initial_configuration( intial_configuration, executed_config) for cl, vals in score_each_cl.items(): if len(vals) > 0: [self.BACKUP(id_class[cl], s) for s in vals] else: self.env.check_time() self.env.run_main_configuration() for i in range(n): if time.time() - self.env.start_time < self.time_budget: self.MCT_SEARCH() if self.exec_dir != "": img_dir = os.path.join(self.exec_dir, "images") if not os.path.exists(img_dir): os.makedirs(img_dir) self.tree.draw_tree( os.path.join(img_dir, "step_%s" % i)) else: return 0 gc.collect() except Timeout.Timeout: return 0 def print_tree(self, images): self.tree.draw_tree(images)
class MCTS: """ Implementation of Monte Carlo Tree Search algorithm Parameters ----------- env: object Problem environment time_budget : int Time budget coef_progressive_widening: float Coefficient of progressive widening exec_dir: str Path to store results Attributes ---------- best_config: object Current best configuration best_score: float Current best score tree: object <class mosaic.node.Node> Tree created by the MCTS algorithm logger: object Logger policy: object Bandit algorithm used n_iter: int Number of executed MCTS simulation (selection, expansion, playout, back-propagation) """ def __init__(self, env, time_budget, bandit_policy, coef_progressive_widening, exec_dir): self.env = env self.time_budget = time_budget self.exec_dir = exec_dir self.best_config = None self.best_score = -np.inf # Init tree self.tree = Node() # Set up logger self.logger = logging.getLogger('mcts') # Policy if bandit_policy["policy_name"] == "uct": if "c_ucb" in bandit_policy: c_ucb = bandit_policy["c_ucb"] else: c_ucb = np.sqrt(2) self.policy = UCT(c_ucb) elif bandit_policy["policy_name"] == "besa": self.policy = Besa() elif bandit_policy["policy_name"] == "puct": bandit_policy["start_time"] = self.env.start_time bandit_policy["time_budget"] = self.time_budget self.policy = PUCT(self.env, self.tree, bandit_policy) else: raise NotImplemented("Policy {0} not implemented".format( bandit_policy["policy_name"])) self.n_iter = 0 self.tree.coef_progressive_widening = coef_progressive_widening def MCT_SEARCH(self): """One simulation of MCTS. One simulation is composed of selection, expansion, playout and back-propagation Returns: -------- reward: float Reward of the simulation config: object Configuration run """ self.logger.info("## ITERATION={0} ##".format(self.n_iter)) front = self.TREEPOLICY() reward, config = self.PLAYOUT(front) if config is None: return 0, None self.BACKUP(front, reward) self.n_iter += 1 return reward, config def TREEPOLICY(self): """Selection using policy.""" node = 0 # Root of the tree while not self.tree.is_terminal(node): if len(self.tree.get_children(node)) == 0: return self.EXPAND(node) else: if not self.tree.fully_expanded(node, self.env): # self.logger.info("Not fully expanded.") return self.EXPAND(node) else: current_node = self.tree.get_info_node(node) children = [[ n, self.tree.get_attribute(n, "reward"), self.tree.get_attribute(n, "visits") ] for n in self.tree.get_children(node) if not self.tree.get_attribute(n, "invalid")] if len(children) > 0: node = self.policy.selection( (current_node["reward"], current_node["visits"]), [x[0] for x in children], [x[1] for x in children], [x[2] for x in children], state=self.tree.get_path_to_node(node)) # self.logger.info("Selection\t node={0}".format(node)) else: # self.logger.error( # "Empty list of valid children\n current node {0}\t List of children {1}".format( # current_node, # self.tree.get_children(node))) return node return node def EXPAND(self, node): """Expand child node.""" st_time = time.time() # self.logger.info("Expand on node {0}\n Current history: {1}".format(node, self.tree.get_path_to_node(node))) name, value, terminal = self.policy.expansion(self.env.next_move, [ self.tree.get_path_to_node(node), self.tree.get_children(node, info=["name", "value"]) ]) id = self.tree.add_node(name=name, value=value, terminal=terminal, parent_node=node) # self.logger.info("Expand\t id={0}\t name={1}\t value={2}\t terminal={3}".format( # id, name, value, terminal)) return id def PLAYOUT(self, node_id): """Playout policy.""" # self.logger.info("Playout on : {0}".format(self.tree.get_path_to_node(node_id))) st_time = time.time() try: playout_node = self.env.rollout( self.tree.get_path_to_node(node_id)) except Exception as e: # self.logger.error("Add node %s to not possible state: %s" % (node_id, e)) self.tree.set_attribute(node_id, "invalid", True) return 0, None score = self.policy.evaluate(self.env._evaluate, [playout_node]) self.logger.info("param={0}\t score={1}\t exec time={2}".format( playout_node, score, time.time() - st_time)) return score, playout_node def BACKUP(self, node, reward): """Back propagate reward.""" for parent in self.tree.get_path_to_node(node_id=node, name=False): vl, vs = self.tree.get_attribute( parent, "reward"), self.tree.get_attribute(parent, "visits") new_val, new_vis = self.policy.backpropagate( parent, vl, vs, reward) self.tree.set_attribute(parent, "reward", new_val) self.tree.set_attribute(parent, "visits", new_vis) def run(self, nb_simulation=1, initial_configurations=[], step_to_generate_img=-1): """Run MCTS algorithm Parameters: ---------- nb_simulation: int number of MCTS simulation to run (default is 10) initial_configurations: list of object set of configuration to start with (default is []) step_to_generate_img: int or None set of initial configuration (default -1, generate image for each MCTS iteration) Do not generate images if None. Returns: ---------- int 1 if timeout else 0 """ start_run = time.time() with Timeout(int(self.time_budget - (start_run - time.time()))): try: self.logger.info("Run default configuration") self.env.run_default_configuration() if len(initial_configurations) > 0: self.logger.info("Run initial configurations") else: self.logger.info("No initial configuration to run.") for i in range(nb_simulation): if time.time() - self.env.start_time < self.time_budget: res, config = self.MCT_SEARCH() if res > self.best_score: self.best_score = res self.best_config = config else: return 0 if step_to_generate_img == -1 or i % step_to_generate_img == 0: self.tree.draw_tree( os.path.join(self.exec_dir, "images")) self.print_tree("tree_{0}".format(i)) except Timeout.Timeout: self.logger.info("Budget exhausted.") return 1 def print_tree(self, name_img): """Print snapshot of constructed tree Parameters ---------- name_img: str Path to store generated image """ img_dir = os.path.join(self.exec_dir, "images") if not os.path.isdir(img_dir): os.mkdir(img_dir) self.tree.draw_tree(os.path.join(img_dir, name_img))
def test_add_node(self): node = Node() assert(node.id_count == 0) assert(node.get_attribute(0, "name") == "root") node.add_node(name="v1", parent_node = 0) assert(node.get_path_to_node(1, name=False) == [0, 1])
def test_get_path_to_node(self): node = Node() node.add_node(name="c1", parent_node = 0) node.add_node(name="c2", parent_node = 0) node.add_node(name="b1", parent_node = 1) node.add_node(name="b2", parent_node = 1) node.add_node(name="d1", parent_node = 3) assert(node.get_path_to_node(1, name=False) == [0, 1]) assert(node.get_path_to_node(2, name=False) == [0, 2]) assert(node.get_path_to_node(3, name=False) == [0, 1, 3]) assert(node.get_path_to_node(4, name=False) == [0, 1, 4]) assert(node.get_path_to_node(5, name=False) == [0, 1, 3, 5])