def plan(timeout, blocks, problem, model): tree = Tree(blocks) for t in range(timeout): parent_node_id = tree.get_exp_best_node_expand() #print(t, len(tree.nodes[parent_node_id]['tower']), tree.nodes[parent_node_id]['value']) sys.stdout.write("Search progress: %i \r" % (t)) sys.stdout.flush() new_nodes = problem.sample_actions(tree.nodes[parent_node_id], model) for node in new_nodes: tree.expand(parent_node_id, node) return tree
def plan_mcts(logger, timeout, blocks, problem, model, c=1., discrete=True): tree = Tree(blocks) tallest_tower = [0] highest_exp_height = [0] highest_value = [0] tower_stats = np.zeros((problem.max_height, timeout)) node_values = { k: { 'median': [], '25': [], '75': [] } for k in range(problem.max_height + 1) } for t in range(timeout): tower_stats[:, t] = tower_stats[:, t - 1] sys.stdout.write("Search progress: %i \r" % (t)) sys.stdout.flush() parent_node_id = tree.traverse(c) new_nodes = problem.sample_actions(tree.nodes[parent_node_id], model, discrete=discrete) tallest_tower_t = tallest_tower[-1] highest_exp_height_t = highest_exp_height[-1] highest_value_t = highest_value[-1] for new_node in new_nodes: #print(t, len(new_node['tower']), new_node['exp_reward']) new_node_id = tree.expand(parent_node_id, new_node) rollout_value = tree.rollout(new_node_id, problem, model) tree.backpropagate(new_node_id, rollout_value) tower_height = len(new_node['tower']) #print(tower_height) index = int(tower_height) tower_stats[index - 1, t] += 1 if len(new_node['tower']) > tallest_tower_t: tallest_tower_t = len(new_node['tower']) if new_node['exp_reward'] > highest_exp_height_t: highest_exp_height_t = new_node['exp_reward'] if new_node['value'] > highest_value_t: highest_value_t = new_node['value'] tallest_tower.append(tallest_tower_t) highest_exp_height.append(highest_exp_height_t) highest_value.append(highest_value_t) # update node value stats temp_values = {k: [] for k in range(problem.max_height + 1)} for node in tree.nodes: height = len(tree.nodes[node]['tower']) temp_values[height].append(tree.nodes[node]['value']) for height in range(problem.max_height + 1): if temp_values[height] == []: node_values[height]['median'].append(0) node_values[height]['25'].append(0) node_values[height]['75'].append(0) else: node_values[height]['median'].append( np.median(temp_values[height])) node_values[height]['25'].append( np.quantile(temp_values[height], .25)) node_values[height]['75'].append( np.quantile(temp_values[height], .75)) return tree, tallest_tower, highest_exp_height, highest_value, tower_stats, node_values