示例#1
0
    def visualize(agent: SMARTAgent, ep: int, ts: int):
        images = []
        for seed in seeds:
            if seed is not None:
                env.seed(seed)
            state = env.reset()
            goal_point = find(state, 'Goal')
            option = Option(goal_point, depth=0)
            agent.reset(env, option, random_seed=3)

            visualize_decision(agent, state, writer, f'likelihoods: {seed}',
                               ep, ts)

            images.append(env.render('rgb_array'))
            done = False
            while not done:
                action = agent.act(state, option)
                state, reward, done, _ = env.step(action)
                options = _get_option_tree_(agent)
                print(
                    f"@{onehot2directedpoint(state)} : {reward} => {options}")
                rendered = _render_options_(env.render('rgb_array'), options)
                images.append(rendered)
        gif = np.stack(images, 0)
        # np.ndarray [t, imx, imy, 3]
        gif_tensor: torch.Tensor = torch.from_numpy(gif).type(
            torch.uint8).unsqueeze(0)
        # torch.Tensor[uint8] [1, t, imx, imy, 3]
        gif_tensor = gif_tensor.permute(0, 1, 4, 2, 3)
        writer.add_video('sample trajectory', gif_tensor, global_step=ts)
示例#2
0
    def test(agent: SMARTAgent, ep, ts):
        rewards = [0] * len(seeds)
        for i, seed in enumerate(seeds):
            env.seed(seed)
            state = env.reset()

            goal_point = find(state, 'Goal')
            option = Option(goal_point, depth=0)
            agent.reset(env, option, random_seed=3)

            done = False

            while not done:
                action = agent.act(state)
                state, reward, done, info = env.step(action)

                rewards[seed] += reward

        for i, seed in enumerate(seeds):
            writer.add_scalar(f"Test Reward: {seed}",
                              rewards[i],
                              global_step=ts)
示例#3
0
def visualize_decision(agent: SMARTAgent,
                       state,
                       writer: SummaryWriter,
                       tag: str,
                       ep: int = None,
                       ts: int = None) -> None:
    prev_option = agent._prev_option_()
    parent_option = agent.current_option_node.value
    possibilities = agent.generator.generate(state, prev_option, parent_option)
    probabilities = agent.evaluator._selection_probabilities_(
        state, possibilities, prev_option, parent_option)
    xx, yy, zz = make_mesh_grid(possibilities, probabilities)
    fig = render_mesh(xx, yy, zz)
    writer.add_figure(tag, fig, global_step=ts)
示例#4
0
fig = plt.figure()
images = []

low_level_agent: IAgent = BacktrackingMazeAgent(env)
low_level_agent: IAgent = Grid2PointWrapper(low_level_agent)
evaluator: IEvaluator = GridworldEvaluator(XDIMS + 2,
                                           YDIMS + 2,
                                           settings,
                                           gamma=0.99)
generator: IGenerator = SimpleGridworldGenerator()
fulfils_goal = lambda state, goal: array_equal(state[:, :, -1], goal[:, :, 0])
goal_manager: IGoalManager = SimpleGoalManager(evaluator, generator, 1,
                                               fulfils_goal)
memory: IMemory = CompleteMemory(100, 3)
agent = SMARTAgent(goal_manager, low_level_agent, memory)

totals = []

step: int = 0

for iter, seed in enumerate([0] * 500):
    total_reward: int = 0
    print(f"================={seed}=================")
    env = MazeWorld(cache._get_cached_board(seed))

    state, goal = env.reset(3)
    agent.reset(env, state, goal)
    done = False
    states: List[State] = [state]
示例#5
0
low_level_agent = MinigridBacktrackingAgent()
shape = env.observation_space.shape
shape = (-1, shape[-1], shape[0], shape[1])
v_model = VModel(shape, 32, 2, device=settings['device'])
q_model = QModel(shape, 32, 2, device=settings['device'])
planning_terminator = DepthPlanningTerminator(max_depth=settings['max_depth'])
evaluator = Evaluator(v_model, q_model, planning_terminator, settings, get_beta=lambda step: 3, gamma=0.99)
generator = SimpleMinigridGenerator()
memory = CompleteMemory(max_length=100000)
def goal_met(s, o):
    agent_loc: np.ndarray = s[:, :, 8] # imx, imy, onehot
    agent_loc = np.unravel_index(np.argmax(agent_loc), agent_loc.shape)
    return np.all(agent_loc == o.value)
policy_terminator = StrictGoalTerminator(goal_met)
agent = SMARTAgent(
    evaluator,
    generator,
    planning_terminator,
    policy_terminator=policy_terminator,
    low_level=low_level_agent,
    memory=memory,
    settings=settings)



testfn = training.make_simple_minigrid_test(env, writer, range(5))
vizfn = training.make_visualize(env, writer, range(5))

training.train(agent, env, settings, testfn=testfn, vizfn=vizfn)
training.summarize(agent, env, settings, list(range(10)), writer)
示例#6
0
                      gamma=0.99)
generator = SimpleMinigridGenerator()
memory = CompleteMemory(max_length=100000)


def goal_met(s, o):
    agent_loc: np.ndarray = s[:, :, 8]  # imx, imy, onehot
    agent_loc = np.unravel_index(np.argmax(agent_loc), agent_loc.shape)
    return np.all(agent_loc == o.value)


policy_terminator = StrictGoalTerminator(goal_met)
agent = SMARTAgent(evaluator,
                   generator,
                   planning_terminator,
                   policy_terminator=policy_terminator,
                   low_level=low_level_agent,
                   memory=memory,
                   settings=settings)


def visualize(rgb_array, options):
    #tiles of size 32 x 32
    for option in options:
        tile_ur: np.ndarray = option.value.astype(np.int32) * 32
        tile_ur = tile_ur[::-1]
        for y in range(4, 28):
            for x in range(4, 28):
                new_x, new_y = tile_ur + np.asarray([x, y])

                colors = [[255, 0, 0], [255, 102, 102], [255, 128, 0],
示例#7
0
get_beta = lambda step: 0.001 * step
evaluator: IEvaluator = SimpleMazeworldEvaluator(planning_terminator,
                                                 v_model,
                                                 q_model,
                                                 settings,
                                                 get_beta,
                                                 gamma=0.99)

generator: IGenerator = SimpleMazeworldGenerator()

low_level: IOptionBasedAgent = BacktrackingMazeAgent(env)

memory: IMemory = CompleteMemory(max_length=100,
                                 random_seed=settings['random'])

agent: SMARTAgent = SMARTAgent(evaluator, generator, planning_terminator,
                               policy_terminator, low_level, memory, settings)

step: int = 0
images = []
for seed in [0] * 500:
    env = MazeWorld(cache._get_cached_board(seed))

    total_reward: int = 0
    t: int = 0
    done: bool = False

    state, goal = env.reset(3)
    goal = Option(goal, 0)
    states: List[State] = state
    agent.reset(env, goal)