Пример #1
0
def run_maze_learner(mission, clients):

    if 'malmopy.visualization.tensorboard' in sys.modules:
        visualizer = TensorboardVisualizer()
        visualizer.initialize(logdir, None)

    else:
        visualizer = ConsoleVisualizer()


#    with TensorboardVisualizer() as visualizer:
    env = MazeEnvironment(mission,
                          [str.split(client, ':') for client in clients])
    env.recording = False

    #    explorer = LinearEpsilonGreedyExplorer(1, 0.1, 10000)
    #        model = DeepQNeuralNetwork((4, 84, 84), (env.available_actions,), momentum=0, visualizer=visualizer)
    #        memory = TemporalMemory(50000, model.input_shape[1:], model.input_shape[0], False)

    agent = RandomAgent(
        "rand", 3
    )  #DQNAgent("Maze DQN Agent", env.available_actions, model, memory, explorer=explorer,
    #visualizer=visualizer)

    #        exp = SingleAgentExperiment("Malmo Cliff Walking", agent, env, 500000, warm_up_timesteps=500,
    #                                    visualizer=visualizer)
    #        exp.episode_end += on_episode_end

    #        visualizer.initialize(MALMO_MAZE_FOLDER, model, CntkConverter())

    #   with Popen(['tensorboard', '--logdir=%s' % path.join(MALMO_MAZE_FOLDER, path.pardir), '--port=6006']):
    EPOCH_SIZE = 250000
    max_training_steps = 50 * EPOCH_SIZE
    state = env.reset()
    reward = 0
    agent_done = False
    viz_rewards = []
    for step in range(1, max_training_steps + 1):

        #        action = agent.act(state, reward, agent_done, is_training=True)
        # check if env needs reset
        if env.done:
            visualize_training(visualizer, step, viz_rewards)
            agent.inject_summaries(step)
            viz_rewards = []
            state = env.reset()

            # select an action
        action = agent.act(state, reward, agent_done, is_training=True)
        print('ACTION BEING TAKEN: ', action)

        # take a step
        state, reward, agent_done = env.do(action)
        viz_rewards.append(reward)

        if (step % EPOCH_SIZE) == 0:
            model.save('%s-%s-dqn_%d.model' %
                       (backend, environment, step / EPOCH_SIZE))
Пример #2
0
def run_maze_learner(mission, clients):

    if 'malmopy.visualization.tensorboard' in sys.modules:
        visualizer = TensorboardVisualizer()
        visualizer.initialize(logdir, None)

    else:
        visualizer = ConsoleVisualizer()

    env = MazeEnvironment(mission,
                          [str.split(client, ':') for client in clients])
    env.recording = False

    agent = RandomAgent("rand", 3, delay_between_action=1.5)

    #taking random actions
    EPOCH_SIZE = 250000
    max_training_steps = 50 * EPOCH_SIZE
    state = env.reset()
    reward = 0
    agent_done = False
    viz_rewards = []
    for step in range(1, max_training_steps + 1):

        action = agent.act(state, reward, agent_done, is_training=True)
        # check if env needs reset
        if env.done:
            visualize_training(visualizer, step, viz_rewards)
            agent.inject_summaries(step)
            viz_rewards = []
            state = env.reset()

            # select an action
        action = agent.act(state, reward, agent_done, is_training=True)
        print('ACTION BEING TAKEN: ', action)

        # take a step
        state, reward, agent_done = env.do(action)
        viz_rewards.append(reward)

        if (step % EPOCH_SIZE) == 0:
            model.save('%s-%s-dqn_%d.model' %
                       (backend, environment, step / EPOCH_SIZE))