Пример #1
0
            scopes = [network_scope, scene_scope, task_scope]

            for i_episode in range(NUM_EVAL_EPISODES):

                env.reset()
                terminal = False
                ep_reward = 0
                ep_collision = 0
                ep_t = 0

                while not terminal:
                    usf_s_g = global_network.run_usf(sess, env.s_t, env.target,
                                                     scopes)

                    pi_values = global_network.run_policy(
                        sess, env.s_t, env.target, usf_s_g, scopes)
                    action = sample_action(pi_values)
                    env.step(action)
                    env.update()

                    terminal = env.terminal
                    if ep_t == 500: break
                    if env.collided: ep_collision += 1
                    ep_reward += env.reward
                    ep_t += 1

                ep_lengths.append(ep_t)
                ep_rewards.append(ep_reward)
                ep_collisions.append(ep_collision)
                if VERBOSE:
                    print("episode #{} ends after {} steps".format(
Пример #2
0
            for i_episode in range(NNN):

                optimal = env.reset()
                terminal = False
                ep_reward = 0
                ep_collision = 0
                ep_t = 0

                f_path.write(str(counter * NNN + i_episode) + ': [')
                while not terminal:

                    #viewer.imshow(env.observation)
                    #time.sleep(0.3)
                    #print(env.isCheckpoint)
                    pi_values = global_network.run_policy(
                        sess, env.s_t, env.s_position, env.checkpoint,
                        env.s_a_t, env.s_c_t, env.isCheckpoint, env.s_aux_cl,
                        scopes)
                    #print(pi_values)
                    action = sample_action(pi_values)
                    env.step(action)

                    env.update()

                    terminal = env.terminal
                    if ep_t == 500: break
                    if env.collided: ep_collision += 1
                    ep_reward += env.reward
                    ep_t += 1
                    #if not terminal:
                    #  f_path.write('['+str(int(env.x*2))+', '+str(int(env.z*2))+'], ')
                    #else:
Пример #3
0
            h = target[3]*300
            xa = int((x+w)/2)
            xm = int(x-xa)
            ya = int((y+h)/2)
            ym = int(y-ya)
            #print(env.observation)
            env.observation[ym:ym+2,xm:xa] = 255
            env.observation[ya:ya+2,xm:xa] = 255
            env.observation[ym:ya,xm:xm+2] = 255
            env.observation[ym:ya,xa:xa+2] = 255
          '''

                    #viewer.imshow(env.observation)
                    #time.sleep(0.5)

                    pi_values = global_network.run_policy(
                        sess, env.s_t, env.s_position, env.checkpoint, scopes)
                    action = sample_action(pi_values)
                    env.step(action)

                    env.update()

                    terminal = env.terminal
                    if ep_t == 500: break
                    if env.collided: ep_collision += 1
                    ep_reward += env.reward
                    ep_t += 1
                    if not terminal:
                        f.write('[' + str(int(env.x * 2)) + ', ' +
                                str(int(env.z * 2)) + '], ')
                    else:
                        f.write('[' + str(int(env.x * 2)) + ', ' +
Пример #4
0
def main():
    # disable all v2 behavior
    tf.disable_v2_behavior()
    tf.disable_eager_execution()

    device = "/cpu:0"  # use CPU for display tool
    network_scope = TASK_TYPE  # Always 'navigation'
    list_of_tasks = TASK_LIST
    scene_scopes = list_of_tasks.keys()

    global_network = ActorCriticFFNetwork(action_size=ACTION_SIZE,
                                          device=device,
                                          network_scope=network_scope,
                                          scene_scopes=scene_scopes)
    sess = tf.Session()
    # sess = tf.coSession()
    init = tf.global_variables_initializer()
    sess.run(init)

    saver = tf.train.Saver()
    checkpoint = tf.train.get_checkpoint_state(CHECKPOINT_DIR)

    # see if we saved a checkpoint from past training?
    if checkpoint and checkpoint.model_checkpoint_path:
        saver.restore(sess, checkpoint.model_checkpoint_path)
        print("checkpoint loaded: {}".format(checkpoint.model_checkpoint_path))
    else:
        print("Could not find old checkpoint")

    scene_stats = dict()
    for scene_scope in scene_scopes:
        # TODO: remove
        scene_scope = "FloorPlan402"
        scene_stats[scene_scope] = []
        for task_scope in list_of_tasks[scene_scope]:
            # tasks are positions!!!
            # env = ai2thor.controller.Controller(scene="FloorPlan227", gridSize=0.25, width=1000, height=1000)
            with open(GOAL_FILE, 'r') as f:
                GOAL_DATA = json.load(f)

            GOAL_POS = GOAL_DATA["agent_position"]
            env = RLController({
                'scene': scene_scope,
                'terminal_state_id': int(task_scope),
                'goal_pos': GOAL_POS,
                'goal_image_fpath': "data/FP402_goal_towel.png"
            })
            env.docker_enabled = True
            ep_rewards = []
            ep_lengths = []
            ep_collisions = []

            scopes = [network_scope, scene_scope]

            for i_episode in range(NUM_EVAL_EPISODES):
                env.reset()

                terminal = False
                ep_reward = 0
                ep_collision = 0
                ep_t = 0

                while not terminal:
                    # mirrors actions taken in paper
                    # NOTE: rearranged these to mirror code in scene_loader
                    list_of_actions = [
                        "MoveAhead", "RotateRight", "RotateLeft", "MoveBack"
                    ]

                    pi_values = global_network.run_policy(
                        sess, env.curr_state, env.target, scopes)
                    # action returned is an integer -- critical that the list_of_actions is in correct order

                    action = sample_action(pi_values)
                    print(
                        "Ep_t: {} \n\tCollided?: {} \n\tAction: {} \n\tValue: {} \n\tAll Action Values: {}"
                        .format(ep_t, env.collided, list_of_actions[action],
                                pi_values[action], pi_values))
                    env.step(list_of_actions[action])

                    env.update()

                    terminal = env.terminal
                    if ep_t == 10000: break

                    if env.collided: ep_collision += 1
                    ep_reward += env.reward
                    ep_t += 1

                    ep_lengths.append(ep_t)
                    ep_rewards.append(ep_reward)
                    ep_collisions.append(ep_collision)

                print('evaluation: %s %s' % (scene_scope, task_scope))
                print('mean episode reward: %.2f' % np.mean(ep_rewards))
                print('mean episode length: %.2f' % np.mean(ep_lengths))
                print('mean episode collision: %.2f' % np.mean(ep_collisions))