Пример #1
0
def main(_):
    logging.info('Total shards: %d; Current shard index: %d', FLAGS.num_tasks,
                 FLAGS.task)
    runtime_config = common.RuntimeConfig(task_id=FLAGS.task,
                                          num_tasks=FLAGS.num_tasks)
    data_sources = FLAGS.data_source.split(',')
    aggregator_prefix = '_'.join(data_sources)

    # Get problem instance.
    if FLAGS.problem == 'R2R':
        problem = r2r_problem.R2RProblem(runtime_config,
                                         mode=FLAGS.mode,
                                         data_sources=data_sources,
                                         curriculum=FLAGS.curriculum,
                                         agent_type=FLAGS.agent_type)
    elif FLAGS.problem == 'NDH':
        problem = ndh_problem.NDHProblem(runtime_config,
                                         mode=FLAGS.mode,
                                         data_sources=data_sources,
                                         agent_type=FLAGS.agent_type)
    elif FLAGS.problem == 'R2R+NDH':
        # Multi-task problem-type during training only. Use task-specific problems
        # during eval.
        if FLAGS.mode != 'train':
            raise ValueError('Multi-tasking is only supported for training. '
                             'Use task-specific problems during eval.')
        problem = mt_problem.MTProblem(runtime_config, mode=FLAGS.mode)
    else:
        raise ValueError('Unsupported problem type encountered: {}'.format(
            FLAGS.problem))

    logging.info('Current mode is %s', FLAGS.mode)
    if FLAGS.mode == 'train':
        logging.info('Running train actor...')
        actor.run(problem)
    else:
        logging.info('Running eval actor...')
        eval_actor.run(
            problem,
            # Evaluate each path in the dataset exactly once.
            num_episodes_per_iter=problem.get_environment().num_paths,
            task_id=runtime_config.task_id)
Пример #2
0
def main(_):
    # Pseudo config. Will not be used in learner.
    runtime_config = common.RuntimeConfig(task_id=0, num_tasks=1)
    if FLAGS.problem == 'R2R':
        problem = r2r_problem.R2RProblem(runtime_config,
                                         mode=FLAGS.mode,
                                         data_sources=None,
                                         agent_type=FLAGS.agent_type)
    elif FLAGS.problem == 'NDH':
        problem = ndh_problem.NDHProblem(runtime_config,
                                         mode=FLAGS.mode,
                                         data_sources=None,
                                         agent_type=FLAGS.agent_type)
    elif FLAGS.problem == 'R2R+NDH':
        problem = mt_problem.MTProblem(runtime_config, mode=FLAGS.mode)
    else:
        raise ValueError('Unsupported problem type encountered: {}'.format(
            FLAGS.problem))

    logging.info('Begin running learner...')
    learner.run(problem)
Пример #3
0
  def setUp(self):
    super(DiscriminatorTest, self).setUp()
    self.data_dir = FLAGS.test_srcdir + (
        'valan/r2r/testdata')
    self._env_config = hparam.HParams(
        problem='R2R',
        scan_base_dir=self.data_dir,
        data_base_dir=self.data_dir,
        vocab_dir=self.data_dir,
        vocab_file='vocab.txt',
        images_per_pano=36,
        max_conns=14,
        image_encoding_dim=64,
        direction_encoding_dim=256,
        image_features_dir=os.path.join(self.data_dir, 'image_features'),
        instruction_len=50,
        max_agent_actions=6,
        project_decoder_input_states=True,
        use_all_final_states=False,
        reward_fn=env_config.RewardFunction.get_reward_fn('distance_to_goal'))

    self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=100)
    self._env = env.R2REnv(
        data_sources=['R2R_small_split'],
        runtime_config=self._runtime_config,
        env_config=self._env_config)
    self.num_panos = 36
    self.image_feature_size = 64
    self.direction_encoding_dim = 256
    self.num_actions = 14
    self.time_step = 3
    self.batch_size = 2
    done = np.array([[False, True], [True, False], [True, False]])
    self._test_environment = common.EnvOutput(
        reward=0,
        done=done,
        observation={
            constants.PATH_ID:  # Shape = [time, batch]
                np.array([[2, 1], [0, 1], [0, 1]]),
            constants.IS_START:  # Shape = [time, batch]
                np.array([[False, True], [True, False], [False, False]]),
            constants.DISC_MASK:  # Shape = [time, batch]
                np.array([[False, True], [True, True], [True, True]]),
            constants.PANO_ENC:  # Shape = [time, batch, num_panos, featur_size]
                tf.random.normal([
                    self.time_step, self.batch_size, self.num_panos,
                    self.image_feature_size + self.direction_encoding_dim
                ]),
            constants.CONN_ENC:
                # Shape = [time, batch, num_actions, feature_size]
                tf.random.normal([
                    self.time_step, self.batch_size, self.num_actions,
                    self.image_feature_size + self.direction_encoding_dim
                ]),
            constants.PREV_ACTION_ENC:
                # Shape = [time, batch, feature_size]
                tf.random.normal([
                    self.time_step, self.batch_size,
                    self.image_feature_size + self.direction_encoding_dim
                ]),
            constants.NEXT_GOLDEN_ACTION_ENC:
                # Shape = [time, batch, feature_size]
                tf.random.normal([
                    self.time_step, self.batch_size,
                    self.image_feature_size + self.direction_encoding_dim
                ]),
            constants.INS_TOKEN_IDS:  # Shape = [time, batch, token_len]
                np.array([[[5, 3, 2, 1, 0], [3, 4, 5, 6, 1]],
                          [[3, 6, 1, 0, 0], [3, 4, 5, 6, 1]],
                          [[3, 6, 1, 0, 0], [3, 4, 5, 6, 1]]]),
            constants.INS_LEN:  # Shape = [time, batch]
                np.tile(np.array([[3]]), [self.time_step, self.batch_size]),
            constants.VALID_CONN_MASK:
                # Shape = [time, batch, num_connections]
                np.tile(
                    np.array([[[True] * 14], [[True] * 5 + [False] * 9],
                              [[True] * 2 + [False] * 12]]),
                    [1, self.batch_size, 1]),
            constants.LABEL:
                # Shape = [time, batch]
                np.array([[False, False], [True, False], [True, False]])
        },
        info='')
    self._agent_config = agent_config.get_r2r_agent_config()
Пример #4
0
    def setUp(self):
        super(EvalMetricTest, self).setUp()

        self.data_dir = FLAGS.test_srcdir + ('valan/r2r/testdata')

        self._env_config = hparam.HParams(
            problem='R2R',
            base_path=self.data_dir,
            vocab_file='vocab.txt',
            images_per_pano=36,
            max_conns=14,
            image_encoding_dim=2052,
            image_features_dir=os.path.join(self.data_dir, 'image_features'),
            instruction_len=50,
            max_agent_actions=6,
            reward_fn=env_config.RewardFunction.get_reward_fn(
                'distance_to_goal'))

        self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=1)
        self._env = env.R2REnv(data_sources=['small_split'],
                               runtime_config=self._runtime_config,
                               env_config=self._env_config)

        # scan: gZ6f7yhEvPG
        # Path: 1, 3, 7, 5, 2
        self._golden_path = [1, 4, 6, 2]
        self._scan_id = 0  # testdata has single scan only 'gZ6f7yhEvPG'
        self._env_list = [
            common.EnvOutput(reward=0,
                             done=None,
                             observation={
                                 constants.PANO_ID:
                                 1,
                                 constants.GOLDEN_PATH:
                                 self._golden_path,
                                 constants.GOAL_PANO_ID:
                                 2,
                                 constants.SCAN_ID:
                                 self._scan_id,
                                 constants.GOAL_ROOM_PANOS:
                                 [6, 2, constants.INVALID_NODE_ID]
                             },
                             info=None),
            common.EnvOutput(reward=1,
                             done=None,
                             observation={
                                 constants.PANO_ID:
                                 3,
                                 constants.GOLDEN_PATH:
                                 self._golden_path,
                                 constants.GOAL_PANO_ID:
                                 2,
                                 constants.SCAN_ID:
                                 self._scan_id,
                                 constants.GOAL_ROOM_PANOS:
                                 [6, 2, constants.INVALID_NODE_ID]
                             },
                             info=None),
            common.EnvOutput(reward=1,
                             done=None,
                             observation={
                                 constants.PANO_ID:
                                 7,
                                 constants.GOLDEN_PATH:
                                 self._golden_path,
                                 constants.GOAL_PANO_ID:
                                 2,
                                 constants.SCAN_ID:
                                 self._scan_id,
                                 constants.GOAL_ROOM_PANOS:
                                 [6, 2, constants.INVALID_NODE_ID]
                             },
                             info=None),
            common.EnvOutput(reward=1,
                             done=None,
                             observation={
                                 constants.PANO_ID:
                                 5,
                                 constants.GOLDEN_PATH:
                                 self._golden_path,
                                 constants.GOAL_PANO_ID:
                                 2,
                                 constants.SCAN_ID:
                                 self._scan_id,
                                 constants.GOAL_ROOM_PANOS:
                                 [6, 2, constants.INVALID_NODE_ID]
                             },
                             info=None),
            common.EnvOutput(reward=1,
                             done=False,
                             observation={
                                 constants.PANO_ID:
                                 2,
                                 constants.GOLDEN_PATH:
                                 self._golden_path,
                                 constants.GOAL_PANO_ID:
                                 2,
                                 constants.SCAN_ID:
                                 self._scan_id,
                                 constants.GOAL_ROOM_PANOS:
                                 [6, 2, constants.INVALID_NODE_ID]
                             },
                             info=None),
            common.EnvOutput(
                reward=4,  # success
                done=True,  # end of episode
                # next episode's observation.
                observation={
                    constants.PANO_ID: 11,
                    constants.GOLDEN_PATH: self._golden_path,
                    constants.GOAL_PANO_ID: 2,
                    constants.SCAN_ID: self._scan_id,
                    constants.GOAL_ROOM_PANOS:
                    [6, 2, constants.INVALID_NODE_ID]
                },
                info=None),
        ]
        self._action_list = [3, 7, 5, 2, 0]
Пример #5
0
    def testStepToGoalRoom(self):
        self.reward_fn_type = 'distance_to_room'
        self._env_config = hparam.HParams(
            problem='NDH',
            history='all',
            path_type='trusted_path',
            max_goal_room_panos=4,
            base_path=self.data_dir,
            problem_path=os.path.join(self.data_dir, 'NDH'),
            vocab_file='vocab.txt',
            images_per_pano=36,
            max_conns=14,
            image_encoding_dim=2052,
            image_features_dir=os.path.join(self.data_dir, 'image_features'),
            instruction_len=50,
            max_agent_actions=6,
            reward_fn_type=self.reward_fn_type,
            reward_fn=env_config.RewardFunction.get_reward_fn(
                self.reward_fn_type))
        self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=1)

        self._env = env_ndh.NDHEnv(data_sources=['small_split'],
                                   runtime_config=self._runtime_config,
                                   env_config=self._env_config)

        scan_id = 0  # testdata only has single scan 'gZ6f7yhEvPG'
        _ = self._env.reset()
        golden_path = [
            'ba27da20782d4e1a825f0a133ad84da9',
            '47d8a8282c1c4a7fb3eeeacc45e9d959',  # in the goal room
            '0ee20663dfa34b438d48750ddcd7366c'  # in the goal room
        ]

        # Step through the trajectory and verify the env_output.
        for i, action in enumerate(
            [self._get_pano_id(p, scan_id) for p in golden_path]):
            expected_time_step = i + 1
            expected_heading, expected_pitch = self._env._get_heading_pitch(
                action, scan_id, expected_time_step)
            if i + 1 < len(golden_path):
                expected_oracle_action = self._get_pano_id(
                    golden_path[i + 1], scan_id)
            else:
                expected_oracle_action = constants.STOP_NODE_ID
            expected_reward = 1 if i <= 1 else 0
            env_test.verify_env_output(
                self,
                self._env.step(action),
                expected_reward=expected_reward,  #  Moving towards goal.
                expected_done=False,
                expected_info='',
                expected_time_step=expected_time_step,
                expected_path_id=318,
                expected_pano_name=golden_path[i],
                expected_heading=expected_heading,
                expected_pitch=expected_pitch,
                expected_scan_id=scan_id,
                expected_oracle_action=expected_oracle_action)

        # Stop at goal pano. Terminating the episode results in resetting the
        # observation to next episode.
        env_test.verify_env_output(
            self,
            self._env.step(constants.STOP_NODE_ID),
            expected_reward=4,  # reached goal and stopped
            expected_done=True,  # end of episode
            expected_info='',
            # observation for next episode.
            expected_time_step=0,
            expected_path_id=1304,
            expected_pano_name='80929af5cf234ae38ac3a2a4e60e4342',
            expected_heading=6.101,
            expected_pitch=0.,
            expected_scan_id=scan_id,
            expected_oracle_action=self._get_pano_id(
                'ba27da20782d4e1a825f0a133ad84da9', scan_id))