def setUp(self): super(EnvTest, self).setUp() self.data_dir = FLAGS.test_srcdir + ('valan/r2r/testdata') self._env_config = hparam.HParams( problem='R2R', scan_base_dir=self.data_dir, data_base_dir=self.data_dir, vocab_dir=self.data_dir, vocab_file='vocab.txt', images_per_pano=36, max_conns=14, image_encoding_dim=64, direction_encoding_dim=256, image_features_dir=os.path.join(self.data_dir, 'image_features'), instruction_len=50, max_agent_actions=6, reward_fn=env_config.RewardFunction.get_reward_fn( 'distance_to_goal')) self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=1) self._env = env.R2REnv(data_sources=['R2R_small_split'], runtime_config=self._runtime_config, env_config=self._env_config) # For deterministic behavior in test np.random.seed(0)
def setUp(self): super(DiscriminatorProblemTest, self).setUp() self.data_dir = FLAGS.test_srcdir + ('valan/r2r/testdata') self._env_config = hparam.HParams( problem='R2R', scan_base_dir=self.data_dir, data_base_dir=self.data_dir, vocab_dir=self.data_dir, vocab_file='vocab.txt', images_per_pano=36, max_conns=14, image_encoding_dim=64, direction_encoding_dim=256, image_features_dir=os.path.join(self.data_dir, 'image_features'), instruction_len=50, max_agent_actions=6, reward_fn=env_config.RewardFunction.get_reward_fn( 'distance_to_goal')) self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=100) self._env = env.R2REnv(data_sources=['R2R_small_split'], runtime_config=self._runtime_config, env_config=self._env_config) self._agent_config = agent_config.get_r2r_agent_config() self._agent_config.add_hparam('init_image_enc_with_text_state', True) self._agent_config.add_hparam('average_image_states_of_all_steps', False) self._agent_config.embed_action = True
def setUp(self): super(NDHEnvTest, self).setUp() self.data_dir = FLAGS.test_srcdir + ('valan/r2r/testdata') self.reward_fn_type = 'distance_to_goal' self._env_config = hparam.HParams( problem='NDH', history='all', path_type='trusted_path', max_goal_room_panos=4, base_path=self.data_dir, problem_path=os.path.join(self.data_dir, 'NDH'), vocab_file='vocab.txt', images_per_pano=36, max_conns=14, image_encoding_dim=2052, image_features_dir=os.path.join(self.data_dir, 'image_features'), instruction_len=50, max_agent_actions=6, reward_fn_type=self.reward_fn_type, reward_fn=env_config.RewardFunction.get_reward_fn( self.reward_fn_type)) self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=1) self._env = env_ndh.NDHEnv(data_sources=['small_split'], runtime_config=self._runtime_config, env_config=self._env_config) # For deterministic behavior in test np.random.seed(0)
def get_default_env_config(): """Returns default env config.""" config = hparam.HParams(**DEFAULT_ENV_CONFIG) config.vln_reward_fn = env_config.RewardFunction.get_reward_fn( config.vln_reward_fn_type) config.ndh_reward_fn = env_ndh_config.RewardFunction.get_reward_fn( config.ndh_reward_fn_type) return config
def setUp(self): super(DiscriminatorTest, self).setUp() self.data_dir = FLAGS.test_srcdir + ('valan/r2r/testdata') self._env_config = hparam.HParams( problem='R2R', scan_base_dir=self.data_dir, data_base_dir=self.data_dir, vocab_file='vocab.txt', images_per_pano=36, max_conns=14, image_encoding_dim=2052, image_features_dir=os.path.join(self.data_dir, 'image_features'), instruction_len=50, max_agent_actions=6, reward_fn=env_config.RewardFunction.get_reward_fn( 'distance_to_goal')) self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=100) self._env = env.R2REnv(data_sources=['R2R_small_split'], runtime_config=self._runtime_config, env_config=self._env_config) self.num_panos = 36 self.image_feature_size = 2052 self.num_actions = 14 self.time_step = 3 self.batch_size = 1 done = np.array([[True], [False], [True]]) done = np.reshape(done, [3, 1]) self._test_environment = common.EnvOutput( reward=0, done=done, observation={ constants.IS_START: np.array([[True], [False], [True]]), constants.DISC_MASK: np.array([[True], [False], [True]]), constants.PANO_ENC: tf.random.normal([ self.time_step, self.batch_size, self.num_panos, self.image_feature_size ]), constants.CONN_ENC: tf.random.normal([ self.time_step, self.batch_size, self.num_actions, self.image_feature_size ]), constants.INS_TOKEN_IDS: np.array([[[3, 6, 1, 0, 0]], [[3, 6, 1, 0, 0]], [[3, 6, 1, 0, 0]]]), constants.VALID_CONN_MASK: np.array([[[True] * 14], [[True] * 5 + [False] * 9], [[True] * 2 + [False] * 12]]) }, info='') self._agent = discriminator_agent.DiscriminatorAgent( agent_config.get_r2r_agent_config())
def test_call_ndh(self): self._agent = agent.R2RAgent(agent_config.get_ndh_agent_config()) self.data_dir = FLAGS.test_srcdir + ( 'valan/r2r/testdata') self._env_config = hparam.HParams( problem='NDH', history='all', path_type='trusted_path', max_goal_room_panos=4, scan_base_dir=self.data_dir, data_base_dir=self.data_dir, vocab_dir=self.data_dir, problem_path=os.path.join(self.data_dir, 'NDH'), vocab_file='vocab.txt', images_per_pano=36, max_conns=14, image_encoding_dim=64, direction_encoding_dim=256, image_features_dir=os.path.join(self.data_dir, 'image_features'), instruction_len=50, max_agent_actions=6, reward_fn=env_config.RewardFunction.get_reward_fn('distance_to_goal')) self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=1) self._env = env.R2REnv( data_sources=['R2R_small_split'], runtime_config=self._runtime_config, env_config=self._env_config) env_output = self._env.reset() observation = tf.nest.map_structure(lambda t: tf.expand_dims(t, 0), env_output.observation) initial_agent_state = self._agent.get_initial_state( observation, batch_size=1) # Agent always expects time,batch dimensions. First add and then remove. env_output = utils.add_time_batch_dim(env_output) agent_output, _ = self._agent(env_output, initial_agent_state) self.assertEqual(agent_output.policy_logits.shape, [1, 1, 14]) self.assertEqual(agent_output.baseline.shape, [1, 1]) initial_agent_state = ([ (tf.random.normal([self.batch_size, 512]), tf.random.normal([self.batch_size, 512])), (tf.random.normal([self.batch_size, 512]), tf.random.normal([self.batch_size, 512])) ], tf.random.normal([self.batch_size, 5, 512])) agent_output, _ = self._agent(self._test_environment, initial_agent_state) self.assertEqual(agent_output.policy_logits.shape, [self.time_step, self.batch_size, 14]) self.assertEqual(agent_output.baseline.shape, [self.time_step, self.batch_size])
def get_default_env_config(mode): """Returns default config using values from dict `DEFAULT_ENV_CONFIG`.""" config = hparam.HParams(**DEFAULT_ENV_CONFIG) config.mode = mode if mode == 'train': config.max_agent_actions = config.max_agent_train_actions else: config.max_agent_actions = config.max_agent_test_actions config.image_features_path = (config.pano_image_features_path if config.panoramic_action_space else config.legacy_image_features_path) return config
def get_default_env_config(): """Returns default config using values from dict `DEFAULT_ENV_CONFIG`.""" config = hparam.HParams(**DEFAULT_ENV_CONFIG) config.reward_fn = RewardFunction.get_reward_fn(config.reward_fn_type) # Update directories if set in FLAGS. if FLAGS.scan_base_dir: config.scan_base_dir = FLAGS.scan_base_dir if FLAGS.data_base_dir: config.data_base_dir = FLAGS.data_base_dir if FLAGS.vocab_dir: config.vocab_dir = FLAGS.vocab_dir if FLAGS.vocab_file: config.vocab_file = FLAGS.vocab_file if FLAGS.image_features_dir: config.image_features_dir = FLAGS.image_features_dir return config
def get_default_curriculum_env_config(method, env_config=None): """Get default curriculum env config. Args: method: The method used in curriculum learning. env_config: Optional. The env config. If None, use the default env config file. Default, None. Returns: A curriculum env config. """ if env_config is None: env_config = env_config_lib.get_default_env_config() config_updates = dict(env_config.values(), method=method) curriculum_env_config = DEFAULT_CURRICULUM_ENV_CONFIG.copy() curriculum_env_config.update(config_updates) config = hparam.HParams(**curriculum_env_config) return config
def get_ndh_env_config(): """Returns default NDH config using values from dict `NDH_ENV_CONFIG`.""" # Input settings. history = NDH_ENV_CONFIG['history'] if history == 'none': NDH_ENV_CONFIG['instruction_len'] = 1 # [<EOS>] fixed length. elif history == 'target': NDH_ENV_CONFIG[ 'instruction_len'] = 3 # [<TAR> target <EOS>] fixed length. elif history == 'oracle_ans': # 16.16+/-9.67 ora utt len, 35.5 at x2 stddevs. 71 is double that. NDH_ENV_CONFIG['instruction_len'] = 50 elif history == 'nav_q_oracle_ans': # 11.24+/-6.43 [plus Ora avg], 24.1 at x2 std. # 71+48 ~~ 120 per QA doubles both. NDH_ENV_CONFIG['instruction_len'] = 120 else: # i.e., 'all' # 4.93+/-3.21 turns -> 2.465+/-1.605 Q/A. # 5.67 at x2 std. Call it 6 (real max 13). NDH_ENV_CONFIG['instruction_len'] = 240 config = hparam.HParams(**NDH_ENV_CONFIG) config.reward_fn = RewardFunction.get_reward_fn(config.reward_fn_type) return config
def get_default_env_config(): """Returns default config using values from dict `EXAMPLE_ENV_CONFIG`.""" config = hparam.HParams(**EXAMPLE_ENV_CONFIG) return config
def setUp(self): super(DiscriminatorTest, self).setUp() self.data_dir = FLAGS.test_srcdir + ( 'valan/r2r/testdata') self._env_config = hparam.HParams( problem='R2R', scan_base_dir=self.data_dir, data_base_dir=self.data_dir, vocab_dir=self.data_dir, vocab_file='vocab.txt', images_per_pano=36, max_conns=14, image_encoding_dim=64, direction_encoding_dim=256, image_features_dir=os.path.join(self.data_dir, 'image_features'), instruction_len=50, max_agent_actions=6, project_decoder_input_states=True, use_all_final_states=False, reward_fn=env_config.RewardFunction.get_reward_fn('distance_to_goal')) self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=100) self._env = env.R2REnv( data_sources=['R2R_small_split'], runtime_config=self._runtime_config, env_config=self._env_config) self.num_panos = 36 self.image_feature_size = 64 self.direction_encoding_dim = 256 self.num_actions = 14 self.time_step = 3 self.batch_size = 2 done = np.array([[False, True], [True, False], [True, False]]) self._test_environment = common.EnvOutput( reward=0, done=done, observation={ constants.PATH_ID: # Shape = [time, batch] np.array([[2, 1], [0, 1], [0, 1]]), constants.IS_START: # Shape = [time, batch] np.array([[False, True], [True, False], [False, False]]), constants.DISC_MASK: # Shape = [time, batch] np.array([[False, True], [True, True], [True, True]]), constants.PANO_ENC: # Shape = [time, batch, num_panos, featur_size] tf.random.normal([ self.time_step, self.batch_size, self.num_panos, self.image_feature_size + self.direction_encoding_dim ]), constants.CONN_ENC: # Shape = [time, batch, num_actions, feature_size] tf.random.normal([ self.time_step, self.batch_size, self.num_actions, self.image_feature_size + self.direction_encoding_dim ]), constants.PREV_ACTION_ENC: # Shape = [time, batch, feature_size] tf.random.normal([ self.time_step, self.batch_size, self.image_feature_size + self.direction_encoding_dim ]), constants.NEXT_GOLDEN_ACTION_ENC: # Shape = [time, batch, feature_size] tf.random.normal([ self.time_step, self.batch_size, self.image_feature_size + self.direction_encoding_dim ]), constants.INS_TOKEN_IDS: # Shape = [time, batch, token_len] np.array([[[5, 3, 2, 1, 0], [3, 4, 5, 6, 1]], [[3, 6, 1, 0, 0], [3, 4, 5, 6, 1]], [[3, 6, 1, 0, 0], [3, 4, 5, 6, 1]]]), constants.INS_LEN: # Shape = [time, batch] np.tile(np.array([[3]]), [self.time_step, self.batch_size]), constants.VALID_CONN_MASK: # Shape = [time, batch, num_connections] np.tile( np.array([[[True] * 14], [[True] * 5 + [False] * 9], [[True] * 2 + [False] * 12]]), [1, self.batch_size, 1]), constants.LABEL: # Shape = [time, batch] np.array([[False, False], [True, False], [True, False]]) }, info='') self._agent_config = agent_config.get_r2r_agent_config()
def setUp(self): super(EvalMetricTest, self).setUp() self.data_dir = FLAGS.test_srcdir + ('valan/r2r/testdata') self._env_config = hparam.HParams( problem='R2R', base_path=self.data_dir, vocab_file='vocab.txt', images_per_pano=36, max_conns=14, image_encoding_dim=2052, image_features_dir=os.path.join(self.data_dir, 'image_features'), instruction_len=50, max_agent_actions=6, reward_fn=env_config.RewardFunction.get_reward_fn( 'distance_to_goal')) self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=1) self._env = env.R2REnv(data_sources=['small_split'], runtime_config=self._runtime_config, env_config=self._env_config) # scan: gZ6f7yhEvPG # Path: 1, 3, 7, 5, 2 self._golden_path = [1, 4, 6, 2] self._scan_id = 0 # testdata has single scan only 'gZ6f7yhEvPG' self._env_list = [ common.EnvOutput(reward=0, done=None, observation={ constants.PANO_ID: 1, constants.GOLDEN_PATH: self._golden_path, constants.GOAL_PANO_ID: 2, constants.SCAN_ID: self._scan_id, constants.GOAL_ROOM_PANOS: [6, 2, constants.INVALID_NODE_ID] }, info=None), common.EnvOutput(reward=1, done=None, observation={ constants.PANO_ID: 3, constants.GOLDEN_PATH: self._golden_path, constants.GOAL_PANO_ID: 2, constants.SCAN_ID: self._scan_id, constants.GOAL_ROOM_PANOS: [6, 2, constants.INVALID_NODE_ID] }, info=None), common.EnvOutput(reward=1, done=None, observation={ constants.PANO_ID: 7, constants.GOLDEN_PATH: self._golden_path, constants.GOAL_PANO_ID: 2, constants.SCAN_ID: self._scan_id, constants.GOAL_ROOM_PANOS: [6, 2, constants.INVALID_NODE_ID] }, info=None), common.EnvOutput(reward=1, done=None, observation={ constants.PANO_ID: 5, constants.GOLDEN_PATH: self._golden_path, constants.GOAL_PANO_ID: 2, constants.SCAN_ID: self._scan_id, constants.GOAL_ROOM_PANOS: [6, 2, constants.INVALID_NODE_ID] }, info=None), common.EnvOutput(reward=1, done=False, observation={ constants.PANO_ID: 2, constants.GOLDEN_PATH: self._golden_path, constants.GOAL_PANO_ID: 2, constants.SCAN_ID: self._scan_id, constants.GOAL_ROOM_PANOS: [6, 2, constants.INVALID_NODE_ID] }, info=None), common.EnvOutput( reward=4, # success done=True, # end of episode # next episode's observation. observation={ constants.PANO_ID: 11, constants.GOLDEN_PATH: self._golden_path, constants.GOAL_PANO_ID: 2, constants.SCAN_ID: self._scan_id, constants.GOAL_ROOM_PANOS: [6, 2, constants.INVALID_NODE_ID] }, info=None), ] self._action_list = [3, 7, 5, 2, 0]
def get_ndh_agent_config(): """Returns default config using values from dict `NDH_AGENT_CONFIG`.""" config = hparam.HParams(**NDH_AGENT_CONFIG) return config
def testStepToGoalRoom(self): self.reward_fn_type = 'distance_to_room' self._env_config = hparam.HParams( problem='NDH', history='all', path_type='trusted_path', max_goal_room_panos=4, base_path=self.data_dir, problem_path=os.path.join(self.data_dir, 'NDH'), vocab_file='vocab.txt', images_per_pano=36, max_conns=14, image_encoding_dim=2052, image_features_dir=os.path.join(self.data_dir, 'image_features'), instruction_len=50, max_agent_actions=6, reward_fn_type=self.reward_fn_type, reward_fn=env_config.RewardFunction.get_reward_fn( self.reward_fn_type)) self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=1) self._env = env_ndh.NDHEnv(data_sources=['small_split'], runtime_config=self._runtime_config, env_config=self._env_config) scan_id = 0 # testdata only has single scan 'gZ6f7yhEvPG' _ = self._env.reset() golden_path = [ 'ba27da20782d4e1a825f0a133ad84da9', '47d8a8282c1c4a7fb3eeeacc45e9d959', # in the goal room '0ee20663dfa34b438d48750ddcd7366c' # in the goal room ] # Step through the trajectory and verify the env_output. for i, action in enumerate( [self._get_pano_id(p, scan_id) for p in golden_path]): expected_time_step = i + 1 expected_heading, expected_pitch = self._env._get_heading_pitch( action, scan_id, expected_time_step) if i + 1 < len(golden_path): expected_oracle_action = self._get_pano_id( golden_path[i + 1], scan_id) else: expected_oracle_action = constants.STOP_NODE_ID expected_reward = 1 if i <= 1 else 0 env_test.verify_env_output( self, self._env.step(action), expected_reward=expected_reward, # Moving towards goal. expected_done=False, expected_info='', expected_time_step=expected_time_step, expected_path_id=318, expected_pano_name=golden_path[i], expected_heading=expected_heading, expected_pitch=expected_pitch, expected_scan_id=scan_id, expected_oracle_action=expected_oracle_action) # Stop at goal pano. Terminating the episode results in resetting the # observation to next episode. env_test.verify_env_output( self, self._env.step(constants.STOP_NODE_ID), expected_reward=4, # reached goal and stopped expected_done=True, # end of episode expected_info='', # observation for next episode. expected_time_step=0, expected_path_id=1304, expected_pano_name='80929af5cf234ae38ac3a2a4e60e4342', expected_heading=6.101, expected_pitch=0., expected_scan_id=scan_id, expected_oracle_action=self._get_pano_id( 'ba27da20782d4e1a825f0a133ad84da9', scan_id))
def get_default_env_config(): """Returns default config using values from dict `DEFAULT_ENV_CONFIG`.""" config = hparam.HParams(**DEFAULT_ENV_CONFIG) config.reward_fn = RewardFunction.get_reward_fn(config.reward_fn_type) return config