def main(_): logging.info('Total shards: %d; Current shard index: %d', FLAGS.num_tasks, FLAGS.task) runtime_config = common.RuntimeConfig(task_id=FLAGS.task, num_tasks=FLAGS.num_tasks) data_sources = FLAGS.data_source.split(',') aggregator_prefix = '_'.join(data_sources) # Get problem instance. if FLAGS.problem == 'R2R': problem = r2r_problem.R2RProblem(runtime_config, mode=FLAGS.mode, data_sources=data_sources, curriculum=FLAGS.curriculum, agent_type=FLAGS.agent_type) elif FLAGS.problem == 'NDH': problem = ndh_problem.NDHProblem(runtime_config, mode=FLAGS.mode, data_sources=data_sources, agent_type=FLAGS.agent_type) elif FLAGS.problem == 'R2R+NDH': # Multi-task problem-type during training only. Use task-specific problems # during eval. if FLAGS.mode != 'train': raise ValueError('Multi-tasking is only supported for training. ' 'Use task-specific problems during eval.') problem = mt_problem.MTProblem(runtime_config, mode=FLAGS.mode) else: raise ValueError('Unsupported problem type encountered: {}'.format( FLAGS.problem)) logging.info('Current mode is %s', FLAGS.mode) if FLAGS.mode == 'train': logging.info('Running train actor...') actor.run(problem) else: logging.info('Running eval actor...') eval_actor.run( problem, # Evaluate each path in the dataset exactly once. num_episodes_per_iter=problem.get_environment().num_paths, task_id=runtime_config.task_id)
def main(_): # Pseudo config. Will not be used in learner. runtime_config = common.RuntimeConfig(task_id=0, num_tasks=1) if FLAGS.problem == 'R2R': problem = r2r_problem.R2RProblem(runtime_config, mode=FLAGS.mode, data_sources=None, agent_type=FLAGS.agent_type) elif FLAGS.problem == 'NDH': problem = ndh_problem.NDHProblem(runtime_config, mode=FLAGS.mode, data_sources=None, agent_type=FLAGS.agent_type) elif FLAGS.problem == 'R2R+NDH': problem = mt_problem.MTProblem(runtime_config, mode=FLAGS.mode) else: raise ValueError('Unsupported problem type encountered: {}'.format( FLAGS.problem)) logging.info('Begin running learner...') learner.run(problem)
def setUp(self): super(DiscriminatorTest, self).setUp() self.data_dir = FLAGS.test_srcdir + ( 'valan/r2r/testdata') self._env_config = hparam.HParams( problem='R2R', scan_base_dir=self.data_dir, data_base_dir=self.data_dir, vocab_dir=self.data_dir, vocab_file='vocab.txt', images_per_pano=36, max_conns=14, image_encoding_dim=64, direction_encoding_dim=256, image_features_dir=os.path.join(self.data_dir, 'image_features'), instruction_len=50, max_agent_actions=6, project_decoder_input_states=True, use_all_final_states=False, reward_fn=env_config.RewardFunction.get_reward_fn('distance_to_goal')) self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=100) self._env = env.R2REnv( data_sources=['R2R_small_split'], runtime_config=self._runtime_config, env_config=self._env_config) self.num_panos = 36 self.image_feature_size = 64 self.direction_encoding_dim = 256 self.num_actions = 14 self.time_step = 3 self.batch_size = 2 done = np.array([[False, True], [True, False], [True, False]]) self._test_environment = common.EnvOutput( reward=0, done=done, observation={ constants.PATH_ID: # Shape = [time, batch] np.array([[2, 1], [0, 1], [0, 1]]), constants.IS_START: # Shape = [time, batch] np.array([[False, True], [True, False], [False, False]]), constants.DISC_MASK: # Shape = [time, batch] np.array([[False, True], [True, True], [True, True]]), constants.PANO_ENC: # Shape = [time, batch, num_panos, featur_size] tf.random.normal([ self.time_step, self.batch_size, self.num_panos, self.image_feature_size + self.direction_encoding_dim ]), constants.CONN_ENC: # Shape = [time, batch, num_actions, feature_size] tf.random.normal([ self.time_step, self.batch_size, self.num_actions, self.image_feature_size + self.direction_encoding_dim ]), constants.PREV_ACTION_ENC: # Shape = [time, batch, feature_size] tf.random.normal([ self.time_step, self.batch_size, self.image_feature_size + self.direction_encoding_dim ]), constants.NEXT_GOLDEN_ACTION_ENC: # Shape = [time, batch, feature_size] tf.random.normal([ self.time_step, self.batch_size, self.image_feature_size + self.direction_encoding_dim ]), constants.INS_TOKEN_IDS: # Shape = [time, batch, token_len] np.array([[[5, 3, 2, 1, 0], [3, 4, 5, 6, 1]], [[3, 6, 1, 0, 0], [3, 4, 5, 6, 1]], [[3, 6, 1, 0, 0], [3, 4, 5, 6, 1]]]), constants.INS_LEN: # Shape = [time, batch] np.tile(np.array([[3]]), [self.time_step, self.batch_size]), constants.VALID_CONN_MASK: # Shape = [time, batch, num_connections] np.tile( np.array([[[True] * 14], [[True] * 5 + [False] * 9], [[True] * 2 + [False] * 12]]), [1, self.batch_size, 1]), constants.LABEL: # Shape = [time, batch] np.array([[False, False], [True, False], [True, False]]) }, info='') self._agent_config = agent_config.get_r2r_agent_config()
def setUp(self): super(EvalMetricTest, self).setUp() self.data_dir = FLAGS.test_srcdir + ('valan/r2r/testdata') self._env_config = hparam.HParams( problem='R2R', base_path=self.data_dir, vocab_file='vocab.txt', images_per_pano=36, max_conns=14, image_encoding_dim=2052, image_features_dir=os.path.join(self.data_dir, 'image_features'), instruction_len=50, max_agent_actions=6, reward_fn=env_config.RewardFunction.get_reward_fn( 'distance_to_goal')) self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=1) self._env = env.R2REnv(data_sources=['small_split'], runtime_config=self._runtime_config, env_config=self._env_config) # scan: gZ6f7yhEvPG # Path: 1, 3, 7, 5, 2 self._golden_path = [1, 4, 6, 2] self._scan_id = 0 # testdata has single scan only 'gZ6f7yhEvPG' self._env_list = [ common.EnvOutput(reward=0, done=None, observation={ constants.PANO_ID: 1, constants.GOLDEN_PATH: self._golden_path, constants.GOAL_PANO_ID: 2, constants.SCAN_ID: self._scan_id, constants.GOAL_ROOM_PANOS: [6, 2, constants.INVALID_NODE_ID] }, info=None), common.EnvOutput(reward=1, done=None, observation={ constants.PANO_ID: 3, constants.GOLDEN_PATH: self._golden_path, constants.GOAL_PANO_ID: 2, constants.SCAN_ID: self._scan_id, constants.GOAL_ROOM_PANOS: [6, 2, constants.INVALID_NODE_ID] }, info=None), common.EnvOutput(reward=1, done=None, observation={ constants.PANO_ID: 7, constants.GOLDEN_PATH: self._golden_path, constants.GOAL_PANO_ID: 2, constants.SCAN_ID: self._scan_id, constants.GOAL_ROOM_PANOS: [6, 2, constants.INVALID_NODE_ID] }, info=None), common.EnvOutput(reward=1, done=None, observation={ constants.PANO_ID: 5, constants.GOLDEN_PATH: self._golden_path, constants.GOAL_PANO_ID: 2, constants.SCAN_ID: self._scan_id, constants.GOAL_ROOM_PANOS: [6, 2, constants.INVALID_NODE_ID] }, info=None), common.EnvOutput(reward=1, done=False, observation={ constants.PANO_ID: 2, constants.GOLDEN_PATH: self._golden_path, constants.GOAL_PANO_ID: 2, constants.SCAN_ID: self._scan_id, constants.GOAL_ROOM_PANOS: [6, 2, constants.INVALID_NODE_ID] }, info=None), common.EnvOutput( reward=4, # success done=True, # end of episode # next episode's observation. observation={ constants.PANO_ID: 11, constants.GOLDEN_PATH: self._golden_path, constants.GOAL_PANO_ID: 2, constants.SCAN_ID: self._scan_id, constants.GOAL_ROOM_PANOS: [6, 2, constants.INVALID_NODE_ID] }, info=None), ] self._action_list = [3, 7, 5, 2, 0]
def testStepToGoalRoom(self): self.reward_fn_type = 'distance_to_room' self._env_config = hparam.HParams( problem='NDH', history='all', path_type='trusted_path', max_goal_room_panos=4, base_path=self.data_dir, problem_path=os.path.join(self.data_dir, 'NDH'), vocab_file='vocab.txt', images_per_pano=36, max_conns=14, image_encoding_dim=2052, image_features_dir=os.path.join(self.data_dir, 'image_features'), instruction_len=50, max_agent_actions=6, reward_fn_type=self.reward_fn_type, reward_fn=env_config.RewardFunction.get_reward_fn( self.reward_fn_type)) self._runtime_config = common.RuntimeConfig(task_id=0, num_tasks=1) self._env = env_ndh.NDHEnv(data_sources=['small_split'], runtime_config=self._runtime_config, env_config=self._env_config) scan_id = 0 # testdata only has single scan 'gZ6f7yhEvPG' _ = self._env.reset() golden_path = [ 'ba27da20782d4e1a825f0a133ad84da9', '47d8a8282c1c4a7fb3eeeacc45e9d959', # in the goal room '0ee20663dfa34b438d48750ddcd7366c' # in the goal room ] # Step through the trajectory and verify the env_output. for i, action in enumerate( [self._get_pano_id(p, scan_id) for p in golden_path]): expected_time_step = i + 1 expected_heading, expected_pitch = self._env._get_heading_pitch( action, scan_id, expected_time_step) if i + 1 < len(golden_path): expected_oracle_action = self._get_pano_id( golden_path[i + 1], scan_id) else: expected_oracle_action = constants.STOP_NODE_ID expected_reward = 1 if i <= 1 else 0 env_test.verify_env_output( self, self._env.step(action), expected_reward=expected_reward, # Moving towards goal. expected_done=False, expected_info='', expected_time_step=expected_time_step, expected_path_id=318, expected_pano_name=golden_path[i], expected_heading=expected_heading, expected_pitch=expected_pitch, expected_scan_id=scan_id, expected_oracle_action=expected_oracle_action) # Stop at goal pano. Terminating the episode results in resetting the # observation to next episode. env_test.verify_env_output( self, self._env.step(constants.STOP_NODE_ID), expected_reward=4, # reached goal and stopped expected_done=True, # end of episode expected_info='', # observation for next episode. expected_time_step=0, expected_path_id=1304, expected_pano_name='80929af5cf234ae38ac3a2a4e60e4342', expected_heading=6.101, expected_pitch=0., expected_scan_id=scan_id, expected_oracle_action=self._get_pano_id( 'ba27da20782d4e1a825f0a133ad84da9', scan_id))