Exemplo n.º 1
0
            def __init__(self, initial_state, target):
                self.env = THORDiscreteEnvironment(
                    scene_name=scene_name,
                    initial_state_id = initial_state,
                    terminal_state_id = target,
                    h5_file_path=(lambda scene: parent.config["h5_file_path"].replace("{scene}", scene_name))
                )

                self.env.reset()
                self.net = net
    def run(self):
        scene_stats = dict()
        resultData = []
        for scene_scope, items in TASK_LIST.items():
            scene_net = self.scene_nets[scene_scope]
            scene_stats[scene_scope] = list()
            for task_scope in items:
                env = THORDiscreteEnvironment(
                    scene_name=scene_scope,
                    h5_file_path=(lambda scene: self.config.get("h5_file_path", "D:\\datasets\\visual_navigation_precomputed\\{scene}.h5").replace('{scene}', scene)),
                    terminal_state_id=int(task_scope)
                )

                ep_rewards = []
                ep_lengths = []
                ep_collisions = []
                for i_episode in range(NUM_EVAL_EPISODES):
                    env.reset()
                    terminal = False
                    ep_reward = 0
                    ep_collision = 0
                    ep_t = 0
                    while not terminal:
                        state = torch.Tensor(env.render(mode='resnet_features'))
                        target = torch.Tensor(env.render_target(mode='resnet_features'))
                        (policy, value,) = scene_net.forward(self.shared_net.forward((state, target,)))

                        with torch.no_grad():
                            action = F.softmax(policy, dim=0).multinomial(1).data.numpy()[0]
                        env.step(action)
                        terminal = env.is_terminal

                        if ep_t == 10000: break
                        if env.collided: ep_collision += 1
                        ep_reward += env.reward
                        ep_t += 1

                    ep_lengths.append(ep_t)
                    ep_rewards.append(ep_reward)
                    ep_collisions.append(ep_collision)
                    if VERBOSE: print("episode #{} ends after {} steps".format(i_episode, ep_t))

                print('evaluation: %s %s' % (scene_scope, task_scope))
                print('mean episode reward: %.2f' % np.mean(ep_rewards))
                print('mean episode length: %.2f' % np.mean(ep_lengths))
                print('mean episode collision: %.2f' % np.mean(ep_collisions))
                scene_stats[scene_scope].extend(ep_lengths)
                resultData.append((scene_scope, str(task_scope), np.mean(ep_rewards), np.mean(ep_lengths), np.mean(ep_collisions),))

        print('\nResults (average trajectory length):')
        for scene_scope in scene_stats:
            print('%s: %.2f steps'%(scene_scope, np.mean(scene_stats[scene_scope])))
        
        if 'csv_file' in self.config and self.config['csv_file'] is not None:
            export_to_csv(resultData, self.config['csv_file'])
Exemplo n.º 3
0
    def _initialize_thread(self):
        h5_file_path = self.init_args.get('h5_file_path')
        # self.logger = logging.getLogger('agent')
        # self.logger.setLevel(logging.INFO)
        self.init_args['h5_file_path'] = lambda scene: h5_file_path.replace('{scene}', scene)
        self.env = THORDiscreteEnvironment(self.scene, **self.init_args)
        self.gamma : float = self.init_args.get('gamma', 0.99)
        self.grad_norm: float = self.init_args.get('grad_norm', 40.0)
        entropy_beta : float = self.init_args.get('entropy_beta', 0.01)
        self.max_t : int = self.init_args.get('max_t', 1)# TODO: 5)
        self.local_t = 0
        self.action_space_size = self.get_action_space_size()

        self.criterion = ActorCriticLoss(entropy_beta)
        self.policy_network = nn.Sequential(SharedNetwork(), SceneSpecificNetwork(self.get_action_space_size()))

        # Initialize the episode
        self._reset_episode()
        self._sync_network()
Exemplo n.º 4
0
    def run(self):
        scene_stats = dict()
        resultData = []
        for scene_scope, items in TASK_LIST.items():
            if len(self.config['test_scenes']) != 0 and not scene_scope in self.config['test_scenes']:
                continue

            scene_net = self.scene_nets[scene_scope]
            scene_stats[scene_scope] = list()
            for task_scope in items:
                env = THORDiscreteEnvironment(
                    scene_name=scene_scope,
                    h5_file_path=(lambda scene: self.config.get("h5_file_path", "D:\\datasets\\visual_navigation_precomputed\\{scene}.h5").replace('{scene}', scene)),
                    terminal_state_id=int(task_scope),
                )

                graph = env._get_graph_handle()
                hitting_times = graph['hitting_times'][()]
                shortest_paths = graph['shortest_path_distance'][()]

                ep_rewards = []
                ep_lengths = []
                ep_collisions = []
                ep_normalized_lengths = []
                for (i_episode, start) in enumerate(env.get_initial_states(int(task_scope))):
                    env.reset(initial_state_id = start)
                    terminal = False
                    ep_reward = 0
                    ep_collision = 0
                    ep_t = 0
                    hitting_time = hitting_times[start, int(task_scope)]
                    shortest_path = shortest_paths[start, int(task_scope)]

                    while not terminal:
                        state = torch.Tensor(env.render(mode='resnet_features'))
                        target = torch.Tensor(env.render_target(mode='resnet_features'))
                        (policy, value,) = scene_net.forward(self.shared_net.forward((state, target,)))

                        with torch.no_grad():
                            action = F.softmax(policy, dim=0).multinomial(1).data.numpy()[0]
                        env.step(action)
                        terminal = env.is_terminal

                        if ep_t == hitting_time: break
                        if env.collided: ep_collision += 1
                        ep_reward += env.reward
                        ep_t += 1                   


                    ep_lengths.append(ep_t)
                    ep_rewards.append(ep_reward)
                    ep_collisions.append(ep_collision)
                    ep_normalized_lengths.append(min(ep_t, hitting_time) / shortest_path)
                    if VERBOSE: print("episode #{} ends after {} steps".format(i_episode, ep_t))

                    
                print('evaluation: %s %s' % (scene_scope, task_scope))
                print('mean episode reward: %.2f' % np.mean(ep_rewards))
                print('mean episode length: %.2f' % np.mean(ep_lengths))
                print('mean episode collision: %.2f' % np.mean(ep_collisions))
                print('mean normalized episode length: %.2f' % np.mean(ep_normalized_lengths))
                scene_stats[scene_scope].extend(ep_lengths)
                resultData.append((scene_scope, str(task_scope), np.mean(ep_rewards), np.mean(ep_lengths), np.mean(ep_collisions), np.mean(ep_normalized_lengths),))

        print('\nResults (average trajectory length):')
        for scene_scope in scene_stats:
            print('%s: %.2f steps'%(scene_scope, np.mean(scene_stats[scene_scope])))
        
        if 'csv_file' in self.config and self.config['csv_file'] is not None:
            export_to_csv(resultData, self.config['csv_file'])
    return {key: torch.Tensor(v) for (key, v) in data.items()}


shared_net.load_state_dict(convertToStateDict(data['navigation']))
for key in TASK_LIST.keys():
    scene_nets[key].load_state_dict(
        convertToStateDict(data[f'navigation/{key}']))

scene_stats = dict()
for scene_scope, items in TASK_LIST.items():
    scene_net = scene_nets[scene_scope]
    scene_stats[scene_scope] = list()
    for task_scope in items:
        env = THORDiscreteEnvironment(
            scene_name=scene_scope,
            h5_file_path=(
                lambda scene:
                f"D:\\datasets\\visual_navigation_precomputed\\{scene}.h5"),
            terminal_state_id=int(task_scope))

        ep_rewards = []
        ep_lengths = []
        ep_collisions = []
        for i_episode in range(NUM_EVAL_EPISODES):
            env.reset()
            terminal = False
            ep_reward = 0
            ep_collision = 0
            ep_t = 0
            while not terminal:
                state = torch.Tensor(env.render(mode='resnet_features'))
                target = torch.Tensor(