示例#1
0
    def step(self, action):

        obs, reward, done, info = self.env.step(action)

        if self.mode == 'Bob':
            import ipdb
            ipdb.set_trace()
            #First visit done for Bob
            if np.allclose(reward, 0.):
                done = True
                info['is_success'] = True
                if info.get('TimeLimit.truncated'):
                    del info['TimeLimit.truncated']

            return obs, reward, done, info

        elif self.mode == 'Alice':
            import ipdb
            ipdb.set_trace()
            info = AttrDict(info)
            self.total_rewards += reward
            if done:
                done = False
                info.done_observation = obs
                #info.terminal_state = True
                if info.get('TimeLimit.truncated'):
                    done = True
                    info.terminal_state = False
                info.episodic_return = self.total_rewards
                self.total_rewards = 0
            else:
                info.terminal_state = False
                info.episodic_return = None

            return obs, reward, done, info
示例#2
0
 def step(self, action):
     obs, reward, done, info = self.env.step(action)
     info = AttrDict(info)
     self.total_rewards += reward
     if done:
         info.done_observation = obs
         info.terminal_state = True
         if info.get('TimeLimit.truncated'):
             info.terminal_state = False
         info.episodic_return = self.total_rewards
         self.total_rewards = 0
     else:
         info.terminal_state = False
         info.episodic_return = None
     return obs, reward, done, info
示例#3
0
    def __init__(
        self,
        module_list: Iterable,  # list of mrl.Modules (possibly nested)
        config: AttrDict):  # hyperparameters and module settings

        self.config = config
        parent_folder = config.parent_folder
        assert parent_folder, "Setting the agent's parent folder is required!"
        self.agent_name = config.get(
            'agent_name') or 'agent_' + short_timestamp()
        self.agent_folder = os.path.join(parent_folder, self.agent_name)
        load_agent = False
        if os.path.exists(self.agent_folder):
            print('Detected existing agent! Loading agent from checkpoint...')
            load_agent = True
        else:
            os.makedirs(self.agent_folder, exist_ok=True)

        self._process_experience_registry = [
        ]  # set of modules which define _process_experience
        self._optimize_registry = []  # set of modules which define _optimize
        self.config.env_steps = 0
        self.config.opt_steps = 0

        module_list = flatten_modules(module_list)
        self.module_dict = AttrDict()
        for module in module_list:
            assert module.module_name
            setattr(self, module.module_name, module)
            self.module_dict[module.module_name] = module
        for module in module_list:
            self._register_module(module)

        self.training = True

        if load_agent:
            self.load()
            print('Successfully loaded saved agent!')
        else:
            self.save()