示例#1
0
 def post_body_init(self):
     '''Run init for components that need bodies to exist first, e.g. memory or architecture.'''
     self.nanflat_body_e = util.nanflatten(self.body_e)
     for idx, body in enumerate(self.nanflat_body_e):
         body.nanflat_e_idx = idx
     self.body_num = len(self.nanflat_body_e)
     logger.info(util.self_desc(self))
示例#2
0
 def post_body_init(self):
     '''Run init for components that need bodies to exist first, e.g. memory or architecture.'''
     self.nanflat_body_e = util.nanflatten(self.body_e)
     for idx, body in enumerate(self.nanflat_body_e):
         body.nanflat_e_idx = idx
     self.body_num = len(self.nanflat_body_e)
     logger.info(util.self_desc(self))
示例#3
0
def multi_act_with_epsilon_greedy(nanflat_body_a, state_a, net,
                                  nanflat_epsilon_a):
    '''Multi-body nanflat_action_a on a single-pass from net. Uses epsilon-greedy but in a batch manner.'''
    nanflat_state_a = util.nanflatten(state_a)
    cat_state_a = np.concatenate(nanflat_state_a)
    nanflat_action_a = []
    start_idx = 0
    for body, e in zip(nanflat_body_a, nanflat_epsilon_a):
        logger.debug2(f'body: {body.aeb}, epsilon: {e}')
        end_idx = start_idx + body.action_dim
        if e > np.random.rand():
            logger.debug2(f'Random action')
            action = np.random.randint(body.action_dim)
        else:
            logger.debug2(f'Greedy action')
            cat_state_a = cat_state_a.astype('float')
            torch_state = Variable(torch.from_numpy(cat_state_a).float())
            out = net.wrap_eval(torch_state)
            action = int(torch.max(out[start_idx:end_idx], dim=0)[1][0])
        nanflat_action_a.append(action)
        start_idx = end_idx
        logger.debug2(f'''
        body: {body.aeb}, net idx: {start_idx}-{end_idx}
        action: {action}''')
    return nanflat_action_a
示例#4
0
文件: base.py 项目: xenakas/SLM-Lab
 def set_body_e(self, body_e):
     '''Method called by body_space.init_body_space to complete the necessary backward reference needed for EnvSpace to work'''
     self.body_e = body_e
     self.nanflat_body_e = util.nanflatten(self.body_e)
     for idx, body in enumerate(self.nanflat_body_e):
         body.nanflat_e_idx = idx
     self.body_num = len(self.nanflat_body_e)
示例#5
0
def multi_head_act_with_epsilon_greedy(nanflat_body_a, state_a, net,
                                       nanflat_epsilon_a, gpu):
    '''Multi-headed body nanflat_action_a on a single-pass from net. Uses epsilon-greedy but in a batch manner.'''
    nanflat_state_a = util.nanflatten(state_a)
    nanflat_action_a = []
    torch_states = []
    for state in nanflat_state_a:
        state = state.astype('float')
        torch_states.append(torch.from_numpy(state).float().unsqueeze_(dim=0))
    if torch.cuda.is_available() and gpu:
        for torch_state in torch_states:
            torch_state = torch_state.cuda()
    for torch_state in torch_states:
        torch_state = Variable(torch_state)
    outs = net.wrap_eval(torch_states)
    for body, e, output in zip(nanflat_body_a, nanflat_epsilon_a, outs):
        logger.debug2(f'body: {body.aeb}, epsilon: {e}')
        if e > np.random.rand():
            logger.debug2(f'Random action')
            action = np.random.randint(body.action_dim)
        else:
            logger.debug2(f'Greedy action')
            action = torch.max(output, dim=1)[1][0]
        nanflat_action_a.append(action)
        logger.debug2(f'epsilon: {e}, outputs: {output}, action: {action}')
    return nanflat_action_a
示例#6
0
def multi_head_act_with_boltzmann(nanflat_body_a, state_a, net, nanflat_tau_a,
                                  gpu):
    nanflat_state_a = util.nanflatten(state_a)
    torch_states = []
    for state in nanflat_state_a:
        state = state.astype('float')
        torch_states.append(torch.from_numpy(state).float().unsqueeze_(dim=0))
    if torch.cuda.is_available() and gpu:
        for torch_state in torch_states:
            torch_state = torch_state.cuda()
    for torch_state in torch_states:
        torch_state = Variable(torch_state)
    outs = net.wrap_eval(torch_states)
    out_with_temp = [torch.div(x, t) for x, t in zip(outs, nanflat_tau_a)]
    logger.debug2(
        f'taus: {nanflat_tau_a}, outs: {outs}, out_with_temp: {out_with_temp}')
    nanflat_action_a = []
    for body, output in zip(nanflat_body_a, out_with_temp):
        probs = F.softmax(Variable(output.cpu()), dim=1).data.numpy()[0]
        action = np.random.choice(list(range(body.action_dim)), p=probs)
        logger.debug3(f'''
        body: {body.aeb}, output: {output},
        probs: {probs}, action: {action}''')
        nanflat_action_a.append(action)
    return nanflat_action_a
示例#7
0
def multi_act_with_boltzmann(nanflat_body_a, state_a, net, nanflat_tau_a, gpu):
    nanflat_state_a = util.nanflatten(state_a)
    cat_state_a = np.concatenate(nanflat_state_a).astype(float)
    torch_state = torch.from_numpy(cat_state_a).float()
    if torch.cuda.is_available() and gpu:
        torch_state = torch_state.cuda()
    torch_state = Variable(torch_state)
    out = net.wrap_eval(torch_state)
    nanflat_action_a = []
    start_idx = 0
    logger.debug2(f'taus: {nanflat_tau_a}')
    for body, tau in zip(nanflat_body_a, nanflat_tau_a):
        end_idx = start_idx + body.action_dim
        out_with_temp = torch.div(out[start_idx:end_idx], tau)
        logger.debug3(f'''
        tau: {tau}, out: {out},
        out select: {out[start_idx: end_idx]},
        out with temp: {out_with_temp}''')
        probs = F.softmax(Variable(out_with_temp.cpu()), dim=0).data.numpy()
        action = np.random.choice(list(range(body.action_dim)), p=probs)
        logger.debug3(f'''
        body: {body.aeb}, net idx: {start_idx}-{end_idx}
        probs: {probs}, action: {action}''')
        nanflat_action_a.append(action)
        start_idx = end_idx
    return nanflat_action_a
示例#8
0
 def step(self, action_e):
     # TODO implement clock_speed: step only if self.clock.to_step()
     if self.done:
         return self.reset()
     action_e = util.nanflatten(action_e)
     env_info_dict = self.u_env.step(action_e)
     reward_e, state_e, done_e = self.env_space.aeb_space.init_data_s(ENV_DATA_NAMES, e=self.e)
     for (a, b), body in util.ndenumerate_nonan(self.body_e):
         env_info_a = self.get_env_info(env_info_dict, a)
         reward_e[(a, b)] = env_info_a.rewards[b]
         state_e[(a, b)] = env_info_a.states[b]
         done_e[(a, b)] = env_info_a.local_done[b]
     self.done = (util.nonan_all(done_e) or self.clock.get('t') > self.max_timestep)
     return reward_e, state_e, done_e
示例#9
0
 def space_step(self, action_e):
     # TODO implement clock_speed: step only if self.clock.to_step()
     if self.done:
         return self.space_reset()
     action_e = util.nanflatten(action_e)
     env_info_dict = self.u_env.step(action_e)
     reward_e, state_e, done_e = self.env_space.aeb_space.init_data_s(
         ENV_DATA_NAMES, e=self.e)
     for (a, b), body in util.ndenumerate_nonan(self.body_e):
         env_info_a = self._get_env_info(env_info_dict, a)
         reward_e[(a, b)] = env_info_a.rewards[b] * self.reward_scale
         state_e[(a, b)] = env_info_a.states[b]
         done_e[(a, b)] = env_info_a.local_done[b]
     self.done = (util.nonan_all(done_e) or self.clock.t > self.max_t)
     logger.debug(
         f'Env {self.e} step reward_e: {reward_e}, state_e: {state_e}, done_e: {done_e}'
     )
     return reward_e, state_e, done_e
示例#10
0
 def space_init(self, agent_space, body_a, global_nets):
     '''Post init override for space env. Note that aeb is already correct from __init__'''
     self.agent_space = agent_space
     self.body_a = body_a
     self.aeb_space = agent_space.aeb_space
     self.nanflat_body_a = util.nanflatten(self.body_a)
     for idx, body in enumerate(self.nanflat_body_a):
         if idx == 0:  # NOTE set default body
             self.body = body
         body.agent = self
         body.nanflat_a_idx = idx
         MemoryClass = getattr(memory, ps.get(self.agent_spec,
                                              'memory.name'))
         body.memory = MemoryClass(self.agent_spec['memory'], body)
     self.body_num = len(self.nanflat_body_a)
     AlgorithmClass = getattr(algorithm,
                              ps.get(self.agent_spec, 'algorithm.name'))
     self.algorithm = AlgorithmClass(self, global_nets)
     # after algo init, transfer any missing variables from default body
     for idx, body in enumerate(self.nanflat_body_a):
         for k, v in vars(self.body).items():
             if util.gen_isnan(getattr(body, k, None)):
                 setattr(body, k, v)
示例#11
0
def test_nanflatten(arr, res):
    arr = np.array(arr)
    res = np.array(res)
    assert np.array_equal(util.nanflatten(arr), res)
示例#12
0
def test_nanflatten(arr, res):
    arr = np.array(arr)
    res = np.array(res)
    assert np.array_equal(util.nanflatten(arr), res)