Exemplo n.º 1
0
 def __init__(self,
              input_shape: Tuple,
              output_size: int,
              rnn_type: str = 'gru',
              rnn_size: int = 256,
              hidden_sizes: [List, Tuple] = None,
              baselines_init: bool = True,
              layer_norm: bool = False
              ):
     super().__init__()
     self._obs_dim = 2
     self.rnn_is_lstm = rnn_type != 'gru'
     input_size = int(np.prod(input_shape))
     rnn_class = get_rnn_class(rnn_type, layer_norm)
     self.body_pi = MlpModel(input_size, hidden_sizes, None, nn.ReLU, None)
     self.body_v = MlpModel(input_size, hidden_sizes, None, nn.ReLU, None)
     self.rnn_pi = rnn_class(self.body_pi.output_size + output_size + 1, rnn_size)  # Concat action, reward
     self.rnn_v = rnn_class(self.body_v.output_size + output_size + 1, rnn_size)
     self.pi = nn.Sequential(nn.ReLU(), nn.Linear(rnn_size, output_size), nn.Softmax(-1))  # Need to activate after lstm
     self.v = nn.Sequential(nn.ReLU(), nn.Linear(rnn_size, 1))
     if baselines_init:
         self.body_pi.apply(apply_init); self.body_v.apply(apply_init)
         self.rnn_pi.apply(apply_init); self.rnn_v.apply(apply_init)
         self.pi.apply(partial(apply_init, O_INIT_VALUES['pi']))
         self.v.apply(partial(apply_init, O_INIT_VALUES['v']))
     self.body_pi, self.body_v, self.pi, self.v = tscr(self.body_pi), tscr(self.body_v), tscr(self.pi), tscr(self.v)
Exemplo n.º 2
0
 def __init__(self,
              input_classes: int,
              output_size: int,
              rnn_type: str = 'gru',
              rnn_size: int = 256,
              hidden_sizes: [List, Tuple] = None,
              baselines_init: bool = True,
              layer_norm: bool = False,
              prev_action: int = 2,
              prev_reward: int = 2,
              ):
     super().__init__()
     self._obs_dim = 0
     self.rnn_is_lstm = rnn_type != 'gru'
     self.preprocessor = tscr(OneHotLayer(input_classes))
     rnn_class = get_rnn_class(rnn_type, layer_norm)
     rnn_input_size = input_classes
     if prev_action: rnn_input_size += output_size  # Use previous action as input
     if prev_reward: rnn_input_size += 1  # Use previous reward as input
     self.rnn = rnn_class(rnn_input_size, rnn_size)  # Concat action, reward
     self.body = MlpModel(rnn_size, hidden_sizes, None, nn.ReLU, None)
     self.pi = nn.Sequential(nn.Linear(self.body.output_size, output_size), nn.Softmax(-1))
     self.v = nn.Linear(self.body.output_size, 1)
     if baselines_init:
         self.rnn.apply(apply_init); self.body.apply(apply_init)
         self.pi.apply(partial(apply_init, gain=O_INIT_VALUES['pi']))
         self.v.apply(partial(apply_init, gain=O_INIT_VALUES['v']))
     self.body, self.pi, self.v = tscr(self.body), tscr(self.pi), tscr(self.v)
     self.p_a = prev_action > 0
     self.p_r = prev_reward > 0
Exemplo n.º 3
0
 def __init__(self,
              input_classes: int,
              output_size: int,
              rnn_type: str = 'gru',
              rnn_size: int = 256,
              hidden_sizes: [List, Tuple] = None,
              baselines_init: bool = True,
              layer_norm: bool = False,
              prev_action: int = 3,
              prev_reward: int = 3,
              ):
     super().__init__()
     self._obs_dim = 0
     self.rnn_is_lstm = rnn_type != 'gru'
     self.preprocessor = tscr(OneHotLayer(input_classes))
     rnn_class = get_rnn_class(rnn_type, layer_norm)
     self.body_pi = MlpModel(input_classes, hidden_sizes, None, nn.ReLU, None)
     self.body_v = MlpModel(input_classes, hidden_sizes, None, nn.ReLU, None)
     rnn_input_size_pi = self.body_pi.output_size + (prev_action in [1,3]) * output_size + (prev_reward in [1,3])
     rnn_input_size_v = self.body_v.output_size + (prev_action in [2,3]) * output_size + (prev_reward in [2,3])
     self.rnn_pi = rnn_class(rnn_input_size_pi, rnn_size)  # Concat action, reward
     self.rnn_v = rnn_class(rnn_input_size_v, rnn_size)
     self.pi = nn.Sequential(nn.ReLU(), nn.Linear(rnn_size, output_size), nn.Softmax(-1))  # Need to activate after lstm
     self.v = nn.Sequential(nn.ReLU(), nn.Linear(rnn_size, 1))
     if baselines_init:
         self.body_pi.apply(apply_init); self.body_v.apply(apply_init)
         self.rnn_pi.apply(apply_init); self.rnn_v.apply(apply_init)
         self.pi.apply(partial(apply_init, O_INIT_VALUES['pi']))
         self.v.apply(partial(apply_init, O_INIT_VALUES['v']))
     self.body_pi, self.body_v, self.pi, self.v = tscr(self.body_pi), tscr(self.body_v), tscr(self.pi), tscr(self.v)
     self.p_a = prev_action
     self.p_r = prev_reward
Exemplo n.º 4
0
 def __init__(self,
              input_shape: Tuple,
              output_size: int,
              rnn_type: str = 'gru',
              rnn_size: int = 256,
              hidden_sizes: [List, Tuple] = None,
              baselines_init: bool = True,
              layer_norm: bool = False
              ):
     super().__init__()
     self._obs_dim = 2
     self.rnn_is_lstm = rnn_type != 'gru'
     input_size = int(np.prod(input_shape))
     rnn_class = get_rnn_class(rnn_type, layer_norm)
     self.rnn = rnn_class(input_size + output_size + 1, rnn_size)  # Concat action, reward
     pi_inits = (O_INIT_VALUES['base'], O_INIT_VALUES['pi']) if baselines_init else None
     v_inits = (O_INIT_VALUES['base'], O_INIT_VALUES['v']) if baselines_init else None
     self.pi = nn.Sequential(MlpModel(rnn_size, hidden_sizes, output_size, nn.ReLU, pi_inits), nn.Softmax(-1))
     self.v = nn.Sequential(MlpModel(rnn_size, hidden_sizes, 1, nn.ReLU, v_inits))
     if baselines_init:
         self.rnn.apply(apply_init)
     self.pi, self.v = tscr(self.pi), tscr(self.v)
Exemplo n.º 5
0
 def __init__(self,
              input_classes: int,
              output_size: int,
              option_size: int,
              hidden_sizes: [List, Tuple, None] = None,
              rnn_type: str = 'gru',
              rnn_size: int = 256,
              baselines_init: bool = True,
              layer_norm: bool = False,
              use_interest: bool = False,  # IOC sigmoid interest functions
              use_diversity: bool = False,  # TDEOC q entropy output
              use_attention: bool = False,
              prev_action: np.ndarray = np.ones(5, dtype=bool),
              prev_reward: np.ndarray = np.ones(5, dtype=bool),
              prev_option: np.ndarray = np.zeros(5, dtype=bool)
              ):
     super().__init__()
     self._obs_ndim = 0
     self.rnn_is_lstm = rnn_type != 'gru'
     self.preprocessor = tscr(OneHotLayer(input_classes))
     rnn_class = get_rnn_class(rnn_type, layer_norm)
     self.p_a, self.p_o, self.p_r = prev_action, prev_option, prev_reward
     body_mlp_class = partial(MlpModel, hidden_sizes=hidden_sizes, output_size=None, nonlinearity=nn.ReLU, inits=None)
     self.oc = OptionCriticHead_IndependentPreprocessorWithRNN(
         input_size=input_classes,
         input_module_class=body_mlp_class,
         rnn_module_class=rnn_class,
         output_size=output_size,
         option_size=option_size,
         rnn_size=rnn_size,
         intra_option_policy='discrete',
         use_interest=use_interest,
         use_diversity=use_diversity,
         use_attention=use_attention,
         baselines_init=baselines_init,
         prev_action=prev_action,
         prev_reward=prev_reward,
         prev_option=prev_option
     )
Exemplo n.º 6
0
 def __init__(self,
              input_classes: int,
              output_size: int,
              option_size: int,
              hidden_sizes: [List, Tuple, None] = None,
              rnn_type: str = 'gru',
              rnn_size: int = 256,
              baselines_init: bool = True,
              layer_norm: bool = False,
              use_interest: bool = False,  # IOC sigmoid interest functions
              use_diversity: bool = False,  # TDEOC q entropy output
              use_attention: bool = False,
              prev_action: np.ndarray = np.ones(5, dtype=bool),
              prev_reward: np.ndarray = np.ones(5, dtype=bool),
              prev_option: np.ndarray = np.zeros(5, dtype=bool)
              ):
     super().__init__()
     self._obs_ndim = 0
     self.rnn_is_lstm = rnn_type != 'gru'
     self.preprocessor = tscr(OneHotLayer(input_classes))
     rnn_class = get_rnn_class(rnn_type, layer_norm)
     self.body = MlpModel(input_classes, hidden_sizes, None, nn.ReLU, None)
     self.p_a, self.p_o, self.p_r = prev_action.any().item(), prev_option.any().item(), prev_reward.any().item()
     rnn_input_size = self.body.output_size + (output_size * self.p_a) + (option_size * self.p_o) + self.p_r
     self.rnn = rnn_class(rnn_input_size, rnn_size)  # Concat action, reward
     self.oc = tscr(OptionCriticHead_SharedPreprocessor(
         input_size=rnn_size,
         output_size=output_size,
         option_size=option_size,
         intra_option_policy='discrete',
         use_interest=use_interest,
         use_diversity=use_diversity,
         use_attention=use_attention,
         baselines_init=baselines_init))
     if baselines_init:
         self.rnn.apply(partial(apply_init, gain=O_INIT_VALUES['lstm']))
         self.body.apply(apply_init)
     self.body = tscr(self.body)