Exemplo n.º 1
0
 def __init__(self,
              input_shape: int,
              output_size: int,
              option_size: int,
              rnn_type: str = 'lstm',
              rnn_size: int = 128,
              hidden_sizes: [List, Tuple, None] = None,
              inits: [(float, float, float), None] = (np.sqrt(2), 1., 0.01),
              hidden_nonlinearity=torch.nn.Tanh,  # Module form.
              use_interest=False,  # IOC sigmoid interest functions
              use_diversity=False,  # TDEOC q entropy output
              use_attention=False,
              ):
     super().__init__()
     self._obs_ndim = 0
     self.preprocessor = tscr(OneHotLayer(input_shape))
     self.rnn_type = rnn_type
     rnn_class = nn.GRU if rnn_type == 'gru' else nn.LSTM
     self.rnn = rnn_class(input_shape + output_size + 1, rnn_size)  # At some point, want to put option in here too
     body_mlp_class = partial(MlpModel, hidden_sizes=hidden_sizes, output_size=None, nonlinearity=hidden_nonlinearity, inits=inits[:-1])  # MLP with no head (and potentially no body)
     # Seperate mlp processors for each head
     self.model = tscr(OptionCriticHead_IndependentPreprocessor(
         input_size=rnn_size,
         input_module_class=body_mlp_class,
         output_size=output_size,
         option_size=option_size,
         intra_option_policy='discrete',
         use_interest=use_interest,
         use_diversity=use_diversity,
         use_attention=use_attention,
         baselines_init=True,
         orthogonal_init_base=inits[1],
         orthogonal_init_pol=inits[2]
     ))
Exemplo n.º 2
0
 def __init__(self,
              input_shape: Tuple,
              output_size: int,
              rnn_type: str = 'gru',
              rnn_size: int = 256,
              hidden_sizes: [List, Tuple] = None,
              baselines_init: bool = True,
              layer_norm: bool = False
              ):
     super().__init__()
     self._obs_dim = 2
     self.rnn_is_lstm = rnn_type != 'gru'
     input_size = int(np.prod(input_shape))
     rnn_class = get_rnn_class(rnn_type, layer_norm)
     self.body_pi = MlpModel(input_size, hidden_sizes, None, nn.ReLU, None)
     self.body_v = MlpModel(input_size, hidden_sizes, None, nn.ReLU, None)
     self.rnn_pi = rnn_class(self.body_pi.output_size + output_size + 1, rnn_size)  # Concat action, reward
     self.rnn_v = rnn_class(self.body_v.output_size + output_size + 1, rnn_size)
     self.pi = nn.Sequential(nn.ReLU(), nn.Linear(rnn_size, output_size), nn.Softmax(-1))  # Need to activate after lstm
     self.v = nn.Sequential(nn.ReLU(), nn.Linear(rnn_size, 1))
     if baselines_init:
         self.body_pi.apply(apply_init); self.body_v.apply(apply_init)
         self.rnn_pi.apply(apply_init); self.rnn_v.apply(apply_init)
         self.pi.apply(partial(apply_init, O_INIT_VALUES['pi']))
         self.v.apply(partial(apply_init, O_INIT_VALUES['v']))
     self.body_pi, self.body_v, self.pi, self.v = tscr(self.body_pi), tscr(self.body_v), tscr(self.pi), tscr(self.v)
Exemplo n.º 3
0
 def __init__(self,
              input_classes: int,
              output_size: int,
              rnn_type: str = 'gru',
              rnn_size: int = 256,
              hidden_sizes: [List, Tuple] = None,
              baselines_init: bool = True,
              layer_norm: bool = False,
              prev_action: int = 2,
              prev_reward: int = 2,
              ):
     super().__init__()
     self._obs_dim = 0
     self.rnn_is_lstm = rnn_type != 'gru'
     self.preprocessor = tscr(OneHotLayer(input_classes))
     rnn_class = get_rnn_class(rnn_type, layer_norm)
     rnn_input_size = input_classes
     if prev_action: rnn_input_size += output_size  # Use previous action as input
     if prev_reward: rnn_input_size += 1  # Use previous reward as input
     self.rnn = rnn_class(rnn_input_size, rnn_size)  # Concat action, reward
     self.body = MlpModel(rnn_size, hidden_sizes, None, nn.ReLU, None)
     self.pi = nn.Sequential(nn.Linear(self.body.output_size, output_size), nn.Softmax(-1))
     self.v = nn.Linear(self.body.output_size, 1)
     if baselines_init:
         self.rnn.apply(apply_init); self.body.apply(apply_init)
         self.pi.apply(partial(apply_init, gain=O_INIT_VALUES['pi']))
         self.v.apply(partial(apply_init, gain=O_INIT_VALUES['v']))
     self.body, self.pi, self.v = tscr(self.body), tscr(self.pi), tscr(self.v)
     self.p_a = prev_action > 0
     self.p_r = prev_reward > 0
Exemplo n.º 4
0
 def __init__(self,
              input_classes: int,
              output_size: int,
              rnn_type: str = 'gru',
              rnn_size: int = 256,
              hidden_sizes: [List, Tuple] = None,
              baselines_init: bool = True,
              layer_norm: bool = False,
              prev_action: int = 3,
              prev_reward: int = 3,
              ):
     super().__init__()
     self._obs_dim = 0
     self.rnn_is_lstm = rnn_type != 'gru'
     self.preprocessor = tscr(OneHotLayer(input_classes))
     rnn_class = get_rnn_class(rnn_type, layer_norm)
     self.body_pi = MlpModel(input_classes, hidden_sizes, None, nn.ReLU, None)
     self.body_v = MlpModel(input_classes, hidden_sizes, None, nn.ReLU, None)
     rnn_input_size_pi = self.body_pi.output_size + (prev_action in [1,3]) * output_size + (prev_reward in [1,3])
     rnn_input_size_v = self.body_v.output_size + (prev_action in [2,3]) * output_size + (prev_reward in [2,3])
     self.rnn_pi = rnn_class(rnn_input_size_pi, rnn_size)  # Concat action, reward
     self.rnn_v = rnn_class(rnn_input_size_v, rnn_size)
     self.pi = nn.Sequential(nn.ReLU(), nn.Linear(rnn_size, output_size), nn.Softmax(-1))  # Need to activate after lstm
     self.v = nn.Sequential(nn.ReLU(), nn.Linear(rnn_size, 1))
     if baselines_init:
         self.body_pi.apply(apply_init); self.body_v.apply(apply_init)
         self.rnn_pi.apply(apply_init); self.rnn_v.apply(apply_init)
         self.pi.apply(partial(apply_init, O_INIT_VALUES['pi']))
         self.v.apply(partial(apply_init, O_INIT_VALUES['v']))
     self.body_pi, self.body_v, self.pi, self.v = tscr(self.body_pi), tscr(self.body_v), tscr(self.pi), tscr(self.v)
     self.p_a = prev_action
     self.p_r = prev_reward
Exemplo n.º 5
0
 def __init__(self,
              input_shape: Tuple,
              output_size: int,
              hidden_sizes: [List, Tuple, None] = None,
              nonlinearity: nn.Module = nn.ReLU
              ):
     super().__init__()
     self._obs_ndim = 2  # All bsuite obs are 2 (even (1,1))
     input_size = input_shape[0] * input_shape[1]
     self.preprocessor = MlpModel(input_size, hidden_sizes, None, nonlinearity)
     self.v = tscr(nn.Linear(self.preprocessor.output_size, 1))
     self.pi = tscr(nn.Sequential(nn.Linear(self.preprocessor.output_size, output_size), nn.Softmax(-1)))
Exemplo n.º 6
0
 def __init__(self,
              input_classes: int,
              output_size: int,
              option_size: int,
              hidden_sizes: [List, Tuple, None] = None,
              inits: [(float, float, float), None] = (np.sqrt(2), 1., 0.01),
              shared_processor: bool = True,
              hidden_nonlinearity=torch.nn.ReLU,  # Module form.
              use_interest=False,  # IOC sigmoid interest functions
              use_diversity=False,  # TDEOC q entropy output
              use_attention=False,
              ):
     super().__init__()
     self._obs_ndim = 0
     self.preprocessor = tscr(OneHotLayer(input_classes))
     body_mlp_class = partial(MlpModel, hidden_sizes=hidden_sizes, output_size=None, nonlinearity=hidden_nonlinearity, inits=inits[:-1])  # MLP with no head (and potentially no body)
     if shared_processor:
         # Same mlp for all heads
         self.model = tscr(nn.Sequential(body_mlp_class(input_classes), OptionCriticHead_SharedPreprocessor(
             input_size=hidden_sizes[-1],
             output_size=output_size,
             option_size=option_size,
             intra_option_policy='discrete',
             use_interest=use_interest,
             use_attention=use_attention,
             use_diversity=use_diversity,
             baselines_init=True,
         )))
     else:
         # Seperate mlp processors for each head (though if using diversity, q entropy and q share mlp
         self.model = tscr(OptionCriticHead_IndependentPreprocessor(
             input_size=input_classes,
             input_module_class=body_mlp_class,
             output_size=output_size,
             option_size=option_size,
             intra_option_policy='discrete',
             use_interest=use_interest,
             use_diversity=use_diversity,
             use_attention=use_attention,
             baselines_init=True,
         ))
Exemplo n.º 7
0
 def __init__(self,
              input_shape: Tuple,
              output_size: int,
              rnn_type: str = 'gru',
              rnn_size: int = 256,
              hidden_sizes: [List, Tuple] = None,
              baselines_init: bool = True,
              layer_norm: bool = False
              ):
     super().__init__()
     self._obs_dim = 2
     self.rnn_is_lstm = rnn_type != 'gru'
     input_size = int(np.prod(input_shape))
     rnn_class = get_rnn_class(rnn_type, layer_norm)
     self.rnn = rnn_class(input_size + output_size + 1, rnn_size)  # Concat action, reward
     pi_inits = (O_INIT_VALUES['base'], O_INIT_VALUES['pi']) if baselines_init else None
     v_inits = (O_INIT_VALUES['base'], O_INIT_VALUES['v']) if baselines_init else None
     self.pi = nn.Sequential(MlpModel(rnn_size, hidden_sizes, output_size, nn.ReLU, pi_inits), nn.Softmax(-1))
     self.v = nn.Sequential(MlpModel(rnn_size, hidden_sizes, 1, nn.ReLU, v_inits))
     if baselines_init:
         self.rnn.apply(apply_init)
     self.pi, self.v = tscr(self.pi), tscr(self.v)
Exemplo n.º 8
0
 def __init__(self,
              input_classes: int,
              output_size: int,
              option_size: int,
              hidden_sizes: [List, Tuple, None] = None,
              rnn_type: str = 'gru',
              rnn_size: int = 256,
              baselines_init: bool = True,
              layer_norm: bool = False,
              use_interest: bool = False,  # IOC sigmoid interest functions
              use_diversity: bool = False,  # TDEOC q entropy output
              use_attention: bool = False,
              prev_action: np.ndarray = np.ones(5, dtype=bool),
              prev_reward: np.ndarray = np.ones(5, dtype=bool),
              prev_option: np.ndarray = np.zeros(5, dtype=bool)
              ):
     super().__init__()
     self._obs_ndim = 0
     self.rnn_is_lstm = rnn_type != 'gru'
     self.preprocessor = tscr(OneHotLayer(input_classes))
     rnn_class = get_rnn_class(rnn_type, layer_norm)
     self.body = MlpModel(input_classes, hidden_sizes, None, nn.ReLU, None)
     self.p_a, self.p_o, self.p_r = prev_action.any().item(), prev_option.any().item(), prev_reward.any().item()
     rnn_input_size = self.body.output_size + (output_size * self.p_a) + (option_size * self.p_o) + self.p_r
     self.rnn = rnn_class(rnn_input_size, rnn_size)  # Concat action, reward
     self.oc = tscr(OptionCriticHead_SharedPreprocessor(
         input_size=rnn_size,
         output_size=output_size,
         option_size=option_size,
         intra_option_policy='discrete',
         use_interest=use_interest,
         use_diversity=use_diversity,
         use_attention=use_attention,
         baselines_init=baselines_init))
     if baselines_init:
         self.rnn.apply(partial(apply_init, gain=O_INIT_VALUES['lstm']))
         self.body.apply(apply_init)
     self.body = tscr(self.body)
Exemplo n.º 9
0
 def __init__(self,
              input_classes: int,
              output_size: int,
              option_size: int,
              hidden_sizes: [List, Tuple, None] = None,
              rnn_type: str = 'gru',
              rnn_size: int = 256,
              baselines_init: bool = True,
              layer_norm: bool = False,
              use_interest: bool = False,  # IOC sigmoid interest functions
              use_diversity: bool = False,  # TDEOC q entropy output
              use_attention: bool = False,
              prev_action: np.ndarray = np.ones(5, dtype=bool),
              prev_reward: np.ndarray = np.ones(5, dtype=bool),
              prev_option: np.ndarray = np.zeros(5, dtype=bool)
              ):
     super().__init__()
     self._obs_ndim = 0
     self.rnn_is_lstm = rnn_type != 'gru'
     self.preprocessor = tscr(OneHotLayer(input_classes))
     rnn_class = get_rnn_class(rnn_type, layer_norm)
     self.p_a, self.p_o, self.p_r = prev_action, prev_option, prev_reward
     body_mlp_class = partial(MlpModel, hidden_sizes=hidden_sizes, output_size=None, nonlinearity=nn.ReLU, inits=None)
     self.oc = OptionCriticHead_IndependentPreprocessorWithRNN(
         input_size=input_classes,
         input_module_class=body_mlp_class,
         rnn_module_class=rnn_class,
         output_size=output_size,
         option_size=option_size,
         rnn_size=rnn_size,
         intra_option_policy='discrete',
         use_interest=use_interest,
         use_diversity=use_diversity,
         use_attention=use_attention,
         baselines_init=baselines_init,
         prev_action=prev_action,
         prev_reward=prev_reward,
         prev_option=prev_option
     )
Exemplo n.º 10
0
 def __init__(self,
              input_classes: int,
              output_size: int,
              hidden_sizes: [List, Tuple, None] = None,
              inits: [(float, float, float), None] = (np.sqrt(2), 1., 0.01),
              nonlinearity: nn.Module = nn.ReLU,
              shared_processor: bool = False
              ):
     super().__init__()
     self._obs_ndim = 0
     if shared_processor:
         self.preprocessor = tscr(nn.Sequential(OneHotLayer(input_classes), MlpModel(input_classes, hidden_sizes, None, nonlinearity, inits[:-1] if inits is not None else inits)))
         self.v = tscr(layer_init(nn.Linear(hidden_sizes[-1], 1), inits[1]) if inits else nn.Linear(hidden_sizes[-1], 1))
         self.pi = tscr(nn.Sequential(layer_init(nn.Linear(hidden_sizes[-1], output_size), inits[1]) if inits else nn.Linear(hidden_sizes[-1], output_size), nn.Softmax(-1)))
     else:
         self.preprocessor = tscr(OneHotLayer(input_classes))
         self.v = tscr(MlpModel(input_classes, hidden_sizes, 1, nonlinearity, inits[:-1] if inits is not None else inits))
         self.pi = tscr(nn.Sequential(MlpModel(input_classes, hidden_sizes, output_size, nonlinearity, inits[0::2] if inits is not None else inits), nn.Softmax(-1)))