Пример #1
0
 def __init__(
         self,
         image_shape,
         output_size,
         fc_sizes=512,
         use_maxpool=False,
         channels=None,  # None uses default.
         kernel_sizes=None,
         strides=None,
         paddings=None,
         baselines_init=True,
         init_v=1.,
         init_pi=.01
         ):
     """Instantiate neural net module according to inputs."""
     super().__init__()
     if channels is not None:
         self.conv = Conv2dHeadModel(
             image_shape=image_shape,
             channels=channels,
             kernel_sizes=kernel_sizes,
             strides=strides,
             paddings=paddings,
             use_maxpool=use_maxpool,
             hidden_sizes=fc_sizes,  # Applies nonlinearity at end.
         )
     else:
         self.conv = Conv2dHeadModel(image_shape=image_shape, **CONVNET_DQN, hidden_sizes=fc_sizes)
     #self.pi = torch.jit.script(layer_init(torch.nn.Linear(self.conv.output_size, output_size), init_pi))
     #self.value = torch.jit.script(layer_init(torch.nn.Linear(self.conv.output_size, 1), init_v))
     #self.conv = torch.jit.script(self.conv)
     self.pi = layer_init(torch.nn.Linear(self.conv.output_size, output_size), init_pi)
     self.value = layer_init(torch.nn.Linear(self.conv.output_size, 1), init_v)
Пример #2
0
 def __init__(
         self,
         image_shape,
         output_size,
         fc_sizes=512,  # Between conv and lstm.
         lstm_size=512,
         use_maxpool=False,
         channels=None,  # None uses default.
         kernel_sizes=None,
         strides=None,
         paddings=None,
         ):
     super().__init__()
     self.conv = Conv2dHeadModel(
         image_shape=image_shape,
         channels=channels or [16, 32],
         kernel_sizes=kernel_sizes or [8, 4],
         strides=strides or [4, 2],
         paddings=paddings or [0, 1],
         use_maxpool=use_maxpool,
         hidden_sizes=fc_sizes,  # Applies nonlinearity at end.
     )
     self.lstm = torch.nn.LSTM(self.conv.output_size + output_size + 1, lstm_size)
     self.pi = torch.nn.Linear(lstm_size, output_size)
     self.value = torch.nn.Linear(lstm_size, 1)
 def __init__(
     self,
     image_shape,
     output_size,
     fc_size=512,  # Between conv and lstm.
     lstm_size=512,
     head_size=512,
     dueling=False,
     use_maxpool=False,
     channels=None,  # None uses default.
     kernel_sizes=None,
     strides=None,
     paddings=None,
 ):
     """Instantiates the neural network according to arguments; network defaults
     stored within this method."""
     super().__init__()
     self.dueling = dueling
     self.conv = Conv2dHeadModel(
         image_shape=image_shape,
         channels=channels or [32, 64, 64],
         kernel_sizes=kernel_sizes or [8, 4, 3],
         strides=strides or [4, 2, 1],
         paddings=paddings or [0, 1, 1],
         use_maxpool=use_maxpool,
         hidden_sizes=fc_size,  # ReLU applied here (Steven).
     )
     self.lstm = torch.nn.LSTM(self.conv.output_size + output_size + 1,
                               lstm_size)
     if dueling:
         self.head = DuelingHeadModel(lstm_size, head_size, output_size)
     else:
         self.head = MlpModel(lstm_size, head_size, output_size=output_size)
Пример #4
0
 def __init__(
     self,
     image_shape,
     output_size,
     fc_sizes=512,
     use_maxpool=False,
     channels=None,  # None uses default.
     kernel_sizes=None,
     strides=None,
     paddings=None,
 ):
     """Instantiate neural net module according to inputs."""
     super().__init__()
     self.conv = Conv2dHeadModel(
         image_shape=image_shape,
         channels=channels or [16, 32],
         kernel_sizes=kernel_sizes or [8, 4],
         strides=strides or [4, 2],
         paddings=paddings or [0, 1],
         use_maxpool=use_maxpool,
         hidden_sizes=fc_sizes,  # Applies nonlinearity at end.
     )
     self.pi = torch.jit.script(
         torch.nn.Linear(self.conv.output_size, output_size))
     self.value = torch.jit.script(torch.nn.Linear(self.conv.output_size,
                                                   1))
     self.conv = torch.jit.script(self.conv)
Пример #5
0
    def __init__(
            self,
            observation_shape,
            action_size,
            hidden_sizes=[64,64],  # mlp after lstm
            fc_sizes=128, # Between conv and lstm
            lstm_size=64,
            channels=[8, 16],
            kernel_sizes=[8, 4],
            strides=[4, 2],
            paddings=[0, 1],
            use_maxpool=False,
            ):
        """Instantiate neural net according to inputs."""
        super().__init__()
        self._obs_ndim = len(observation_shape)

        self.conv = Conv2dHeadModel(
            image_shape=observation_shape,
            channels=channels,
            kernel_sizes=kernel_sizes,
            strides=strides,
            paddings=paddings,
            use_maxpool=use_maxpool,
            hidden_sizes=fc_sizes,  # Applies nonlinearity at end.
        )   # image -> conv (ReLU) -> linear (fc_sizes) - > ReLU

        self.lstm = torch.nn.LSTM(self.conv.output_size + action_size + 1 + action_size, lstm_size)   # Input to LSTM: conv_output + prev_action + prev_reward + action
        self.mlp = MlpModel(
            input_size=lstm_size,
            hidden_sizes=hidden_sizes,
            output_size=1,
        )
Пример #6
0
 def __init__(
     self,
     image_shape,
     output_size,
     fc_size=512,  # Between conv and lstm.
     lstm_size=512,
     head_size=512,
     dueling=False,
     use_maxpool=False,
     channels=None,  # None uses default.
     kernel_sizes=None,
     strides=None,
     paddings=None,
 ):
     super().__init__()
     self.dueling = dueling
     self.conv = Conv2dHeadModel(
         image_shape=image_shape,
         channels=channels or [32, 64, 64],
         kernel_sizes=kernel_sizes or [8, 4, 3],
         strides=strides or [4, 2, 1],
         paddings=paddings or [0, 1, 1],
         use_maxpool=use_maxpool,
         hidden_sizes=fc_size,
     )
     self.lstm = torch.nn.LSTM(self.conv.output_size + output_size + 1,
                               lstm_size)
     if dueling:
         self.head = DuelingHeadModel(lstm_size, head_size, output_size)
     else:
         self.head = MlpModel(lstm_size, head_size, output_size=output_size)
Пример #7
0
    def __init__(
        self,
        observation_shape,
        action_size,
        hidden_sizes=[64, 64],  # mlp after lstm
        fc_sizes=64,  # Between conv and lstm
        channels=None,
        kernel_sizes=None,
        strides=None,
        paddings=None,
        use_maxpool=False,
    ):
        """Instantiate neural net according to inputs."""
        super().__init__()
        self._obs_ndim = len(observation_shape)

        self.conv = Conv2dHeadModel(
            image_shape=observation_shape,
            channels=channels or [4, 8],
            kernel_sizes=kernel_sizes or [8, 4],
            strides=strides or [4, 2],
            paddings=paddings or [0, 1],
            use_maxpool=use_maxpool,
            hidden_sizes=fc_sizes,  # Applies nonlinearity at end.
        )  # image -> conv (ReLU) -> linear (fc_sizes) - > ReLU
        self.mlp = MlpModel(
            input_size=self.conv.output_size + action_size,
            hidden_sizes=hidden_sizes,
            output_size=1,
        )
Пример #8
0
    def __init__(
        self,
        observation_shape,
        action_size,
        fc_sizes=32,
        use_maxpool=False,
        channels=None,  # None uses default.
        kernel_sizes=None,
        strides=None,
        paddings=None,
        hidden_nonlinearity=torch.nn.Tanh,  # Module form.
        # mu_nonlinearity=torch.nn.Tanh,  # Module form.
        mu_nonlinearity=None,
        init_log_std=0.,
    ):
        """Instantiate neural net module according to inputs."""
        super().__init__()

        self.conv = Conv2dHeadModel(
            image_shape=observation_shape,
            channels=channels or [16, 32],
            kernel_sizes=kernel_sizes or [8, 4],
            strides=strides or [4, 2],
            paddings=paddings or [0, 1],
            use_maxpool=use_maxpool,
            hidden_sizes=fc_sizes,  # Applies nonlinearity at end.
        )
        mu_mlp = MlpModel(
            input_size=self.conv.output_size,
            hidden_sizes=fc_sizes,
            output_size=action_size,
            nonlinearity=hidden_nonlinearity,
        )
        # print(self.conv.output_size)
        # print('Num of encoder parameters: %d' % sum(p.numel() for p in self.conv.parameters() if p.requires_grad))
        # print('Num of encoder parameters: %d' % sum(p.numel() for p in mu_mlp.parameters() if p.requires_grad))
        if mu_nonlinearity is not None:
            self.mu = torch.nn.Sequential(mu_mlp, mu_nonlinearity())
        else:
            self.mu = mu_mlp
        self.v = MlpModel(
            input_size=self.conv.output_size,
            hidden_sizes=fc_sizes,
            output_size=1,
            nonlinearity=hidden_nonlinearity,
        )

        self.lstm = torch.nn.LSTM(mlp_output_size + action_size + 1, lstm_size)
        self.head = torch.nn.Linear(lstm_size, action_size * 2 + 1)

        self.log_std = torch.nn.Parameter(init_log_std *
                                          torch.ones(action_size))
Пример #9
0
 def __init__(
         self,
         image_shape,
         output_size,
         num_options,
         fc_sizes=512,
         use_maxpool=False,
         channels=None,  # None uses default.
         kernel_sizes=None,
         strides=None,
         paddings=None,
         use_interest=False):
     """Instantiate neural net module according to inputs."""
     super().__init__()
     if channels is not None:
         self.conv = Conv2dHeadModel(
             image_shape=image_shape,
             channels=channels,
             kernel_sizes=kernel_sizes,
             strides=strides,
             paddings=paddings,
             use_maxpool=use_maxpool,
             hidden_sizes=fc_sizes,  # Applies nonlinearity at end.
         )
     else:
         self.conv = Conv2dHeadModel(image_shape=image_shape, **CONVNET_DQN)
     self.pi = DiscreteIntraOptionPolicy(self.conv.output_size, num_options,
                                         output_size, True)
     self.q = torch.nn.Linear(self.conv.output_size, num_options)
     self.beta = torch.nn.Sequential(
         torch.nn.Linear(self.conv.output_size, num_options),
         torch.nn.Sigmoid())
     self.pi_omega = torch.nn.Sequential(
         torch.nn.Linear(self.conv.output_size, num_options),
         torch.nn.Softmax(dim=-1))
     self.I = torch.nn.Sequential(
         torch.nn.Linear(self.conv.output_size, num_options),
         torch.nn.Sigmoid()) if use_interest else Dummy(num_options)
Пример #10
0
    def __init__(
            self,
            image_shape,
            output_size,
            fc_sizes=128,
            use_maxpool=False,
            channels=None,  # None uses default.
            kernel_sizes=None,
            strides=None,
            paddings=None,
            scale_obs=True,  # Whether to scale observations
            obs_mean=4.,  # Mean to subtract
            obs_scale=8.  # Scale to apply
    ):
        """Instantiate neural net module according to inputs."""
        super().__init__()
        if channels is not None:  # Override defaults
            self.conv = Conv2dHeadModel(
                image_shape=image_shape,
                channels=channels or [16, 32],
                kernel_sizes=kernel_sizes or [8, 4],
                strides=strides or [4, 2],
                paddings=paddings or [0, 1],
                use_maxpool=use_maxpool,
                hidden_sizes=fc_sizes,  # Applies nonlinearity at end.
            )
        else:
            self.conv = Conv2dHeadModel(image_shape=image_shape,
                                        hidden_sizes=fc_sizes,
                                        **CONVNET_MINIGRID_TINY)

        self.pi = torch.nn.Linear(self.conv.output_size, output_size)
        self.value = torch.nn.Linear(self.conv.output_size, 1)
        self.scaler = nn.Identity() if not scale_obs else ObsScaler(
            obs_mean, obs_scale)
        self.conv = nn.Sequential(self.scaler, self.conv)
Пример #11
0
    def __init__(
        self,
        image_shape,
        output_size,
        fc_size=512,  # Between conv and lstm.
        lstm_size=512,
        head_size=512,
        use_recurrence=True,
        dueling=False,
        use_maxpool=False,
        channels=None,  # None uses default.
        kernel_sizes=None,
        strides=None,
        paddings=None,
    ):
        """Instantiates the neural network according to arguments; network defaults
        stored within this method."""
        super().__init__()
        self.use_recurrence = use_recurrence
        self.dueling = dueling
        self.conv = Conv2dHeadModel(
            image_shape=image_shape,
            channels=channels or [32, 64, 64],
            kernel_sizes=kernel_sizes or [8, 4, 3],
            strides=strides or [4, 2, 1],
            paddings=paddings or [0, 1, 1],
            use_maxpool=use_maxpool,
            hidden_sizes=fc_size,  # ReLU applied here (Steven).
        )
        if self.use_recurrence:
            self.rnn = torch.nn.GRUCell(input_size=(self.conv.output_size +
                                                    output_size + 1),
                                        hidden_size=lstm_size)
        else:
            self.rnn = nn.Sequential(
                nn.Linear(self.conv.output_size + output_size + 1, lstm_size),
                nn.ReLU())

        if dueling:
            self.head = DuelingHeadModel(lstm_size, head_size, output_size)
        else:
            self.head = MlpModel(lstm_size, head_size, output_size=output_size)

        print('model initialized', self)  # NOTE for debug purposes

        # Logging gradients
        self.prev_hs_pre_grad = None
        self.prev_hs_rec_grad = None
Пример #12
0
 def __init__(
         self,
         image_shape,
         output_size,
         option_size,
         fc_sizes=512,
         use_maxpool=False,
         channels=None,  # None uses default.
         kernel_sizes=None,
         strides=None,
         paddings=None,
         baselines_init=True,  # Orthogonal initialization of sqrt(2) until last layer, then 0.01 for policy, 1 for value
         use_interest=False,  # IOC sigmoid interest functions
 ):
     """Instantiate neural net module according to inputs."""
     super().__init__()
     self.conv = Conv2dHeadModel(
         image_shape=image_shape,
         channels=channels or [16, 32],
         kernel_sizes=kernel_sizes or [8, 4],
         strides=strides or [4, 2],
         paddings=paddings or [0, 1],
         use_maxpool=use_maxpool,
         hidden_sizes=fc_sizes,  # Applies nonlinearity at end.
     )
     self.use_interest = use_interest
     self.pi = DiscreteIntraOptionPolicy(self.conv.output_size,
                                         option_size,
                                         output_size,
                                         ortho_init=baselines_init)
     self.q = torch.nn.Linear(self.conv.output_size, option_size)
     self.beta = torch.nn.Sequential(
         torch.nn.Linear(self.conv.output_size, option_size),
         torch.nn.Sigmoid())
     self.pi_omega = torch.nn.Sequential(
         torch.nn.Linear(self.conv.output_size, option_size),
         torch.nn.Softmax(-1))
     self.pi_omega_I = torch.nn.Sequential(
         torch.nn.Linear(self.conv.output_size, option_size),
         torch.nn.Sigmoid()) if use_interest else Dummy(option_size)
Пример #13
0
 def __init__(
     self,
     image_shape,
     output_size,
     fc_sizes=512,
     use_maxpool=False,
     channels=None,  # None uses default.
     kernel_sizes=None,
     strides=None,
     paddings=None,
 ):
     super().__init__()
     self.conv = Conv2dHeadModel(
         image_shape=image_shape,
         channels=channels or [32, 64, 64],
         kernel_sizes=kernel_sizes or [8, 4, 3],
         strides=strides or [4, 2, 1],
         paddings=paddings or [0, 0, 0],
         use_maxpool=use_maxpool,
         hidden_sizes=fc_sizes,  # Applies nonlinearity at end.
     )
     self.pi = torch.nn.Linear(self.conv.output_size, output_size)
     self.value = torch.nn.Linear(self.conv.output_size, 1)
Пример #14
0
    def __init__(
            self,
            image_shape,
            output_size,
            fc_sizes=512,  # Between conv and lstm.
            lstm_size=512,
            use_maxpool=False,
            channels=None,  # None uses default.
            kernel_sizes=None,
            strides=None,
            paddings=None,
            curiosity_kwargs=dict(curiosity_alg='none'),
            obs_stats=None):
        """Instantiate neural net module according to inputs."""
        super().__init__()

        self.obs_stats = obs_stats
        if self.obs_stats is not None:
            self.obs_mean, self.obs_std = self.obs_stats

        if curiosity_kwargs['curiosity_alg'] != 'none':
            if curiosity_kwargs['curiosity_alg'] == 'icm':
                self.curiosity_model = ICM(
                    image_shape=image_shape,
                    action_size=output_size,
                    feature_encoding=curiosity_kwargs['feature_encoding'],
                    batch_norm=curiosity_kwargs['batch_norm'],
                    prediction_beta=curiosity_kwargs['prediction_beta'],
                    obs_stats=self.obs_stats,
                    forward_loss_wt=curiosity_kwargs['forward_loss_wt'])
            elif curiosity_kwargs['curiosity_alg'] == 'disagreement':
                self.curiosity_model = Disagreement(
                    image_shape=image_shape,
                    action_size=output_size,
                    feature_encoding=curiosity_kwargs['feature_encoding'],
                    batch_norm=curiosity_kwargs['batch_norm'],
                    prediction_beta=curiosity_kwargs['prediction_beta'],
                    obs_stats=self.obs_stats,
                    device=curiosity_kwargs['device'],
                    forward_loss_wt=curiosity_kwargs['forward_loss_wt'])
            elif curiosity_kwargs['curiosity_alg'] == 'ndigo':
                self.curiosity_model = NDIGO(
                    image_shape=image_shape,
                    action_size=output_size,
                    obs_stats=self.obs_stats,
                    horizon=curiosity_kwargs['pred_horizon'],
                    feature_encoding=curiosity_kwargs['feature_encoding'],
                    batch_norm=curiosity_kwargs['batch_norm'],
                    num_predictors=curiosity_kwargs['num_predictors'],
                    device=curiosity_kwargs['device'],
                )
            elif curiosity_kwargs['curiosity_alg'] == 'rnd':
                self.curiosity_model = RND(
                    image_shape=image_shape,
                    prediction_beta=curiosity_kwargs['prediction_beta'],
                    drop_probability=curiosity_kwargs['drop_probability'],
                    gamma=curiosity_kwargs['gamma'],
                    device=curiosity_kwargs['device'])

            if curiosity_kwargs['feature_encoding'] == 'idf':
                self.conv = UniverseHead(
                    image_shape=image_shape,
                    batch_norm=curiosity_kwargs['batch_norm'])
                self.conv.output_size = self.curiosity_model.feature_size
            elif curiosity_kwargs['feature_encoding'] == 'idf_burda':
                self.conv = BurdaHead(
                    image_shape=image_shape,
                    output_size=self.curiosity_model.feature_size,
                    batch_norm=curiosity_kwargs['batch_norm'])
                self.conv.output_size = self.curiosity_model.feature_size
            elif curiosity_kwargs['feature_encoding'] == 'idf_maze':
                self.conv = MazeHead(
                    image_shape=image_shape,
                    output_size=self.curiosity_model.feature_size,
                    batch_norm=curiosity_kwargs['batch_norm'])
                self.conv.output_size = self.curiosity_model.feature_size
            elif curiosity_kwargs['feature_encoding'] == 'none':
                self.conv = Conv2dHeadModel(
                    image_shape=image_shape,
                    channels=channels or [16, 32],
                    kernel_sizes=kernel_sizes or [8, 4],
                    strides=strides or [4, 2],
                    paddings=paddings or [0, 1],
                    use_maxpool=use_maxpool,
                    hidden_sizes=fc_sizes)  # Applies nonlinearity at end.

        else:
            self.conv = Conv2dHeadModel(
                image_shape=image_shape,
                channels=channels or [16, 32],
                kernel_sizes=kernel_sizes or [8, 4],
                strides=strides or [4, 2],
                paddings=paddings or [0, 1],
                use_maxpool=use_maxpool,
                hidden_sizes=fc_sizes,  # Applies nonlinearity at end.
            )

        self.lstm = torch.nn.LSTM(self.conv.output_size + output_size,
                                  lstm_size)
        self.pi = torch.nn.Linear(lstm_size, output_size)
        self.value = torch.nn.Linear(lstm_size, 1)