Exemplo n.º 1
0
    def forward(self, inputs):
        # sanity check: actor network is deterministic, and receives only states as input
        assert len(
            inputs) == 1, 'ERROR> this network expects only one input (states)'

        # grab the actual input to the model (states)
        _states = inputs[0]

        if self._config.useBatchnorm:
            x = self._bn0(_states)
            x = F.relu(self._bn1(self._fc1(x)))
            x = F.relu(self._bn2(self._fc2(x)))
            x = F.tanh(self._fc3(x))
        else:
            x = F.relu(self._fc1(_states))
            x = F.relu(self._fc2(x))
            x = F.tanh(self._fc3(x))

        return x
Exemplo n.º 2
0
    def forward( self, state ) :
        r"""Forward pass for this deterministic policy, used for the max Q evaluation

        Args:
            state (torch.tensor): state used to decide the action

        """
        x = F.relu( self.fc1( state ) )
        x = F.relu( self.fc2( x ) )
        x = F.tanh( self.fc3( x ) )

        return x
Exemplo n.º 3
0
    def forward( self, observation ) :
        r"""Forward pass for this deterministic policy, used for the max Q evaluation

        Args:
            observation (torch.tensor): observation used to decide the action

        """
        x = self.bn0( observation )
        x = F.relu( self.bn1( self.fc1( x ) ) )
        x = F.relu( self.bn2( self.fc2( x ) ) )
        x = F.tanh( self.fc3( x ) )

        return x
Exemplo n.º 4
0
    def forward(self, x1, x2, z=None):
        conv1_1 = self.conv1(x1)
        conv1_2 = self.conv1(x2)
        conv2_1 = self.conv2(conv1_1)
        conv2_2 = self.conv2(conv1_2)
        conv3_1 = self.conv3(conv2_1)
        conv3_2 = self.conv3(conv2_2)
        conv4_1 = self.conv4(conv3_1)
        conv4_2 = self.conv4(conv3_2)
        conv5_1 = self.conv5(conv4_1)
        conv5_2 = self.conv5(conv4_2)

        # center_1 = self.center(self.pool(conv5_1))
        # center_2 = self.center(self.pool(conv5_2))
        center_1 = self.center(conv5_1)
        center_2 = self.center(conv5_2)

        feature_1 = self.global_avg_pool(center_1)
        feature_2 = self.global_avg_pool(center_2)
        feature_12 = torch.cat((feature_1, feature_2), dim=1)
        # pdb.set_trace()
        attention_12 = F.sigmoid(
            self.mlp2(F.tanh(self.mlp1(feature_12.view(-1, 512))))).view(
                -1, 256, 1, 1)

        center_1 = center_1 * attention_12
        center_2 = center_2 * attention_12

        dec5_1 = self.dec5(torch.cat([center_1, conv5_1], 1))
        dec5_2 = self.dec5(torch.cat([center_2, conv5_2], 1))
        dec4_1 = self.dec4(torch.cat([dec5_1, conv4_1], 1))
        dec4_2 = self.dec4(torch.cat([dec5_2, conv4_2], 1))
        dec3_1 = self.dec3(torch.cat([dec4_1, conv3_1], 1))
        dec3_2 = self.dec3(torch.cat([dec4_2, conv3_2], 1))
        dec2_1 = self.dec2(torch.cat([dec3_1, conv2_1], 1))
        dec2_2 = self.dec2(torch.cat([dec3_2, conv2_2], 1))

        dec1_1 = self.dec1(dec2_1)
        dec1_2 = self.dec1(dec2_2)
        dec0_1 = self.dec0(dec1_1)
        dec0_2 = self.dec0(dec1_2)

        return self.final(dec0_1), self.final(dec0_2)
Exemplo n.º 5
0
 def forward(self, x):
     l1 = F.tanh(self.l1(x))
     l2 = F.tanh(self.l2(l1))
     pred = F.softmax(self.l3(l2))
     return pred