示例#1
0
文件: net.py 项目: wide725/DRL-Torcs
    def forward(self, x):
        """
            returns value estimation, action, log_action_prob
        """
        # action
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = self.conv3(x)
        x = F.relu(x)
        x = x.view(-1, 32 * 7 * 7)
        x = self.linear1(x)
        x = F.relu(x)

        mean = self.mu(x)  # N, num_actions
        logstd = self.logstd.expand_as(mean)
        std = torch.exp(logstd)
        action = torch.normal(mean, std)

        # value
        v = self.critic_linear(x)

        # action prob on log scale
        logprob = log_normal_density(action, mean, std=std, log_std=logstd)
        return v, action, logprob, mean
示例#2
0
    def forward(self, x):
        """
            returns value estimation, action, log_action_prob
        """
        # action
        act = self.act_fc1(x)
        act = torch.relu(act)
        act = self.act_fc2(act)
        act = torch.relu(act)
        mean = self.mu(act)  # N, num_actions
        logstd = self.logstd.expand_as(mean)
        std = torch.exp(logstd)
        action = torch.normal(mean, std)

        action = action.data.cpu().numpy()[0]

        action[0] = np.tanh(action[0]) # steering
        action[1] = sigmoid(action[1]) # acceleration
        action[2] = sigmoid(action[2]) # braking

        action = torch.as_tensor(action)
        action = action.view(1, -1)

        # value
        v = self.value_fc1(x)
        v = torch.relu(v)
        v = self.value_fc2(v)
        v = torch.relu(v)
        v = self.value_fc3(v)

        # action prob on log scale
        logprob = log_normal_density(action, mean, std=std, log_std=logstd)
        return v, action, logprob, mean
示例#3
0
    def forward(self, x, goal, speed):
        """
            returns value estimation, action, log_action_prob
        """
        # action
        a = F.relu(self.act_fea_cv1(x))
        a = F.relu(self.act_fea_cv2(a))
        a = a.view(a.shape[0], -1)
        a = F.relu(self.act_fc1(a))

        a = torch.cat((a, goal, speed), dim=-1)
        a = F.relu(self.act_fc2(a))
        mean1 = F.sigmoid(self.actor1(a))
        mean2 = F.tanh(self.actor2(a))
        mean = torch.cat((mean1, mean2), dim=-1)

        logstd = self.logstd.expand_as(mean)
        std = torch.exp(logstd)
        action = torch.normal(mean, std)

        # action prob on log scale
        logprob = log_normal_density(action, mean, std=std, log_std=logstd)

        # value
        v = F.relu(self.crt_fea_cv1(x))
        v = F.relu(self.crt_fea_cv2(v))
        v = v.view(v.shape[0], -1)
        v = F.relu(self.crt_fc1(v))
        v = torch.cat((v, goal, speed), dim=-1)
        v = F.relu(self.crt_fc2(v))
        v = self.critic(v)

        return v, action, logprob, mean
示例#4
0
    def forward(self, x, goal, speed):
        """
            returns action, log_action_prob, mean(sigmoid, tanh)
        """

        a = F.relu(self.act_fea_cv1(x))
        a = F.relu(self.act_fea_cv2(a))
        a = a.view(a.shape[0], -1)
        a = F.relu(self.act_fc1(a))

        a = torch.cat((a, goal, speed), dim=-1)
        a = F.relu(self.act_fc2(a))

        mean1 = F.sigmoid(self.actor1(a)) * self.max_action
        mean2 = F.tanh(self.actor2(a)) * self.max_action
        mean = torch.cat((mean1, mean2), dim=-1)

        logstd = self.logstd.expand_as(mean)
        std = torch.exp(logstd)
        action = torch.normal(mean, std)

        # action prob on log scale
        logprob = log_normal_density(action, mean, std=std, log_std=logstd)
        #---------------------------------------------------------------------#

        return action, logprob, mean
示例#5
0
文件: net.py 项目: wide725/DRL-Torcs
 def evaluate_actions(self, x, action):
     v, _, _, mean = self.forward(x)
     logstd = self.logstd.expand_as(mean)
     std = torch.exp(logstd)
     # evaluate
     logprob = log_normal_density(action, mean, log_std=logstd, std=std)
     dist_entropy = 0.5 + 0.5 * math.log(2 * math.pi) + logstd
     dist_entropy = dist_entropy.sum(-1).mean()
     return v, logprob, dist_entropy
示例#6
0
 def evaluate_actions(self, x, goal, speed, action):
     """
         returns log_action_prob, distance entropy
     """
     _, _, mean = self.forward(x, goal, speed)
     logstd = self.logstd.expand_as(mean)
     std = torch.exp(logstd)
     # evaluate
     logprob = log_normal_density(action, mean, log_std=logstd, std=std)
     dist_entropy = 0.5 + 0.5 * math.log(2 * math.pi) + logstd
     dist_entropy = dist_entropy.sum(-1).mean()
     return logprob, dist_entropy
示例#7
0
    def evaluate_actions(self, x, action):
        v, _, _, mean = self.forward(x)
        logstd = self.logstd.expand_as(mean)
        std = torch.exp(logstd)
        # evaluate
        logprob = log_normal_density(action, mean, log_std=logstd, std=std)
        dist_entropy = 0.5 + 0.5 * math.log(2 * math.pi) + logstd
        dist_entropy = dist_entropy.sum(-1).mean()
        return v, logprob, dist_entropy


# if __name__ == '__main__':
#     from torch.autograd import Variable

#     net = MLPPolicy(3, 2)

#     observation = Variable(torch.randn(2, 3))
#     v, action, logprob, mean = net.forward(observation)
#     print(v)
示例#8
0
文件: net.py 项目: wide725/DRL-Torcs
    def forward(self, x):
        """
            returns value estimation, action, log_action_prob
        """
        # action
        act = self.act_fc1(x)
        act = torch.tanh(act)
        act = self.act_fc2(act)
        act = torch.tanh(act)
        mean = self.mu(act)  # N, num_actions
        logstd = self.logstd.expand_as(mean)
        std = torch.exp(logstd)
        action = torch.normal(mean, std)

        # value
        v = self.value_fc1(x)
        v = torch.tanh(v)
        v = self.value_fc2(v)
        v = torch.tanh(v)
        v = self.value_fc3(v)

        # action prob on log scale
        logprob = log_normal_density(action, mean, std=std, log_std=logstd)
        return v, action, logprob, mean