Пример #1
0
    def __init__(self):
        super(Policy, self).__init__()
        self.num_envs = num_envs
        # shared layer
        self.affine1 = nn.Linear(4, 128)
        self.affine_pi0 = nn.Linear(4, 128)
        # not shared layers
        self.mu_heads = nn.ModuleList(
            [nn.Linear(128, 1) for i in range(self.num_envs + 1)])
        self.sigma2_heads = nn.ModuleList(
            [nn.Linear(128, 1) for i in range(self.num_envs + 1)])
        self.value_heads = nn.ModuleList(
            [nn.Linear(128, 1) for i in range(self.num_envs)])

        self.apply(weights_init)
        # +1 for the distilled policy
        for i in range(self.num_envs + 1):
            mu = self.mu_heads[i]
            sigma = self.sigma2_heads[i]
            mu.data = normalized_columns_initializer(mu.weight.data, 0.01)
            mu.bias.data.fill_(0)
            sigma.bias.data.fill_(0)
            if i != self.num_envs:
                value = self.value_heads[i]
                value.weight.data = normalized_columns_initializer(
                    value.weight.data, 1.0)
                value.bias.data.fill_(0)

        # initialize lists for holding run information
        self.div = [[] for i in range(self.num_envs)]
        self.saved_actions = [[] for i in range(self.num_envs)]
        #self.entropies = [[] for i in range(num_envs)]
        self.entropies = [[] for i in range(self.num_envs)]
        self.rewards = [[] for i in range(self.num_envs)]
        self.log_prob = [[] for i in range(self.num_envs)]
Пример #2
0
    def __init__(self):
        super(Policy, self).__init__()
        self.num_envs = num_envs
        # shared layer
        self.affine1 = nn.Linear(4, 128)
        # not shared layers
        self.mu_heads = nn.ModuleList(
            [nn.Linear(128, 1) for i in range(self.num_envs)])
        self.sigma2_heads = nn.ModuleList(
            [nn.Linear(128, 1) for i in range(self.num_envs)])

        self.apply(weights_init)
        for i in range(self.num_envs):
            mu = self.mu_heads[i]
            sigma = self.sigma2_heads[i]
            mu.data = normalized_columns_initializer(mu.weight.data, 0.01)
            mu.bias.data.fill_(0)
            sigma.bias.data.fill_(0)

        # initialize lists for holding run information
        self.div = [[] for i in range(num_envs)]