示例#1
0
    def __init__(self, args):
        super(A3C_LSTM_NN, self).__init__(args)
        self.conv1 = nn.Conv2d(3, 32, 5, stride=1, padding=2)
        self.down1 = nn.Conv2d(32, 32, 3, stride=2, padding=1)

        self.conv2 = nn.Conv2d(32, 32, 5, stride=1, padding=2)
        self.down2 = nn.Conv2d(32, 32, 3, stride=2, padding=1)

        self.conv3 = nn.Conv2d(32, 64, 3, stride=1, padding=1)
        self.down3 = nn.Conv2d(64, 64, 3, stride=2, padding=1)

        self.conv4 = nn.Conv2d(64, 64, 3, stride=1, padding=0)
        self.down4 = nn.Conv2d(64, 64, 3, stride=2, padding=1)

        self.lstm = nn.LSTMCell(1024, self.hidden_dim)
        self.linear_encoder = nn.Linear(self.hidden_dim, self.hidden_dim)
        self.linear_mu = nn.Linear(self.hidden_dim, self.hidden_dim)
        self.bn_x = nn.BatchNorm1d(self.hidden_dim * 2)

        self.policy_5 = NoisyLinear(self.hidden_dim * 2, self.output_dims)
        self.policy_6 = nn.Softmax(dim=1)
        self.value_5 = NoisyLinear(self.hidden_dim * 2, 1)
        self.sampler = sampler(args)

        self._reset()
        self.train()
示例#2
0
    def __init__(self, args):
        super(A3C_LSTM_NN_CRELU, self).__init__(args)
        print("USING NEW MODEL with CRELU")
        self.conv1 = nn.Conv2d(3, 16, 5, stride=1, padding=2)
        self.down1 = nn.Conv2d(32, 32, 3, stride=2, padding=1)
        self.crelu1 = crelu()

        self.conv2 = nn.Conv2d(32, 16, 5, stride=1, padding=2)
        self.down2 = nn.Conv2d(32, 32, 3, stride=2, padding=1)
        self.crelu2 = crelu()

        self.conv3 = nn.Conv2d(32, 32, 3, stride=1, padding=1)
        self.down3 = nn.Conv2d(64, 64, 3, stride=2, padding=1)
        self.crelu3 = crelu()

        self.conv4 = nn.Conv2d(64, 32, 3, stride=1, padding=0)
        self.down4 = nn.Conv2d(64, 64, 3, stride=2, padding=1)
        self.crelu4 = crelu()

        self.lstm = nn.LSTMCell(1024, self.hidden_dim)
        self.linear_encoder = nn.Linear(self.hidden_dim,
                                        int(self.hidden_dim / 2))
        self.linear_mu = nn.Linear(self.hidden_dim, int(self.hidden_dim / 2))
        self.crelu = crelu()
        self.crelu_encoder = crelu()

        self.policy_5 = NoisyLinear(self.hidden_dim * 2, self.output_dims)
        self.policy_6 = nn.Softmax()
        self.value_5 = NoisyLinear(self.hidden_dim * 2, 1)
        self.sampler = sampler(args)

        self._reset()
        self.train()
示例#3
0
    def __init__(self, args):
        super(A3C_LSTM_HPA3C, self).__init__(args)
        if args.crelu:
            self.crelu = True
        else:
            self.crelu = False
        self.sig = args.sig
        self.conv1 = nn.Conv2d(3, 32, 5, stride=1, padding=2)
        self.down1 = nn.Conv2d(32, 32, 3, stride=2, padding=1)

        self.conv2 = nn.Conv2d(32, 32, 5, stride=1, padding=2)
        self.down2 = nn.Conv2d(32, 32, 3, stride=2, padding=1)

        self.conv3 = nn.Conv2d(32, 64, 3, stride=1, padding=1)
        self.down3 = nn.Conv2d(64, 64, 3, stride=2, padding=1)

        self.conv4 = nn.Conv2d(64, 64, 3, stride=1, padding=0)
        self.down4 = nn.Conv2d(64, 64, 3, stride=2, padding=1)

        self.linear_mu = nn.Linear(self.hidden_dim, self.hidden_dim)
        self.linear_sigma = nn.Linear(self.hidden_dim, self.hidden_dim)

        self.prior_mu = nn.Linear(self.hidden_dim, self.hidden_dim)

        self.lstm = nn.LSTMCell(1024, self.hidden_dim)
        self.linear_encoder = nn.Linear(self.hidden_dim, self.hidden_dim)

        self.policy_5 = nn.Linear(self.hidden_dim * 2, self.output_dims)
        self.policy_6 = nn.Softmax(dim=1)
        self.value_5 = nn.Linear(self.hidden_dim * 2, 1)
        self.sampler = sampler(args)

        if self.crelu:
            self.crelu_z = crelu()
            self.crelu_x = crelu()
            self.prior_mu = nn.Linear(self.hidden_dim * 2, self.hidden_dim)
            self.policy_5 = nn.Linear(self.hidden_dim * 4, self.output_dims)
            self.value_5 = nn.Linear(self.hidden_dim * 4, 1)

        self._reset()
        self.train()