def __init__(self, args): super(A3C_LSTM_NN, self).__init__(args) self.conv1 = nn.Conv2d(3, 32, 5, stride=1, padding=2) self.down1 = nn.Conv2d(32, 32, 3, stride=2, padding=1) self.conv2 = nn.Conv2d(32, 32, 5, stride=1, padding=2) self.down2 = nn.Conv2d(32, 32, 3, stride=2, padding=1) self.conv3 = nn.Conv2d(32, 64, 3, stride=1, padding=1) self.down3 = nn.Conv2d(64, 64, 3, stride=2, padding=1) self.conv4 = nn.Conv2d(64, 64, 3, stride=1, padding=0) self.down4 = nn.Conv2d(64, 64, 3, stride=2, padding=1) self.lstm = nn.LSTMCell(1024, self.hidden_dim) self.linear_encoder = nn.Linear(self.hidden_dim, self.hidden_dim) self.linear_mu = nn.Linear(self.hidden_dim, self.hidden_dim) self.bn_x = nn.BatchNorm1d(self.hidden_dim * 2) self.policy_5 = NoisyLinear(self.hidden_dim * 2, self.output_dims) self.policy_6 = nn.Softmax(dim=1) self.value_5 = NoisyLinear(self.hidden_dim * 2, 1) self.sampler = sampler(args) self._reset() self.train()
def __init__(self, args): super(A3C_LSTM_NN_CRELU, self).__init__(args) print("USING NEW MODEL with CRELU") self.conv1 = nn.Conv2d(3, 16, 5, stride=1, padding=2) self.down1 = nn.Conv2d(32, 32, 3, stride=2, padding=1) self.crelu1 = crelu() self.conv2 = nn.Conv2d(32, 16, 5, stride=1, padding=2) self.down2 = nn.Conv2d(32, 32, 3, stride=2, padding=1) self.crelu2 = crelu() self.conv3 = nn.Conv2d(32, 32, 3, stride=1, padding=1) self.down3 = nn.Conv2d(64, 64, 3, stride=2, padding=1) self.crelu3 = crelu() self.conv4 = nn.Conv2d(64, 32, 3, stride=1, padding=0) self.down4 = nn.Conv2d(64, 64, 3, stride=2, padding=1) self.crelu4 = crelu() self.lstm = nn.LSTMCell(1024, self.hidden_dim) self.linear_encoder = nn.Linear(self.hidden_dim, int(self.hidden_dim / 2)) self.linear_mu = nn.Linear(self.hidden_dim, int(self.hidden_dim / 2)) self.crelu = crelu() self.crelu_encoder = crelu() self.policy_5 = NoisyLinear(self.hidden_dim * 2, self.output_dims) self.policy_6 = nn.Softmax() self.value_5 = NoisyLinear(self.hidden_dim * 2, 1) self.sampler = sampler(args) self._reset() self.train()
def __init__(self, args): super(A3C_LSTM_HPA3C, self).__init__(args) if args.crelu: self.crelu = True else: self.crelu = False self.sig = args.sig self.conv1 = nn.Conv2d(3, 32, 5, stride=1, padding=2) self.down1 = nn.Conv2d(32, 32, 3, stride=2, padding=1) self.conv2 = nn.Conv2d(32, 32, 5, stride=1, padding=2) self.down2 = nn.Conv2d(32, 32, 3, stride=2, padding=1) self.conv3 = nn.Conv2d(32, 64, 3, stride=1, padding=1) self.down3 = nn.Conv2d(64, 64, 3, stride=2, padding=1) self.conv4 = nn.Conv2d(64, 64, 3, stride=1, padding=0) self.down4 = nn.Conv2d(64, 64, 3, stride=2, padding=1) self.linear_mu = nn.Linear(self.hidden_dim, self.hidden_dim) self.linear_sigma = nn.Linear(self.hidden_dim, self.hidden_dim) self.prior_mu = nn.Linear(self.hidden_dim, self.hidden_dim) self.lstm = nn.LSTMCell(1024, self.hidden_dim) self.linear_encoder = nn.Linear(self.hidden_dim, self.hidden_dim) self.policy_5 = nn.Linear(self.hidden_dim * 2, self.output_dims) self.policy_6 = nn.Softmax(dim=1) self.value_5 = nn.Linear(self.hidden_dim * 2, 1) self.sampler = sampler(args) if self.crelu: self.crelu_z = crelu() self.crelu_x = crelu() self.prior_mu = nn.Linear(self.hidden_dim * 2, self.hidden_dim) self.policy_5 = nn.Linear(self.hidden_dim * 4, self.output_dims) self.value_5 = nn.Linear(self.hidden_dim * 4, 1) self._reset() self.train()