def __init__(self, shape): """Build network.""" super().__init__() self.conv1 = nn.Conv2d(4, 32, 8, 4) self.conv2 = nn.Conv2d(32, 64, 4, 2) self.conv3 = nn.Conv2d(64, 64, 3, 1) shape = shape[1:] for c in [self.conv1, self.conv2, self.conv3]: shape = conv_out_shape(shape, c) self.nunits = 64 * np.prod(shape) self.fc = nn.Linear(self.nunits, 512)
def build(self): """Build.""" self.conv1 = nn.Conv2d(4, 32, 8, 4) self.conv2 = nn.Conv2d(32, 64, 4, 2) self.conv3 = nn.Conv2d(64, 64, 3, 1) shape = self.observation_space.shape[1:] for c in [self.conv1, self.conv2, self.conv3]: shape = conv_out_shape(shape, c) self.nunits = 64 * np.prod(shape) self.fc = nn.Linear(self.nunits, 512) self.qf = nn.Linear(512, self.action_space.n)
def build(self): """Build.""" self.conv1 = nn.Conv2d(4, 16, 8, 4) self.conv2 = nn.Conv2d(16, 32, 4, 2) shape = self.observation_space.shape[1:] for c in [self.conv1, self.conv2]: shape = conv_out_shape(shape, c) self.nunits = 32 * np.prod(shape) self.fc = nn.Linear(self.nunits, 256) self.vf = nn.Linear(256, 1) self.dist = Categorical(256, self.action_space.n) nn.init.orthogonal_(self.vf.weight.data, gain=1.0) nn.init.constant_(self.vf.bias.data, 0)