def setUp(self): self.batch_size = 30 self.action_size = 3 self.q_values = np.random.normal( size=(self.batch_size, self.action_size)).astype(np.float32) self.qout = action_value.DiscreteActionValue( chainer.Variable(self.q_values))
def __call__(self, x, test=False): h = x for l in self.conv_layers: h = self.activation(l(h)) # Advantage batch_size = x.shape[0] ya = self.a_stream(h, test=test) mean = F.reshape(F.sum(ya, axis=1) / self.n_actions, (batch_size, 1)) ya, mean = F.broadcast(ya, mean) ya -= mean # State value ys = self.v_stream(h, test=test) ya, ys = F.broadcast(ya, ys) q = ya + ys return action_value.DiscreteActionValue(q)