Exemplo n.º 1
0
 def setUp(self):
     self.batch_size = 30
     self.action_size = 3
     self.q_values = np.random.normal(
         size=(self.batch_size, self.action_size)).astype(np.float32)
     self.qout = action_value.DiscreteActionValue(
         chainer.Variable(self.q_values))
Exemplo n.º 2
0
    def __call__(self, x, test=False):
        h = x
        for l in self.conv_layers:
            h = self.activation(l(h))

        # Advantage
        batch_size = x.shape[0]
        ya = self.a_stream(h, test=test)
        mean = F.reshape(F.sum(ya, axis=1) / self.n_actions, (batch_size, 1))
        ya, mean = F.broadcast(ya, mean)
        ya -= mean

        # State value
        ys = self.v_stream(h, test=test)

        ya, ys = F.broadcast(ya, ys)
        q = ya + ys
        return action_value.DiscreteActionValue(q)