Exemplo n.º 1
0
    def test_dumbnet_predict(self):
        gi = TwoPlayerGuessIt()
        nn = NeuralNetwork(gi, DumbNet)
        init = gi.get_initial_state()
        p, v = nn.predict(init)
        self.assertEqual(len(p), 4)
        self.assertTrue((p == .25).all())
        self.assertEqual(v, 0.0)

        template = np.zeros_like(gi.get_available_actions(init))
        template[0, 1] = 1
        s = gi.take_action(init, template)
        p, v = nn.predict(s)
        self.assertEqual(len(p), 3)
        self.assertTrue((p == 0.3333333).all())
        self.assertEqual(v, 0.0)
Exemplo n.º 2
0
    def test_uniform_get_valid_dist(self):
        gi = TwoPlayerGuessIt()
        nn = NeuralNetwork(gi, DumbNet)
        init = gi.get_initial_state()
        logits = torch.ones((2, 2))
        self.assertEqual(list(nn.get_valid_dist(init, logits)),
                         [.25, .25, .25, .25])
        self.assertEqual(
            list(nn.get_valid_dist(init, logits, log_softmax=True)),
            [-1.38629436] * 4)

        template = np.zeros_like(gi.get_available_actions(init))
        template[0, 1] = 1
        s = gi.take_action(init, template)
        self.assertEqual(list(nn.get_valid_dist(s, logits)), [0.3333333] * 3)
        self.assertEqual(list(nn.get_valid_dist(s, logits, log_softmax=True)),
                         [-1.09861228] * 3)
Exemplo n.º 3
0
    def test_prior_get_valid_dist(self):
        gi = TwoPlayerGuessIt()
        nn = NeuralNetwork(gi, DumbNet)
        init = gi.get_initial_state()
        logits = torch.ones((2, 2))
        logits[:, -1] = 2
        dist = [float(x) for x in list(nn.get_valid_dist(init, logits))]
        self.assertEqual(dist, [
            0.13447073101997375, 0.3655293583869934, 0.13447073101997375,
            0.3655293583869934
        ])

        template = np.zeros_like(gi.get_available_actions(init))
        template[0, 1] = 1
        s = gi.take_action(init, template)
        dist = [float(x) for x in list(nn.get_valid_dist(s, logits))]
        self.assertEqual(
            dist,
            [0.21194154024124146, 0.21194154024124146, 0.5761168599128723])
Exemplo n.º 4
0
    def test_combined_loss(self):
        gi = TwoPlayerGuessIt()
        nn = NeuralNetwork(gi, DumbNet)
        init = gi.get_initial_state()
        template = np.zeros_like(gi.get_available_actions(init))
        template[0, 1] = 1
        s = gi.take_action(init, template)
        states = np.array([init, s])

        p_gt_top = np.ones(4) * .1
        p_gt_top[-1] = .7
        p_gt_bot = np.ones(3) * .2
        p_gt_bot[-1] = .6
        p_gt = np.array([p_gt_top, p_gt_bot], dtype=np.object)
        p_pred = np.ones((2, 2, 2), dtype=np.float32)
        p_pred[:, 1, 1] = 2
        p_pred = torch.from_numpy(p_pred)
        v_gt, v_pred = torch.Tensor([1., .5]), torch.Tensor([-.35, -.2])
        l = nn.loss(states, (p_pred, v_pred),
                    (p_gt, v_gt)).detach().numpy().reshape(-1)[0]

        self.assertAlmostEqual((.49 + .9514) + (1.8225 + 1.0437), l, places=4)
Exemplo n.º 5
0
    def test_masked_loss(self):
        gi = TwoPlayerGuessIt()
        nn = NeuralNetwork(gi, DumbNet)
        s = gi.get_initial_state()
        template = np.zeros_like(gi.get_available_actions(s))
        template[0, 1] = 1
        s = gi.take_action(s, template)
        states = np.array([s])

        p_gt = np.ones(3) * .2
        p_gt[-1] = .6
        p_gt = np.array([p_gt])
        p_pred = np.ones((1, 2, 2), dtype=np.float32)
        p_pred[0, 1, 1] = 2
        p_pred = torch.from_numpy(p_pred)
        v_gt, v_pred = torch.Tensor([.5]), torch.Tensor([-.2])
        l = nn.loss(states, (p_pred, v_pred),
                    (p_gt, v_gt)).detach().numpy().reshape(-1)[0]

        mse_loss = .49
        ce_loss = .9514
        self.assertAlmostEqual(mse_loss + ce_loss, l, places=4)