Python VWState示例，vacuumworld.model.VWState Python示例

示例#1

0

显示文件

文件： test.py 项目： huiwq1990/InverseReinforcementLearning

 def test_suck(self):
     s1 = vwmodel.VWState(np.array([2, 2]),
                          np.array([[1, 0, 1], [0, 0, 0], [1, 0, 1]]))
     s2 = vwmodel.VWState(np.array([2, 2]),
                          np.array([[1, 0, 1], [0, 0, 0], [1, 0, 0]]))
     a = vwmodel.VWSuckAction()
     self.assertEqual(s2, a.apply(s1))

示例#2

0

显示文件

文件： test.py 项目： huiwq1990/InverseReinforcementLearning

    def test_move(self):
        s1 = vwmodel.VWState(np.array([3, 2]),
                             np.array([[1, 0, 1], [0, 0, 0], [1, 0, 1]]))
        s2 = vwmodel.VWState(np.array([2, 0]),
                             np.array([[1, 0, 1], [0, 0, 0], [1, 0, 1]]))
        s3 = vwmodel.VWState(np.array([7, 5]),
                             np.array([[1, 0, 1], [0, 0, 0], [1, 0, 1]]))
        a1 = vwmodel.VWMoveAction(np.array([-1, -2]))
        a2 = vwmodel.VWMoveAction(np.array([5, 5]))

        self.assertEqual(s2, a1.apply(s1))
        self.assertEqual(s3, a2.apply(a1.apply(s1)))

示例#3

0

显示文件

文件： test.py 项目： huiwq1990/InverseReinforcementLearning

    def test_state(self):
        s1 = vwmodel.VWState(np.array([3, 2]),
                             np.array([[1, 0, 1], [0, 0, 0], [1, 0, 1]]))
        s2 = vwmodel.VWState(np.array([3, 1]),
                             np.array([[1, 0, 1], [0, 0, 0], [1, 0, 1]]))
        s3 = vwmodel.VWState(np.array([3, 2]),
                             np.array([[1, 0, 1], [0, 1, 0], [1, 0, 1]]))
        s4 = vwmodel.VWState(
            np.array([3, 2]),
            np.array([[1, 0, 1, 0], [0, 1, 0, 0], [1, 0, 1, 0]]))

        self.assertNotEqual(s1, s2)
        self.assertNotEqual(s2, s3)
        self.assertNotEqual(s1, s3)
        self.assertNotEqual(s1, s4)
        self.assertEqual(s1, s1)

示例#4

0

显示文件

文件： test.py 项目： huiwq1990/InverseReinforcementLearning

 def test_is_legal(self):
     s1 = vwmodel.VWState(np.array([0, 0]), np.array([[1, 0, 1], [0, 0,
                                                                  0]]))
     s2 = vwmodel.VWState(np.array([1, 1]), np.array([[1, 1, 1], [0, 0,
                                                                  0]]))
     s3 = vwmodel.VWState(np.array([0, 0]), np.array([[1, 1, 1], [0, 0,
                                                                  0]]))
     s4 = vwmodel.VWState(np.array([-1, 0]), np.array([[1, 0, 1], [1, 0,
                                                                   0]]))
     s5 = vwmodel.VWState(np.array([0, 1]), np.array([[1, 0, 1], [0, 0,
                                                                  1]]))
     m = self._model1
     self.assertTrue(m.is_legal(s1))
     self.assertFalse(m.is_legal(s2))  # invalid dust location
     self.assertFalse(m.is_legal(s3))  # invalid dust location 2
     self.assertFalse(m.is_legal(s4))  # invalid robot location
     self.assertFalse(m.is_legal(s5))  # invalid robot location 2

示例#5

0

显示文件

文件： test.py 项目： huiwq1990/InverseReinforcementLearning

    def test_T(self):
        # Test a move action.  Probabilities calculated by hand.
        q = self._dust_prob
        p = self._act_fail
        s = vwmodel.VWState(np.array([1, 1]), np.array([[1, 0, 0], [0, 1, 0]]))
        a = vwmodel.VWMoveAction(np.array([0, 1]))
        T = self._model1.T(s, a)
        act_prob = {
            (1, 2): (1 - p),
            (1, 1): 2 * p / 3,
            (1, 0): p / 3
        }  # probability of robot's location
        for loc in [(1, 2), (1, 1), (1, 0)]:
            for layout in util.functions.bitstrings(3):
                inds = [(1, 0), (1, 2), (0, 2)]
                dust = np.array(s.dust)  # copy current state's dust
                dust = util.functions.sparse_matrix((2, 3), inds, layout,
                                                    dust)  # update dust
                n_dust = np.sum(dust)  # numbero of dust on the map
                s_p = vwmodel.VWState(np.array(loc), dust)
                dust_prob = (q**(n_dust - 2)) * (
                    (1 - q)**(5 - n_dust))  # probability of dust layout
                self.assertTrue(
                    np.abs(T[s_p] - act_prob[loc] * dust_prob) < 1e-10)

        # Test a suck Action
        a = vwmodel.VWSuckAction()
        T = self._model1.T(s, a)
        for layout in util.functions.bitstrings(3):
            inds = [(1, 0), (1, 2), (0, 2)]
            dust = np.array(s.dust)
            dust = util.functions.sparse_matrix((2, 3), inds, layout, dust)
            n_dust = np.sum(dust)
            dust[1, 1] = 0
            s_p = vwmodel.VWState(s.robot, dust)
            dust_prob = (q**(n_dust - 2)) * ((1 - q)**(5 - n_dust))
            self.assertTrue(np.abs(T[s_p] - dust_prob) < 1e-10)

        for s in self._model1.S():
            for a in self._model1.A():
                T = self._model1.T(s, a)
                self.assertTrue(abs(sum(T.values()) - 1.0) < 10e-10)

示例#6

0

显示文件

文件： test.py 项目： huiwq1990/InverseReinforcementLearning

 def test_S(self):
     s1 = self._map1.shape
     m1 = self._model1
     self.assertTrue(len(m1.S()) == 5 * 2**5)
     for loc in np.transpose(np.nonzero(np.ones(s1))):
         for layout in util.functions.bitstrings(np.prod(s1)):
             inds = np.transpose(np.nonzero(np.ones(s1)))
             s = vwmodel.VWState(
                 loc, util.functions.sparse_matrix(s1, inds, layout))
             if m1.is_legal(s):
                 self.assertTrue(s in m1.S())

示例#7

0

显示文件

文件： test.py 项目： huiwq1990/InverseReinforcementLearning

    def test_reward(self):
        reward = vwreward.VWReward()
        dust = np.array([[1, 0], [1, 1]])
        s = vwmodel.VWState(np.array([1, 1]), dust)
        a = vwmodel.VWSuckAction()

        self.assertEqual(reward.reward(s, a), -3)

        a = vwmodel.VWMoveAction(np.array([-1, 0]))

        self.assertEqual(reward.reward(s, a), -4)

示例#8

0

显示文件

文件： vacuum_main.py 项目： huiwq1990/InverseReinforcementLearning

def vacuum_main():
    random.seed(0)
    np.random.seed(0)
    
    ## Initialize constants
#    map = np.array( [[1, 1, 1, 1], 
#                     [1, 0, 1, 1],
#                     [1, 1, 0, 1]])
    map = np.array( [[1, 1, 1], 
                     [1, 0, 1]])
    p_fail = 0.2
    p_dust = 0.05
#    start_state = vwmodel.VWState( np.array( [0,0] ), 
#                                   np.array( [[1, 1, 0, 0], 
#                                              [0, 0, 1, 0],
#                                              [0, 0, 0, 0]] )
#                                   )
    start_state = vwmodel.VWState( np.array( [0,0] ), 
                                   np.array( [[1, 1, 0], 
                                              [0, 0, 1]] )
                                   )
    initial = util.classes.NumMap( {start_state:1.0} )
    t_max = 500
    
    ## Initialize model
    model = vwmodel.VWModel(map, p_fail, p_dust)
    model.gamma = 0.99
    model.reward_function = vwreward.VWLinearReward(map)
    
    ## Define player
#    policy = mdp.agent.HumanAgent(model)
#    policy = mdp.agent.RandomAgent(model.A())
    opt_policy = mdp.solvers.ValueIteration(100).solve(model)
#    policy = mdp.solvers.PolicyIterator(20, mdp.solvers.ExactPolicyEvaluator()).solve(model)
#    policy = mdp.solvers.PolicyIterator(20, mdp.solvers.IteratingPolicyEvaluator(100)).solve(model)
#    policy = mdp.solvers.PolicyIterator(20, mdp.solvers.SamplingPolicyEvaluator(100, 50)).solve(model)
    policy = mdp.solvers.LSPI(50, 5000, vwetc.VWFeatureFunction()).solve(model)
#    policy = mdp.solvers.LSPI(50, 5000).solve(model)
    
    ## Print
    print model.info()
    n_different = 0
    for s in model.S():
        if opt_policy.actions(s) != policy.actions(s):
            n_different += 1
    print 'Optimal Policy and Approx Policy differ on {} states of {}'.format(n_different, len(model.S()))
    
    ## Simulate
    print 'Sample run:'
    for (s,a,r) in mdp.simulation.simulate(model, policy, initial, t_max):
        print '%s, %s, %f' % (s,a,r)