Пример #1
0
def test4a():
    mdp = util.NumberLineMDP()
    mdp.computeStates()
    rl = submission.QLearningAlgorithm(mdp.actions, mdp.discount(),
                                       submission.identityFeatureExtractor, 0)
    # We call this here so that the stepSize will be 1
    rl.numIters = 1

    rl.incorporateFeedback(0, 1, 0, 1)
    print '1'
    grader.requireIsEqual(0, rl.getQ(0, -1))
    print '2'
    grader.requireIsEqual(0, rl.getQ(0, 1))

    rl.numIters = 1
    rl.incorporateFeedback(1, 1, 1, 2)
    print '3'
    grader.requireIsEqual(0, rl.getQ(0, -1))
    print '4'
    grader.requireIsEqual(0, rl.getQ(0, 1))
    print '5'
    grader.requireIsEqual(0, rl.getQ(1, -1))
    print '6'
    grader.requireIsEqual(1, rl.getQ(1, 1))

    rl.numIters = 1
    rl.incorporateFeedback(2, -1, 1, 1)
    print '7'
    grader.requireIsEqual(1.9, rl.getQ(2, -1))
    print '8'
    grader.requireIsEqual(0, rl.getQ(2, 1))
Пример #2
0
def test_util():
    print("Testing util module : ")
    print("...creating simple mdp instance ... ")
    mdp = util.NumberLineMDP()  #instance of an MDP problem
    solver = util.ValueIteration()  #algorithm instantiated
    solver.solve(mdp)  #algo applied to the MDP problem
    print "Vopt : %s " % solver.V
    print "optimal_policy : %s " % solver.pi
    print("... done test_util.\n")
def test1c():
    V = collections.Counter()  # state -> value of state
    for state in range(-5,6):
        V[state] = state
    mdp = util.NumberLineMDP()
    mdp.computeStates()
    goldPi = collections.defaultdict(lambda: 1)
    pi = submission.computeOptimalPolicy(mdp, V)
    for state in range(-5,6):
        if not grader.requireIsEqual(goldPi[state], pi[state]):
            print ('   state: {}'.format(state))
def testQ(f, V):
    mdp = util.NumberLineMDP()
    goldQ = {}
    values = [l.split() for l in open(f)]
    for state, action, value in values:
        goldQ[(int(state), int(action))] = float(value)
    for state in range(-5,6):
        for action in [-1,1]:
            if not grader.requireIsEqual(goldQ[(state, action)],
                                         submission.computeQ(mdp, V, state,
                                             action)):
                print ('   state: {}, action: {}'.format(state, action))
def testIteration(algorithm):
    mdp = util.NumberLineMDP()
    goldPi = collections.defaultdict(lambda: 1)
    goldV = {}
    values = [l.split() for l in open('1d.gold')]
    for state, value in values:
        goldV[int(state)] = float(value)
    algorithm.solve(mdp, .0001)
    for state in range(-5,6):
        if not grader.requireIsEqual(goldPi[state], algorithm.pi[state]):
            print ('   action for state: {}'.format(state))
        if not grader.requireIsLessThan(.001, abs(goldV[state] - algorithm.V[state])):
            print ('   value for state: {}'.format(state))
def test1b():
    V = collections.defaultdict(int)
    pi = collections.defaultdict(lambda: -1)
    mdp = util.NumberLineMDP()
    mdp.computeStates()
    goldV = {}
    values = [l.split() for l in open('1b.gold')]
    for state, value in values:
        goldV[int(state)] = float(value)
    V = submission.policyEvaluation(mdp, V, pi, .0001)
    for state in range(-5,6):
        if not grader.requireIsLessThan(.001, abs(goldV[state] - V[state])):
            print ('   state: {}'.format(state))
Пример #7
0
    def test_basic(self):
        """4a-basic-0:  Basic test for incorporateFeedback() using NumberLineMDP."""
        mdp = util.NumberLineMDP()
        mdp.computeStates()
        rl = submission.QLearningAlgorithm(mdp.actions, mdp.discount(),
                                           submission.identityFeatureExtractor,
                                           0)
        # We call this here so that the stepSize will be 1
        rl.numIters = 1

        rl.incorporateFeedback(0, 1, 0, 1)
        self.assertEqual(0, rl.getQ(0, -1))
        self.assertEqual(0, rl.getQ(0, 1))

        rl.incorporateFeedback(1, 1, 1, 2)
        self.assertEqual(0, rl.getQ(0, -1))
        self.assertEqual(0, rl.getQ(0, 1))
        self.assertEqual(0, rl.getQ(1, -1))
        self.assertEqual(1, rl.getQ(1, 1))

        rl.incorporateFeedback(2, -1, 1, 1)
        self.assertEqual(1.9, rl.getQ(2, -1))
        self.assertEqual(0, rl.getQ(2, 1))