예제 #1
0
    def unitTest(cls):
        print "Testing ValueIteration"
        np.random.seed(0)
        from system import System
        from networks import genGridNetwork
        from model import PJ


        system = System(genGridNetwork((3,3)),PJ())
        numNodes = system.network.numNodes
        numTrt = Agent.numTrt(system)
        numValidTrt = Agent.numValidTrt(numNodes,numTrt)

        v = ValueIteration.solve(dc(system))

        q = PolicyIteration.solve(dc(system))
        q = util.unflattenQ(q,numNodes,numValidTrt)
        vChk = [max(i) for i in q]

        for i in zip(v,vChk):
            print "% 12.6f  % 10.6f" % i
예제 #2
0
    def unitTest(cls):
        print "Testing PolicyIteration"
        np.random.seed(0)

        from networks import genGridNetwork
        from model import PJ
        from copy import deepcopy
        from runners import vPiS

        system = systems.System(genGridNetwork((2, 2)), PJ())
        numNodes = system.network.numNodes

        p = np.array(cls.calcP(system))
        r = np.array(cls.calcR(system))

        one = np.ones((p.shape[1],))

        pRowsum = np.dot(p, one)

        ## check numerical ranges on p
        tol = 1e-8
        if np.amin(p) < 0:
            raise ValueError("P has negative values")
        if np.amax(p) > 1.0:
            raise ValueError("P has values greater than 1")
        if abs(max(pRowsum) - 1) > tol or abs(min(pRowsum) - 1) > tol:
            raise ValueError("Not all row sums for P are 1.0")

        numTrt = agents.Agent.numTrt(system)
        numValidTrt = agents.Agent.numValidTrt(numNodes, numTrt)
        q = util.unflattenQ(np.random.randint(numNodes, size=(1 << numNodes) * numValidTrt), numNodes, numValidTrt)

        pi = cls.piForPolicy(util.q2Policy(q), system)

        one = np.ones((pi.shape[1],))
        piRowSum = pi * one

        ## check numerical ranges on pi
        if pi.max() < 0:
            raise ValueError("Pi has some negative values")
        if pi.min() > 0:
            raise ValueError("Pi has values greater than 1")
        if abs(np.amin(piRowSum) - 1) > tol or abs(np.amax(piRowSum) - 1):
            raise ValueError("Rows of pi do not sum to 1")

        ## make sure random agent estimates worse Q-values
        gamma = 0.9

        randPol = [range(numValidTrt) for i in range(1 << numNodes)]
        vRa = PolicyIteration2.vForPolicy(randPol, system, gamma=gamma).tolist()

        polOpt = PolicyIteration2.solve(deepcopy(system), gamma=gamma)
        vOpt = PolicyIteration2.vForPolicy(polOpt, system, gamma=gamma).tolist()

        cnt = sum(i > j for i, j in zip(vRa, vOpt))
        if cnt > 0:
            raise ValueError("Random Agent does better " + "than optimal V-function %d times" % cnt)

        ## check that gamma = 0 results in a v-function equal to
        ## expected immediate rewards
        gamma = 0.0

        polOpt = PolicyIteration2.solve(deepcopy(system), gamma=gamma)
        vOpt = PolicyIteration2.vForPolicy(polOpt, system, gamma=gamma)
        pi = cls.piForPolicy(polOpt, system)

        if np.linalg.norm(pi.dot(r) - vOpt, 2) > 1e-10:
            raise ValueError("Gamma = 0 did not result in expected " + "immediate rewards")

        ## check analytical values with simulated values
        gamma = 0.5

        polOpt = PolicyIteration2.solve(deepcopy(system), gamma=gamma)
        vOpt = PolicyIteration2.vForPolicy(polOpt, system, gamma=gamma)
        agentOpt = agents.PolAgent.fromPol(polOpt)

        diff = 0.0
        for s in range(1 << numNodes):
            print "Checking state % 4d" % s
            val = vPiS(s, system, agentOpt, gamma=gamma, finalT=10, reps=1000)
            diff += abs(vOpt[s] - val)

        diff /= float(1 << numNodes)
        # print "diff from sim: % 10.6f" % diff
        if diff > 0.05:
            raise ValueError("V values differ from sim by %f" % diff)