Exemplo n.º 1
0
    def unitTest(cls):
        print "Testing QAgent"

        from system import System
        from networks import genGridNetwork
        from model import PJ

        s = System(genGridNetwork((4,3)),PJ())
        s.cleanse()
        s.infections[2] = True
        s.infections[4] = True
        s.infections[7] = True

        numTrt = Agent.numTrt(s)
        numNodes = s.network.numNodes

        assert numNodes == 12, ("numNodes is not 12")
        assert numTrt == 3, ("numTrt is not 3")

        maxInfInd = 1 << numNodes
        maxTrtInd = Agent.numValidTrt(numNodes,numTrt)

        q = [[0]*maxTrtInd for i in range(maxInfInd)]
        for i in range(maxInfInd):
            for t in range(maxTrtInd):
                q[i][t] = t

        trtNodes = [9,10,11]

        a = cls.fromQ(q)

        a.applyTrt(s)

        if s.nTrt > len(trtNodes):
            raise ValueError("QAgent gave too many treatments ")
        elif s.nTrt < len(trtNodes):
            raise ValueError("QAgent gave too few treatments")
        else:
            for i in trtNodes:
                if not s.treatments[i]:
                    raise ValueError("QAgent did not treat node " + i)
Exemplo n.º 2
0
    def unitTest(cls):
        print "Testing ValueIteration"
        np.random.seed(0)
        from system import System
        from networks import genGridNetwork
        from model import PJ


        system = System(genGridNetwork((3,3)),PJ())
        numNodes = system.network.numNodes
        numTrt = Agent.numTrt(system)
        numValidTrt = Agent.numValidTrt(numNodes,numTrt)

        v = ValueIteration.solve(dc(system))

        q = PolicyIteration.solve(dc(system))
        q = util.unflattenQ(q,numNodes,numValidTrt)
        vChk = [max(i) for i in q]

        for i in zip(v,vChk):
            print "% 12.6f  % 10.6f" % i
Exemplo n.º 3
0
    def unitTest(cls):
        print "Testing System"

        np.random.seed(0)
        random.seed(0)

        system = System(genGridNetwork((2,2)),PJ())
        numNodes = system.network.numNodes

        reps = 50000
        tol = math.sqrt(0.5**2/float(reps))*2.05

        for s in range(1 << numNodes):
            print "Checking state % 4d" % s

            system.infCmb(cmb=s)

            probs = system.model.transProbs(system)
            pastInf = dc(system.infections)

            avgChange = [0]*numNodes
            for r in range(reps):
                system.infCmb(cmb=s)
                system.turnClock(probs=probs)

                for n in range(numNodes):
                    avgChange[n] += system.infections[n] != pastInf[n]

            avgChange = [float(i)/float(reps) for i in avgChange]

            diff = sum(abs(i - j) for i,j in zip(probs,avgChange))
            diff /= float(numNodes)

            if diff > tol:
                for i,j in zip(probs,avgChange):
                    print "true: % 8.6f,  sim: % 8.6f" % (i,j)
                raise ValueError(("Simulation doesn't match model probabilities"
                                  + " for state %d with absolute total"
                                  + " difference of %f")
                                 % (s,diff))
Exemplo n.º 4
0
def run():
    from networks import genGridNetwork
    from model import PJ

    system = System(genGridNetwork((2,2)),PJ())
    sCopy = dc(system)

    numNodes = system.network.numNodes
    numTrt = agents.Agent.numTrt(system)
    numValidTrt = agents.Agent.numValidTrt(numNodes,numTrt)

    gamma = 0.9

    optPol = unflattenQ(PolicyIteration2.solve(system,gamma=gamma),
                        numNodes,numValidTrt)

    optAgent = agents.PolAgent.fromPol(optPol)

    eqAgent = agents.EpsAgent.fromAgent(optAgent,0.1)

    qsarsa = QSarsa(system,gamma=0.9,lam=0.5,eps=0.2)

    qsarsa.iter(nIter=1000000)


    qfa = QFnApprox(Features2(),eqAgent)
    qfa.startCvGroups(10)

    avgQ = [abs(qsa) for qs in q for qsa in qs]
    avgQ = sum(avgQ)/float(len(avgQ))



    R = 1000
    I = 10000
    for r in range(R):
        qfa.iterAndSolve(system,eqAgent,nIter=I,gamma=gamma,
                         addToCvGroups=True,ridgePen=True)
        print "% 9d" % (r*I + I)

        diff = 0.0
        cnt = 0
        for s in range(1 << numNodes):
            sCopy.infCmb(cmb=s)

            for a in range(numValidTrt):
                c = ind2Combo(a,numNodes,numTrt)
                sCopy.trtCmb(cmb=c)

                qHat = qfa.qHat(sCopy)

                if qHat is None:
                    break

                diff += abs(qHat - qAgent.q[s][a])
                cnt += 1

            if qHat is None:
                break

        if qHat is not None:
            print "    qRes: % 6.3f" % (diff/float(cnt),)
            print " bellRes: % 6.3f" % bellRes(system.history,
                                               system,
                                               qAgent,
                                               qfa.qHat,
                                               gamma)
            laQfn = lambda system : (
                la.q[system.infCmb()][combo2Ind(system.trtCmb(),
                                                     numNodes,numTrt)])
            print " trueRes: % 6.3f" % bellRes(system.history,
                                               system,
                                               qAgent,
                                               laQfn,
                                               gamma)
            sarsaQfn = lambda system : (
                qsarsa.q[system.infCmb()][combo2Ind(system.trtCmb(),
                                                    numNodes,numTrt)])
            print "sarsaRes: % 6.4f" % bellRes(system.history,
                                               system,
                                               qAgent,
                                               sarsaQfn,
                                               gamma)
Exemplo n.º 5
0
    gamma = 0.9


    ## function approx agent
    qfa = QFnApprox(features.Features3(),agents.MyopicAgent())
    qfa.startCvGroups(10)

    ## epsilon greedy agent
    eps = 0.1
    epsAgent = agents.EpsAgent.fromAgent(agents.MyopicAgent(),eps)

    ## estimate policies
    itersPerRep = 1000
    for i in range(1000):
        ## function approx
        qfa.iterAndSolve(dc(system),epsAgent,nIter=itersPerRep,gamma=gamma,
                         addToCvGroups=True,ridgePen=True)

        np.savetxt(dataDump.dd.open("beta"+str((i+1)*itersPerRep)+".csv","w"),
                   qfa.beta,delimiter=",")
        print "Saved beta with % 10d iters" % ((i+1)*itersPerRep)



    dataDump.dd.clean(force=True)



if __name__ == "__main__":
    main(genGridNetwork((10,10)))
Exemplo n.º 6
0
    results = compareAgents(system, compAgents, 1000, 100, 1.0)

    resSort = []
    for a in results:
        resSort.append((results[a]["mean"], results[a]["stdev"], a))

    resSort = sorted(resSort, reverse=True)

    print "% 24s  % 16s  % 16s" % ("Agent", "Mean", "Stdev")
    for r in resSort:
        print "% 24s  % 16.7f  % 16.7f" % (r[2], r[0], r[1])

    resFile = "results_" + network.kind + ".txt"
    dataDump.dd("Agent,Mean,Stdev\n", resFile, "w")
    for res in resSort:
        dataDump.dd(",".join(map(str, (res[2], res[0], res[1]))) + "\n", resFile, "a")


if __name__ == "__main__":
    shapes = [(2, 1), (3, 1), (2, 2), (4, 1), (2, 3), (6, 1), (3, 3)]

    multipliers = [5, 10, 25, 50]

    for shape in shapes:
        numNodes = shape[0] * shape[1]
        numTrt = int(0.25 * numNodes)
        nCombos = (1 << numNodes) * sympy.binomial(numNodes, numTrt)
        nIters = [m * nCombos for m in multipliers]

        main(genGridNetwork(shape), nIters)
Exemplo n.º 7
0
    def unitTest(cls):
        print "Testing PolicyIteration"
        np.random.seed(0)

        from networks import genGridNetwork
        from model import PJ
        from copy import deepcopy
        from runners import vPiS

        system = systems.System(genGridNetwork((2, 2)), PJ())
        numNodes = system.network.numNodes

        p = np.array(cls.calcP(system))
        r = np.array(cls.calcR(system))

        one = np.ones((p.shape[1],))

        pRowsum = np.dot(p, one)

        ## check numerical ranges on p
        tol = 1e-8
        if np.amin(p) < 0:
            raise ValueError("P has negative values")
        if np.amax(p) > 1.0:
            raise ValueError("P has values greater than 1")
        if abs(max(pRowsum) - 1) > tol or abs(min(pRowsum) - 1) > tol:
            raise ValueError("Not all row sums for P are 1.0")

        numTrt = agents.Agent.numTrt(system)
        numValidTrt = agents.Agent.numValidTrt(numNodes, numTrt)
        q = util.unflattenQ(np.random.randint(numNodes, size=(1 << numNodes) * numValidTrt), numNodes, numValidTrt)

        pi = cls.piForPolicy(util.q2Policy(q), system)

        one = np.ones((pi.shape[1],))
        piRowSum = pi * one

        ## check numerical ranges on pi
        if pi.max() < 0:
            raise ValueError("Pi has some negative values")
        if pi.min() > 0:
            raise ValueError("Pi has values greater than 1")
        if abs(np.amin(piRowSum) - 1) > tol or abs(np.amax(piRowSum) - 1):
            raise ValueError("Rows of pi do not sum to 1")

        ## make sure random agent estimates worse Q-values
        gamma = 0.9

        randPol = [range(numValidTrt) for i in range(1 << numNodes)]
        vRa = PolicyIteration2.vForPolicy(randPol, system, gamma=gamma).tolist()

        polOpt = PolicyIteration2.solve(deepcopy(system), gamma=gamma)
        vOpt = PolicyIteration2.vForPolicy(polOpt, system, gamma=gamma).tolist()

        cnt = sum(i > j for i, j in zip(vRa, vOpt))
        if cnt > 0:
            raise ValueError("Random Agent does better " + "than optimal V-function %d times" % cnt)

        ## check that gamma = 0 results in a v-function equal to
        ## expected immediate rewards
        gamma = 0.0

        polOpt = PolicyIteration2.solve(deepcopy(system), gamma=gamma)
        vOpt = PolicyIteration2.vForPolicy(polOpt, system, gamma=gamma)
        pi = cls.piForPolicy(polOpt, system)

        if np.linalg.norm(pi.dot(r) - vOpt, 2) > 1e-10:
            raise ValueError("Gamma = 0 did not result in expected " + "immediate rewards")

        ## check analytical values with simulated values
        gamma = 0.5

        polOpt = PolicyIteration2.solve(deepcopy(system), gamma=gamma)
        vOpt = PolicyIteration2.vForPolicy(polOpt, system, gamma=gamma)
        agentOpt = agents.PolAgent.fromPol(polOpt)

        diff = 0.0
        for s in range(1 << numNodes):
            print "Checking state % 4d" % s
            val = vPiS(s, system, agentOpt, gamma=gamma, finalT=10, reps=1000)
            diff += abs(vOpt[s] - val)

        diff /= float(1 << numNodes)
        # print "diff from sim: % 10.6f" % diff
        if diff > 0.05:
            raise ValueError("V values differ from sim by %f" % diff)
Exemplo n.º 8
0
                elif t[0] == "a":
                    val *= system.treatments[t[1]]
                else:
                    raise ValueError("Should be 's' or 'a'")

                if not val:
                    break

            if val:
                data.append(val)
                rows.append(ind)

            ind += 1

        cols = [0] * len(data)

        return sps.csr_matrix((data,(rows,cols)),shape=(ind,1))


if __name__ == "__main__":
    import systems
    import networks
    import model

    system = systems.System(networks.genGridNetwork((2,2)),model.PJ())

    f3 = Features3()
    for i in f3.getFeatures(system):
        print i
    print len(f3.getFeatures(system))
Exemplo n.º 9
0
                for a in range(self.numValidTrt):
                    alpha = 1.0 / (1.0 + self.c[s][a] + self.e[s][a])
                    self.q[s][a] += alpha * delta * self.e[s][a]
                    self.e[s][a] *= self.gamma * self.lam

            self.st = stp1
            self.at = atp1

            self.iters += 1


if __name__ == "__main__":
    from networks import genGridNetwork
    from model import PJ

    system = System(genGridNetwork((2,2)),PJ())
    numNodes = system.network.numNodes
    numTrt = agents.Agent.numTrt(system)
    numValidTrt = agents.Agent.numValidTrt(numNodes,numTrt)

    gamma = 0.9


    optQ = PolicyIteration.solve(system,gamma=gamma)
    optQ = optQ.tolist()

    qsarsa = QSarsa(system,epsAgent=agents.RandomAgent(),
                    gamma=0.9,lam=0.5,eps=0.1)


    R = 1000