Exemplo n.º 1
0
def vIteration(world, userMap, maxX, maxY, discount=0.99, MAX_ITERATIONS=100):
    gen = BasicGridWorld(userMap, maxX, maxY)
    domain = gen.generateDomain()
    initialState = gen.getExampleState(domain);

    rf = BasicRewardFunction(maxX, maxY, userMap)
    tf = BasicTerminalFunction(maxX, maxY)
    env = SimulatedEnvironment(domain, rf, tf, initialState)
    visualizeInitialGridWorld(domain, gen, env)

    hashingFactory = SimpleHashableStateFactory()
    timing = defaultdict(list)
    rewards = defaultdict(list)
    steps = defaultdict(list)
    convergence = defaultdict(list)

    allStates = getAllStates(domain, rf, tf, initialState)

    print("*** {} Value Iteration Analysis".format(world))

    MAX_ITERATIONS = MAX_ITERATIONS
    iterations = range(1, MAX_ITERATIONS + 1)
    vi = ValueIteration(domain, rf, tf, discount, hashingFactory, -1, 1);
    vi.setDebugCode(0)
    vi.performReachabilityFrom(initialState)
    vi.toggleUseCachedTransitionDynamics(False)
    timing['Value'].append(0)
    for nIter in iterations:
        startTime = clock()
        vi.runVI()
        p = vi.planFromState(initialState);
        endTime = clock()
        timing['Value'].append((endTime-startTime)*1000)

        convergence['Value'].append(vi.latestDelta)
        # evaluate the policy with evalTrials roll outs
        runEvals(initialState, p, rewards['Value'], steps['Value'], rf, tf, evalTrials=1)
        if nIter == 1 or nIter == 50:
            simpleValueFunctionVis(vi, p, initialState, domain, hashingFactory, "Value Iteration {}".format(nIter))

    simpleValueFunctionVis(vi, p, initialState, domain, hashingFactory, "Value Iteration {}".format(nIter))
    dumpPolicyMap(MapPrinter.printPolicyMap(allStates, p, gen.getMap()),
            world + ' Value Iteration Policy Map.pkl')
    dumpCSV(nIter, timing['Value'][1:], rewards['Value'], steps['Value'], convergence['Value'], world, 'Value')
Exemplo n.º 2
0
 rewards = defaultdict(list)
 steps = defaultdict(list)
 convergence = defaultdict(list)
 allStates = getAllStates(domain, rf, tf, initialState)
 # Value Iteration
 iterations = range(1, MAX_ITERATIONS + 1)
 vi = ValueIteration(domain, rf, tf, discount, hashingFactory, -1, 1)
 vi.setDebugCode(0)
 vi.performReachabilityFrom(initialState)
 vi.toggleUseCachedTransitionDynamics(False)
 print "//{} Value Iteration Analysis//".format(world)
 flag = True
 timing['Value'].append(0)
 for nIter in iterations:
     startTime = clock()
     vi.runVI()
     #timing['Value'].append((clock()-startTime) * 1000)
     timing['Value'].append(timing['Value'][-1] + clock() - startTime)
     p = vi.planFromState(initialState)
     convergence['Value'].append(vi.latestDelta)
     # evaluate the policy with evalTrials roll outs
     runEvals(initialState, p, rewards['Value'], steps['Value'])
     #if nIter == 1:
     #simpleValueFunctionVis(vi, p, initialState, domain, hashingFactory, "Value Iteration {}".format(nIter))
     if (vi.latestDelta < 1e-6) and flag:
         flag = False
         simpleValueFunctionVis(vi, p, initialState, domain, hashingFactory,
                                "Value Iteration {}".format(nIter))
         dumpPolicyMap(
             MapPrinter.printPolicyMap(allStates, p, gen.getMap()),
             'Value {} Iter {} Policy Map.pkl'.format(world, nIter))