Exemplos de ValueIteration em Python, exemplos de burlap.behavior.singleagent.planning.stochastic.valueiteration.ValueIteration em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: easyGW.py Projeto: shaan0056/Reinforcement-Learning

    env = SimulatedEnvironment(domain, rf, tf, initialState)
    #    Print the map that is being analyzed
    print "/////{} Grid World Analysis/////\n".format(world)
    MapPrinter().printMap(MapPrinter.matrixToMap(userMap))
    visualizeInitialGridWorld(domain, gen, env)

    hashingFactory = SimpleHashableStateFactory()
    increment = MAX_ITERATIONS / NUM_INTERVALS
    timing = defaultdict(list)
    rewards = defaultdict(list)
    steps = defaultdict(list)
    convergence = defaultdict(list)
    allStates = getAllStates(domain, rf, tf, initialState)
    # Value Iteration
    iterations = range(1, MAX_ITERATIONS + 1)
    vi = ValueIteration(domain, rf, tf, discount, hashingFactory, -1, 1)
    vi.setDebugCode(0)
    vi.performReachabilityFrom(initialState)
    vi.toggleUseCachedTransitionDynamics(False)
    print "//{} Value Iteration Analysis//".format(world)
    flag = True
    timing['Value'].append(0)
    for nIter in iterations:
        startTime = clock()
        vi.runVI()
        #timing['Value'].append((clock()-startTime) * 1000)
        timing['Value'].append(timing['Value'][-1] + clock() - startTime)
        p = vi.planFromState(initialState)
        convergence['Value'].append(vi.latestDelta)
        # evaluate the policy with evalTrials roll outs
        runEvals(initialState, p, rewards['Value'], steps['Value'])

Exemplo n.º 2

0

Exibir arquivo

    hashingFactory = SimpleHashableStateFactory()
    increment = MAX_ITERATIONS / NUM_INTERVALS
    timing = defaultdict(list)
    rewards = defaultdict(list)
    steps = defaultdict(list)
    convergence = defaultdict(list)
    policy_converged = defaultdict(list)
    last_policy = defaultdict(list)
    #     # Value Iteration
    iterations = range(1, MAX_ITERATIONS + 1)

    print "//Easy Value Iteration Analysis//"
    for nIter in iterations:
        startTime = clock()
        vi = ValueIteration(domain, rf, tf, discount, hashingFactory, -1,
                            nIter)
        #//Added a very high delta number in order to guarantee that value iteration occurs the max number of iterations for comparison with the other algorithms.
        # run planning from our initial state
        vi.setDebugCode(0)
        p = vi.planFromState(initialState)
        timing['Value'].append((clock() - startTime) * 1000)
        convergence['Value'].append(vi.latestDelta)
        # evaluate the policy with evalTrials roll outs
        runEvals(initialState, p, rewards['Value'], steps['Value'])
        if nIter == 1:
            simpleValueFunctionVis(vi, p, initialState, domain, hashingFactory,
                                   "Value Iteration {}".format(nIter))
    MapPrinter.printPolicyMap(vi.getAllStates(), p, gen.getMap())
    print "\n\n\n"
    simpleValueFunctionVis(vi, p, initialState, domain, hashingFactory,
                           "Value Iteration {}".format(nIter))

Exemplo n.º 3

0

Exibir arquivo

    hashingFactory = SimpleHashableStateFactory()
    increment = MAX_ITERATIONS/NUM_INTERVALS
    timing = defaultdict(list)
    rewards = defaultdict(list)
    steps = defaultdict(list)
    convergence = defaultdict(list)
    policy_converged = defaultdict(list)
    last_policy = defaultdict(list)

#   Value Iteration starts
    iterations = range(1,MAX_ITERATIONS+1)

    print "//hard Value Iteration Analysis//"
    for nIter in iterations:
        startTime = clock()
        vi = ValueIteration(domain,rf,tf,discount,hashingFactory,-1, nIter); #//Added a very high delta number in order to guarantee that value iteration occurs the max number of iterations for comparison with the other algorithms.
        # run planning from our initial state
        vi.setDebugCode(0)
        p = vi.planFromState(initialState);
        timing['Value'].append((clock()-startTime)*1000)
        convergence['Value'].append(vi.latestDelta)
        # evaluate the policy with evalTrials roll outs
        runEvals(initialState,p,rewards['Value'],steps['Value'])
        if nIter == 1:
            simpleValueFunctionVis(vi, p, initialState, domain, hashingFactory, "Value Iteration {}".format(nIter))
        if nIter == MAX_ITERATIONS/2:
            simpleValueFunctionVis(vi, p, initialState, domain, hashingFactory, "Value Iteration {}".format(nIter))
    MapPrinter.printPolicyMap(vi.getAllStates(), p, gen.getMap());
    print "\n\n\n"
    simpleValueFunctionVis(vi, p, initialState, domain, hashingFactory, "Value Iteration {}".format(nIter))
    dumpCSV(iterations, timing['Value'], rewards['Value'], steps['Value'],convergence['Value'], world, 'Value')

Exemplo n.º 4

0

Exibir arquivo

Arquivo: varysize.py Projeto: neelaksh007/CS-7641-assignment-4

        #Print the map that is being analyzed
        print "/////Grid World {}x{} Analysis/////\n".format(n, n)
        print "i am about to print something"
        MapPrinter().printMap(MapPrinter.matrixToMap(userMap))
        print "just printed something"
        #     visualizeInitialGridWorld(domain, gen, env)
        hashingFactory = SimpleHashableStateFactory()
        increment = MAX_ITERATIONS / NUM_INTERVALS
        # Value Iteration
        iterations = defaultdict(list)
        timing = defaultdict(list)
        rewards = defaultdict(list)
        steps = defaultdict(list)
        print "//Size Value Iteration Analysis//"
        startTime = clock()
        vi = ValueIteration(domain, rf, tf, discount, hashingFactory, 1e-6,
                            MAX_ITERATIONS)
        vi.toggleUseCachedTransitionDynamics(False)
        # run planning from our initial state
        vi.setDebugCode(0)
        p = vi.planFromState(initialState)
        timing['Value'].append(clock() - startTime)
        iterations['Value'].append(vi.numIterations)
        # evaluate the policy with one roll out visualize the trajectory
        runEvals(initialState, p, rewards['Value'], steps['Value'])

        MapPrinter.printPolicyMap(vi.getAllStates(), p, gen.getMap())
        print "\n\n"
        #     simpleValueFunctionVis(vi, p, initialState, domain, hashingFactory, "Value Iteration")

        dumpCSV(iterations['Value'], timing['Value'], rewards['Value'],
                steps['Value'], [n], 'Value', n == 2)

Exemplo n.º 5

0

Exibir arquivo

def vIteration(world, userMap, maxX, maxY, discount=0.99, MAX_ITERATIONS=100):
    gen = BasicGridWorld(userMap, maxX, maxY)
    domain = gen.generateDomain()
    initialState = gen.getExampleState(domain);

    rf = BasicRewardFunction(maxX, maxY, userMap)
    tf = BasicTerminalFunction(maxX, maxY)
    env = SimulatedEnvironment(domain, rf, tf, initialState)
    visualizeInitialGridWorld(domain, gen, env)

    hashingFactory = SimpleHashableStateFactory()
    timing = defaultdict(list)
    rewards = defaultdict(list)
    steps = defaultdict(list)
    convergence = defaultdict(list)

    allStates = getAllStates(domain, rf, tf, initialState)

    print("*** {} Value Iteration Analysis".format(world))

    MAX_ITERATIONS = MAX_ITERATIONS
    iterations = range(1, MAX_ITERATIONS + 1)
    vi = ValueIteration(domain, rf, tf, discount, hashingFactory, -1, 1);
    vi.setDebugCode(0)
    vi.performReachabilityFrom(initialState)
    vi.toggleUseCachedTransitionDynamics(False)
    timing['Value'].append(0)
    for nIter in iterations:
        startTime = clock()
        vi.runVI()
        p = vi.planFromState(initialState);
        endTime = clock()
        timing['Value'].append((endTime-startTime)*1000)

        convergence['Value'].append(vi.latestDelta)
        # evaluate the policy with evalTrials roll outs
        runEvals(initialState, p, rewards['Value'], steps['Value'], rf, tf, evalTrials=1)
        if nIter == 1 or nIter == 50:
            simpleValueFunctionVis(vi, p, initialState, domain, hashingFactory, "Value Iteration {}".format(nIter))

    simpleValueFunctionVis(vi, p, initialState, domain, hashingFactory, "Value Iteration {}".format(nIter))
    dumpPolicyMap(MapPrinter.printPolicyMap(allStates, p, gen.getMap()),
            world + ' Value Iteration Policy Map.pkl')
    dumpCSV(nIter, timing['Value'][1:], rewards['Value'], steps['Value'], convergence['Value'], world, 'Value')