示例#1
0
                   0], [0, 1, 0, 0, 0, 0, 0, 0, 1, 0],
                  [0, 1, 0, 1, 1, 0, 1, 0, 1,
                   0], [0, 1, 0, 1, 0, 0, 1, 0, 1, 0],
                  [0, 1, 1, 1, 0, 1, 1, 0, 1,
                   0], [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
                  [0, 0, 1, 0, 1, 1, 0, 0, 1, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 1, 0]]))

    # =============================
    # usage of showState(p):
    p = np.zeros(myMaze.stateSize)
    p[0] = 0.2
    p[2] = 0.3
    p[8] = 0.5

    pShow(p, myMaze)  #Note that pShow scales the probability so that the
    #Maximum values is 1 this makes it helpful to visuzliaze
    #probabilities that are thinly spread out
    # =============================

    rob = robot(myMaze)
    rob.prob = p

    robotShow(rob)
    rob.step()
    rob.step()
    rob.step()
    robotShow(rob)
    ''' Set rob.prob to the steady state'''
    rob.randomize()
示例#2
0
                   0], [0, 1, 0, 0, 0, 0, 0, 0, 1, 0],
                  [0, 1, 0, 1, 1, 0, 1, 0, 1,
                   0], [0, 1, 0, 1, 0, 0, 1, 0, 1, 0],
                  [0, 1, 1, 1, 0, 1, 1, 0, 1,
                   0], [0, 1, 0, 0, 0, 1, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 1, 0, 0, 1, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 1, 0]]))

    stateReward = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                            [0, 0, 0, 0, 100, 0, 0, 0, 0, 0],
                            [
                                -1000, -1000, -1000, -1000, -1000, -1000,
                                -1000, -1000, -1000, -1000
                            ]])

    mdp = MDPmaze(myMaze, stateReward)

    iterCount = 100
    printSkip = 10

    for i in range(iterCount):
        mdp.valIter()
        if np.mod(i, printSkip) == 0:
            print("Iteration ", i)
            pShow(mdp.value, myMaze)
示例#3
0
                  [0, 1, 0, 1, 1, 0, 1, 0, 1,
                   0], [0, 1, 0, 1, 0, 0, 1, 0, 1, 0],
                  [0, 1, 1, 1, 0, 1, 1, 0, 1,
                   0], [0, 1, 0, 0, 0, 1, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 1, 0, 0, 1, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 1, 0]]))

    stateReward = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                            [0, 0, 0, 0, 100, 0, 0, 0, 0, 0],
                            [
                                -1000, -1000, -1000, -1000, -1000, -1000,
                                -1000, -1000, -1000, -1000
                            ]])

    mdp = MDPmaze(myMaze, stateReward)

    iterCount = 1000

    printSkip = 100
    v = np.zeros(mdp.stateSize)
    for i in range(iterCount):
        v = mdp.valIter()
        if np.mod(i, printSkip) == 0:
            print("Iteration ", i)
    pShow(v, myMaze)
示例#4
0
        [1.6, 3.9, 2.5, 4.9, 3.9, 6.7, 9.4, 1.0, 7.1, 6.8],
        [7.7, 9., 9.7, 9.3, 0.4, 6.5, 3.9, 0, 1.7, 2.6],
        [-9000, 2.1, 8.8, 3.8, 8.4, 8.7, 7.9, 5.8, 9.7, 9.2]
    ])

    ocean = Ocean(oceanMap, oceanReward)
    ship = Ship(ocean)
    '''
    Show probability of ship sailing north from harbor once (before/after)
    '''

    #Harbor coordinates
    harborState = ocean.coord2state((3, 2))
    print("Reward for Harbor: %f" % oceanReward[3, 2])
    #set location probability for sihp to harbor state
    ship.prob = np.zeros(ocean.stateSize)
    ship.prob[harborState] = 1

    pShow(ship.prob, ocean)  #probability 1 of begin in harbor
    ship.sail('N')
    pShow(ship.prob, ocean)  #probability after sailing north from harbor

    mdp = MDPSailing(ship)

    #Run Value iteration i.e. call valIter(N=something) until mdp.V
    #has converged.
    '''
    Uncomment the line below to once you are ready to save your answers and
    want to create the data.json file to upload.
    '''
    #saveData(mdp)