0], [0, 1, 0, 0, 0, 0, 0, 0, 1, 0], [0, 1, 0, 1, 1, 0, 1, 0, 1, 0], [0, 1, 0, 1, 0, 0, 1, 0, 1, 0], [0, 1, 1, 1, 0, 1, 1, 0, 1, 0], [0, 0, 0, 0, 0, 1, 0, 0, 0, 0], [0, 0, 1, 0, 1, 1, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0, 0, 0, 1, 0]])) # ============================= # usage of showState(p): p = np.zeros(myMaze.stateSize) p[0] = 0.2 p[2] = 0.3 p[8] = 0.5 pShow(p, myMaze) #Note that pShow scales the probability so that the #Maximum values is 1 this makes it helpful to visuzliaze #probabilities that are thinly spread out # ============================= rob = robot(myMaze) rob.prob = p robotShow(rob) rob.step() rob.step() rob.step() robotShow(rob) ''' Set rob.prob to the steady state''' rob.randomize()
0], [0, 1, 0, 0, 0, 0, 0, 0, 1, 0], [0, 1, 0, 1, 1, 0, 1, 0, 1, 0], [0, 1, 0, 1, 0, 0, 1, 0, 1, 0], [0, 1, 1, 1, 0, 1, 1, 0, 1, 0], [0, 1, 0, 0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0, 0, 0, 1, 0]])) stateReward = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 100, 0, 0, 0, 0, 0], [ -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000 ]]) mdp = MDPmaze(myMaze, stateReward) iterCount = 100 printSkip = 10 for i in range(iterCount): mdp.valIter() if np.mod(i, printSkip) == 0: print("Iteration ", i) pShow(mdp.value, myMaze)
[0, 1, 0, 1, 1, 0, 1, 0, 1, 0], [0, 1, 0, 1, 0, 0, 1, 0, 1, 0], [0, 1, 1, 1, 0, 1, 1, 0, 1, 0], [0, 1, 0, 0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0, 0, 0, 1, 0]])) stateReward = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 100, 0, 0, 0, 0, 0], [ -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000 ]]) mdp = MDPmaze(myMaze, stateReward) iterCount = 1000 printSkip = 100 v = np.zeros(mdp.stateSize) for i in range(iterCount): v = mdp.valIter() if np.mod(i, printSkip) == 0: print("Iteration ", i) pShow(v, myMaze)
[1.6, 3.9, 2.5, 4.9, 3.9, 6.7, 9.4, 1.0, 7.1, 6.8], [7.7, 9., 9.7, 9.3, 0.4, 6.5, 3.9, 0, 1.7, 2.6], [-9000, 2.1, 8.8, 3.8, 8.4, 8.7, 7.9, 5.8, 9.7, 9.2] ]) ocean = Ocean(oceanMap, oceanReward) ship = Ship(ocean) ''' Show probability of ship sailing north from harbor once (before/after) ''' #Harbor coordinates harborState = ocean.coord2state((3, 2)) print("Reward for Harbor: %f" % oceanReward[3, 2]) #set location probability for sihp to harbor state ship.prob = np.zeros(ocean.stateSize) ship.prob[harborState] = 1 pShow(ship.prob, ocean) #probability 1 of begin in harbor ship.sail('N') pShow(ship.prob, ocean) #probability after sailing north from harbor mdp = MDPSailing(ship) #Run Value iteration i.e. call valIter(N=something) until mdp.V #has converged. ''' Uncomment the line below to once you are ready to save your answers and want to create the data.json file to upload. ''' #saveData(mdp)