my_grid.setCurrentPosition(3, 2) # start position my_grid.printGrid() for k in xrange(0, 10000): # number of trials for i in xrange(my_grid.rows - 1, -1, -1): for j in xrange(my_grid.cols - 1, -1, -1): my_grid.TDLearning(i, j) # my_grid.printUtilities() # my_grid.printVisitedCount() # my_grid.printQUtilities() # for i in xrange(0, 10): #number of moves # my_grid.TDLearning(my_grid.currRow, my_grid.currCol) # nextMove = my_grid.grid[my_grid.currRow][my_grid.currCol].qIntendedDirection # my_grid.move(nextMove) # my_grid.printUtilities() # my_grid.printVisitedCount() # my_grid.printQUtilities() # my_grid.printRMSErrors() for i in xrange(0, my_grid.rows): for j in xrange(0, my_grid.cols): my_grid.grid[i][j].calcRMSError() my_grid.printUtilities() my_grid.printVisitedCount() my_grid.printQUtilities() my_grid.printQIntendedDirections() my_grid.printRMSErrors()