Exemplos de GridWorld em Python, exemplos de GridWorld.GridWorld em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: Flow.py Projeto: casiarobot/MotionPlanningWithDNN

def solve(world, goalInWorld):
    solution = GridWorld(world.width, world.height)
    # print(solution.cells)
    solution.cells = [Cell(cell) for cell in world.cells]
    # print(solution.cells)
    goal = solution.get(goalInWorld.col, goalInWorld.line)
    goal.cost = 0
    closed = []
    opened = [goal]

    # reopen = 0

    while len(opened):
        # print(enigmaAsStr(solution, goal))
        # print("opened:", [(c.col, c.line) for c in opened])
        cell = opened.pop()
        closed.append(cell)

        # for adj in solution.getAdjacentCells(cell):
        for adj in solution.getAccessibleCells(cell):
            # print("cell", cell, "has got a adj", adj)
            if adj.reachable:  # we ignore obstacles
                direction = Direction.fromTo(adj, cell)
                cost = cell.cost + direction.cost()
                if adj.cost == -1:  # or adj.cost > cost:  # if not used yet
                    adj.direction = direction
                    adj.cost = cost
                    opened.append(adj)
                if adj.cost > cost:  # if not used yet
                    # reopen += 1
                    # print("reopen", reopen)
                    adj.direction = direction
                    adj.cost = cost
                    opened.append(adj)
    return solution

Exemplo n.º 2

0

Exibir arquivo

Arquivo: Flow.py Projeto: laura-rieger/MotionPlanningWithDNN

def solve(world, goalInWorld):
    solution = GridWorld(world.width, world.height)
    # print(solution.cells)
    solution.cells = [Cell(cell) for cell in world.cells]
    # print(solution.cells)
    goal = solution.get(goalInWorld.col, goalInWorld.line)
    goal.cost = 0
    closed = []
    opened = [goal]

    # reopen = 0

    while len(opened):
        # print(enigmaAsStr(solution, goal))
        # print("opened:", [(c.col, c.line) for c in opened])
        cell = opened.pop()
        closed.append(cell)

        # for adj in solution.getAdjacentCells(cell):
        for adj in solution.getAccessibleCells(cell):
            # print("cell", cell, "has got a adj", adj)
            if adj.reachable:  # we ignore obstacles
                direction = Direction.fromTo(adj, cell)
                cost = cell.cost + direction.cost()
                if adj.cost == -1:  # or adj.cost > cost:  # if not used yet
                    adj.direction = direction
                    adj.cost = cost
                    opened.append(adj)
                if adj.cost > cost:  # if not used yet
                    # reopen += 1
                    # print("reopen", reopen)
                    adj.direction = direction
                    adj.cost = cost
                    opened.append(adj)
    return solution

Exemplo n.º 3

0

Exibir arquivo

Arquivo: Flow.py Projeto: casiarobot/MotionPlanningWithDNN

 def setUp(self):
     self.world = GridWorld(10, 10)
     self.obstaclesProb = 0.2
     self.world.addRandomObstacles(
         math.floor(self.world.getLength() * self.obstaclesProb))
     for cell in self.world.cells:
         if cell.reachable:
             self.goal = cell
             break

Exemplo n.º 4

0

Exibir arquivo

Arquivo: MazeRunner.py Projeto: eriktoger/Reinforcment-Learning

 def createSmallMaze(self):
     #should be GridWorldSmall()
     self.GridWorldGame = GridWorld((5, 5))
     cols = self.GridWorldGame.size[0]
     rows = self.GridWorldGame.size[1]
     self.MAZE_X = cols * 32
     self.MAZE_Y = rows * 32
     FRAME = 8
     self.START_X = (self.MAX_X - cols *
                     32) / 2 + FRAME  #what happens if its not 0 in %32
     self.START_Y = (self.MAX_Y - rows * 32) / 2 + FRAME
     self.smileyPos = (self.START_X, self.START_Y)

Exemplo n.º 5

0

Exibir arquivo

Arquivo: MARL.py Projeto: Vrroom/RL-FILA

def main():
    env = GridWorld()
    _, es1, ts1 = independentQLearning(env, lambda x: x < 100, 0)
    qList, es2, ts2 = shareStateQLearning(env, lambda x: x < 100, 0)
    iQL = plt.scatter(es1, ts1, c='red')
    ssQL = plt.scatter(es2, ts2, c='blue')
    iQL.set_label("Independent")
    ssQL.set_label("5 Predators, 2 Prey, Share State")
    plt.xlabel("Episodes")
    plt.ylabel("Cumulative TimeSteps")
    plt.legend()
    plt.show()
    env.simulateTrajectory(qList)

Exemplo n.º 6

0

Exibir arquivo

Arquivo: Flow.py Projeto: laura-rieger/MotionPlanningWithDNN

class Tests(unittest.TestCase):
    def setUp(self):
        self.world = GridWorld(10, 10)
        self.obstaclesProb = 0.2
        self.world.addRandomObstacles(math.floor(self.world.getLength() * self.obstaclesProb))
        for cell in self.world.cells:
            if cell.reachable:
                self.goal = cell
                break

    def test_runs(self):
        solution = solve(self.world, self.goal)
        print(enigmaAsStr(solution, self.goal))

Exemplo n.º 7

0

Exibir arquivo

Arquivo: Flow.py Projeto: casiarobot/MotionPlanningWithDNN

class Tests(unittest.TestCase):
    def setUp(self):
        self.world = GridWorld(10, 10)
        self.obstaclesProb = 0.2
        self.world.addRandomObstacles(
            math.floor(self.world.getLength() * self.obstaclesProb))
        for cell in self.world.cells:
            if cell.reachable:
                self.goal = cell
                break

    def test_runs(self):
        solution = solve(self.world, self.goal)
        print(enigmaAsStr(solution, self.goal))

Exemplo n.º 8

0

Exibir arquivo

def evaluate(goals, EQ):
    env = GridWorld(goals=goals, T_states=T_states)
    policy = EQ_P(EQ)
    state = env.reset()
    done = False
    t = 0
    G = 0
    while not done and t < 100:
        action = policy[state]
        state_, reward, done, _ = env.step(action)
        state = state_
        G += reward
        t += 1
    return G

Exemplo n.º 9

0

Exibir arquivo

    def openMDPGUI(self):
        global w, g
        if self.checkSettingValues():
            self.master.destroy()

            df = float(self.discFactor.get())
            rews = list(map(lambda x: float(x.get()), self.rewValue))
            probs = list(map(lambda x: float(x.get()), self.probValue))

            w = GridWorld([[
                GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID,
                GridWorld.CELL_EXIT
            ],
                           [
                               GridWorld.CELL_VOID, GridWorld.CELL_WALL,
                               GridWorld.CELL_VOID, GridWorld.CELL_PIT
                           ],
                           [
                               GridWorld.CELL_VOID, GridWorld.CELL_VOID,
                               GridWorld.CELL_VOID, GridWorld.CELL_VOID
                           ]])
            w.setDiscountFactor(df)
            w.setRewards(rews[0], rews[1], rews[2])
            w.setProbabilities(probs[0], probs[1], probs[2], probs[3])

            g = MDPGUI(w)

Exemplo n.º 10

0

Exibir arquivo

def objectiveFunction(args):

    learning_rate, min_epsilon, max_epsilon, epsilon_decay, discount_factor = args

    num_of_episodes = 500
    max_steps = 1000

    environment = GridWorld()

    agentQ = Q_Agent(environment,
                     epsilon=max_epsilon,
                     learning_rate=learning_rate,
                     discount_factor=discount_factor)

    train(environment,
          agentQ,
          episodes=num_of_episodes,
          max_steps_per_episode=max_steps,
          min_epsilon=min_epsilon,
          max_epsilon=max_epsilon,
          epsilon_decay=epsilon_decay)
    mean_reward = test(environment, agentQ, episodes=1000)

    value_map = np.zeros((environment.height, environment.width))
    for x in range(environment.height):
        for y in range(environment.width):
            q_values_of_state = agentQ.q_table[(x, y)]
            maxValue = max(q_values_of_state.values())
            value_map[x, y] = maxValue

    if save == True:
        utils.plotValueFunction(value_map,
                                os.path.join(save_path, 'heatmap.jpg'))

    return -(mean_reward)

Exemplo n.º 11

0

Exibir arquivo

Arquivo: Flow.py Projeto: laura-rieger/MotionPlanningWithDNN

 def setUp(self):
     self.world = GridWorld(10, 10)
     self.obstaclesProb = 0.2
     self.world.addRandomObstacles(math.floor(self.world.getLength() * self.obstaclesProb))
     for cell in self.world.cells:
         if cell.reachable:
             self.goal = cell
             break

Exemplo n.º 12

0

Exibir arquivo

Arquivo: main.py Projeto: GreatAlexander/Games

def gridworld():
	''' Create complete discrete environment for MDP modelling (InSpace Tiled), including Rewards and Transition probabilities'''
	w = GridWorld([[GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_EXIT, GridWorld.CELL_VOID, GridWorld.CELL_VOID], 
			   [GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID],
			   [GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID],
			   [GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_PIT, GridWorld.CELL_VOID],
			   [GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID],
			   [GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID],
			   [GridWorld.CELL_VOID, GridWorld.CELL_PIT, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID],
			   [GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID],
			   [GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID],
			   [GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID]], discountFactor = 1 )
	
	w.setRewards(-0.04, -1, 1)
	w.setProbabilities(0.8, 0.1, 0.1, 0)
#	w.setDiscountFactor(0.6)
	return w

Exemplo n.º 13

0

Exibir arquivo

Arquivo: testQLearner.py Projeto: walkacross/MC3_P3

def baseTester():
    ''' runs a somewhat comprehensive test'''
    try:
        import QLearner as ql
    except:
        pass

    #it is worth noting here that num_states can be 100 for any grid < 10x10 using the tuckerHash
    #we need a new hash algo if we are to use a grid outside those parameters
    baseKwargs = {'num_states':100, 'alpha':1.0, 'gamma':0.9, 'rar':0.5, 'radr':0.99, 'dyna':0, 'verbose':False}
    '''
    if you want to add your own test, add it here. I use a tuple to indicate one test it is:
    (csv file, expected convergence iterations, kwarg modifier, test name)
    '''
    myTestList = [('testEasyWorld.csv', 800, 13,{}, 'easy test'),
                  ('world01.csv', 7000, 16, {}, 'Tucker Test 1'),
                  ('world02.csv', 7000, 17, {}, 'Tucker Test 2'),
                  ('testGridWorld.csv', 5000, 20, {}, 'Leo Base Test'),
                  ('testGridWorld.csv', 18000, 20, {'alpha':.2}, 'Test Learning Rate'),
                  ('testEasyWorld.csv', 700, 13, {'rar': 0.05}, 'Test Exploration'),
                  ('testEasyWorld.csv', 700, 13, {'radr': 0.8}, 'Test Exploration Decay'),
                  ('testGridWorld.csv', 3000, 20, {'gamma':0.8}, 'Test Discount Rate'),
                  ('testGridWorld.csv', 1100, 20, {'dyna':100}, 'Test Dyna'),
                  ]
    
    fdtest=myTestList[7:9]              
                  
    #for test in myTestList:
    for test in fdtest:             
        print '-------------------------------'
        print test[4]
        world = GridWorld(test[0])
        testKwargs = copy(baseKwargs)
        for k in test[3].keys():
            testKwargs[k] = test[3][k]
        print 'parameters %s' % str(testKwargs)
        learner = ql.QLearner(**testKwargs)
        print world.grid
        myTester = QTester(world, learner)
        nIter = test[1]
        totalIter = nIter
        lastPolicyLength = 0
        #someone let me know if there's a better way to check for convergence time
        while (totalIter < (test[1] * 1.4)):
           myTester.nIter(nIter)
           nIter = int(.05*test[1])
           myPolicy = myTester.getPolicy()
           policyLength = len(myPolicy)
           totalIter += nIter
           if (lastPolicyLength == policyLength) and (policyLength < 100):
              print 'converged in approx %i iterations' % totalIter
              print policyLength, myPolicy, test[2]
              break
           lastPolicyLength = policyLength
        if (test[1]*1.2 >= totalIter) and (policyLength == test[2]):
           print '*** TEST PASSED ***'
        else:
           print 'xxx TEST FAILED xxx'

Exemplo n.º 14

0

Exibir arquivo

 def setUp(self):
     self.n = 5
     self.p = 1
     self.gridworld = GridWorld(self.n, self.p)
     self.go_right_policy = np.ones(self.n * self.n, dtype=int)
     self.discount = 0.9
     self.large_discount = 0.2
     self.policy = np.array(
             [['TERMINAL', 'RIGHT', 'RIGHT', 'RIGHT', 'TERMINAL'],
              ['RIGHT', 'RIGHT', 'RIGHT', 'RIGHT', 'UP'],
              ['RIGHT', 'RIGHT', 'RIGHT', 'RIGHT', 'UP'],
              ['RIGHT', 'RIGHT', 'RIGHT', 'RIGHT', 'UP'],
              ['RIGHT', 'RIGHT', 'RIGHT', 'RIGHT', 'UP']])
     self.policy_large_discount = np.array(
             [['TERMINAL', 'LEFT', 'RIGHT', 'RIGHT', 'TERMINAL'],
              ['UP', 'LEFT', 'RIGHT', 'RIGHT', 'UP'],
              ['UP', 'LEFT', 'RIGHT', 'RIGHT', 'UP'],
              ['UP', 'LEFT', 'RIGHT', 'RIGHT', 'UP'],
              ['UP', 'LEFT', 'RIGHT', 'RIGHT', 'UP']])

Exemplo n.º 15

0

Exibir arquivo

 def __init__(self,
              epsilon=0.01,
              greedy=False,
              alpha=0.1,
              gamma=0.95,
              visual=True,
              goal=(10, 8),
              agentPose=(1, 1, 'up'),
              showTrial=True,
              randomReset=False,
              epsilonStrat=1,
              epsilonFactor=500):
     """
     gridWorld: GridWorld object
     epsilon: value used for epsilon greedy search
     alpha: step size
     gamma: discount favtor
     """
     self.actionValues = Counter()
     self.epsilonFactor = epsilonFactor
     self.randomReset = randomReset
     self.epsilon = epsilon
     self.greedy = greedy
     self.epsilonStrat = epsilonStrat
     self.goal = goal
     self.Q = dict()
     self.gridWorld = GridWorld(goal,
                                agentPose,
                                visual=visual,
                                showTrial=showTrial,
                                randomReset=randomReset)
     self.actions = self.gridWorld.getActions()
     self.Model = dict()
     self.alpha = alpha
     self.PriorityQueue = PriorityQueue()
     self.gamma = gamma
     self.exp = []
     self.rewards = dict()
     self.rewardNums = dict()
     self.predecessors = defaultdict(set)
     self.initQValues()

Exemplo n.º 16

0

Exibir arquivo

def start_grid_mdp():
    """
    starts the program, restarts if the user wants to
    """
    grid = load_grid(get_file_path())
    world = GridWorld(grid)
    move_costs = get_move_cost()
    gamma = get_gamma()
    eval_steps = get_evaluation_steps()
    MDP(world, eval_steps, gamma, move_costs)
    if start_again():
        start_grid_mdp()

Exemplo n.º 17

0

Exibir arquivo

    def buildBiasEngine(self):
        """ 
			Simulates MDPs with varying bias to build a bias inference engine.
		"""

        print "Loading MDPs...\n"

        # Unnecessary progress bar for terminal
        bar = pyprind.ProgBar(len(self.test))
        for i in self.test:
            self.sims.append(
                GridWorld(self.grid, i, self.discount, self.tau, self.epsilon))
            bar.update()

        print "\nDone loading MDPs..."

Exemplo n.º 18

0

Exibir arquivo

Arquivo: GUI.py Projeto: GreatAlexander/Games

    def openMDPGUI(self):
        global w, g
        if self.checkSettingValues():
            self.master.destroy()

            df = float(self.discFactor.get())
            rews = list(map(lambda x: float(x.get()), self.rewValue))
            probs = list(map(lambda x: float(x.get()), self.probValue))

            w = GridWorld(
                [
                    [GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_EXIT],
                    [GridWorld.CELL_VOID, GridWorld.CELL_WALL, GridWorld.CELL_VOID, GridWorld.CELL_PIT],
                    [GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID],
                ]
            )
            w.setDiscountFactor(df)
            w.setRewards(rews[0], rews[1], rews[2])
            w.setProbabilities(probs[0], probs[1], probs[2], probs[3])

            g = MDPGUI(w)

Exemplo n.º 19

0

Exibir arquivo

import numpy as np
from matplotlib import pyplot as plt
import deepdish as dd
from GridWorld import GridWorld
from library import *

env = GridWorld()
T_states = [(3, 3), (3, 9), (9, 3), (9, 9), (1, 1), (1, 2), (1, 3), (1, 4),
            (1, 5), (1, 7), (1, 8), (1, 9), (1, 10), (1, 11), (11, 1), (11, 2),
            (11, 3), (11, 4), (11, 5), (11, 7), (11, 8), (11, 9), (11, 10),
            (2, 1), (3, 1), (4, 1), (5, 1), (7, 1), (8, 1), (9, 1), (10, 1),
            (2, 11), (3, 11), (4, 11), (5, 11), (6, 11), (8, 11), (9, 11),
            (10, 11), (11, 11)]

###################################### Qs
BTasksQ = [[t] for t in T_states]
###################################### EQs
Bases = []
n = int(np.ceil(np.log2(len(T_states))))
m = (2**n) / 2
for i in range(n):
    Bases.append([])
    b = False
    for j in range(0, 2**n):
        if j >= len(T_states):
            break
        if b:
            Bases[i].append(1)  #1=True=rmax
        else:
            Bases[i].append(0)  #0=False=rmin
        if (j + 1) % m == 0:

Exemplo n.º 20

0

Exibir arquivo

Arquivo: TestValueIteration.py Projeto: Yun-Han/GridWorld-MDP

from GridWorld import GridWorld
from GridWorld import GridWorldAdditive
from ValueIteration import ValueIteration

# Run Value Iteration in different Grid World environments
if __name__ == "__main__":
    gamma = 0.9
    print("Grid world Value Iteration with discounted rewards gamma = %.2f\n" % gamma)
    terminals = {(0, 3): +1, (1, 3): -1}
    gw = GridWorld((3, 4), 0.8, [(1, 1)], terminals)
    vi = ValueIteration()
    values = vi.valueIteration(gw, gamma)
    gw.printValues(values)
    qvalues = vi.getQValues(gw, values, gamma)
    gw.printQValues(qvalues)
    policy = vi.getPolicy(gw, values, gamma)
    gw.printPolicy(policy)

    reward = -0.01
    print("Grid world Value Iteration with additive rewards = %.2f\n" % reward)
    gwa = GridWorldAdditive((3, 4), 0.8, [(1, 1)], terminals, reward)
    values = vi.valueIteration(gwa, 1, 100)
    gwa.printValues(values)
    qvalues = vi.getQValues(gwa, values, 1)
    gwa.printQValues(qvalues)
    policy = vi.getPolicy(gwa, values, 1)
    gwa.printPolicy(policy)
 
    reward = -0.04
    print("Grid World with additive rewards = %.2f\n" % reward)
    gwa = GridWorldAdditive((3, 4), 0.8, [(1, 1)], terminals, reward)

Exemplo n.º 21

0

Exibir arquivo

Arquivo: main.py Projeto: GreatAlexander/Games

		hlpStr = ("Markov Decision Process Examples\n"
				  "	Examples:\n"
				  "		gridworld 1: std grid world as the book (step cost -0.04, discount factor 1)\n"
				  "		gridworld 2: low discount factor 0.6 (step cost -0.04)\n"
				  "		gridworld 3: low step cost -0.01\n"
				  "		gridworld 4: suicide mode (step cost -2)\n"
				  )
		print(hlpStr)
		exit()
	
	if len(sys.argv) == 1: showhelp()
	
	if sys.argv[1] == "gridworld":
		
		w = GridWorld([[GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_EXIT], 
				   [GridWorld.CELL_VOID, GridWorld.CELL_WALL, GridWorld.CELL_VOID, GridWorld.CELL_PIT],
				   [GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID]], discountFactor = 1 )
		
		if len(sys.argv) < 3:
			mdpc = MDPChooser()
		elif sys.argv[2] == "1":	
			w.setRewards(-0.04, -1, 1)
			w.setProbabilities(0.8, 0.1, 0.1, 0)
			w.setDiscountFactor(1)
			g = MDPGUI(w)
		elif sys.argv[2] == "2":
			w.setRewards(-0.04, -1, 1)
			w.setProbabilities(0.8, 0.1, 0.1, 0)
			w.setDiscountFactor(0.9)
			g = MDPGUI(w)
		elif sys.argv[2] == "3":

Exemplo n.º 22

0

Exibir arquivo

Arquivo: TDL.py Projeto: eriktoger/Reinforcment-Learning

 def __init__(self):
     self.game = GridWorld( (5,5))
     self.squareCountGrid = self.game.createSquareCount()
     self.alpha = 0.1
     self.gamma = 0.9

Exemplo n.º 23

0

Exibir arquivo

from GridWorld import GridWorld


g = GridWorld(3,4)
policy={
    (0, 0):'R',
    (0, 1):'R',
    (0, 2):'R',
    (1, 0):'U',
    (1, 1):'U',
    (1, 2):'U',
    (1, 3):'U',
    (2, 0):'R',
    (2, 1):'R',
    (2, 2):'U',
    (2, 3):'L'
}

def print_policy(p,g):
    for r in range(g.row):
        print('------------------')
        for c in range(g.col):
            a = p.get((r,c),' ')
            print(' %s |'%a, end="")
        print("")

def print_value(V,g):
    for r in range(g.row):
        print('------------------')
        for c in range(g.col):
            v = V.get((r,c), 0)

Exemplo n.º 24

0

Exibir arquivo

    t = 0
    G = 0
    while not done and t < 100:
        action = policy[state]
        state_, reward, done, _ = env.step(action)
        state = state_
        G += reward
        t += 1
    return G


for t in range(len(types)):
    print("type: ", t)

    # Learning universal bounds (min and max tasks)
    env = GridWorld(goals=T_states, dense_rewards=not types[t][0])
    EQ_max, _ = Goal_Oriented_Q_learning(env, maxiter=maxiter)

    env = GridWorld(goals=T_states,
                    goal_reward=-0.1,
                    dense_rewards=not types[t][0])
    EQ_min, _ = Goal_Oriented_Q_learning(env, maxiter=maxiter)

    # Learning base tasks and doing composed tasks
    goals = Bases[0]
    goals = [[pos, pos] for pos in goals]
    env = GridWorld(goals=goals,
                    dense_rewards=not types[t][0],
                    T_states=T_states if types[t][1] else goals)
    A, stats1 = Goal_Oriented_Q_learning(
        env, maxiter=maxiter, T_states=None if types[t][1] else T_states)

Exemplo n.º 25

0

Exibir arquivo

Arquivo: TDL.py Projeto: eriktoger/Reinforcment-Learning

class TDL_solution:
    def __init__(self):
        self.game = GridWorld( (5,5))
        self.squareCountGrid = self.game.createSquareCount()
        self.alpha = 0.1
        self.gamma = 0.9
    
    def playTDLGame(self,startSquare, randomMove):
        self.game.currentSquare = startSquare
        
        keepPlaying = not self.game.gameOver()
        squares_and_returns = [(self.game.currentSquare,0)]
     
        while keepPlaying:
            
            #policy
            i = self.game.currentSquare[0]
            j = self.game.currentSquare[1]
            move = self.game.policyGrid[i][j]
      
            if randomMove < np.random.rand():
                moves = self.game.possibleMoves((i,j))
               
                moves.remove(move)
                if len(moves) > 0:
                    idx = np.random.randint(0,len(moves))
                    move = moves[idx]
            #move
            self.game.move(move)
            i = self.game.currentSquare[0]
            j = self.game.currentSquare[1]
            theReturn = self.game.returnGrid[i][j]
            squares_and_returns.append( (self.game.currentSquare,theReturn) )
            keepPlaying = not self.game.gameOver()
        
        G = 0
        self.squares_and_values = []
        for square , theReturn in reversed(squares_and_returns):
            self.squares_and_values.append( (square,G) )
            G = theReturn + self.game.gamma*G
        #self.squares_and_values.reverse()
    
    def playSarsa(self,startSquare, randomMove):
        self.game.currentSquare = startSquare
        keepPlaying = not self.game.gameOver()
        
        while keepPlaying:
            
            #policy
            i1 = self.game.currentSquare[0]
            j1 = self.game.currentSquare[1]
            move = self.game.policyGrid[i1][j1]
      
            if randomMove < np.random.rand():
                moves = self.game.possibleMoves((i1,j1))
                print( str(i1) + " " + str(j1) + " " + str(moves) + " " + str(move) )
                moves.remove(move)
                if len(moves) > 0:
                    idx = np.random.randint(0,len(moves))
                    move = moves[idx]
            #move
            self.game.move(move)
            i2 = self.game.currentSquare[0]
            j2 = self.game.currentSquare[1]
            theReturn = self.game.returnGrid[i2][j2]
            self.game.valueGrid[i1][j1] = self.game.valueGrid[i1][j1] + self.alpha*(theReturn + self.gamma*self.game.valueGrid[i2][j2]- self.game.valueGrid[i1][j1] )
            keepPlaying = not self.game.gameOver()
            
    def playQLearning(self,startSquare, randomMove):
        self.game.currentSquare = startSquare
        keepPlaying = not self.game.gameOver()
        
        while keepPlaying:
            
            #policy
            i1 = self.game.currentSquare[0]
            j1 = self.game.currentSquare[1]
            move = self.game.policyGrid[i1][j1]
            
            # we use the best move even if random runs over it
            i3 = self.game.currentSquare[0]
            j3 = self.game.currentSquare[1]
      
            if randomMove < np.random.rand():
                moves = self.game.possibleMoves((i1,j1))
                print( str(i1) + " " + str(j1) + " " + str(moves) + " " + str(move) )
                moves.remove(move)
                if len(moves) > 0:
                    idx = np.random.randint(0,len(moves))
                    move = moves[idx]
            #move
            self.game.move(move)
            i2 = self.game.currentSquare[0]
            j2 = self.game.currentSquare[1]
            theReturn = self.game.returnGrid[i2][j2]
            self.game.valueGrid[i1][j1] = self.game.valueGrid[i1][j1] + self.alpha*(theReturn + self.gamma*self.game.valueGrid[i3][j3]- self.game.valueGrid[i1][j1] )
            keepPlaying = not self.game.gameOver()
    
        
        
    def updateValueGrid(self):
        for t in range(len(self.squares_and_values) -1):
            
            square , _ = self.squares_and_values[t]
            nextSquare, value = self.squares_and_values[t+1]
            i1 = square[0]
            j1 = square[1]
            i2 = nextSquare[0]
            j2 = nextSquare[1]
            self.game.valueGrid[i1][j1] = self.game.valueGrid[i1][j1] + self.alpha*(value + self.gamma*self.game.valueGrid[i2][j2]- self.game.valueGrid[i1][j1] )  
    
    def updatePolicyGrid(self):
        
        #check if policy change
        #hasChanged = False
        #if bestMove is new set to true.
        rows = self.game.size[0]
        cols = self.game.size[1]
        change = False
        for i in range(rows):
            for j in range(cols):
                if self.game.policyGrid[i][j] in [0,1,2,3]:
                    self.game.currentSquare = (i,j)
                    oldMove = self.game.policyGrid[i][j]
                    self.game.policyGrid[i][j] = self.game.bestMove()
                    if oldMove != self.game.policyGrid[i][j]:
                        change = True
        return change
        
        
    def printGrids(self):
        self.game.printPolicyGrid()
        self.game.printReturnGrid()
        self.game.printValueGrid()

Exemplo n.º 26

0

Exibir arquivo

from Evaluation import Evaluation
from GridWorld import GridWorld
from Learning import Learning

# グリッドワールドの大きさを指定
row = 5
column = 5

LearningAgentSpan = 10  # 学習エージェントの寿命
LearningTimes = 100  # 学習回数
P = 5  # 報酬
T = 10  # 遡る数

EvaluationAgentSpan = 10  # 評価エージェントの寿命
EvaluationTimes = 100  # 試行回数

grid_world = GridWorld(row, column)
grid_world.make_grid_world()

learning = Learning(grid_world.get_grid_world(), row, column)
learning.do_learning(LearningAgentSpan, LearningTimes, P, T)

evaluation = Evaluation(learning.get_grid_world(), row, column)
evaluation.evaluation(EvaluationAgentSpan, EvaluationTimes)

Exemplo n.º 27

0

Exibir arquivo

    vehState = start
    env_file = open("Environment.txt", "w")
    gridWorld = CreateEnvironment()
    gridWorld.create(env_file,
                     size_row='10',
                     size_col='10',
                     agent_row=str(vehState[0]),
                     agent_col=str(vehState[1]),
                     goal_row=str(goal[0]),
                     goal_col=str(goal[1]),
                     static_number='2',
                     static_list=[0, 3, 2, 4])
    env_file = open("Environment.txt", "r")
    text_in_file = env_file.readline()
    print(text_in_file)
    grid = GridWorld(text_in_file)
    gw = grid.gridDefine()
    #-------------------------------------------------------

    # initialize agent class and uav class
    Agent = agent(vehState)
    # define a model dictionary, which maps user inputs of learning model names to learning model function
    modelType = {
        "random": Agent.predict_Random,
        "standard": Agent.predict_Standard,
        "NN": Agent.predict_NN
    }
    UAV = uav(vehState)

    # initialize decision model (options = "random", "standard", or "NN")
    model = "random"  # will be a user input

Exemplo n.º 28

0

Exibir arquivo

import hashlib
import json
from GridWorld import GridWorld
import numpy as np
import copy

from matplotlib import pyplot as plt

import torch
from torch.nn.modules.loss import SmoothL1Loss
import torch.nn as nn
from torch.optim import Adam
import random

grid_world = GridWorld()

rewards_to_plot = []
stats_ax = None
rewards_ax = None
model = None


def init_gridworld(random_player=False, random_mines=False, maze=False):
    global grid_world
    grid_world = GridWorld(random_player, random_mines, maze)


class NeuralNetwork(nn.Module):
    def __init__(self, iterations=500):
        super(NeuralNetwork, self).__init__()

Exemplo n.º 29

0

Exibir arquivo

Arquivo: run.py Projeto: imishra23663/DeepQMotionPlanning

from GridWorld import GridWorld
from Robot import Robot

env = GridWorld("grid-small.txt")
env.print_map()
gamma = 0.9

start = [0, 0]
agent = Robot(env, gamma)
epochs = 500
decay = 0.99
rvm_max_iter = 500
max_step = 1000
epsilon = 1
epsilon_threshold = 0.001
verbose = True
verbose_iteration = 1
steps, rewards = agent.learn(epochs, decay, rvm_max_iter, max_step, epsilon,  start, verbose, verbose_iteration)
path = agent.get_path(start)
print(path)

Exemplo n.º 30

0

Exibir arquivo

 def __createEmptyPolicy(self):
     """we create a partial function that is undefined in all points"""
     c, r = self.world.size
     return [[(None if self.world.cellAt(x, y) == GridWorld.CELL_VOID else
               GridWorld.randomAction()) for x in range(c)]
             for y in range(r)]

Exemplo n.º 31

0

Exibir arquivo

        self.drawUtilities(canvas)
        self.drawQValues(canvas)
        self.drawPolicy(canvas)


# ===========================================================================
# TEST
# ===========================================================================
if __name__ == '__main__':
    w = GridWorld([[
        GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID,
        GridWorld.CELL_EXIT
    ],
                   [
                       GridWorld.CELL_VOID, GridWorld.CELL_WALL,
                       GridWorld.CELL_VOID, GridWorld.CELL_PIT
                   ],
                   [
                       GridWorld.CELL_VOID, GridWorld.CELL_VOID,
                       GridWorld.CELL_VOID, GridWorld.CELL_VOID
                   ]],
                  discountFactor=1)
    w.setRewards(-0.04, -1, 1)
    w.setProbabilities(0.8, 0.1, 0.1, 0)
    print("GridWorld-----------")
    print(w)
    print("----------------")

    print("\nPolicy----------")
    p = Policy(w)

Exemplo n.º 32

0

Exibir arquivo

def init_gridworld(random_player=False, random_mines=False, maze=False):
    global grid_world
    grid_world = GridWorld(random_player, random_mines, maze)

Exemplo n.º 33

0

Exibir arquivo

            # RL take action and get next state and reward
            _, next_state_index, reward, done = env.step(action)

            # RL choose action based on next state
            next_action = RL.choose_action(str(next_state_index))

            # RL learn from this transition (s, a, r, s, a) ==> Sarsa
            RL.learn(str(state), action, reward, str(next_state_index), next_action)

            # swap state and action
            state = next_state_index
            action = next_action

            # break while loop when end of this episode
            if done:
                break

    # end of game
    print('game over')
    env.destroy()


if __name__ == "__main__":
    env = GridWorld()
    RL = Sarsa(actions=list(range(env.n_actions)))

    env.after(10000, update)
    env.mainloop()
    print(RL.q_table)

Exemplo n.º 34

0

Exibir arquivo

from GridWorld import GridWorld

g = GridWorld(3, 4)
policy = {
    (0, 0): {
        'R': 1
    },
    (0, 1): {
        'R': 1
    },
    (0, 2): {
        'R': 1
    },
    (1, 0): {
        'U': 1
    },
    (1, 1): {
        'U': 1
    },
    (1, 2): {
        'U': 1
    },
    (1, 3): {
        'U': 1
    },
    (2, 0): {
        'R': 0.5,
        'U': 0.5
    },
    (2, 1): {
        'R': 1

Exemplo n.º 35

0

Exibir arquivo

import tensorflow as tf

from GridWorld import GridWorld

np.random.seed(20)
tf.set_random_seed(20)

MAX_EPISODE = 1000
MAX_EP_STEPS = 1000  # maximum time step in one episode
GAMMA = 0.9  # reward discount in TD error
lr_actor = 0.001
lr_critic = 0.01

grid_world_h = 5
grid_world_w = 5
env = GridWorld(grid_world_h, grid_world_w)

n_features = 2
n_actions = 4


class Actor(object):
    def __init__(self, sess, n_features, n_actions, lr=0.001):
        self.sess = sess
        self.state = tf.placeholder(tf.float32, [1, n_features], "state")
        self.action = tf.placeholder(tf.int32, None, "act")
        self.td_error = tf.placeholder(tf.float32, None, "td_error")

        with tf.variable_scope('Actor'):
            state_layer = tf.layers.dense(
                inputs=self.state,

Exemplo n.º 36

0

Exibir arquivo

Arquivo: Policy.py Projeto: GreatAlexander/Games

		self.world.draw(canvas)
		self.drawUtilities(canvas)
		self.drawQValues(canvas)
		self.drawPolicy(canvas)
	
		
#===========================================================================
# TEST
#===========================================================================
if __name__ == '__main__':

	w = GridWorld([[GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_EXIT, GridWorld.CELL_VOID, GridWorld.CELL_VOID], 
			   [GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID],
			   [GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID],
			   [GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_PIT, GridWorld.CELL_VOID],
			   [GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID],
			   [GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID],
			   [GridWorld.CELL_VOID, GridWorld.CELL_PIT, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID],
			   [GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID],
			   [GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID],
			   [GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID]], discountFactor = 1 )
	
	
	
	w.setRewards(-0.04, -1, 1)
	w.setProbabilities(0.8, 0.1, 0.1, 0)
#	w.setDiscountFactor(0.9)
#	print("-GridWorld-")
#	print(w)
#	print("-----------")
	
	print("\n---Policy---")

Exemplo n.º 37

0

Exibir arquivo

Arquivo: Policy.py Projeto: GreatAlexander/Games

	def __createEmptyPolicy(self):
		'''we create a partial function that is undefined in all points'''
		c, r = self.world.size
		return [ [ (None if self.world.cellAt(x,y) == GridWorld.CELL_VOID else GridWorld.randomAction()) for x in range(c) ] for y in range(r) ]

Exemplo n.º 38

0

Exibir arquivo

Arquivo: RunDFS.py Projeto: guroosh/CS7IS2-AI-project

        return -1
    grid_world.is_visited[x][y] = 1
    grid_world.dfs_route.append((x, y))
    random.shuffle(adjacent_nodes)
    for l in adjacent_nodes:
        if grid_world.is_visited[l[0]][l[1]] == 0:
            ret_val = random_dfs(grid_world, str(l[0]) + "," + str(l[1]))
            if ret_val == -1:
                grid_world.dfs_best_route.append((l[0], l[1]))
                return -1


def run_dfs(grid_world):
    # dfs(grid_world, grid_world.start_key)
    random_dfs(grid_world, grid_world.start_key)
    grid_world.dfs_best_route.append((grid_world.start_x, grid_world.start_y))
    grid_world.dfs_best_route = grid_world.dfs_best_route[::-1]


grid_world = GridWorld()
Functions.create_obstacles_from_hex(grid_world)
# Functions.create_random_obstacles(grid_world, 0.205)
# Functions.create_fixed_obstacles(grid_world, 6)
grid_world.scan_grid_and_generate_graph()
grid_world.print_graph()
grid_world.create_grid_ui(grid_world.m, grid_world.n, (grid_world.start_x, grid_world.start_y),
                          (grid_world.end_x, grid_world.end_y), grid_world.obstacles)
run_dfs(grid_world)
grid_world.move_on_given_route()
tk.mainloop()