return 2 else: return 1 if cell.wall else 0 return tuple([ cellvalue( self.world.getWrappedCell(self.cell.x + j, self.cell.y + i)) for i, j in lookcells ]) mouse = Mouse() cat = Cat() cheese = Cheese() world = cellular.World(Cell, directions=directions, filename='waco.txt') world.age = 0 world.addAgent(cheese, cell=pickRandomLocation()) world.addAgent(cat) world.addAgent(mouse) epsilonx = (0, 100000) epsilony = (0.1, 0) epsilonm = (epsilony[1] - epsilony[0]) / (epsilonx[1] - epsilonx[0]) endAge = world.age + 150000 while world.age < endAge: world.update() '''if world.age % 100 == 0:
#move diagonally in other direction elif R_Action == 5: self.cell_y -= 1 self.cell_x += 1 #calculating the state vector after the action(only x,y,robotpos are being changed) def calcState(self): robotPos = int( (3 * (self.cell_y - 1)) / 9) #represents the Y value of the robot return self.ball.x_cell - 1, self.ball.y_cell - 1, self.ball.VAmplitude, self.ball.Angle, robotPos #returns the States Vector #don't understand the calculation???????? directions = 4 world = cellular.World(Cell, directions=directions, filename='soccerField.txt') ball = BallSimulation.Ball(world, 1, 6, 9) world.addAgent(ball) robot = Robot(ball) world.addAgent(robot) print "Doing the best" # test the success percentage of the robot in hitting the ball - the measure pretraining = 10001 for i in range(pretraining): if i % 10000 == 0 and i > 0: line2print = [ "round number: " + str(i) + '. ' + "good score: " + str( (100 * (robot.good_score)) / (robot.good_score + robot.bad_score + robot.no_score)) + "%" +
if here.cliff: self.deads += 1 return cliffReward elif here.goal: self.score += 1 return goalReward else: return normalReward normalReward = -1 cliffReward = -100 goalReward = 50 directions = 4 world = cellular.World(Cell, directions=directions, filename='./worlds/cliff.txt') if startCell is None: print("You must indicate where the agent starts by putting a 'S' in the map file") sys.exit() agent = Agent() world.addAgent(agent, cell=startCell) pretraining = 10000 for i in range(pretraining): if i % 1000 == 0: print(i, agent.score, agent.deads) agent.score = 0 agent.deads = 0 world.update()
accident_stats = np.zeros((len(test_args), time_limit / sample_every, 3)) for ii, args in enumerate(test_args): print '\n----------------------------------' print 'testing argument set %i' % ii print args print '----------------------------------' stats_avg = np.zeros((average_across, time_limit / sample_every, 3)) for jj in range(average_across): print 'trial %i' % jj index = 0 world = cellular.World(Cell, directions=directions, filename='worlds/barrier3.txt') if startCell is None: print "You must indicate where the agent starts by putting a 'S' in the map file" sys.exit() agent = Agent(**args) world.addAgent(agent, cell=startCell) pretraining = 0 for i in range(pretraining): if i % 1000 == 0: print i, agent.score agent.score = 0 world.update()
def calcReward(self): if self.cell.cliff: return cliffReward elif self.cell.goal: self.score += 1 return goalReward else: return normalReward normalReward = -1 cliffReward = -100 goalReward = 0 mouse = Mouse() world = cellular.World(Cell, directions=directions, filename='gridworld.txt') world.age = 0 world.addAgent(mouse, cell=StartCell()) f = open('episodes_sarsa.txt', 'w') mouse.startEpisode() # while mouse.score < 500: # world.update() # print 'age: %d score: %d' % (world.age, mouse.score) oldscore = None mouse.ai.epsilon = 0.005 world.display.activate(size=30) world.display.delay = 1 while 1:
def __init__(self, worldmap, pacmen, ghost, ghostList, **kwargs): # Initializes Pacman World using parameters from the global pacman and ghost variables super(PacmanWorld, self).__init__(**kwargs) self.world = cellular.World(Cell, map=worldmap, directions=4) self.pacmen = pacmen self.ghost = ghost self.ghost_rotate = self.ghost.rotate self.ghost_speed = self.ghost.speed # Init for starting positions of the pacman and for food, etc. starting = list(self.world.find_cells(lambda cell: cell.pacman_start)) if len(starting) == 0: starting = list(self.world.find_cells(lambda cell: cell.food)) cell = random.choice(starting) total = len(list(self.world.find_cells(lambda cell: cell.food))) # Adds a random amount of ghost enemies to the world self.enemies = [] for cell in self.world.find_cells(lambda cell: cell.enemy_start): new = body.Player("ghost", "seeking", 2, "red", 10, 5) self.world.add(new, cell=cell, dir=1) self.enemies.append(new) startingE = list(self.world.find_cells(lambda cell: cell.food)) for gG in ghostList: cellx = random.choice(startingE) self.world.add(gG, cell = cellx, dir = 1) self.enemies.append(gG) self.completion_time = None # Sets up environment for the GridNode (this includes the nodes for obstacles and food) with self: self.environment = GridNode(self.world) self.pacnets = [] for i, pacman in enumerate(self.pacmen): self.world.add(pacman, cell=cell, dir=3) pacnet = nengo.Network(label='pacman[%d]' % i) self.pacnets.append(pacnet) #Pacman's move function -- called every 0.001 second (set using dt) def move(t, x, pacman=pacman): def revertColor(): self.ghost.color = "red" self.ghost.state = "seeking" i=0 for g in self.enemies: g.color = ghostC[i] g.state = "seeking" i+=1 speed, rotation = x dt = 0.001 # Pacman turns and moves forward based on obstacles and food availability pacman.turn(rotation * dt * pacman.rotate) pacman.go_forward(speed * dt * pacman.speed) # If pacman moves into a cell containing food... if pacman.cell.food: ghostC = [] for g in self.enemies: ghostC.append(g.color) if(pacman.cell.state=="super"): self.ghost.color = "white" self.ghost.state = "running" for g in self.enemies: g.color = "white" g.state = "running" tx = Timer(5.0, revertColor) tx.start() pacman.score += 1 pacman.cell.food = False # Sets up the node for the obstacles (this factors in angles and distances towards respective obstacles) def obstacles(t, pacman=pacman): angles = np.linspace(-1, 1, 5) + pacman.dir angles = angles % self.world.directions pacman.obstacle_distances = [pacman.detect(d, max_distance=4*2)[0] for d in angles] return pacman.obstacle_distances # Sets up the node for the food (factors in amount of food in an area and its relative strength, distance, etc) def detect_food(t, pacman=pacman): x = 0 y = 0 # Runs through the total number of cells in the world and calculates strength and relative distance for each one for cell in self.world.find_cells(lambda cell:cell.food): dir = pacman.get_direction_to(cell) dist = pacman.get_distance_to(cell) rel_dir = dir - pacman.dir if dist > 5: continue if dist>=0.05: strength = 1.0 / dist else: strength = 20 dx = np.sin(rel_dir * np.pi / 2) * strength dy = np.cos(rel_dir * np.pi / 2) * strength x += dx y += dy return x, y # Sets up the node for the enemies (factors in number of enemies in an area and their relative strength, distance, etc.) def detect_enemy(t, pacman=pacman): x = 0 y = 0 # Runs through the total number of ghosts in the world and calculates strength and relative distance for each one for ghost in self.enemies: dir = pacman.get_direction_to(ghost) dist = pacman.get_distance_to(ghost) rel_dir = dir - pacman.dir if dist < 0.001: dist = 0.001 strength = 1.0 / dist dx = np.sin(rel_dir * np.pi / 2) * strength dy = np.cos(rel_dir * np.pi / 2) * strength x += dx y += dy return x, y with pacnet: pacnet.move = nengo.Node(move, size_in=2) pacnet.obstacles = nengo.Node(obstacles) pacnet.detect_food = nengo.Node(detect_food) pacnet.detect_enemy = nengo.Node(detect_enemy) # The score is kept track of using an html rendering def score(t): for ghost in self.enemies: self.update_ghost(ghost) total_score = sum([pacman.score for pacman in self.pacmen]) if total_score == total: self.reset(0, True) scores = ':'.join(['%d' % pacman.score for pacman in self.pacmen]) html = '<h1>%s / %d</h1>' % (scores, total) html += '%1.3f seconds' % (t*10) html = '<center>%s</center>' % html score._nengo_html_ = html self.score = nengo.Node(score)
self.lastAction = action self.goInDirection(action) def calcState(self): if cat.cell is not None: return self.cell.x, self.cell.y, cat.cell.x, cat.cell.y, cheese.cell.x, cheese.cell.y else: return self.cell.x, self.cell.y, cheese.cell.x, cheese.cell.y mouse = Mouse() cat = Cat() cheese = Cheese() world = cellular.World(Cell, directions=directions, filename='barrier2.txt') world.age = 0 world.addAgent(cheese, cell=pickRandomLocation()) world.addAgent(cat) world.addAgent(mouse) epsilonx = (0, 100000) epsilony = (0.1, 0) epsilonm = (epsilony[1] - epsilony[0]) // (epsilonx[1] - epsilonx[0]) endAge = world.age + 150000 while world.age < endAge: world.update() if world.age % 100 == 0:
def cellvalue(self, cell): return (3 if cell.goal else 2 if cell.wall else 1 if cell.cliff else 0) def restart(self): self.cell = startCell self.lastAction = None self.lastEgoState = None cliffReward = -10 goalReward = 500 hitWallReward = -5 normalReward = -1 directions = 4 world = cellular.World(Cell, directions=directions, filename='cliffs.txt') if startCell is None: print "You must indicate where the agent starts by putting a 'S' in the map file" sys.exit() agent = Agent() world.addAgent(agent, cell=startCell) pretraining = 0 for i in range(pretraining): if i % 1000 == 0: print i, agent.score agent.score = 0 world.update() ### display
return 1 if cell.wall else 0 return tuple([ cellvalue( self.world.getWrappedCell(self.cell.x + j, self.cell.y + i)) for i, j in lookcells ]) mouse = Mouse() cat = Cat() cheese = Cheese() world = cellular.World( Cell, directions=directions, filename= '/Users/edela/source/repos/basic_reinforcement_learning/worlds/eecs4401.txt' ) world.age = 0 world.addAgent(cat, cell=world.getCell(5, 6)) world.addAgent(cheese, cell=world.getCell(8, 2)) world.addAgent(mouse, cell=world.getCell(6, 1)) epsilonx = (0, 100000) epsilony = (0.1, 0) epsilonm = (epsilony[1] - epsilony[0]) / (epsilonx[1] - epsilonx[0]) endAge = world.age + 1000 while world.age < endAge:
def get_simulated_world(cells_per_day, rule, number_of_days): world = cellular.World(cells_per_day, rule, ones=False) world.simulate(number_of_days) world.display(landscape=True) return numpy.vstack(world.state)
def __init__(self, worldmap, pacman_speed=70, pacman_rotate=20, ghost_speed=5, ghost_rotate=5, dt=0.001, **kwargs): # Initializes PacmanWorld using parameters from the global pacman and ghost variables super(PacmanWorld, self).__init__(**kwargs) self.world = cellular.World(Cell, map=worldmap, directions=4) self.pacman = body.Player("pacman", "eating", 2, "yellow", pacman_speed, pacman_rotate) self.ghost_rotate = ghost_rotate self.ghost_speed = ghost_speed self.last_t = None # Init for starting positions of the pacman and for food, etc. starting = list(self.world.find_cells(lambda cell: cell.pacman_start)) if len(starting) == 0: starting = list(self.world.find_cells(lambda cell: cell.food)) cell = random.choice(starting) total = len(list(self.world.find_cells(lambda cell: cell.food))) self.world.add(self.pacman, cell=cell, dir=3) # Adds a random amount of ghost enemies to the world self.enemies = [] for cell in self.world.find_cells(lambda cell: cell.enemy_start): new = body.Player("ghost", "seeking", 0.37, "red", ghost_speed, ghost_rotate) self.world.add(new, cell=cell, dir=1) self.enemies.append(new) self.completion_time = None # Sets up environment for the GridNode (this includes the nodes for obstacles and food) with self: self.environment = GridNode(self.world, dt=dt) #Pacman's move function -- called every 0.001 second (set using dt) def move(t, x): if self.last_t is not None and t < self.last_t: self.reset() self.last_t = t speed, rotation = x dt = 0.001 # Pacman turns and moves forward based on obstacles and food availability self.pacman.turn(rotation * dt * pacman_rotate) self.pacman.go_forward(speed * dt * pacman_speed) # If pacman moves into a cell containing food... for n in self.pacman.cell.neighbours: if n.food: # Adds to the score and updates ghosts self.pacman.score += 1 n.food = False if self.completion_time is None and self.pacman.score == total: self.completion_time = t for ghost in self.enemies: self.update_ghost(ghost) self.move = nengo.Node(move, size_in=2) # The score is kept track of using an html rendering def score(t): html = '<h1>%d / %d</h1>' % (self.pacman.score, total) if self.completion_time is not None: html += '<div style="background:yellow">Completed in<br/>%1.3f seconds</div>' % self.completion_time else: html += '%1.3f seconds' % t html = '<center>%s</center>' % html score._nengo_html_ = html self.score = nengo.Node(score) # Sets up the node for the obstacles (this factors in angles and distances towards respective obstacles) def obstacles(t): angles = np.linspace(-0.5, 0.5, 3) + self.pacman.dir angles = angles % self.world.directions self.pacman.obstacle_distances = [ self.pacman.detect(d, max_distance=4 * 2)[0] for d in angles ] return self.pacman.obstacle_distances self.obstacles = nengo.Node(obstacles) # Sets up the node for the food (factors in amount of food in an area and its relative strength, distance, etc) def detect_food(t): x = 0 y = 0 # Runs through the total number of cells in the world and calculates strength and relative distance for each one for cell in self.world.find_cells(lambda cell: cell.food): dir = self.pacman.get_direction_to(cell) dist = self.pacman.get_distance_to(cell) rel_dir = dir - self.pacman.dir if dist > 5: continue if dist >= 0.05: strength = 1.0 / dist else: continue dx = np.sin(rel_dir * np.pi / 2) * strength dy = np.cos(rel_dir * np.pi / 2) * strength x += dx y += dy return x, y self.detect_food = nengo.Node(detect_food) # Sets up the node for the enemies (factors in number of enemies in an area and their relative strength, distance, etc.) def detect_enemy(t): x = 0 y = 0 # Runs through the total number of ghosts in the world and calculates strength and relative distance for each one for ghost in self.enemies: dir = self.pacman.get_direction_to(ghost) dist = self.pacman.get_distance_to(ghost) rel_dir = dir - self.pacman.dir strength = 1.0 / dist dx = np.sin(rel_dir * np.pi / 2) * strength dy = np.cos(rel_dir * np.pi / 2) * strength x += dx y += dy return x, y self.detect_enemy = nengo.Node(detect_enemy)
else: return 1 if cell.wall else 0 return tuple([ cellvalue( self.world.getWrappedCell(self.cell.x + j, self.cell.y + i)) for i, j in lookcells ]) mouse = Mouse() cat = Cat() cheese = Cheese() world = cellular.World(Cell, directions=directions, filename=r'D:\New\ML Group\reinf3\waco.txt') world.age = 0 world.addAgent(cheese, cell=pickRandomLocation()) world.addAgent(cat) world.addAgent(mouse) epsilonx = (0, 10000) epsilony = (0.1, 0) epsilonm = (epsilony[1] - epsilony[0]) / (epsilonx[1] - epsilonx[0]) endAge = world.age + 10000 # endAge = world.age + 1 while world.age < endAge: