def __init__(self): self.environment = GameEnv() av_table = ActionValueTable(self.environment.outdim, self.environment.indim) av_table.initialize(0.) # todo: save & restore agents state learner = Q() learner._setExplorer(EpsilonGreedyExplorer()) agent = LearningAgent(av_table, learner) self.agent = agent self.task = GameTask(self.environment) self.experiment = Experiment(self.task, self.agent)
class ReinforcedController(Controller): NAME = "Reinforced Controller" def __init__(self, pendulum_length, pendulum_mass, cart_mass): super(ReinforcedController, self).__init__(pendulum_length, pendulum_mass, cart_mass) self.ranges = self.get_ranges() self.model = InvertedPendulumModel(self.pendulum_length, self.pendulum_mass, self.cart_mass) self.force_granularity = 2 self.environment = CartEnvironment( self.model, *self.ranges, force_granularity=self.force_granularity) # self.load() def learn(self, number_of_iterations): learner = Q(0.2, 0.8) task = CartMovingTask(self.environment) self.controller = ActionValueTable( reduce(lambda x, y: x * y, map(lambda x: len(x), self.ranges)), self.force_granularity) self.controller.initialize(1.) agent = LearningAgent(self.controller, learner) experiment = Experiment(task, agent) for i in range(number_of_iterations): experiment.doInteractions(1) agent.learn() agent.reset() with open("test.pcl", "w+") as f: pickle.dump(self.controller, f) def load(self): with open("test.pcl", "r+") as f: self.controller = pickle.load(f) def calculate_force(self, angular_position, angular_velocity, cart_position, cart_velocity): state = self.environment.normalized_state( (angular_position, angular_velocity, cart_position, cart_velocity)) action = self.controller.getMaxAction(state) force = self.environment.allActions[action] return force def get_ranges(self): cart_position_ranges = [(-1000000000, -2.4), (-2.4, -0.8), (-0.8, 0.8), (0.8, 2.4), (2.4, 1000000000)] cart_velocity_ranges = [(-1000000000, -0.5), (-0.5, 0.5), (0.5, 1000000000)] angles_ranges = [(-6.28, -1), (-1, 0), (0, 1), (1, 6.28)] angular_velocity_ranges = [(-1000000000, -3.14), (-3.14, 3.14), (3.14, 1000000000)] return (cart_position_ranges, cart_velocity_ranges, angles_ranges, angular_velocity_ranges)
def learn(self, number_of_iterations): learner = Q(0.2, 0.8) task = CartMovingTask(self.environment) self.controller = ActionValueTable( reduce(lambda x, y: x * y, map(lambda x: len(x), self.ranges)), self.force_granularity) self.controller.initialize(1.) agent = LearningAgent(self.controller, learner) experiment = Experiment(task, agent) for i in range(number_of_iterations): experiment.doInteractions(1) agent.learn() agent.reset() with open("test.pcl", "w+") as f: pickle.dump(self.controller, f)
class ReinforcedController(Controller): NAME = "Reinforced Controller" def __init__(self, pendulum_length, pendulum_mass, cart_mass): super(ReinforcedController, self).__init__(pendulum_length, pendulum_mass, cart_mass) self.ranges = self.get_ranges() self.model = InvertedPendulumModel(self.pendulum_length, self.pendulum_mass, self.cart_mass) self.force_granularity = 2 self.environment = CartEnvironment(self.model, *self.ranges, force_granularity=self.force_granularity) # self.load() def learn(self, number_of_iterations): learner = Q(0.2, 0.8) task = CartMovingTask(self.environment) self.controller = ActionValueTable( reduce(lambda x, y: x * y, map(lambda x: len(x), self.ranges)), self.force_granularity ) self.controller.initialize(1.0) agent = LearningAgent(self.controller, learner) experiment = Experiment(task, agent) for i in range(number_of_iterations): experiment.doInteractions(1) agent.learn() agent.reset() with open("test.pcl", "w+") as f: pickle.dump(self.controller, f) def load(self): with open("test.pcl", "r+") as f: self.controller = pickle.load(f) def calculate_force(self, angular_position, angular_velocity, cart_position, cart_velocity): state = self.environment.normalized_state((angular_position, angular_velocity, cart_position, cart_velocity)) action = self.controller.getMaxAction(state) force = self.environment.allActions[action] return force def get_ranges(self): cart_position_ranges = [(-1000000000, -2.4), (-2.4, -0.8), (-0.8, 0.8), (0.8, 2.4), (2.4, 1000000000)] cart_velocity_ranges = [(-1000000000, -0.5), (-0.5, 0.5), (0.5, 1000000000)] angles_ranges = [(-6.28, -1), (-1, 0), (0, 1), (1, 6.28)] angular_velocity_ranges = [(-1000000000, -3.14), (-3.14, 3.14), (3.14, 1000000000)] return (cart_position_ranges, cart_velocity_ranges, angles_ranges, angular_velocity_ranges)
def main(): vrep.simxFinish(-1) # just in case, close all opened connections client_id = vrep.simxStart('127.0.0.1', 19997, True, True, 5000, 5) # Connect to V-REP if client_id < 0: print('Failed connecting to remote API server') return -1 print('Connected to remote API server') # Define RL elements environment = StandingUpEnvironment(client_id) task = StandingUpTask(environment) controller = ActionValueTable(task.get_state_space_size(), task.get_action_space_size()) controller.initialize(1.) file = open('standing-up-q.pkl', 'rb') controller._params = pickle.load(file) file.close() # learner = Q() agent = LearningAgent(controller) experiment = EpisodicExperiment(task, agent) i = 0 while True: i += 1 print('Iteration n° ' + str(i)) experiment.doEpisodes(1) vrep.simxFinish(client_id)
def learn(self, number_of_iterations): learner = Q(0.2, 0.8) task = CartMovingTask(self.environment) self.controller = ActionValueTable( reduce(lambda x, y: x * y, map(lambda x: len(x), self.ranges)), self.force_granularity ) self.controller.initialize(1.0) agent = LearningAgent(self.controller, learner) experiment = Experiment(task, agent) for i in range(number_of_iterations): experiment.doInteractions(1) agent.learn() agent.reset() with open("test.pcl", "w+") as f: pickle.dump(self.controller, f)
[1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1], [1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1], [1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1], [1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1], [1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1], [1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1], [1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1], [1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]) environment = Maze(envmatrix2, (6, 8)) task = MDPMazeTask(environment) table = ActionValueTable(324, 4) table.initialize(3.) agent = LearningAgent(table, Q(0.5, 0.80)) #agent._setLearning(0.01) experiment = Experiment(task, agent) plt.ion() plt.hot() #plt.set_cmap() Fix bakgrundsfarg x = raw_input('Want to start?') yes = "yes" no = "no" if x == yes:
from pybrain.rl.learners import Q, ActionValueTable from pybrain.rl.experiments import Experiment envmatrix = array([[1,1,1,1,1,1,1,1,1], [1,0,0,1,0,0,0,0,1], [1,0,0,1,0,0,1,0,1], [1,0,0,1,0,0,1,0,1], [1,0,0,1,0,1,1,0,1], [1,0,0,0,0,0,1,0,1], [1,1,1,1,1,1,1,0,1], [1,0,0,0,0,0,0,0,1], [1,1,1,1,1,1,1,1,1]]) environment = Maze(envmatrix, (7,7)) task = MDPMazeTask(environment) table = ActionValueTable(81,4) table.initialize(1.) agent = LearningAgent(table,Q()) experiment = Experiment(task,agent) plt.ion() plt.gray() for i in range(1000): experiment.doInteractions(100); agent.learn(); agent.reset(); plt.pcolor(table.params.reshape(81,4).max(axis=1).reshape(9,9)) plt.gcf().canvas.draw()