def main(): client_id = Utils.connectToVREP() # Define RL elements environment = StandingUpEnvironment(client_id) task = StandingUpTask(environment) temperature = 2 ttable_path = os.path.join(BASE_DIR, 't-table.pkl') qtable_path = os.path.join(BASE_DIR, 'q-table-{}.pkl'.format(Q_TABLE_VERSION)) dtmc_generator = DTMCGenerator(ttable_path, qtable_path, temperature) dtmc_generator.load_policy(POLICY_PREFIX + 'sm10-policy.pkl', BASE_DIR) with open(ttable_path, 'rb') as file: ttable = pickle.load(file) trans_prob_dict = {} for key, value in ttable.items(): new_key = (key[0], key[1]) v = trans_prob_dict.get(new_key, []) v.append((key[2], value)) trans_prob_dict[new_key] = v while True: n1 = dt.datetime.now() state = task.getObservation()[0] action = numpy.argmax(dtmc_generator.Q[state]) print('trans probs {}'.format(trans_prob_dict.get((state, action)))) action = select_action(dtmc_generator.policy, state, 'argmax') print('State {} Action {} Prob {}'.format( state, action, dtmc_generator.policy[state][action])) print('pol trans probs {}'.format(trans_prob_dict.get( (state, action)))) if action == 729: task.reset() else: task.performAction(action) successors = dtmc_generator.get_successor_states(state, action) new_state = task.getObservation()[0] found = False for state, prob in successors: if new_state == state: print('{} state found, prob {}'.format(state, prob)) found = True if not found: print('{} state not found! successors: {}'.format( new_state, successors)) n2 = dt.datetime.now() print('elapsed time: {} s'.format((n2 - n1).microseconds / 1e6)) Utils.endVREP()
def run(self): try: proc = Utils.exec_vrep(self.port) time.sleep(60) client_id = Utils.connectToVREP(self.port) environment = StandingUpEnvironment(client_id) task = StandingUpTask(environment) for episode in range(self.n_episodes): old_state = current_state = task.getObservation()[0] action = self.select_action(self.policy, current_state) print('State {} Action {} Prob {}'.format( current_state, action, self.policy[current_state][action])) task.performAction(action) while action != 729: old_state = current_state # Test to verify Monitor capability,, #Pk:added more actions #if action == 579 or action == 337: # current_state = task.state_mapper.self_collided_state #else: current_state = task.getObservation()[0] self.t_table.incrementValue( (old_state, action, current_state)) action = self.select_action(self.policy, current_state) print('State {} Action {} Prob {}'.format( current_state, action, self.policy[current_state][action])) task.performAction(action) task.reset() if current_state == task.state_mapper.goal_state: self.counters['goal'] += 1 elif current_state == task.state_mapper.fallen_state: self.counters['fallen'] += 1 elif current_state == task.state_mapper.too_far_state: self.counters['far'] += 1 elif current_state == task.state_mapper.self_collided_state: self.counters['collided'] += 1 else: self.counters['unknown'] += 1 finally: self.barrier.wait() Utils.endVREP(client_id) proc.kill()
def run(self): proc = Utils.exec_vrep(self.port) time.sleep(10) # connect to V-REP server try: client_id = Utils.connectToVREP(self.port) env = StandingUpEnvironment(client_id) task = StandingUpTask(env) counter = 0 while counter < MAX_ITERATIONS: state = task.getObservation()[0] action = select_action(self.policy, state, 'prob') # print('State {} Action {} Prob {}'.format(state, action, self.policy[state][action])) if action == 729: if state == task.state_mapper.goal_state: self.logger.info('Goal!') elif state == task.state_mapper.fallen_state: self.logger.info('Fallen!') elif state == task.state_mapper.too_far_state: self.logger.info('Far!') elif state == task.state_mapper.self_collided_state: self.logger.info('Collided!') else: self.logger.info(state) counter += 1 print('Iteration {}'.format(counter)) task.reset() else: task.performAction(action) finally: proc.kill()
def main(): vrep.simxFinish(-1) # just in case, close all opened connections client_id = vrep.simxStart('127.0.0.1', 19997, True, True, 5000, 5) # Connect to V-REP if client_id < 0: print('Failed connecting to remote API server') return -1 print('Connected to remote API server') # Define RL elements environment = StandingUpEnvironment(client_id) task = StandingUpTask(environment) controller = ActionValueTable(task.get_state_space_size(), task.get_action_space_size()) controller.initialize(1.) file = open('standing-up-q.pkl', 'rb') controller._params = pickle.load(file) file.close() # learner = Q() agent = LearningAgent(controller) experiment = EpisodicExperiment(task, agent) i = 0 while True: i += 1 print('Iteration n° ' + str(i)) experiment.doEpisodes(1) vrep.simxFinish(client_id)
def main(): client_id = Utils.connectToVREP() environment = StandingUpEnvironment(client_id) task = StandingUpTask(environment) #print('Initial State: ') #print(environment.bioloid.read_state()) trajectory_data = [] for i in range(50): print('Iteration {}'.format(i)) trajectory = [] for action in Utils.standingUpActions: observation = task.getObservation()[0] state_vector = task.env.bioloid.read_state() action = Utils.vecToInt(action) task.performAction(action) reward = task.getReward() trajectory.append({ 'state': observation, 'state_vector': state_vector, 'action': action, 'reward': reward, 'full_state': task.env.bioloid.read_full_state() }) trajectory_data.append(trajectory) observation = task.getObservation()[0] state_vector = task.env.bioloid.read_state() trajectory.append({ 'state': observation, 'state_vector': state_vector, 'action': -1, 'reward': 0, 'full_state': task.env.bioloid.read_full_state() }) task.reset() with open('../data/trajectory.pkl', 'wb') as file: pickle.dump(trajectory_data, file) Utils.endVREP()
def main(): client_id = Utils.connectToVREP() environment = StandingUpEnvironment(client_id) task = StandingUpTask(environment) while True: action_str = input("Insert next action: ") action = [int(x) for x in action_str.split(' ')] observation = task.getObservation() print(task.current_sensors) a = Utils.vecToInt(action) task.performAction(a) task.getReward() print('self-collided: ' + str(environment.bioloid.is_fallen())) print('is-fallen: ' + str(environment.bioloid.is_fallen())) environment.reset()
def main(): client_id = Utils.connectToVREP() # Define RL elements environment = StandingUpEnvironment(client_id) task = StandingUpTask(environment) controller = MyActionValueTable() learner = Q(0.5, 0.9) learner.explorer = EpsilonGreedyExplorer(0.15, 1) # EpsilonGreedyBoltzmannExplorer() agent = LearningAgent(controller, learner) experiment = EpisodicExperiment(task, agent) controller.initialize(agent) i = 0 try: while True: i += 1 print('Episode ' + str(i)) experiment.doEpisodes() agent.learn() agent.reset() print('mean: '+str(numpy.mean(controller.params))) print('max: '+str(numpy.max(controller.params))) print('min: '+str(numpy.min(controller.params))) if i % 500 == 0: # Save q-table every 500 episodes print('Save q-table') controller.save() task.t_table.save() except (KeyboardInterrupt, SystemExit): with open('../data/standing-up-q.pkl', 'wb') as handle: pickle.dump(controller.params, handle) task.t_table.save() controller.save() vrep.simxFinish(client_id)
def main(): client_id = Utils.connectToVREP() environment = StandingUpEnvironment(client_id) task = StandingUpTask(environment) state_vector_length = len(environment.bioloid.read_state()) n = int(input('Number of iterations: ')) workbook = xlsxwriter.Workbook('data/reports/trajectory-trials.xls') worksheets = [] for i in range(len(Utils.standingUpActions)): worksheets.append(workbook.add_worksheet('t' + str(i + 1))) for i in range(n): print('Iteration ' + str(i + 1)) print('Initial State: ') # task.getObservation() print(task.state_mapper.sd.discretize(environment.getSensors())) print(task.getObservation()[0]) for j, action in enumerate(Utils.standingUpActions): environment.performAction(action) state_vector = environment.getSensors() discretized_state = task.state_mapper.sd.discretize(state_vector) for k, s in enumerate(discretized_state): worksheets[j].write(i, k, s) state_n = task.update_current_state() if state_n != task.state_mapper.goal_state: state_distance = euclidean( task.state_mapper.state_space[state_n], discretized_state) else: state_distance = 0 goal_distance = task.state_mapper.get_goal_distance( discretized_state) print(discretized_state) print('---------------------') worksheets[j].write(i, state_vector_length + 1, state_n) worksheets[j].write(i, state_vector_length + 2, state_distance) worksheets[j].write(i, state_vector_length + 3, goal_distance) environment.reset() res_worksheet = workbook.add_worksheet('Results') row = 0 for i in range(len(Utils.standingUpActions)): sheet_name = 't' + str(i + 1) res_worksheet.write(row, 0, sheet_name) res_worksheet.write(row, 1, 'mean') res_worksheet.write(row + 1, 1, 'var') for j in range(state_vector_length): col_name = chr(ord('A') + j) data_range = sheet_name + '.' + col_name + '1:' + col_name + str(n) # TODO: check why range is made lowercase :/ res_worksheet.write_formula(row, 2 + j, '=AVERAGE(' + data_range + ')') res_worksheet.write_formula(row + 1, 2 + j, '=VAR.P(' + data_range + ')') row += 2 workbook.close()