class Controller(): def __init__(self): self.robot_foot_qlearner = None self.goal = [0.33, 0.33, 0.33] self.last_pressure_measure = None self.initial_control_state = (0, 0, 0, 0) def distance(self, v1, v2): return math.sqrt((v1[0] - v2[0])**2+(v1[1] - v2[1])**2+(v1[2] - v2[2])**2) def to_state(self, value): fp1, fp2, fp3 = value fpq1 = min(max(0, int(fp1 * 20)), 19) fpq2 = min(max(0, int(fp2 * 20)), 19) fpq3 = min(max(0, int(fp3 * 20)), 19) return [fpq1, fpq2, fpq3] def step_decision(self, fp1, fp2, fp3): if self.robot_foot_qlearner is None: state = self.to_state([fp1, fp2, fp3]) self.robot_foot_qlearner = RobotFootQLearner(state) self.last_pressure_measure = fp1, fp2, fp3 return (0, 0, 0, 0) current_distance = self.distance((fp1, fp2, fp3), self.goal) former_distance = self.distance(self.last_pressure_measure, self.goal) if current_distance < former_distance: self.robot_foot_qlearner.perform_q_learning(self.to_state(self.last_pressure_measure), str(self.initial_control_state), 100, self.to_state((fp1, fp2, fp3))) elif fp1 < 0 or fp2 < 0 or fp3 < 0: self.robot_foot_qlearner.perform_q_learning(self.to_state(self.last_pressure_measure), str(self.initial_control_state), -10, self.to_state((fp1, fp2, fp3))) else: self.robot_foot_qlearner.perform_q_learning(self.to_state(self.last_pressure_measure), str(self.initial_control_state), 0, self.to_state((fp1, fp2, fp3))) self.last_pressure_measure = fp1, fp2, fp3 next_action = self.robot_foot_qlearner.decide_next_action(True) try: self.initial_control_state = eval(str(next_action)) except Exception as e: print next_action print e state = self.to_state([fp1, fp2, fp3]) self.robot_foot_qlearner.update_state(state) return self.initial_control_state
def step_decision(self, fp1, fp2, fp3): if self.robot_foot_qlearner is None: state = self.to_state([fp1, fp2, fp3]) self.robot_foot_qlearner = RobotFootQLearner(state) self.last_pressure_measure = fp1, fp2, fp3 return (0, 0, 0, 0) current_distance = self.distance((fp1, fp2, fp3), self.goal) former_distance = self.distance(self.last_pressure_measure, self.goal) if current_distance < former_distance: self.robot_foot_qlearner.perform_q_learning(self.to_state(self.last_pressure_measure), str(self.initial_control_state), 100, self.to_state((fp1, fp2, fp3))) elif fp1 < 0 or fp2 < 0 or fp3 < 0: self.robot_foot_qlearner.perform_q_learning(self.to_state(self.last_pressure_measure), str(self.initial_control_state), -10, self.to_state((fp1, fp2, fp3))) else: self.robot_foot_qlearner.perform_q_learning(self.to_state(self.last_pressure_measure), str(self.initial_control_state), 0, self.to_state((fp1, fp2, fp3))) self.last_pressure_measure = fp1, fp2, fp3 next_action = self.robot_foot_qlearner.decide_next_action(True) try: self.initial_control_state = eval(str(next_action)) except Exception as e: print next_action print e state = self.to_state([fp1, fp2, fp3]) self.robot_foot_qlearner.update_state(state) return self.initial_control_state