def rewardFunction(self, s_n, a): first, second = self.splitState(s_n.ravel().tolist()) fPrev, sPrev = self.splitState(self.prev['S'].ravel().tolist()) prevPos, pos, blockPos, prevBlock, ori, prevOri, blockOri = self.unpack( fPrev, first, double=True) first = Info(prevPos, pos, blockPos, prevBlock, ori, prevOri, blockOri) prevPos, pos, blockPos, prevBlock, ori, prevOri, blockOri = self.unpack( sPrev, second, double=True) second = Info(prevPos, pos, blockPos, prevBlock, ori, prevOri, blockOri) if self.phase == 1: if first.pos[2] < .35 or second.pos[2] < .35: return (-3, 1) if blockPos[-1] < .3: self.phase += 1 return (5, 0) box_r = (blockPos[0] - prevBlock[0]) - .005 * (abs(blockOri)) vel_r = dist(first.prevPos, prevBlock) - dist(first.pos, blockPos) vel_r += dist(second.prevPos, prevBlock) - dist( second.pos, blockPos) prevVec = unitVector(vector(first.prevOri)) vec = unitVector(vector(first.ori)) goal = unitVector(blockPos[:2] - first.pos[:2]) prevDot = dot(prevVec, goal) currDot = dot(vec, goal) ori_r = currDot - prevDot prevVec = unitVector(vector(second.prevOri)) vec = unitVector(vector(second.ori)) goal = unitVector(blockPos[:2] - second.pos[:2]) prevDot = dot(prevVec, goal) currDot = dot(vec, goal) ori_r += currDot - prevDot r = (25 * box_r + vel_r + 2 * ori_r) - .01 if self.phase == 2: if first.pos[2] < .35 or second.pos[2] < .35: return (-6, 1) if first.pos[0] > .45 and second.pos[0] > .45: print('Success!') return (5, 1) vel_r = first.pos[0] - first.prevPos[0] vel_r += second.pos[0] - second.prevPos[0] y_r = .1 * (abs(first.pos[1] - blockPos[1]) + abs(second.pos[1] - blockPos[1])) #ori_r = .05* (abs(first.ori) + abs(second.ori)) r = vel_r - y_r - .01 return (r, 0)
def succeeded(self, s): if self.simulation_name == 'elevated_scene': return dist( s[:3], s[5:8] ) < .5 and self.box_z_global < .2 and self.bot_z_global > .3 if self.simulation_name == 'flat_scene': return dist(s[:3], s[5:8]) < .4 if self.simulation_name == 'slope_scene': return dist(s[:3], s[5:8]) < .4
def reward_function(self, s): s = s.ravel() succeeded = self.succeeded(s) _, done = self.decide_to_restart(s) if succeeded: if self.simulation_name == 'elevated_scene': return 5 - dist(s[:2], s[5:7]) if self.simulation_name == 'flat_scene': return 5 - abs(self.box_ori_global) if self.simulation_name == 'slope_scene': return 5 - abs(self.box_ori_global) if done and not succeeded: if self.simulation_name == 'elevated_scene' and (self.box_z_global < .2 and self.bot_z_global > .2): return 0 else: return -5 else: if type(self.prev["S"]) != np.ndarray: return 0 previous_local_state = self.prev['S'].ravel() dist_state = 2 if self.simulation_name == 'elevated_scene' else 3 min_dist = .5 if self.simulation_name == 'elevated_scene' else 1 previous_distance = dist(previous_local_state[0: dist_state], previous_local_state[5: 5 + dist_state]) curr_distance = dist(s[:dist_state], s[5: 5 + dist_state]) d_reward = previous_distance - curr_distance prev_ori = self.get_goal_angle(previous_local_state) curr_ori = self.get_goal_angle(s, display=True) ori_reward = prev_ori - curr_ori if abs(s[3]) < .01 and curr_distance > min_dist else 0 # this is to keep certain calculations correct """prev_box_from_hole = previous_local_state[:2] - previous_local_state[5:7] hole = s[5:7] aligned = hole - dot(hole, unitVector(prev_box_from_hole)) * unitVector(prev_box_from_hole) prev_align = dist(np.zeros(2), aligned) box_from_hole = s[:2] - s[5:7] hole = s[5:7] aligned = hole - dot(hole, unitVector(box_from_hole)) * unitVector(box_from_hole) curr_align = dist(np.zeros(2), aligned) align_reward = prev_align - curr_align""" """prev_distance_to_box = dist(np.zeros(3), previous_local_state[:3]) distance_to_box = dist(np.zeros(3), s[:3]) box_reward = prev_distance_to_box - distance_to_box""" """if self.prev_action_was_valid: return -.05 else: return -.3""" if self.prev_action_was_valid: return 3 * np.round(.5 * np.round(d_reward, 2) + .5 * np.round(ori_reward, 2), 3) - .1 else: return -.3
def succeeded(self, s): assert type(self.simulation_name) == str if self.has_box_in_simulation: if self.simulation_name == 'elevated_scene': return dist(s[:2], s[5:7]) < .3 and self.box_z_global < .2 and self.bot_z_global > .3 if self.simulation_name == 'flat_scene': return dist(s[:3], s[5:8]) < .3 and abs(self.box_ori_global) < .3 # last part is added if self.simulation_name == 'slope_scene': return dist(s[:3], s[5:8]) < .3 and abs(self.box_ori_global) < .3 # last part is added else: return dist(s[5: 8], np.zeros(3)) < .2
def rewardFunction(self, s, a, s_n): currDist = dist(s_n, np.zeros(s_n.shape)) if currDist < .5: return (1, 1) reg = .1 * np.sum(3 - np.abs(a)) if self.a != "argmax" else 0 prev = self.prev['S'] prevOri = unitVector(prev) ori = unitVector(s_n) r_ori = abs(ori[0]) - abs(prevOri[0]) deltDist = 10 * (dist(prev, np.zeros(prev.shape)) - dist(s_n, np.zeros(s_n.shape))) return ((deltDist + r_ori - reg) / self.success, 0)
def decide_to_restart(self, s): # if far away from box, far away from goal, box dropped, or bot dropped if self.simulation_name == 'elevated_scene': return dist(s[:3], np.zeros(3)) > 3.5 or dist( s[5:8], np.zeros(3) ) > 3.5 or self.box_z_global < .2 or self.bot_z_global < .3 or self.currReward <= -20 if self.simulation_name == 'flat_scene': return dist(s[:3], np.zeros(3)) > 3.5 or dist( s[5:8], np.zeros(3)) > 4 or abs( self.box_y_global) > 1 or self.currReward <= -20 if self.simulation_name == 'slope_scene': return abs(self.box_ori_global) > .4 or dist( s[:3], np.zeros(3)) > 2 or self.currReward <= -20
def checkPhase(self, s): if self.primitive == 'PUSH_IN_HOLE' or self.primitive == 'CROSS': if self.rob_height < .35: return (-3, 1) if self.primitive == 'PUSH_IN_HOLE': if (self.box_height < .2): d = dist(s[:2], s[4:6]) print('DISTANCE: ', d) if d < .2 and (self.hole_height > self.box_relative_height): return (10 - d * 5, 1) else: return (-3, 1) if self.primitive == 'CROSS': goal = s[4:6] d = dist(goal, np.zeros(2)) if d < .2: print('distance: ', d) return (5, 1) if self.primitive == 'REORIENT': box_to_goal = s[4:6] - s[:2] goal_vector = unitVector(box_to_goal) goal_direction = math.atan(goal_vector[1] / goal_vector[0]) curr_direction = s[3] d = dist(s[:2], s[4:6]) if abs(self.box_y) > .8: # .2 return (-3, 1) if abs(goal_direction - curr_direction) < .15 or d < .2: return (5 - abs(self.box_y), 1) if self.primitive == 'PUSH_TOWARDS': d = dist(s[:2], s[4:6]) box_to_goal = s[4:6] - s[:2] goal_vector = unitVector(box_to_goal) goal_direction = math.atan(goal_vector[1] / goal_vector[0]) curr_direction = s[3] if abs(self.box_ori) > .25 or abs(self.box_y) > .35: return (-2, 1) if d < .2: return (5, 1) if self.primitive == 'SLOPE_PUSH': d = dist(s[:2], s[4:6]) box_to_goal = s[4:6] - s[:2] goal_vector = unitVector(box_to_goal) goal_direction = math.atan(goal_vector[1] / goal_vector[0]) curr_direction = s[3] if abs(self.box_ori) > .6 or abs(self.box_y) > .5: return (-2, 1) if d < .2: return (5, 1) return (0, 0)
def getNextAction(self, state, angle, i): '''Order of priority: Box orientation (theta), Contact Point, Agent orientation''' theta = angle[3] tilt = angle[4] to_contact = dist( state[:2], np.array( [self.x_contact[i][self.phase], self.y_contact[self.phase][i]])) bench = .25 if self.phase == 2 else .1 if (i != 0 and i != self.num_agents - 1 ) or self.phase == 0 or self.phase == 1: if theta > self.angle_threshold: #you're ahead action = 3 elif tilt or to_contact > bench: # far from your contact point action = 1 elif abs(angle[2]) > .2: # orientation incorrect action = 2 else: action = 0 else: if tilt or to_contact > bench: # far from your contact point action = 1 elif theta > self.angle_threshold: #you're ahead action = 3 elif abs(angle[2]) > .2: # orientation incorrect action = 2 else: action = 0 return action
def rewardFunction(self, s_n, a): tank, bridge = self.splitState(s_n.ravel()) prevTank, prevBridge = self.splitState(self.prev['S'].ravel()) if bridge[2] < .1 or tank[2] < .1: # THIS IS A TEST return (0, 1) if self.phase == 1: if bridge[0] > -.5: # TODO: Check this benchmark for bridge print('## Phase 1 Complete ##') self.phase += 1 return (5, 0) vel_r = ((tank[0] - prevTank[0]) + (bridge[0] - prevBridge[0])) * 2 ori_r = -1 * (abs(tank[5]) + abs(bridge[5])) * .08 r = vel_r + ori_r elif self.phase == 2: if tank[0] > -.4: # TODO: Check this benchmark for cross print(' ## Phase 2 Complete ##') self.phase += 1 return (5, 0) vel_r = (tank[0] - prevTank[0]) move_r = -1 * dist(prevBridge[:3], bridge[:3]) r = vel_r + move_r elif self.phase == 3: if bridge[0] > -.3: # TODO: Check this benchmark for pull up print(' ## Success ##') return (10, 1) vel_r = (bridge[0] - prevBridge[0]) r = vel_r return (r, 0)
def checkConditions(self, full_state, a): # given self.prev['A'] and state (unraveled already), check that we've sufficiently executed primitive if a == None: return [True for i in range(self.num_agents)] fill = [] states = self.splitState(full_state) prev_states = self.splitState(self.prev['S']) angles = self.getAngles(full_state) box = states["box"] prevBox = prev_states['box'] for i in range(self.num_agents): k = "robot" + str(i) angle = angles[k] theta = angle[3] act = self.actionMap[a[i]] curr = states[k] if act == "STRAIGHT_BOX": fill.append(self.timeOut(i)) if act == "HOME_CONTACT": fill.append( self.timeOut(i) or self.fallingBehind(theta) or dist(curr[:2], np.array([-.58, self.y_contact[self.phase][i]])) < .3) if act == "CHANGE_ANGLE": fill.append( self.timeOut(i) or self.fallingBehind(theta) or abs(box[2] - curr[2]) < .05) if act == "BACK": fill.append(self.timeOut(i)) return fill
def checkConditions(self, full_state, a, complete=True): # given self.prev['A'] and state (unraveled already), check that we've sufficiently executed primitive if a == None: return True a = self.actionMap[a] s = np.array(full_state).ravel() goal_angles, align_y_angles, cross_angles, left_angle, right_angle = self.getAngles( s) theta, phi = goal_angles alpha, beta, to_align = align_y_angles goal1, goal2 = cross_angles if a == "ANGLE_TOWARDS": return abs(theta - np.pi / 2) < 5e-2 or self.counter == 0 if a == "ALIGN_Y": return to_align < .1 or self.counter == 0 if a == "APPROACH": return dist(s[:2], np.zeros(2)) < .7 or self.counter == 0 if a == 'PUSH_IN': if self.primitive == 'PUSH_IN_HOLE': return self.box_height < .35 or self.counter == 0 else: return self.counter == 0 return self.counter == 0
def get_neighbors(self, node, radius, inclusions, exclusions): neighbors = [] for node_other in self.nodes: if (node.coords, node_other.coords) in exclusions: continue elif dist(node, node_other) < radius and node_other != node: neighbors.append(node_other) elif (node.coords, node_other.coords) in inclusions: neighbors.append(node_other) return neighbors
def checkPhase(self, s): if self.primitive == 'PUSH_IN_HOLE' or self.primitive == 'CROSS': if self.rob_height < .35: return (-3, 1) if self.primitive == 'PUSH_IN_HOLE': if (self.box_height < .2): d = dist(s[:2], s[4:6]) print('DISTANCE: ', d) if d < .2: return ( 10 - d * 5, 1 ) # NOTE: we are training just the first phase (get box into hole) else: return (-3, 1) if self.primitive == 'CROSS': goal = s[4:6] d = dist(goal, np.zeros(2)) if d < .2: print('distance: ', d) return (5, 1) if self.primitive == 'REORIENT': box_to_goal = s[4:6] - s[:2] goal_vector = unitVector(box_to_goal) goal_direction = math.atan(goal_vector[1] / goal_vector[0]) curr_direction = s[3] if abs(self.box_y) > .1: return (-3, 1) if abs(goal_direction - curr_direction) < .15: return (5 - 20 * abs(self.box_y), 1) if self.primitive == 'PUSH_TOWARDS': d = dist(s[:2], s[4:6]) box_to_goal = s[4:6] - s[:2] goal_vector = unitVector(box_to_goal) goal_direction = math.atan(goal_vector[1] / goal_vector[0]) curr_direction = s[3] if abs(self.box_ori) > .25 or abs(self.box_y) > .35: return (-2, 1) if d < .2: return (5, 1) return (0, 0)
def checkConditions(self, full_state, a): # given self.prev['A'] and state (unraveled already), check that we've sufficiently executed primitive fill = [] states = self.splitState(full_state) prev_states = self.splitState(self.prev['S']) angles = self.getAngles(full_state) box = states["box"] for i in range(self.num_agents): if a[0] == None: fill.append(True) continue k = "robot" + str(i) angle = angles[k] theta = angle[3] act = self.actionMap[a[i]] curr = states[k] if act == "STRAIGHT_BOX": fill.append(self.timeOut(i)) if act == "HOME_CONTACT": if self.explicit_control: fill.append( self.timeOut(i) or self.fallingBehind(theta) or dist( curr[:2], np.array([self.x_contact, self.contact[i]])) < .35) else: fill.append( self.timeOut(i) or dist( curr[:2], np.array([self.x_contact, self.contact[i]])) < .35) if act == "CHANGE_ANGLE": if self.explicit_control: fill.append( self.timeOut(i) or self.fallingBehind(theta) or abs(box[2] - curr[2]) < .05) else: fill.append(self.timeOut(i) or abs(box[0] - curr[2]) < .05) if act == "BACK": fill.append(self.timeOut(i)) return fill
def isValidAction(self, s, a, angle, i): act = self.actionMap[a] ori = angle[2] if act == "STRAIGHT_BOX": return True if act == "HOME_CONTACT": return not (dist(s[:2], np.array([self.x_contact, self.contact[i] ])) < .35) if act == "CHANGE_ANGLE": return not abs(ori) < .05 if act == "BACK": return True return
def checkPhase(self, s): s = s.ravel() theta = s[2] phi = s[3] z = s[4] to_contact = dist(s[:2], np.zeros(2)) if theta > .45 or to_contact > 1.2: print('#### Failed out') return (-3, 1) if self.box_height >= self.goal_height and self.dist_box_from_goal < 1: print('#### SUCCESS') return (10, 1) return (0, 0)
def receiveState(self, msg): if self.restart_timer: self.start_time = time.time() self.restart_timer = False floats = vrep.simxUnpackFloats(msg.data) restart = 0 r = None self.dist_box_from_goal = dist(np.array(floats[0:2]), np.array(floats[6:8])) features = self.feature_joint_2_feature(floats) floats = self.feature_joint_2_joint(np.array(floats).ravel()) s = (np.array(floats)).reshape(1, -1) angles = self.getAngles(s) states = self.splitState(s) self.box_height = states['box'][-1] a = (self.sendAction(s)) if self.mode == 'GET_STATE_DATA': if self.changeAction[0]: self.curr_rollout1.append(features[0]) if self.changeAction[1]: self.curr_rollout2.append(features[1]) rest = 0 for i in range(self.num_agents): loc = 'robot' + str(i) angle = angles[loc] curr_state = self.getNetInput(states[loc], angle, i) if type(self.prev['S'][i]) == np.ndarray and type( self.prev['A'][i]) == int: prevAngle = self.prevAngles[loc] prev_state = self.getNetInput(self.prev['S'][i], prevAngle, i) r, restart = self.rewardFunction(curr_state, prev_state, i) rest = restart if restart == 1 else rest if self.changeAction[i] or rest: r = r if self.isValidAction(self.prev['S'][i], self.prev['A'][i], angle, i) or rest else -1 self.agent.store(prev_state, self.prev["A"][i], np.array([r]).reshape(1, -1), curr_state, a[i], restart) self.currReward += r if self.changeAction[i]: self.prev["S"][i] = states[loc] self.prev["A"][i] = int(a[i]) self.prevAngles[loc] = angles[loc] if any(self.changeAction) and self.trainMode: loss = self.agent.train() if restart and r > 0 and self.mode == 'GET_STATE_DATA': self.curr_rollout1.append(features[0]) self.curr_rollout2.append(features[1]) self.restartProtocol(rest, succeeded=r > 0 if r else False) return
def getAux(self, pos, prevPos, blockPos, prevBlock, ori, prevOri, phase): if phase == 1: dist_r = (dist(prevPos, blockPos) - dist(pos, blockPos)) block_r = blockPos[0] - prevBlock[0] prevVec = unitVector(vector(prevOri)) vec = unitVector(vector(ori)) goal = unitVector(blockPos[:2] - pos[:2]) prevDot = dot(prevVec, goal) currDot = dot(vec, goal) ori_r = currDot - prevDot return ((block_r + dist_r + 2 * ori_r) * self.w_phase1 - .01, 0) if phase == 2: goal = np.array([.80, blockPos[1], pos[2]]) delta = dist(pos, goal) prevDelta = dist(prevPos, goal) dist_r = prevDelta - delta y_r = -abs(blockPos[1] - pos[1]) return ((dist_r + .15 * y_r - .05 * abs(ori)) * self.w_phase3 - .01, 0)
def sendActionForPlan(self, states, phase): s = states['feature'] if dist(s[:3], s[5:8]) < .3: msg = Int8() msg.data = 1 self.changePoint.publish(msg) action_index = self.box_policy.get_action( self.concatenate_identifier(s)) self.controller.goal = s.ravel()[:2] action = self.controller.getPrimitive( self.controller.feature_2_task_state(s.ravel()), self.action_map[action_index]) msg.x, msg.y = (action[0], action[1]) self.pubs[self.name].publish(msg) return
def reward_function(self, s): s = s.ravel() succeeded = self.succeeded(s) done = self.decide_to_restart(s) if succeeded: if self.simulation_name == 'elevated_scene': return 10 - dist(s[:3], s[5:8]) * 5 if self.simulation_name == 'flat_scene': return 10 - abs(self.box_ori_global) * 3 if self.simulation_name == 'slope_scene': return 10 - abs(self.box_ori_global) * 3 if done and not succeeded: return -3 else: if self.prev_action_was_valid: return -.25 else: return -.4
def decide_to_restart(self, s): assert type(self.simulation_name) == str # if far away from box, far away from goal, box dropped, or bot dropped # Returns tuple (restart, done) # TODO: Get rid of this max_steps = 100 if self.simulation_name == 'slope_scene' else 50 if self.num_steps > max_steps: return True, False failed = False if self.simulation_name == 'elevated_scene': failed = dist(s[:3], np.zeros(3)) > 4 or dist(s[5:8], np.zeros(3)) > 5 or self.box_z_global < .2 or self.bot_z_global < .3 if self.simulation_name == 'flat_scene': failed = dist(s[:3], np.zeros(3)) > 5 or dist(s[5:8], np.zeros(3)) > 5 or abs(self.box_y_global) > 2 if self.simulation_name == 'slope_scene': failed = abs(self.box_ori_global) > 1 or dist(s[:3], np.zeros(3)) > 5 or dist(s[:3], s[5:8]) > 10 return failed, failed
def checkConditions(self, full_state, a, complete=True): # given self.prev['A'] and state (unraveled already), check that we've sufficiently executed primitive if a == None: return True s = np.array(full_state).ravel() goal_angles, align_y_angles, cross_angles, left_angle, right_angle = self.getAngles( s) theta, phi = goal_angles alpha, beta, to_align = align_y_angles goal1, goal2 = cross_angles if a == "ANGLE_TOWARDS": return abs(theta - np.pi / 2) < 5e-2 if a == "ANGLE_TOWARDS_GOAL": return abs(goal1 - np.pi / 2) < 5e-2 if a == "ALIGN_Y": return to_align < .1 if a == "APPROACH": return dist(s[:2], np.zeros(2)) < .7 if a == 'PUSH_IN': return False return False
def getNextAction(self, state, angle, i): '''Order of priority: Box orientation (theta), Contact Point, Agent orientation''' theta = angle[3] tilt = angle[4] ori = angle[2] if self.explicit_control: #outdated to_contact = dist(state[:2], np.array([self.x_contact, self.contact[i]])) if theta > self.angle_threshold: #you're ahead or box is slipping away action = 3 elif to_contact > .5: # far from your contact point action = 1 elif abs(ori) > .5: # orientation incorrect action = 2 elif tilt: action = 3 else: action = 0 # for now just push return action else: # In the state information: relative position to contact point, theta, orientation angle. Total: 4 return self.agent.get_action(self.getNetInput(state, angle, i))
def getAngles(self, s): states = self.splitState(s) box = states['box'] box_ori = box[2] angles = {} if self.phase == 1 or self.phase == 3: # these are transitioning phases reached = True for i in range(self.num_agents): to_contact = dist( states["robot" + str(i)][:2], np.array([ self.x_contact[i][self.phase], self.y_contact[self.phase][i] ])) if to_contact > .1: reached = False if reached: self.phase = (self.phase + 1) % 4 print('PHASE', self.phase) for i in range(self.num_agents): curr = states["robot" + str(i)] ori = curr[2] pos = curr[:2] # Calculating angle for STRAIGHT_BOX and CHANGE_ANGLE phi = ori - box_ori direction = -1 if self.y_contact[self.phase][i] > 0 else 1 # Special case: if you're smack in the middle, you should go back if either side is uneven direction = np.sign(box_ori) if self.y_contact[ self.phase][i] == 0 else direction goal_relative_to_box = np.array( [self.x_contact[i][self.phase], self.y_contact[self.phase][i]]) phi_trans = phi - np.pi / 2 phi_trans = phi_trans + 2 * np.pi if phi_trans < -np.pi else phi_trans goal = goal_relative_to_box - pos # this is the vector from current position to contact point all relative to box front_v = vector(phi_trans) right_v = vector(phi_trans - np.pi / 2) # Calculating angles for HOME_CONTACT relative_y = -dot(unitVector(goal), right_v) # negative for convention relative_x = dot(unitVector(goal), front_v) buff = ( -np.pi if relative_y < 0 else np.pi ) if relative_x < 0 else 0 # since we want to map -pi to pi alpha = np.arctan(relative_y / relative_x) + buff beta = -np.pi - alpha if alpha < 0 else np.pi - alpha contact_angles = (alpha, beta) # Boolean to determine need for translation adjustment ie. if box is too far in, tilt it towards you ratio = abs( goal[1] / goal[0] ) # ratio horizontal and vertical distance from contact point tilt = True if ratio > .8 and np.sign( goal[1] * self.y_contact[self.phase][i]) < 0 and abs( goal[0]) < .4 else False angles['robot' + str(i)] = (alpha, beta, phi, direction * box_ori, tilt) return angles
def calculate_distances_to_neighbors(self): for n in self.neighbors: self.distances_to_neighbors[n] = dist(n.pos, self.pos)
def getAngles(self, s): s = s.ravel() goal = self.goal # This is relative position of box w.r.t. the robot if self.primitive == 'PUSH_IN_HOLE' or self.primitive == 'REORIENT' or self.primitive == 'PUSH_TOWARDS': relative_y = goal[0] relative_x = -goal[1] if self.primitive == 'CROSS': relative_y = s[4] relative_x = -s[5] buff = (-np.pi if relative_y < 0 else np.pi ) if relative_x < 0 else 0 # since we want to map -pi to pi theta = np.arctan(relative_y / relative_x) + buff phi = -np.pi - theta if theta < 0 else np.pi - theta goal_angles = (theta, phi) # NOTE: Depending on the primitive, these all reference the box and some otherpoint past it as well box_from_hole = s[:2] - s[4:6] hole = s[4:6] aligned = hole - dot( hole, unitVector(box_from_hole)) * unitVector(box_from_hole) relative_x = -aligned[1] relative_y = aligned[0] buff = (-np.pi if relative_y < 0 else np.pi ) if relative_x < 0 else 0 # since we want to map -pi to pi alpha = np.arctan(relative_y / relative_x) + buff beta = -np.pi - alpha if alpha < 0 else np.pi - alpha align_y_angles = (alpha, beta, dist(aligned, np.zeros(2))) relative_y = s[4] relative_x = -s[5] buff = (-np.pi if relative_y < 0 else np.pi ) if relative_x < 0 else 0 # since we want to map -pi to pi goal1 = np.arctan(relative_y / relative_x) + buff goal2 = -np.pi - goal1 if goal1 < 0 else np.pi - goal1 cross_angles = (goal1, goal2) pos = s[:2] psi = s[3] goal_relative_to_box = np.array([self.x_contact, self.contact['left']]) rotation_matrix = np.array([[np.cos(psi), -np.sin(psi)], [np.sin(psi), np.cos(psi)]]) home = pos + rotation_matrix.dot(goal_relative_to_box) relative_y = home[0] relative_x = -home[1] buff = (-np.pi if relative_y < 0 else np.pi ) if relative_x < 0 else 0 # since we want to map -pi to pi alpha = np.arctan(relative_y / relative_x) + buff beta = -np.pi - alpha if alpha < 0 else np.pi - alpha left_angle = (alpha, beta) goal_relative_to_box = np.array( [self.x_contact, self.contact['right']]) home = pos + rotation_matrix.dot(goal_relative_to_box) relative_y = home[0] relative_x = -home[1] buff = (-np.pi if relative_y < 0 else np.pi ) if relative_x < 0 else 0 # since we want to map -pi to pi alpha = np.arctan(relative_y / relative_x) + buff beta = -np.pi - alpha if alpha < 0 else np.pi - alpha right_angle = (alpha, beta) return goal_angles, align_y_angles, cross_angles, left_angle, right_angle
def receiveState(self, msg): if self.curr_episode[0] >= self.local_curr_episode_synchronous + 1 and self.curr_episode[1] == False: print('ENVIRONMENT: ', self.simulation_name) self.done = False self.start_time = time.time() # Train for 20 episode, Test for 20 episode if self.agent.testing_to_record_progress and self.tracker_for_testing % 20 == 0: self.agent.testing_to_record_progress = False self.tracker_for_testing = 0 elif not self.agent.testing_to_record_progress and self.tracker_for_testing % 20 == 0 and len(self.agent.policy.exp) >= self.agent.initial_explore: self.agent.testing_to_record_progress = True self.tracker_for_testing = 0 self.local_curr_episode_synchronous += 1 self.tracker_for_testing += 1 if self.agent.testing_to_record_progress: print(' ##### TESTING ##### ') elif len(self.agent.policy.exp) < self.agent.initial_explore: print(' ##### EXPLORATION ##### ') else: print(' ##### TRAINING ##### ') floats = vrep.simxUnpackFloats(msg.data) self.bot_z_global = floats[self.s_n + 1] self.box_z_global = floats[self.s_n] self.box_y_global = floats[-2] self.box_ori_global = floats[-1] local_state = np.array(floats[:self.s_n]).ravel() adjusted_state_for_controls = self.controller.feature_2_task_state(local_state) changeAction = self.changeAction(adjusted_state_for_controls, self.action_map[self.prev['A'][0]], complete=False) if type(self.prev['A']) == tuple else True s = (np.array(local_state)).reshape(1,-1) succeeded = self.succeeded(s.ravel()) restart, done = self.decide_to_restart(s.ravel()) self.done = restart or succeeded reward = self.reward_function(s) if not self.curr_episode[1]: # Not finished yet with the episode for syncing purposes if not self.done: # If we hasn't been declared done if changeAction: self.counter = self.period action_index, action_control = (self.sendAction(s, changeAction)) self.num_steps += 1 if self.isValidAction(adjusted_state_for_controls, self.action_map[action_index]): if len(self.curr_rollout) > 0: if all([dist(r, s.ravel()) > .3 for r in self.curr_rollout[-5:]]): self.curr_rollout.append(s.ravel()) else: self.curr_rollout.append(s.ravel()) if self.mode == 'GET_STATE_DATA': print('Length data for collection: ', len(self.curr_rollout) + self.curr_size) if type(self.prev["S"]) == np.ndarray and not self.agent.testing_to_record_progress: print(self.action_map[self.prev['A'][0]], reward) self.agent.store(self.prev['S'], np.array(self.prev["A"][0]), reward, s, 0, done or succeeded, self.prev['A'][0]) if self.trainMode: loss = self.agent.train(self.curr_episode[0]) self.prev["S"] = s self.prev["A"] = (int(action_index), action_control) if not self.curr_episode[1]: self.currReward += reward else: action_index, a = self.sendAction(s, changeAction) else: if type(self.prev["S"]) == np.ndarray: prev_s = self.prev['S'] if not self.agent.testing_to_record_progress: self.agent.store(prev_s, np.array(self.prev["A"][0]), reward, s, 0, done or succeeded, self.prev['A'][0]) print('Last transition recorded with reward: ', reward) self.currReward += reward if succeeded: assert reward > 0 print(' ##### SUCCESS ') print(' ##### Success reward: ', reward) self.restartProtocol(self.done , succeeded=succeeded) return