def goaldistance(angles): th1, th2, th3 = angles foot1coords = coords('mybot', 'LOIII') goalcoords = coords('goal', '') R = transformations.quaternion_matrix(foot1coords[3:]) foot1tip_rframe = [0.2, 0, 0, 1] foot1tip_wframe = np.matmul(R, foot1tip_rframe) foot2basex_rframe = 0.15 * np.sin(th1) + 0.15 * np.sin( th1 + th2) + 0.131 * np.sin(th1 + th2 + th3) foot2basey_rframe = 0 foot2basez_rframe = 0.141 + 0.15 * np.cos(th1) + 0.15 * np.cos( th1 + th2) + 0.131 * np.cos(th1 + th2 + th3) foot2base_wframe = np.matmul( R, [foot2basex_rframe, foot2basey_rframe, foot2basez_rframe, 1]) foot2tipx_rframe = foot2basex_rframe + 0.2 * np.cos(th1 + th2 + th3) foot2tipy_rframe = 0 foot2tipz_rframe = foot2basez_rframe + 0.2 * np.sin(th1 + th2 + th3) foot2tip_wframe = np.matmul( R, [foot2tipx_rframe, foot2tipy_rframe, foot2tipz_rframe, 1]) # get x position of each foot's base and tip, and goal foot1base = foot1coords[0] foot1tip = foot1coords[0] + foot1tip_wframe[0] foot2base = foot1coords[0] + foot2base_wframe[0] foot2tip = foot1coords[0] + foot2tip_wframe[0] goal = goalcoords[0] # get minimum distance to goal dist = np.min( (foot1base - goal, foot1tip - goal, foot2base - goal, foot2tip - goal)) return dist
def rewardfunc(angles, prevdist, time): th1, th2, th3 = angles foot1coords = coords('mybot', 'LOIII') goalcoords = coords('goal', '') R = transformations.quaternion_matrix(foot1coords[3:]) roll, pitch, yaw = transformations.euler_from_quaternion(foot1coords[3:]) foot1tip_rframe = [0.2, 0, 0, 1] foot1tip_wframe = np.matmul(R, foot1tip_rframe) foot2basex_rframe = 0.15 * np.sin(th1) + 0.15 * np.sin( th1 + th2) + 0.131 * np.sin(th1 + th2 + th3) foot2basey_rframe = 0 foot2basez_rframe = 0.141 + 0.15 * np.cos(th1) + 0.15 * np.cos( th1 + th2) + 0.131 * np.cos(th1 + th2 + th3) foot2base_wframe = np.matmul( R, [foot2basex_rframe, foot2basey_rframe, foot2basez_rframe, 1]) foot2tipx_rframe = foot2basex_rframe - 0.2 * np.cos(th1 + th2 + th3) foot2tipy_rframe = 0 foot2tipz_rframe = foot2basez_rframe - 0.2 * np.sin(th1 + th2 + th3) foot2tip_wframe = np.matmul( R, [foot2tipx_rframe, foot2tipy_rframe, foot2tipz_rframe, 1]) # get x position of each foot's base and tip, and goal foot1base = foot1coords[0] foot1tip = foot1coords[0] + foot1tip_wframe[0] foot2base = foot1coords[0] + foot2base_wframe[0] foot2tip = foot1coords[0] + foot2tip_wframe[0] goal = goalcoords[0] # get minimum distance to goal goaldist = np.min( (foot1base - goal, foot1tip - goal, foot2base - goal, foot2tip - goal)) distreward = prevdist - goaldist print '-------------------------distance reward :', distreward # time factor - decrease reward for slower policies timefactor = 1.0 - np.exp(-time / 100.0) print '-------------------------time penalty :', -timefactor # tilt penalty - don't want the robot to reward falling over sideways tiltpenalty = -abs(yaw / (np.pi)) print '-------------------------tilt penalty :', tiltpenalty # reward reward = distreward + tiltpenalty - timefactor return reward, goaldist
def statefinder(currAngles, world): th1, th2, th3 = currAngles positions = [[np.linspace(1.0, -1.0, 41)], [np.linspace(0, 1.0, 11)], [np.linspace(1.0, -1.0, 41)], [np.linspace(0, 1.0, 11)]] foot1coords = coords('mybot', 'LOIII') foot1xpos = foot1coords[0] foot1zpos = foot1coords[2] R = transformations.quaternion_matrix(foot1coords[3:]) foot2x_rframe = 0.15 * np.sin(th1) + 0.15 * np.sin( th1 + th2) + 0.131 * np.sin(th1 + th2 + th3) foot2y_rframe = 0 foot2z_rframe = 0.141 + 0.15 * np.cos(th1) + 0.15 * np.cos( th1 + th2) + 0.131 * np.cos(th1 + th2 + th3) foot2_wframe = np.matmul(R, [foot2x_rframe, foot2y_rframe, foot2z_rframe, 1]) foot2xpos = foot1xpos + foot2_wframe[0] foot2zpos = foot1zpos + foot2_wframe[2] foot1x = int(np.round((foot1xpos - 1.0) / (-0.05))) foot1z = np.max((int(np.round(foot1zpos / (0.1))), 0)) foot2x = int(np.round((foot2xpos - 1.0) / (-0.05))) foot2z = np.max((int(np.round(foot2zpos / (0.1))), 0)) # state and reward state = [foot1x, foot1z, foot2x, foot2z] goalx = coords('goal', '')[0] reward = world[state[0], state[1]] + ( 1.0 - abs(np.min(((foot1xpos - goalx), (foot2xpos - goalx))))) * 0.5 # if it's fallen over but hasn't reached the goal, penalise if foot1zpos > 0.01 and foot2zpos > 0.01 and reward < 1: reward = -1 # if it's reached joint limits, penalise if th1 > 2.17 or th1 < -1.17: reward = -1 if th2 > 2.00 or th2 < -0.86: reward = -1 if th3 > 1.98 or th3 < -0.77: reward = -1 return state, reward
def statefinder(currAngles): th1, th2, th3 = currAngles positions = [[np.linspace(1.0, -1.0, 41)], [np.linspace(0, 1.0, 11)], [np.linspace(1.0, -1.0, 41)], [np.linspace(0, 1.0, 11)]] foot1coords = coords('mybot', 'LOIII') R = transformations.quaternion_matrix(foot1coords[3:]) foot2basex_rframe = 0.15 * np.sin(th1) + 0.15 * np.sin( th1 + th2) + 0.131 * np.sin(th1 + th2 + th3) foot2basey_rframe = 0 foot2basez_rframe = 0.141 + 0.15 * np.cos(th1) + 0.15 * np.cos( th1 + th2) + 0.131 * np.cos(th1 + th2 + th3) foot2base_wframe = np.matmul( R, [foot2basex_rframe, foot2basey_rframe, foot2basez_rframe, 1]) foot1x = foot1coords[0] foot1z = foot1coords[2] foot2x = foot1coords[0] + foot2base_wframe[0] foot2z = foot1coords[2] + foot2base_wframe[2] # quantise values to states foot1xstate = np.max( (int(np.round(xs * (foot1x - xmin) / (xmax - xmin))), 0)) foot1zstate = np.max( (int(np.round(zs * (foot1z - zmin) / (zmax - zmin))), 0)) foot2xstate = np.max( (int(np.round(xs * (foot2x - xmin) / (xmax - xmin))), 0)) foot2zstate = np.max( (int(np.round(zs * (foot2z - zmin) / (zmax - zmin))), 0)) f1state = [foot1xstate, foot1zstate] f2state = [foot2xstate, foot2zstate] # state state = [f1state, f2state] return state
def statefinder(currAngles): th1, th2, th3 = currAngles positions = [[np.linspace(1.0,-1.0,41)], [np.linspace(0,1.0,11)], [np.linspace(1.0,-1.0,41)], [np.linspace(0,1.0,11)]] foot1coords = coords('mybot', 'LOIII') foot1x = foot1coords[0] foot1z = foot1coords[2] # quantise values to states foot1xq = np.max((int(np.round(xs*(foot1x-xmin)/(xmax-xmin))), 0)) foot1zq = np.max((int(np.round(zs*(foot1z-zmin)/(zmax-zmin))), 0)) th1q = np.max((int(np.round(th1s*(th1-th1min)/(th1max-th1min))), 0)) th2q = np.max((int(np.round(th2s*(th2-th2min)/(th2max-th2min))), 0)) th3q = np.max((int(np.round(th3s*(th3-th3min)/(th3max-th3min))), 0)) # state state = [foot1xq, foot1zq, th1q, th2q, th3q] return state