Пример #1
0
def goaldistance(angles):
    th1, th2, th3 = angles
    foot1coords = coords('mybot', 'LOIII')
    goalcoords = coords('goal', '')

    R = transformations.quaternion_matrix(foot1coords[3:])

    foot1tip_rframe = [0.2, 0, 0, 1]
    foot1tip_wframe = np.matmul(R, foot1tip_rframe)

    foot2basex_rframe = 0.15 * np.sin(th1) + 0.15 * np.sin(
        th1 + th2) + 0.131 * np.sin(th1 + th2 + th3)
    foot2basey_rframe = 0
    foot2basez_rframe = 0.141 + 0.15 * np.cos(th1) + 0.15 * np.cos(
        th1 + th2) + 0.131 * np.cos(th1 + th2 + th3)
    foot2base_wframe = np.matmul(
        R, [foot2basex_rframe, foot2basey_rframe, foot2basez_rframe, 1])

    foot2tipx_rframe = foot2basex_rframe + 0.2 * np.cos(th1 + th2 + th3)
    foot2tipy_rframe = 0
    foot2tipz_rframe = foot2basez_rframe + 0.2 * np.sin(th1 + th2 + th3)
    foot2tip_wframe = np.matmul(
        R, [foot2tipx_rframe, foot2tipy_rframe, foot2tipz_rframe, 1])

    # get x position of each foot's base and tip, and goal
    foot1base = foot1coords[0]
    foot1tip = foot1coords[0] + foot1tip_wframe[0]
    foot2base = foot1coords[0] + foot2base_wframe[0]
    foot2tip = foot1coords[0] + foot2tip_wframe[0]
    goal = goalcoords[0]

    # get minimum distance to goal
    dist = np.min(
        (foot1base - goal, foot1tip - goal, foot2base - goal, foot2tip - goal))
    return dist
Пример #2
0
def rewardfunc(angles, prevdist, time):
    th1, th2, th3 = angles
    foot1coords = coords('mybot', 'LOIII')
    goalcoords = coords('goal', '')

    R = transformations.quaternion_matrix(foot1coords[3:])
    roll, pitch, yaw = transformations.euler_from_quaternion(foot1coords[3:])

    foot1tip_rframe = [0.2, 0, 0, 1]
    foot1tip_wframe = np.matmul(R, foot1tip_rframe)

    foot2basex_rframe = 0.15 * np.sin(th1) + 0.15 * np.sin(
        th1 + th2) + 0.131 * np.sin(th1 + th2 + th3)
    foot2basey_rframe = 0
    foot2basez_rframe = 0.141 + 0.15 * np.cos(th1) + 0.15 * np.cos(
        th1 + th2) + 0.131 * np.cos(th1 + th2 + th3)
    foot2base_wframe = np.matmul(
        R, [foot2basex_rframe, foot2basey_rframe, foot2basez_rframe, 1])

    foot2tipx_rframe = foot2basex_rframe - 0.2 * np.cos(th1 + th2 + th3)
    foot2tipy_rframe = 0
    foot2tipz_rframe = foot2basez_rframe - 0.2 * np.sin(th1 + th2 + th3)
    foot2tip_wframe = np.matmul(
        R, [foot2tipx_rframe, foot2tipy_rframe, foot2tipz_rframe, 1])

    # get x position of each foot's base and tip, and goal
    foot1base = foot1coords[0]
    foot1tip = foot1coords[0] + foot1tip_wframe[0]
    foot2base = foot1coords[0] + foot2base_wframe[0]
    foot2tip = foot1coords[0] + foot2tip_wframe[0]
    goal = goalcoords[0]

    # get minimum distance to goal
    goaldist = np.min(
        (foot1base - goal, foot1tip - goal, foot2base - goal, foot2tip - goal))
    distreward = prevdist - goaldist
    print '-------------------------distance reward	:', distreward
    # time factor - decrease reward for slower policies
    timefactor = 1.0 - np.exp(-time / 100.0)
    print '-------------------------time penalty		:', -timefactor

    # tilt penalty - don't want the robot to reward falling over sideways
    tiltpenalty = -abs(yaw / (np.pi))
    print '-------------------------tilt penalty		:', tiltpenalty
    # reward
    reward = distreward + tiltpenalty - timefactor
    return reward, goaldist
Пример #3
0
def statefinder(currAngles, world):
    th1, th2, th3 = currAngles
    positions = [[np.linspace(1.0, -1.0, 41)], [np.linspace(0, 1.0, 11)],
                 [np.linspace(1.0, -1.0, 41)], [np.linspace(0, 1.0, 11)]]

    foot1coords = coords('mybot', 'LOIII')

    foot1xpos = foot1coords[0]
    foot1zpos = foot1coords[2]

    R = transformations.quaternion_matrix(foot1coords[3:])
    foot2x_rframe = 0.15 * np.sin(th1) + 0.15 * np.sin(
        th1 + th2) + 0.131 * np.sin(th1 + th2 + th3)
    foot2y_rframe = 0
    foot2z_rframe = 0.141 + 0.15 * np.cos(th1) + 0.15 * np.cos(
        th1 + th2) + 0.131 * np.cos(th1 + th2 + th3)
    foot2_wframe = np.matmul(R,
                             [foot2x_rframe, foot2y_rframe, foot2z_rframe, 1])

    foot2xpos = foot1xpos + foot2_wframe[0]
    foot2zpos = foot1zpos + foot2_wframe[2]

    foot1x = int(np.round((foot1xpos - 1.0) / (-0.05)))
    foot1z = np.max((int(np.round(foot1zpos / (0.1))), 0))
    foot2x = int(np.round((foot2xpos - 1.0) / (-0.05)))
    foot2z = np.max((int(np.round(foot2zpos / (0.1))), 0))

    # state and reward
    state = [foot1x, foot1z, foot2x, foot2z]
    goalx = coords('goal', '')[0]
    reward = world[state[0], state[1]] + (
        1.0 - abs(np.min(((foot1xpos - goalx), (foot2xpos - goalx))))) * 0.5

    # if it's fallen over but hasn't reached the goal, penalise
    if foot1zpos > 0.01 and foot2zpos > 0.01 and reward < 1:
        reward = -1
    # if it's reached joint limits, penalise
    if th1 > 2.17 or th1 < -1.17:
        reward = -1
    if th2 > 2.00 or th2 < -0.86:
        reward = -1
    if th3 > 1.98 or th3 < -0.77:
        reward = -1

    return state, reward
Пример #4
0
def statefinder(currAngles):
    th1, th2, th3 = currAngles
    positions = [[np.linspace(1.0, -1.0, 41)], [np.linspace(0, 1.0, 11)],
                 [np.linspace(1.0, -1.0, 41)], [np.linspace(0, 1.0, 11)]]

    foot1coords = coords('mybot', 'LOIII')
    R = transformations.quaternion_matrix(foot1coords[3:])

    foot2basex_rframe = 0.15 * np.sin(th1) + 0.15 * np.sin(
        th1 + th2) + 0.131 * np.sin(th1 + th2 + th3)
    foot2basey_rframe = 0
    foot2basez_rframe = 0.141 + 0.15 * np.cos(th1) + 0.15 * np.cos(
        th1 + th2) + 0.131 * np.cos(th1 + th2 + th3)
    foot2base_wframe = np.matmul(
        R, [foot2basex_rframe, foot2basey_rframe, foot2basez_rframe, 1])

    foot1x = foot1coords[0]
    foot1z = foot1coords[2]
    foot2x = foot1coords[0] + foot2base_wframe[0]
    foot2z = foot1coords[2] + foot2base_wframe[2]

    # quantise values to states
    foot1xstate = np.max(
        (int(np.round(xs * (foot1x - xmin) / (xmax - xmin))), 0))
    foot1zstate = np.max(
        (int(np.round(zs * (foot1z - zmin) / (zmax - zmin))), 0))
    foot2xstate = np.max(
        (int(np.round(xs * (foot2x - xmin) / (xmax - xmin))), 0))
    foot2zstate = np.max(
        (int(np.round(zs * (foot2z - zmin) / (zmax - zmin))), 0))

    f1state = [foot1xstate, foot1zstate]
    f2state = [foot2xstate, foot2zstate]

    # state
    state = [f1state, f2state]

    return state
Пример #5
0
def statefinder(currAngles):
    th1, th2, th3 = currAngles
    positions = [[np.linspace(1.0,-1.0,41)], 
	         [np.linspace(0,1.0,11)],
	         [np.linspace(1.0,-1.0,41)], 
	         [np.linspace(0,1.0,11)]] 
    
    foot1coords = coords('mybot', 'LOIII')

    foot1x = foot1coords[0]
    foot1z = foot1coords[2]
    
    # quantise values to states
    foot1xq = np.max((int(np.round(xs*(foot1x-xmin)/(xmax-xmin))), 0))
    foot1zq = np.max((int(np.round(zs*(foot1z-zmin)/(zmax-zmin))), 0))
    th1q = np.max((int(np.round(th1s*(th1-th1min)/(th1max-th1min))), 0))
    th2q = np.max((int(np.round(th2s*(th2-th2min)/(th2max-th2min))), 0))
    th3q = np.max((int(np.round(th3s*(th3-th3min)/(th3max-th3min))), 0))

    # state
    state = [foot1xq, foot1zq, th1q, th2q, th3q]

    return state