示例#1
0
def reward(cart_pole):
    x_threshold = 2.4
    if cart_pole.state[0] < -x_threshold or cart_pole.state[0] > x_threshold:
        return -500
    from continuous_cartpole import angle_normalize
    normalized_angle = angle_normalize(cart_pole.state[2])
    special_sauce = 2 if -0.1 <= angle_normalize(
        cart_pole.state[2]) <= 0.1 else 1
    #return special_sauce*(1-np.abs(normalized_angle/np.pi)) + 0.01 - 0.2*np.abs(cart_pole.state[0]/x_threshold)
    #return 5*(1-np.abs(normalized_angle/np.pi)) + 0.2
    return 1
示例#2
0
def reward(cart_pole):
    if cart_pole.state[0] < -cart_pole.x_threshold or cart_pole.state[
            0] > cart_pole.x_threshold:
        return -1
    if 0 <= math.fabs(angle_normalize(cart_pole.state[2])) <= 0.1:
        return 1
    elif 0.1 < math.fabs(angle_normalize(cart_pole.state[2])) <= 0.5:
        return 0.5
    elif 0.5 < math.fabs(angle_normalize(cart_pole.state[2])) <= 1:
        return 0.3
    elif 1 < math.fabs(angle_normalize(cart_pole.state[2])) <= 2:
        return 0.2
    elif 2 < math.fabs(angle_normalize(cart_pole.state[2])) <= 3:
        return 0.1
    else:
        return 0
示例#3
0
def reward(env):
    x, x_dot, theta, theta_dot = env.state
    theta_norm = angle_normalize(theta)

    r_x, r_theta, r_thetadot, r_x_dot = 0, 0, 0, 0
    if math.fabs(x) > 2.4:
        return -100
    elif (abs(theta_norm) < np.pi / 8):
        return 2 * np.cos(theta_norm) - theta_dot**2 + 4

    elif (abs(theta_norm) < np.pi / 2):
        return 2 * np.cos(theta_norm) - 0.1 * theta_dot**2 + 2
    else:
        return np.cos(
            theta_norm) - 0.1 * theta_dot**2 * np.cos(theta_norm) - 0.01 * x**2
def rf_info_pos(cart_pole):
	""" Sparse positive Reward Function:
	This Reward Function returns a positive reward in the interval [0, 1] given by:

		r = ½(cos(θ) + 1)

	where:
		· θ is the angle of the pole (θ=0 upwards)

	:param cart_pole: CartPole Environment from OpenAI Gym
	:return: (float) reward between [0, 1]
	"""
	theta = angle_normalize(cart_pole.state[2])

	return (np.cos(theta) + 1) / 2
def rf_spar_pos(cart_pole):
	""" Sparse positive Reward Function:
	This Reward Function returns +1 when the pole is within the desired threshold, else it returns 0

			⎧ 1   if -0.1 ≤ θ ≤ 0.1
		r = ⎨
			⎩ 0   else

	where:
		· θ is the angle of the pole (θ=0 upwards)

	:param cart_pole: CartPole Environment from OpenAI Gym
	:return: (int) reward in {0, +1}
	"""

	theta = angle_normalize(cart_pole.state[2])

	return 1 if -0.1 <= theta <= 0.1 else 0
def rf_info2d_pos(cart_pole):
	""" Sparse positive Reward Function:
	This Reward Function returns a positive reward in the interval [0, 1] given by:

			⎧ ¼(cos(θ) + 1)(cos(πx) + 1)   if -2.4 ≤ x ≤ 2.4
		r = ⎨
			⎩ 0   else

	where:
		· x is the horizontal position of the car in [-1, 1]
		· θ is the angle of the pole (θ=0 upwards)

	:param cart_pole: CartPole Environment from OpenAI Gym
	:return: (float) reward between [0, 1]
	"""
	x = cart_pole.state[0]
	theta = angle_normalize(cart_pole.state[2])

	if -cart_pole.x_threshold <= x <= cart_pole.x_threshold:
		return (np.cos(theta) + 1) * (np.cos(np.pi * x / cart_pole.x_threshold) + 1) / 4
	else:
		return 0
示例#7
0
def reward_old(env):
    x, x_dot, theta, theta_dot = env.state
    true_theta = angle_normalize(theta)

    r_x, r_theta, r_thetadot, r_x_dot = 0, 0, 0, 0
    if math.fabs(x) > 2.35:
        r_x = -1000
    else:
        r_x = 1

    if np.fabs(true_theta) > np.pi / 2:
        # Under
        r_x_dot = -0.001 * np.fabs(x_dot)
        r_theta = np.cos(true_theta) - abs(np.sin(theta) * x_dot * x**2)
        r_thetadot = -0.001 * theta_dot**2
    elif np.fabs(true_theta) < 0.3:
        # Close
        r_theta = gaussian(theta, 0, 0.5) + gaussian(theta_dot, 0, 0.5) + 1
    else:
        # Up not so close
        r_x_dot = -0.01 * x_dot**2
        r_theta = np.cos(true_theta)
        r_thetadot = -0.01 * theta_dot**2
    return r_x + r_theta + r_thetadot + r_x_dot
示例#8
0
def reward_new(env):
    x, x_dot, theta, theta_dot = env.state
    true_theta = angle_normalize(theta)

    r_x, r_theta, r_thetadot, r_x_dot = 0, 0, 0, 0
    if math.fabs(x) > 2.35:
        r_x = -1000
    else:
        r_x = 1

    if np.fabs(true_theta) > np.pi / 2:
        # Under
        #r_x_dot = - 0.001 * np.fabs(x_dot)
        r_theta = -1
        r_thetadot = -0.01 * theta_dot**2
    elif np.fabs(true_theta) < 0.3:
        # Close
        r_theta = 100 + -0.01 * theta_dot**2
    else:
        # Up not so close
        r_x_dot = -0.01 * x_dot**2
        r_theta = np.cos(true_theta)
        r_thetadot = -0.01 * theta_dot**2
    return r_x + r_theta + r_thetadot + r_x_dot
def rf_inf(cart_pole, cos_pow=3):
	""" Informative Reward Function:
	This Reward Function returns a float reward according to:

			⎧       <cos_pow>
		r = ⎨ cos(θ)^           if -2.4 ≤ x ≤ 2.4
			⎪
			⎩  -1               else

	where:
		· x is the horizontal position of the car
		· θ is the angular position of the pole (θ=0 upwards)

	:param cart_pole: CartPole Environment from OpenAI Gym
	:return: (float) reward in interval [-1, 1]
	"""

	x = cart_pole.state[0]
	theta = angle_normalize(cart_pole.state[2])

	if -cart_pole.x_threshold <= x <= cart_pole.x_threshold:
		return np.cos(theta) ** cos_pow
	else:
		return -1
示例#10
0
def smooth_reward(cart_pole):
    x_threshold = 2.4
    if cart_pole.state[0] < -x_threshold or cart_pole.state[0] > x_threshold:
        return -10
    normalized_angle = angle_normalize(cart_pole.state[2])
    return 1 if -0.1 <= angle_normalize(cart_pole.state[2]) <= 0.1 else 0.001
示例#11
0
	def rf_default(cart_pole):
		if cart_pole.state[0] < -cart_pole.x_threshold or cart_pole.state[0] > cart_pole.x_threshold:
			return -1
		return 1 if -0.1 <= angle_normalize(cart_pole.state[2]) <= 0.1 else 0