Python angle_normalize示例，continuous_cartpole.angle_normalize Python示例

示例#1

0

显示文件

def reward(cart_pole):
    x_threshold = 2.4
    if cart_pole.state[0] < -x_threshold or cart_pole.state[0] > x_threshold:
        return -500
    from continuous_cartpole import angle_normalize
    normalized_angle = angle_normalize(cart_pole.state[2])
    special_sauce = 2 if -0.1 <= angle_normalize(
        cart_pole.state[2]) <= 0.1 else 1
    #return special_sauce*(1-np.abs(normalized_angle/np.pi)) + 0.01 - 0.2*np.abs(cart_pole.state[0]/x_threshold)
    #return 5*(1-np.abs(normalized_angle/np.pi)) + 0.2
    return 1

示例#2

0

显示文件

文件： utils.py 项目： arndz/Continuous-CartPole

def reward(cart_pole):
    if cart_pole.state[0] < -cart_pole.x_threshold or cart_pole.state[
            0] > cart_pole.x_threshold:
        return -1
    if 0 <= math.fabs(angle_normalize(cart_pole.state[2])) <= 0.1:
        return 1
    elif 0.1 < math.fabs(angle_normalize(cart_pole.state[2])) <= 0.5:
        return 0.5
    elif 0.5 < math.fabs(angle_normalize(cart_pole.state[2])) <= 1:
        return 0.3
    elif 1 < math.fabs(angle_normalize(cart_pole.state[2])) <= 2:
        return 0.2
    elif 2 < math.fabs(angle_normalize(cart_pole.state[2])) <= 3:
        return 0.1
    else:
        return 0

示例#3

0

显示文件

def reward(env):
    x, x_dot, theta, theta_dot = env.state
    theta_norm = angle_normalize(theta)

    r_x, r_theta, r_thetadot, r_x_dot = 0, 0, 0, 0
    if math.fabs(x) > 2.4:
        return -100
    elif (abs(theta_norm) < np.pi / 8):
        return 2 * np.cos(theta_norm) - theta_dot**2 + 4

    elif (abs(theta_norm) < np.pi / 2):
        return 2 * np.cos(theta_norm) - 0.1 * theta_dot**2 + 2
    else:
        return np.cos(
            theta_norm) - 0.1 * theta_dot**2 * np.cos(theta_norm) - 0.01 * x**2

示例#4

0

显示文件

文件： reward_functions.py 项目： joseab10/cont_cartpole

def rf_info_pos(cart_pole):
	""" Sparse positive Reward Function:
	This Reward Function returns a positive reward in the interval [0, 1] given by:

		r = ½(cos(θ) + 1)

	where:
		· θ is the angle of the pole (θ=0 upwards)

	:param cart_pole: CartPole Environment from OpenAI Gym
	:return: (float) reward between [0, 1]
	"""
	theta = angle_normalize(cart_pole.state[2])

	return (np.cos(theta) + 1) / 2

示例#5

0

显示文件

文件： reward_functions.py 项目： joseab10/cont_cartpole

def rf_spar_pos(cart_pole):
	""" Sparse positive Reward Function:
	This Reward Function returns +1 when the pole is within the desired threshold, else it returns 0

			⎧ 1   if -0.1 ≤ θ ≤ 0.1
		r = ⎨
			⎩ 0   else

	where:
		· θ is the angle of the pole (θ=0 upwards)

	:param cart_pole: CartPole Environment from OpenAI Gym
	:return: (int) reward in {0, +1}
	"""

	theta = angle_normalize(cart_pole.state[2])

	return 1 if -0.1 <= theta <= 0.1 else 0

示例#6

0

显示文件

文件： reward_functions.py 项目： joseab10/cont_cartpole

def rf_info2d_pos(cart_pole):
	""" Sparse positive Reward Function:
	This Reward Function returns a positive reward in the interval [0, 1] given by:

			⎧ ¼(cos(θ) + 1)(cos(πx) + 1)   if -2.4 ≤ x ≤ 2.4
		r = ⎨
			⎩ 0   else

	where:
		· x is the horizontal position of the car in [-1, 1]
		· θ is the angle of the pole (θ=0 upwards)

	:param cart_pole: CartPole Environment from OpenAI Gym
	:return: (float) reward between [0, 1]
	"""
	x = cart_pole.state[0]
	theta = angle_normalize(cart_pole.state[2])

	if -cart_pole.x_threshold <= x <= cart_pole.x_threshold:
		return (np.cos(theta) + 1) * (np.cos(np.pi * x / cart_pole.x_threshold) + 1) / 4
	else:
		return 0

示例#7

0

显示文件

def reward_old(env):
    x, x_dot, theta, theta_dot = env.state
    true_theta = angle_normalize(theta)

    r_x, r_theta, r_thetadot, r_x_dot = 0, 0, 0, 0
    if math.fabs(x) > 2.35:
        r_x = -1000
    else:
        r_x = 1

    if np.fabs(true_theta) > np.pi / 2:
        # Under
        r_x_dot = -0.001 * np.fabs(x_dot)
        r_theta = np.cos(true_theta) - abs(np.sin(theta) * x_dot * x**2)
        r_thetadot = -0.001 * theta_dot**2
    elif np.fabs(true_theta) < 0.3:
        # Close
        r_theta = gaussian(theta, 0, 0.5) + gaussian(theta_dot, 0, 0.5) + 1
    else:
        # Up not so close
        r_x_dot = -0.01 * x_dot**2
        r_theta = np.cos(true_theta)
        r_thetadot = -0.01 * theta_dot**2
    return r_x + r_theta + r_thetadot + r_x_dot

示例#8

0

显示文件

def reward_new(env):
    x, x_dot, theta, theta_dot = env.state
    true_theta = angle_normalize(theta)

    r_x, r_theta, r_thetadot, r_x_dot = 0, 0, 0, 0
    if math.fabs(x) > 2.35:
        r_x = -1000
    else:
        r_x = 1

    if np.fabs(true_theta) > np.pi / 2:
        # Under
        #r_x_dot = - 0.001 * np.fabs(x_dot)
        r_theta = -1
        r_thetadot = -0.01 * theta_dot**2
    elif np.fabs(true_theta) < 0.3:
        # Close
        r_theta = 100 + -0.01 * theta_dot**2
    else:
        # Up not so close
        r_x_dot = -0.01 * x_dot**2
        r_theta = np.cos(true_theta)
        r_thetadot = -0.01 * theta_dot**2
    return r_x + r_theta + r_thetadot + r_x_dot

示例#9

0

显示文件

文件： reward_functions.py 项目： joseab10/cont_cartpole

def rf_inf(cart_pole, cos_pow=3):
	""" Informative Reward Function:
	This Reward Function returns a float reward according to:

			⎧       <cos_pow>
		r = ⎨ cos(θ)^           if -2.4 ≤ x ≤ 2.4
			⎪
			⎩  -1               else

	where:
		· x is the horizontal position of the car
		· θ is the angular position of the pole (θ=0 upwards)

	:param cart_pole: CartPole Environment from OpenAI Gym
	:return: (float) reward in interval [-1, 1]
	"""

	x = cart_pole.state[0]
	theta = angle_normalize(cart_pole.state[2])

	if -cart_pole.x_threshold <= x <= cart_pole.x_threshold:
		return np.cos(theta) ** cos_pow
	else:
		return -1

示例#10

0

显示文件

def smooth_reward(cart_pole):
    x_threshold = 2.4
    if cart_pole.state[0] < -x_threshold or cart_pole.state[0] > x_threshold:
        return -10
    normalized_angle = angle_normalize(cart_pole.state[2])
    return 1 if -0.1 <= angle_normalize(cart_pole.state[2]) <= 0.1 else 0.001

示例#11

0

显示文件

文件： reward_functions.py 项目： joseab10/cont_cartpole

	def rf_default(cart_pole):
		if cart_pole.state[0] < -cart_pole.x_threshold or cart_pole.state[0] > cart_pole.x_threshold:
			return -1
		return 1 if -0.1 <= angle_normalize(cart_pole.state[2]) <= 0.1 else 0