def reward(cart_pole): x_threshold = 2.4 if cart_pole.state[0] < -x_threshold or cart_pole.state[0] > x_threshold: return -500 from continuous_cartpole import angle_normalize normalized_angle = angle_normalize(cart_pole.state[2]) special_sauce = 2 if -0.1 <= angle_normalize( cart_pole.state[2]) <= 0.1 else 1 #return special_sauce*(1-np.abs(normalized_angle/np.pi)) + 0.01 - 0.2*np.abs(cart_pole.state[0]/x_threshold) #return 5*(1-np.abs(normalized_angle/np.pi)) + 0.2 return 1
def reward(cart_pole): if cart_pole.state[0] < -cart_pole.x_threshold or cart_pole.state[ 0] > cart_pole.x_threshold: return -1 if 0 <= math.fabs(angle_normalize(cart_pole.state[2])) <= 0.1: return 1 elif 0.1 < math.fabs(angle_normalize(cart_pole.state[2])) <= 0.5: return 0.5 elif 0.5 < math.fabs(angle_normalize(cart_pole.state[2])) <= 1: return 0.3 elif 1 < math.fabs(angle_normalize(cart_pole.state[2])) <= 2: return 0.2 elif 2 < math.fabs(angle_normalize(cart_pole.state[2])) <= 3: return 0.1 else: return 0
def reward(env): x, x_dot, theta, theta_dot = env.state theta_norm = angle_normalize(theta) r_x, r_theta, r_thetadot, r_x_dot = 0, 0, 0, 0 if math.fabs(x) > 2.4: return -100 elif (abs(theta_norm) < np.pi / 8): return 2 * np.cos(theta_norm) - theta_dot**2 + 4 elif (abs(theta_norm) < np.pi / 2): return 2 * np.cos(theta_norm) - 0.1 * theta_dot**2 + 2 else: return np.cos( theta_norm) - 0.1 * theta_dot**2 * np.cos(theta_norm) - 0.01 * x**2
def rf_info_pos(cart_pole): """ Sparse positive Reward Function: This Reward Function returns a positive reward in the interval [0, 1] given by: r = ½(cos(θ) + 1) where: · θ is the angle of the pole (θ=0 upwards) :param cart_pole: CartPole Environment from OpenAI Gym :return: (float) reward between [0, 1] """ theta = angle_normalize(cart_pole.state[2]) return (np.cos(theta) + 1) / 2
def rf_spar_pos(cart_pole): """ Sparse positive Reward Function: This Reward Function returns +1 when the pole is within the desired threshold, else it returns 0 ⎧ 1 if -0.1 ≤ θ ≤ 0.1 r = ⎨ ⎩ 0 else where: · θ is the angle of the pole (θ=0 upwards) :param cart_pole: CartPole Environment from OpenAI Gym :return: (int) reward in {0, +1} """ theta = angle_normalize(cart_pole.state[2]) return 1 if -0.1 <= theta <= 0.1 else 0
def rf_info2d_pos(cart_pole): """ Sparse positive Reward Function: This Reward Function returns a positive reward in the interval [0, 1] given by: ⎧ ¼(cos(θ) + 1)(cos(πx) + 1) if -2.4 ≤ x ≤ 2.4 r = ⎨ ⎩ 0 else where: · x is the horizontal position of the car in [-1, 1] · θ is the angle of the pole (θ=0 upwards) :param cart_pole: CartPole Environment from OpenAI Gym :return: (float) reward between [0, 1] """ x = cart_pole.state[0] theta = angle_normalize(cart_pole.state[2]) if -cart_pole.x_threshold <= x <= cart_pole.x_threshold: return (np.cos(theta) + 1) * (np.cos(np.pi * x / cart_pole.x_threshold) + 1) / 4 else: return 0
def reward_old(env): x, x_dot, theta, theta_dot = env.state true_theta = angle_normalize(theta) r_x, r_theta, r_thetadot, r_x_dot = 0, 0, 0, 0 if math.fabs(x) > 2.35: r_x = -1000 else: r_x = 1 if np.fabs(true_theta) > np.pi / 2: # Under r_x_dot = -0.001 * np.fabs(x_dot) r_theta = np.cos(true_theta) - abs(np.sin(theta) * x_dot * x**2) r_thetadot = -0.001 * theta_dot**2 elif np.fabs(true_theta) < 0.3: # Close r_theta = gaussian(theta, 0, 0.5) + gaussian(theta_dot, 0, 0.5) + 1 else: # Up not so close r_x_dot = -0.01 * x_dot**2 r_theta = np.cos(true_theta) r_thetadot = -0.01 * theta_dot**2 return r_x + r_theta + r_thetadot + r_x_dot
def reward_new(env): x, x_dot, theta, theta_dot = env.state true_theta = angle_normalize(theta) r_x, r_theta, r_thetadot, r_x_dot = 0, 0, 0, 0 if math.fabs(x) > 2.35: r_x = -1000 else: r_x = 1 if np.fabs(true_theta) > np.pi / 2: # Under #r_x_dot = - 0.001 * np.fabs(x_dot) r_theta = -1 r_thetadot = -0.01 * theta_dot**2 elif np.fabs(true_theta) < 0.3: # Close r_theta = 100 + -0.01 * theta_dot**2 else: # Up not so close r_x_dot = -0.01 * x_dot**2 r_theta = np.cos(true_theta) r_thetadot = -0.01 * theta_dot**2 return r_x + r_theta + r_thetadot + r_x_dot
def rf_inf(cart_pole, cos_pow=3): """ Informative Reward Function: This Reward Function returns a float reward according to: ⎧ <cos_pow> r = ⎨ cos(θ)^ if -2.4 ≤ x ≤ 2.4 ⎪ ⎩ -1 else where: · x is the horizontal position of the car · θ is the angular position of the pole (θ=0 upwards) :param cart_pole: CartPole Environment from OpenAI Gym :return: (float) reward in interval [-1, 1] """ x = cart_pole.state[0] theta = angle_normalize(cart_pole.state[2]) if -cart_pole.x_threshold <= x <= cart_pole.x_threshold: return np.cos(theta) ** cos_pow else: return -1
def smooth_reward(cart_pole): x_threshold = 2.4 if cart_pole.state[0] < -x_threshold or cart_pole.state[0] > x_threshold: return -10 normalized_angle = angle_normalize(cart_pole.state[2]) return 1 if -0.1 <= angle_normalize(cart_pole.state[2]) <= 0.1 else 0.001
def rf_default(cart_pole): if cart_pole.state[0] < -cart_pole.x_threshold or cart_pole.state[0] > cart_pole.x_threshold: return -1 return 1 if -0.1 <= angle_normalize(cart_pole.state[2]) <= 0.1 else 0