DISCOUNT = 0.9 EXPLORATION = 0.4 REAL_BINS = 9 BINS = REAL_BINS + 1 NUMPY_BINS = REAL_BINS + 1 ANIMATION = False x_qtz = np.linspace(MIN_X, MAX_X, BINS) y_qtz = np.linspace(MIN_Y, MAX_Y, BINS) x_vel_qtz = np.linspace(MIN_X_VEL, MAX_X_VEL, BINS) y_vel_qtz = np.linspace(MIN_Y_VEL, MAX_Y_VEL, BINS) un1_qtz = np.linspace(-MAX_UN, MAX_UN, BINS) un2_qtz = np.linspace(-MAX_UN, MAX_UN, BINS) env = gym.make('LunarLander-v2') learner = QLearningAgent(env, LEARNING_RATE, DISCOUNT, EXPLORATION, range(env.action_space.n), (NUMPY_BINS, NUMPY_BINS, NUMPY_BINS, NUMPY_BINS, 2, 2, env.action_space.n)) def extract_state(obs): """ extract state via this function so that it is DRY :param obs: gym observation """ def qtz(val, lns): binn = int(np.digitize(val, lns)) if binn == 0: return 1 elif binn == BINS: return BINS - 1 else: return binn
ANIMATION = True # train and test with learned data USE_EXTERNAL = True x_qtz = np.linspace(MIN_X, MAX_X, BINS) y_qtz = np.linspace(MIN_Y, MAX_Y, BINS) x_vel_qtz = np.linspace(MIN_X_VEL, MAX_X_VEL, BINS) y_vel_qtz = np.linspace(MIN_Y_VEL, MAX_Y_VEL, BINS) un1_qtz = np.linspace(-MAX_UN, MAX_UN, BINS) un2_qtz = np.linspace(-MAX_UN, MAX_UN, BINS) (x, y, x_vel, y_vel, unknown1, unknown2, leg1, leg2) = (0, 0, 0, 0, 0, 0, 0, 0) env = gym.make('LunarLander-v2') learner = QLearningAgent( env, LEARNING_RATE, DISCOUNT, EXPLORATION, range(env.action_space.n), (NUMPY_BINS, NUMPY_BINS, NUMPY_BINS, NUMPY_BINS, 2, 2, env.action_space.n)) if USE_EXTERNAL: print('loaded') learner.values = np.load('fail.npy') def extract_state(obs): """ extract state via this function so that it is DRY :param obs: gym observation """ def qtz(val, lns): binn = int(np.digitize(val, lns)) if binn == 0:
TESTING_EPISODES = 10 LEARNING_RATE = 0.2 DISCOUNT = 0.9 EXPLORATION = 0.3 BINS = 10 ANIMATION = True # set True when skipping learning phase and load knowledge from external with 5000 LEARNING_EPISODES USE_EXTERNAL = False cos_qtz = Quantizer(-MAX_COS_THETA, MAX_COS_THETA, BINS) sin_qtz = Quantizer(-MAX_SIN_THETA, MAX_SIN_THETA, BINS) theta_qtz = Quantizer(-MAX_THETA_DOT, MAX_THETA_DOT, BINS) env = gym.make('Pendulum-v0') action_qtz = Quantizer(-2.0, 2.0, 10) learner = QLearningAgent(env, LEARNING_RATE, DISCOUNT, EXPLORATION, action_qtz.as_list()) def extract_state(obs): """ extract state via this function so that it is DRY :param obs: gym observation """ (c, s, d) = obs c = cos_qtz.round(c) s = sin_qtz.round(s) d = theta_qtz.round(d) return c, s, d # Learning
DISCOUNT = 0.9 EXPLORATION = 0.0 BINS = 8 x_qtz = np.linspace(MIN_X, MAX_X, BINS) y_qtz = np.linspace(MIN_Y, MAX_Y, BINS) x_vel_qtz = Quantizer(MIN_X_VEL, MAX_X_VEL, BINS) y_vel_qtz = Quantizer(MIN_Y_VEL, MAX_Y_VEL, BINS) un1_qtz = Quantizer(-MAX_UN, MAX_UN, BINS) un2_qtz = Quantizer(-MAX_UN, MAX_UN, BINS) (x, y, x_vel, y_vel, unknown1, unknown2, leg1, leg2) = (0, 0, 0, 0, 0, 0, 0, 0) env = gym.make('LunarLander-v2') env.seed(2) learner = QLearningAgent(env, LEARNING_RATE, DISCOUNT, EXPLORATION, range(env.action_space.n)) with open('lunar_lander_knowledge_bins_8.txt', 'rb') as f: learner.values = pickle.load(f) def extract_state(obs): """ extract state via this function so that it is DRY :param obs: gym observation """ (x, y, x_vel, y_vel, unknown1, unknown2, leg1, leg2) = obs # x = qtz(x, x_qtz) # y = qtz(y, y_qtz) x_vel = x_vel_qtz.round(x_vel) y_vel = y_vel_qtz.round(y_vel) un1 = un1_qtz.round(unknown1)
ANIMATION = False # train and test with learned data USE_EXTERNAL = True x_qtz = np.linspace(MIN_X, MAX_X, BINS) y_qtz = np.linspace(MIN_Y, MAX_Y, BINS) x_vel_qtz = np.linspace(MIN_X_VEL, MAX_X_VEL, BINS) y_vel_qtz = np.linspace(MIN_Y_VEL, MAX_Y_VEL, BINS) un1_qtz = np.linspace(-MAX_UN, MAX_UN, BINS) un2_qtz = np.linspace(-MAX_UN, MAX_UN, BINS) (x, y, x_vel, y_vel, unknown1, unknown2, leg1, leg2) = (0, 0, 0, 0, 0, 0, 0, 0) env = gym.make('LunarLander-v2') learner = QLearningAgent( env, LEARNING_RATE, DISCOUNT, EXPLORATION, range(env.action_space.n), (NBINS, NBINS, NBINS, NBINS, NBINS, NBINS, env.action_space.n)) if USE_EXTERNAL: print('loaded') learner.values = np.load('lunar_lander_fail.npy') def extract_state(obs): """ extract state via this function so that it is DRY :param obs: gym observation """ def qtz(val, lns): return int(np.digitize(val, lns))