DISCOUNT = 0.9
EXPLORATION = 0.4
REAL_BINS = 9
BINS = REAL_BINS + 1
NUMPY_BINS = REAL_BINS + 1
ANIMATION = False

x_qtz = np.linspace(MIN_X, MAX_X, BINS)
y_qtz = np.linspace(MIN_Y, MAX_Y, BINS)
x_vel_qtz = np.linspace(MIN_X_VEL, MAX_X_VEL, BINS)
y_vel_qtz = np.linspace(MIN_Y_VEL, MAX_Y_VEL, BINS)
un1_qtz = np.linspace(-MAX_UN, MAX_UN, BINS)
un2_qtz = np.linspace(-MAX_UN, MAX_UN, BINS)

env = gym.make('LunarLander-v2')
learner = QLearningAgent(env, LEARNING_RATE, DISCOUNT, EXPLORATION, range(env.action_space.n), (NUMPY_BINS, NUMPY_BINS, NUMPY_BINS, NUMPY_BINS, 2, 2, env.action_space.n))


def extract_state(obs):
    """
    extract state via this function so that it is DRY
    :param obs: gym observation
    """
    def qtz(val, lns):
        binn = int(np.digitize(val, lns))
        if binn == 0:
            return 1
        elif binn == BINS:
            return BINS - 1
        else:
            return binn
Пример #2
0
ANIMATION = True
# train and test with learned data
USE_EXTERNAL = True

x_qtz = np.linspace(MIN_X, MAX_X, BINS)
y_qtz = np.linspace(MIN_Y, MAX_Y, BINS)
x_vel_qtz = np.linspace(MIN_X_VEL, MAX_X_VEL, BINS)
y_vel_qtz = np.linspace(MIN_Y_VEL, MAX_Y_VEL, BINS)
un1_qtz = np.linspace(-MAX_UN, MAX_UN, BINS)
un2_qtz = np.linspace(-MAX_UN, MAX_UN, BINS)

(x, y, x_vel, y_vel, unknown1, unknown2, leg1, leg2) = (0, 0, 0, 0, 0, 0, 0, 0)

env = gym.make('LunarLander-v2')
learner = QLearningAgent(
    env, LEARNING_RATE, DISCOUNT, EXPLORATION, range(env.action_space.n),
    (NUMPY_BINS, NUMPY_BINS, NUMPY_BINS, NUMPY_BINS, 2, 2, env.action_space.n))

if USE_EXTERNAL:
    print('loaded')
    learner.values = np.load('fail.npy')


def extract_state(obs):
    """
    extract state via this function so that it is DRY
    :param obs: gym observation
    """
    def qtz(val, lns):
        binn = int(np.digitize(val, lns))
        if binn == 0:
Пример #3
0
TESTING_EPISODES = 10
LEARNING_RATE = 0.2
DISCOUNT = 0.9
EXPLORATION = 0.3
BINS = 10
ANIMATION = True
# set True when skipping learning phase and load knowledge from external with 5000 LEARNING_EPISODES
USE_EXTERNAL = False

cos_qtz = Quantizer(-MAX_COS_THETA, MAX_COS_THETA, BINS)
sin_qtz = Quantizer(-MAX_SIN_THETA, MAX_SIN_THETA, BINS)
theta_qtz = Quantizer(-MAX_THETA_DOT, MAX_THETA_DOT, BINS)

env = gym.make('Pendulum-v0')
action_qtz = Quantizer(-2.0, 2.0, 10)
learner = QLearningAgent(env, LEARNING_RATE, DISCOUNT, EXPLORATION,
                         action_qtz.as_list())


def extract_state(obs):
    """
    extract state via this function so that it is DRY
    :param obs: gym observation
    """
    (c, s, d) = obs
    c = cos_qtz.round(c)
    s = sin_qtz.round(s)
    d = theta_qtz.round(d)
    return c, s, d


# Learning
DISCOUNT = 0.9
EXPLORATION = 0.0
BINS = 8

x_qtz = np.linspace(MIN_X, MAX_X, BINS)
y_qtz = np.linspace(MIN_Y, MAX_Y, BINS)
x_vel_qtz = Quantizer(MIN_X_VEL, MAX_X_VEL, BINS)
y_vel_qtz = Quantizer(MIN_Y_VEL, MAX_Y_VEL, BINS)
un1_qtz = Quantizer(-MAX_UN, MAX_UN, BINS)
un2_qtz = Quantizer(-MAX_UN, MAX_UN, BINS)

(x, y, x_vel, y_vel, unknown1, unknown2, leg1, leg2) = (0, 0, 0, 0, 0, 0, 0, 0)

env = gym.make('LunarLander-v2')
env.seed(2)
learner = QLearningAgent(env, LEARNING_RATE, DISCOUNT, EXPLORATION, range(env.action_space.n))
with open('lunar_lander_knowledge_bins_8.txt', 'rb') as f:
    learner.values = pickle.load(f)


def extract_state(obs):
    """
    extract state via this function so that it is DRY
    :param obs: gym observation
    """
    (x, y, x_vel, y_vel, unknown1, unknown2, leg1, leg2) = obs
    # x = qtz(x, x_qtz)
    # y = qtz(y, y_qtz)
    x_vel = x_vel_qtz.round(x_vel)
    y_vel = y_vel_qtz.round(y_vel)
    un1 = un1_qtz.round(unknown1)
Пример #5
0
ANIMATION = False
# train and test with learned data
USE_EXTERNAL = True

x_qtz = np.linspace(MIN_X, MAX_X, BINS)
y_qtz = np.linspace(MIN_Y, MAX_Y, BINS)
x_vel_qtz = np.linspace(MIN_X_VEL, MAX_X_VEL, BINS)
y_vel_qtz = np.linspace(MIN_Y_VEL, MAX_Y_VEL, BINS)
un1_qtz = np.linspace(-MAX_UN, MAX_UN, BINS)
un2_qtz = np.linspace(-MAX_UN, MAX_UN, BINS)

(x, y, x_vel, y_vel, unknown1, unknown2, leg1, leg2) = (0, 0, 0, 0, 0, 0, 0, 0)

env = gym.make('LunarLander-v2')
learner = QLearningAgent(
    env, LEARNING_RATE, DISCOUNT, EXPLORATION, range(env.action_space.n),
    (NBINS, NBINS, NBINS, NBINS, NBINS, NBINS, env.action_space.n))

if USE_EXTERNAL:
    print('loaded')
    learner.values = np.load('lunar_lander_fail.npy')


def extract_state(obs):
    """
    extract state via this function so that it is DRY
    :param obs: gym observation
    """
    def qtz(val, lns):
        return int(np.digitize(val, lns))