Пример #1
0
noisy_game_no_longer_valid = False

#Create testing enviroment

settings = {
    'Side Engines': True,
    'Clouds': True,
    'Vectorized Nozzle': True,
    'Starting Y-Pos Constant': 1,
    'Initial Force': 'random'
}  # (6000, -10000)}

env = RocketLander(settings)
env.refresh(render=True)
env.render(mode="human")
env.reset()

print("-- Observations", env.observation_space)
print("-- actionspace", env.action_space)

#initialize training matrix with random states and actions
#dataX = np.random.random(( 5,num_env_variables+num_env_actions ))
dataX = tf.placeholder("float", [None, num_env_variables + num_env_actions])

#Only one output for the total score / reward
#dataY = np.random.random((5,1))
dataY = tf.placeholder("float", [None, 1])

#initialize training matrix with random states and actions
#apdataX = np.random.random(( 5,num_env_variables ))
apdataX = tf.placeholder("float", [None, num_env_variables])
Пример #2
0
    print("env.action_space", len(env.action_space))
    print("action_bounds", action_bounds)

    PG = PolicyGradient(
        n_x = env.observation_space.shape[0],
        n_y = len(env.action_space),
        learning_rate=0.001,
        reward_decay=0.99,
        load_path=load_path,
        save_path=save_path,
        epsilon_max=0.95,
        epsilon_greedy_increment=0.01,
        initial_epsilon = 0.8
    )

    observation = env.reset()

    left_or_right_barge_movement = np.random.randint(0, 2)
    epsilon = 0.05


    for episode in range(EPISODES):
        while True:
            # 1. Choose an action based on observation
            action = PG.choose_action(observation)

            # 2. Take action in the environment
            observation_, reward, done, info = env.step(action)

            # 3. Store transition for training
            # if reward > -0.20: