Пример #1
0
def evolutionary_network_variable_psi_transformable_state(
        simulation_settings, logger, save=False):
    print("evolutionary_network_variable_psi_transformable_state")
    # saved in weights_es.npy
    filepath = 'C://Users//REUBS_LEN//PycharmProjects//RocketLanding//weights_es_variable_psi_transformable_state.npy'

    evo_strategy_parameters = {
        'population_size': 100,
        'action_size': 3,
        'noise_standard_deviation': 0.1,
        'number_of_generations': 200,
        'learning_rate': 0.00025,
        'state_size': 14,
        'max_num_actions': 250
    }

    env = []
    for i in range(evo_strategy_parameters['population_size'] + 1):
        env.append(RocketLander(simulation_settings))

    if save:
        save_file = filepath
        load_file = None
    else:
        save_file = None
        load_file = filepath

    evolutionary_network_transformable_state(env, evo_strategy_parameters,
                                             logger, load_file, save_file)
from environments.rocketlander import RocketLander
from constants import LEFT_GROUND_CONTACT, RIGHT_GROUND_CONTACT
import numpy as np

if __name__ == "__main__":
    # Settings holds all the settings for the rocket lander environment.
    settings = {
        'Side Engines': True,
        'Clouds': True,
        'Vectorized Nozzle': True,
        'Starting Y-Pos Constant': 1,
        'Initial Force': 'random'
    }  # (6000, -10000)}

    env = RocketLander(settings)
    s = env._reset()

    from control_and_ai.pid import PID_Benchmark

    # Initialize the PID algorithm
    pid = PID_Benchmark()

    left_or_right_barge_movement = np.random.randint(0, 2)
    epsilon = 0.05
    total_reward = 0
    episode_number = 5

    for episode in range(episode_number):
        while (1):
            a = pid.pid_algorithm(
Пример #3
0
last_game_average = -1000
last_best_noisy_game = -1000
max_game_average = -1000
noisy_game_no_longer_valid = False

#Create testing enviroment

settings = {
    'Side Engines': True,
    'Clouds': True,
    'Vectorized Nozzle': True,
    'Starting Y-Pos Constant': 1,
    'Initial Force': 'random'
}  # (6000, -10000)}

env = RocketLander(settings)
env.refresh(render=True)
env.render(mode="human")
env.reset()

print("-- Observations", env.observation_space)
print("-- actionspace", env.action_space)

#initialize training matrix with random states and actions
#dataX = np.random.random(( 5,num_env_variables+num_env_actions ))
dataX = tf.placeholder("float", [None, num_env_variables + num_env_actions])

#Only one output for the total score / reward
#dataY = np.random.random((5,1))
dataY = tf.placeholder("float", [None, 1])
Пример #4
0
eps = []
eps.append(OUPolicy(0, 0.2, 0.4))
eps.append(OUPolicy(0, 0.2, 0.4))
eps.append(OUPolicy(0, 0.2, 0.4))

simulation_settings = {'Side Engines': True,
                       'Clouds': True,
                       'Vectorized Nozzle': True,
                       'Graph': False,
                       'Render': False,
                       'Starting Y-Pos Constant': 1,
                       'Initial Force': 'random',
                       'Rows': 1,
                       'Columns': 2,
                       'Episodes': 300}
env = RocketLander(simulation_settings)
#env = wrappers.Monitor(env, '/tmp/contlunarlander', force=True, write_upon_reset=True)

FLAGS.retrain = True # Restore weights if False
FLAGS.test = False
FLAGS.num_episodes = 300
model_dir = 'C://Users//REUBS_LEN//PycharmProjects//RocketLanding//DDPG//models_unlimited_episodes_full_normalized_normal_state'

agent = DDPG(
    action_bounds,
    eps,
    env.observation_space.shape[0],
    actor_learning_rate=0.0001,
    critic_learning_rate=0.001,
    retrain=FLAGS.retrain,
    log_dir=FLAGS.log_dir,