# Initialize the loss function loss_function = tf.keras.losses.MeanSquaredError() # Initialize the optimizer optimizer = tf.keras.optimizers.Adam(learning_rate) # Initialize ray.init(log_to_driver=False) manager = SampleManager(**kwargs) # Where to save your results to: create this directory in advance! saving_path = os.getcwd() + "/progress_LunarLander" # Initialize buffer manager.initilize_buffer(buffer_size) # Fill buffer manager.store_in_buffer(manager.get_data(total_steps=buffer_size)) # Initialize progress aggregator manager.initialize_aggregator( path=saving_path, saving_after=5, aggregator_keys=["loss", 'reward', 'time'] ) rewards = [] # Get initial agent agent = manager.get_agent() print('TRAINING')
"action_sampling_type": "epsilon_greedy", "num_episodes": 20, "epsilon": epsilon, } ray.init(log_to_driver=False) manager = SampleManager(**kwargs) # where to save your results to: create this directory in advance! saving_path = os.getcwd() + "/progress_cartpole" # keys for replay buffer -> what you will need for optimization optim_keys = ["state", "action", "reward", "state_new", "not_done"] # initialize buffer manager.initilize_buffer(buffer_size, optim_keys) # initilize progress aggregator manager.initialize_aggregator(path=saving_path, saving_after=5, aggregator_keys=["loss", "time_steps"]) # initial testing: print("test before training: ") manager.test(test_steps, test_episodes=10, do_print=True, render=True) # get initial agent agent = manager.get_agent() for e in range(epochs):
'model_kwargs':model_kwargs, "action_sampling_type": "epsilon_greedy", "epsilon": EPSILON } manager = SampleManager(**kwargs) # specify where to save results and ensure that the folder exists saving_path = Path(os.getcwd() + SAVING_DIRECTORY) saving_path.mkdir(parents=True, exist_ok=True) saving_path_model = Path(os.getcwd() + SAVING_DIRECTORY + '/model') saving_path_model.mkdir(parents=True, exist_ok=True) # initialize manager optim_keys = ['state', 'action', 'reward', 'state_new', 'not_done'] manager.initilize_buffer(BUFFER_SIZE, optim_keys) aggregator_keys=['loss', 'time_steps', 'reward'] manager.initialize_aggregator(saving_path, 5, aggregator_keys) # initialize the optimizers optimizer = Adam(learning_rate=LEARNING_RATE) print('# =============== INITIAL TESTING =============== #') manager.test(MAX_TEST_STEPS, 5, evaluation_measure='time_and_reward', do_print=True, render=True) # get the initial agent agent = manager.get_agent() print('# =============== START TRAINING ================ #') for e in range(1, EPOCHS+1): print(f'# ============== EPOCH {e}/{EPOCHS} ============== #')