# Initialize the loss function
    loss_function = tf.keras.losses.MeanSquaredError()

    # Initialize the optimizer
    optimizer = tf.keras.optimizers.Adam(learning_rate)

    # Initialize
    ray.init(log_to_driver=False)
    manager = SampleManager(**kwargs)

    # Where to save your results to: create this directory in advance!
    saving_path = os.getcwd() + "/progress_LunarLander"

    # Initialize buffer
    manager.initilize_buffer(buffer_size)

    # Fill buffer
    manager.store_in_buffer(manager.get_data(total_steps=buffer_size))

    # Initialize progress aggregator
    manager.initialize_aggregator(
        path=saving_path, saving_after=5, aggregator_keys=["loss", 'reward', 'time']
    )

    rewards = []

    # Get initial agent
    agent = manager.get_agent()

    print('TRAINING')
Exemplo n.º 2
0
        "action_sampling_type": "epsilon_greedy",
        "num_episodes": 20,
        "epsilon": epsilon,
    }

    ray.init(log_to_driver=False)

    manager = SampleManager(**kwargs)
    # where to save your results to: create this directory in advance!
    saving_path = os.getcwd() + "/progress_cartpole"

    # keys for replay buffer -> what you will need for optimization
    optim_keys = ["state", "action", "reward", "state_new", "not_done"]

    # initialize buffer
    manager.initilize_buffer(buffer_size, optim_keys)

    # initilize progress aggregator
    manager.initialize_aggregator(path=saving_path,
                                  saving_after=5,
                                  aggregator_keys=["loss", "time_steps"])

    # initial testing:
    print("test before training: ")
    manager.test(test_steps, test_episodes=10, do_print=True, render=True)

    # get initial agent
    agent = manager.get_agent()

    for e in range(epochs):
Exemplo n.º 3
0
        'model_kwargs':model_kwargs,
        "action_sampling_type": "epsilon_greedy",
        "epsilon": EPSILON
    }

    manager = SampleManager(**kwargs)

    # specify where to save results and ensure that the folder exists
    saving_path = Path(os.getcwd() + SAVING_DIRECTORY)
    saving_path.mkdir(parents=True, exist_ok=True)
    saving_path_model = Path(os.getcwd() + SAVING_DIRECTORY + '/model')
    saving_path_model.mkdir(parents=True, exist_ok=True)

    # initialize manager
    optim_keys = ['state', 'action', 'reward', 'state_new', 'not_done']
    manager.initilize_buffer(BUFFER_SIZE, optim_keys)
    aggregator_keys=['loss', 'time_steps', 'reward']
    manager.initialize_aggregator(saving_path, 5, aggregator_keys)

    # initialize the optimizers
    optimizer = Adam(learning_rate=LEARNING_RATE)

    print('# =============== INITIAL TESTING =============== #')
    manager.test(MAX_TEST_STEPS, 5, evaluation_measure='time_and_reward', do_print=True, render=True)

    # get the initial agent
    agent = manager.get_agent()

    print('# =============== START TRAINING ================ #')
    for e in range(1, EPOCHS+1):
        print(f'# ============== EPOCH {e}/{EPOCHS} ============== #')