Exemplo n.º 1
0
    # Define optimizer parameters
    optimizer_parameters = policy_net.parameters()
else:
    # Define optimizer parameters
    optimizer_parameters = policy_net.parameters() + encoder.parameters() + forward_model.parameters() + inverse_model.parameters()

# Only use defined parameters if there is no previous output being loaded
if RUN_TO_LOAD != None:
    # Initialize and load optimizer
    optimizer = optim.Adam(params=optimizer_parameters, lr=LEARNING_RATE)
    optimizer.load_state_dict(OPTIMIZER_STATE_DICT)

    # Load replay memory
    memory = ReplayMemory(REPLAY_MEMORY_SIZE)
    for chunk in REPLAY_MEMORY_CHUNKS:
        memory.append_storage_chunk(chunk)
else:
    # Initialize optimizer
    optimizer = optim.Adam(params=optimizer_parameters, lr=LEARNING_RATE)
    # Initialize replay memory
    memory = ReplayMemory(REPLAY_MEMORY_SIZE)

# Initialize total variables
total_frames = 0
total_episodes = FINISHED_EPISODES
total_original_rewards = TOTAL_ORIGINAL_REWARDS
total_shaped_rewards = TOTAL_SHAPED_REWARDS
total_losses = TOTAL_LOSSES
total_start_time = time.time()

# Initialize episode variables