device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print("Current usable device is: ", device) ######################################## # Model hyperparameters input_size = 4 # Size of state output_size = 2 # Number of discrete actions batch_size = 128 GAMMA = 0.999 EPS_START = 0.9 EPS_END = 0.05 EPS_DECAY = 200 target_update = 10 # Create the models policy_net = DQN(input_size, output_size).to(device) target_net = DQN(input_size, output_size).to(device) target_net.load_state_dict(policy_net.state_dict()) target_net.eval() # Set up replay memory memory = ReplayMemory(10000) # Set up optimizer optimizer = optim.Adam(policy_net.parameters()) ######################################## # Start training num_episodes = 500 ckpt_dir = "DDDQN_CartPoleV1_obs_checkpoints/" save_ckpt_interval = 100
# Turn on pyplot's interactive mode # VERY IMPORTANT because otherwise training stats plot will hault plt.ion() # Create OpenAI gym environment env = gym.make(env_name) if is_unwrapped: env = env.unwrapped # Get device device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print("Current usable device is: ", device) # Create the models policy_net = DQN(input_size, output_size).to(device) target_net = DQN(input_size, output_size).to(device) target_net.load_state_dict(policy_net.state_dict()) target_net.eval() # Set up replay memory memory = ReplayMemory(replaybuffer_size) # Set up optimizer - Minimal # optimizer = optim.Adam(policy_net.parameters()) optimizer = optim.SGD(policy_net.parameters(), lr=learning_rate) ################################################################### # Start training # Dictionary for extra training information to save to checkpoints
def load_checkpoint(file_dir, i_episode, input_size, output_size, device='cuda'): checkpoint = torch.load(os.path.join(file_dir, "ckpt_eps%d.pt" % i_episode)) policy_net = DQN(input_size, output_size).to(device) policy_net.load_state_dict(checkpoint["policy_net"]) policy_net.train() target_net = DQN(input_size, output_size).to(device) target_net.load_state_dict(checkpoint["target_net"]) target_net.eval() learning_rate = checkpoint["learning_rate"] # optimizer = optim.Adam(policy_net.parameters()) optimizer = optim.SGD(policy_net.parameters(), lr=learning_rate) optimizer.load_state_dict(checkpoint["optimizer"]) checkpoint.pop("policy_net") checkpoint.pop("target_net") checkpoint.pop("optimizer") checkpoint.pop("i_episode") checkpoint.pop("learning_rate") return policy_net, target_net, optimizer, checkpoint