num_frames = 1000000 batch_size = 32 gamma = 0.99 record_idx = 10000 replay_initial = 10000 replay_buffer = ReplayBuffer(100000) model = QLearner(env, num_frames, batch_size, gamma, replay_buffer) model.load_state_dict(torch.load("model_pretrained.pth", map_location='cpu')) target_model = QLearner(env, num_frames, batch_size, gamma, replay_buffer) target_model.copy_from(model) optimizer = optim.Adam(model.parameters(), lr=0.00001) if USE_CUDA: model = model.cuda() target_model = target_model.cuda() print("Using cuda") epsilon_start = 1.0 epsilon_final = 0.01 epsilon_decay = 30000 epsilon_by_frame = lambda frame_idx: epsilon_final + ( epsilon_start - epsilon_final) * math.exp(-1. * frame_idx / epsilon_decay) # .01 + 0.99* 1/ e^(frame index / 30000) # So epsilon starts at pretty much 1 # as frame index increases, exp will get larger so 1/exp will decrease, so the 0.99 term will decrease, leaving us with just the final # half of num_frames will give us a value very close to final value. So it decays quickly. losses = [] all_rewards = []
replay_initial = 10000 # number frames that are held replay_buffer = ReplayBuffer(100000) model = QLearner(env, num_frames, batch_size, gamma, replay_buffer) model.load_state_dict( torch.load("model_pretrained.pth", map_location='cpu')) #loading in the pretrained model target_model = QLearner(env, num_frames, batch_size, gamma, replay_buffer) #load in model target_model.copy_from(model) optimizer = optim.Adam(model.parameters(), lr=0.0001) #learning rate set and optimizing the model if USE_CUDA: model = model.cuda() # sends model to gpu target_model = target_model.cuda() print("Using cuda") epsilon_start = 1.0 epsilon_final = 0.01 epsilon_decay = 30000 #used in ? epsilon_by_frame = lambda frame_idx: epsilon_final + ( epsilon_start - epsilon_final) * math.exp(-1. * frame_idx / epsilon_decay) losses = [] all_rewards = [] episode_reward = 0 state = env.reset() # initial state
env = make_atari(env_id) env = wrap_deepmind(env) env = wrap_pytorch(env) num_frames = 1000000 batch_size = 32 gamma = 0.99 replay_initial = 10000 replay_buffer = ReplayBuffer(100000) model = QLearner(env, num_frames, batch_size, gamma, replay_buffer) model.load_state_dict(torch.load('trained_model.pth')) model.eval() if USE_CUDA: model = model.cuda() epsilon_start = 1.0 epsilon_final = 0.01 epsilon_decay = 30000 losses = [] all_rewards = [] episode_reward = 0 loss_list = [] reward_list = [] state = env.reset() frame_list = random.sample(range(2000, num_frames), 8000)