USE_CUDA = torch.cuda.is_available() from dqn import QLearner, compute_td_loss, ReplayBuffer env_id = "PongNoFrameskip-v4" # established environment that will be played env = make_atari(env_id) env = wrap_deepmind(env) env = wrap_pytorch(env) num_frames = 1000000 # total frames that will be learning from batch_size = 32 # the number of samples that are provided to the model for update services at a given time gamma = 0.99 # the discount of future rewards record_idx = 10000 # replay_initial = 10000 # number frames that are held replay_buffer = ReplayBuffer(100000) model = QLearner(env, num_frames, batch_size, gamma, replay_buffer) model.load_state_dict( torch.load("model_pretrained.pth", map_location='cpu')) #loading in the pretrained model target_model = QLearner(env, num_frames, batch_size, gamma, replay_buffer) #load in model target_model.copy_from(model) optimizer = optim.Adam(model.parameters(), lr=0.0001) #learning rate set and optimizing the model if USE_CUDA: model = model.cuda() # sends model to gpu target_model = target_model.cuda() print("Using cuda")
import os.path as op import matplotlib.pyplot as plt env_id = "PongNoFrameskip-v4" env = make_atari(env_id) env = wrap_deepmind(env) env = wrap_pytorch(env) num_frames = 1000000 batch_size = 32 gamma = 0.99 replay_initial = 10000 replay_buffer = ReplayBuffer(100000) model = QLearner(env, num_frames, batch_size, gamma, replay_buffer) model.load_state_dict(torch.load('trained_model.pth')) model.eval() if USE_CUDA: model = model.cuda() epsilon_start = 1.0 epsilon_final = 0.01 epsilon_decay = 30000 losses = [] all_rewards = [] episode_reward = 0 loss_list = [] reward_list = []
USE_CUDA = torch.cuda.is_available() from dqn import QLearner, compute_td_loss, ReplayBuffer env_id = "PongNoFrameskip-v4" env = make_atari(env_id) env = wrap_deepmind(env) env = wrap_pytorch(env) num_frames = 1000000 batch_size = 32 gamma = 0.99 record_idx = 10000 replay_initial = 10000 replay_buffer = ReplayBuffer(100000) model = QLearner(env, num_frames, batch_size, gamma, replay_buffer) model.load_state_dict(torch.load("model_pretrained.pth", map_location='cpu')) target_model = QLearner(env, num_frames, batch_size, gamma, replay_buffer) target_model.copy_from(model) optimizer = optim.Adam(model.parameters(), lr=0.00001) if USE_CUDA: model = model.cuda() target_model = target_model.cuda() print("Using cuda") epsilon_start = 1.0 epsilon_final = 0.01 epsilon_decay = 30000 epsilon_by_frame = lambda frame_idx: epsilon_final + (
USE_CUDA = torch.cuda.is_available() from dqn import QLearner, compute_td_loss, ReplayBuffer env_id = "PongNoFrameskip-v4" env = make_atari(env_id) env = wrap_deepmind(env) env = wrap_pytorch(env) num_frames = 1000000 batch_size = 32 gamma = 0.99 replay_initial = 10000 replay_buffer = ReplayBuffer(100000) model = QLearner(env, num_frames, batch_size, gamma, replay_buffer) model.load_state_dict(torch.load(sys.argv[1], map_location='cpu')) model.eval() if USE_CUDA: model = model.cuda() print("Using cuda") model.load_state_dict(torch.load(pthname, map_location='cpu')) env.seed(1) state = env.reset() done = False games_won = 0 while not done:
USE_CUDA = torch.cuda.is_available() from dqn import QLearner, compute_td_loss, ReplayBuffer import sys env_id = "PongNoFrameskip-v4" env = make_atari(env_id) env = wrap_deepmind(env) env = wrap_pytorch(env) num_frames = 1500000 batch_size = 32 gamma = 0.9 replay_initial = 10000 replay_buffer = ReplayBuffer(1000000) model = QLearner(env, num_frames, batch_size, gamma, replay_buffer) optimizer = optim.Adam(model.parameters(), lr=0.00001) if USE_CUDA: model = model.cuda() epsilon_start = 0.1 epsilon_final = 0.1 epsilon_decay = 30000 epsilon_by_frame = lambda frame_idx: epsilon_final + ( epsilon_start - epsilon_final) * math.exp(-1. * frame_idx / epsilon_decay) losses = [] all_rewards = [] episode_reward = 0 state = env.reset()
env = wrap_pytorch(env) train_num_frames = 5000000 sample_num_frames = 50000 batch_size = 32 gamma = 0.99 target_update = 50000 epsilon_start = 1.0 epsilon_final = 0.01 epsilon_decay = 1000000 replay_initial = 10000 learning_rate = 1e-5 train_replay_buffer = ReplayBuffer(100000) analysis_replay_buffer = ReplayBuffer(100000) policy_model = QLearner(env, train_num_frames, batch_size, gamma, train_replay_buffer) target_model = QLearner(env, train_num_frames, batch_size, gamma, train_replay_buffer) target_model.load_state_dict(policy_model.state_dict()) target_model.eval() optimizer = optim.Adam(policy_model.parameters(), lr=learning_rate) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if USE_CUDA: policy_model = policy_model.to(device) target_model = target_model.to(device) epsilon_by_frame = lambda frame_idx: epsilon_final + ( epsilon_start - epsilon_final) * math.exp(-1. * frame_idx / epsilon_decay)
plt.figure(2) plt.plot([reward[0] for reward in all_rewards], [reward[1] for reward in all_rewards]) plt.xlabel('Frame #') plt.ylabel('Episode Reward') plt.savefig(f'rewards_lr={lr}.pdf') USE_CUDA = torch.cuda.is_available() # Set up game env_id = "PongNoFrameskip-v4" env = make_atari(env_id) env = wrap_deepmind(env) env = wrap_pytorch(env) replay_buffer = ReplayBuffer(replay_buff_size) # Buffer size model = QLearner(env, num_frames, batch_size, gamma, replay_buffer) # Create model model.load_state_dict(torch.load("model_pretrained.pth", map_location='cpu')) model.eval() target_model = QLearner(env, num_frames, batch_size, gamma, replay_buffer) # Create target model target_model.copy_from(model) # Optimize model's parameters optimizer = optim.Adam(model.parameters(), lr=lr) if USE_CUDA: model = model.cuda() target_model = target_model.cuda() print("Using cuda") # Neg exp func. Start exploring then exploiting according to frame_indx epsilon_by_frame = lambda frame_idx: epsilon_final + (epsilon_start - epsilon_final) * math.exp(-1. * frame_idx / epsilon_decay)
import torch.nn.functional as F USE_CUDA = torch.cuda.is_available() from dqn import QLearner, compute_td_loss, ReplayBuffer env_id = "PongNoFrameskip-v4" env = make_atari(env_id) env = wrap_deepmind(env) env = wrap_pytorch(env) num_frames = 1400000 batch_size = 32 gamma = 0.99 replay_initial = 10000 replay_buffer = ReplayBuffer(100000) model = QLearner(env, num_frames, batch_size, gamma, replay_buffer) optimizer = optim.Adam(model.parameters(), lr=0.00001) if USE_CUDA: model = model.cuda() epsilon_start = 1.0 epsilon_final = 0.01 epsilon_decay = 30000 epsilon_by_frame = lambda frame_idx: epsilon_final + (epsilon_start - epsilon_final) * math.exp(-1. * frame_idx / epsilon_decay) losses = [] all_rewards = [] episode_reward = 0 state = env.reset()
USE_CUDA = torch.cuda.is_available() from dqn import QLearner, compute_td_loss, ReplayBuffer env_id = "PongNoFrameskip-v4" env = make_atari(env_id) env = wrap_deepmind(env) env = wrap_pytorch(env) num_frames = 1000000 # total frames will be lerning from batch_size = 32 # num samples provided to for update ppurposes gamma = 0.99 record_idx = 10000 replay_initial = 10000 # num frames it will hold?? replay_buffer = ReplayBuffer(100000) model = QLearner(env, num_frames, batch_size, gamma, replay_buffer) #from dpn.py; pth file??v model.load_state_dict(torch.load("model_pretrained.pth", map_location='cpu')) target_model = QLearner(env, num_frames, batch_size, gamma, replay_buffer) target_model.copy_from(model) optimizer = optim.Adam(model.parameters(), lr=0.00001) #lr == learning rate if USE_CUDA: # gpu to use model = model.cuda() target_model = target_model.cuda() print("Using cuda") epsilon_start = 1.0 # in textbook, figure out what it is used fro epsilon_final = 0.01 # go towards this from above epsilon_decay = 30000 epsilon_by_frame = lambda frame_idx: epsilon_final + (
import sys Variable = lambda *args, **kwargs: autograd.Variable(*args, **kwargs).cuda( ) if USE_CUDA else autograd.Variable(*args, **kwargs) import hashlib # initialize the game environment env_id = "PongNoFrameskip-v4" env = make_atari(env_id) env = wrap_deepmind(env) env = wrap_pytorch(env) # initialize learner with the same parameters that it was trained with batch_size = 32 gamma = 0.99 replay_buffer = ReplayBuffer(100000) model = QLearner(env, 5000000, batch_size, gamma, replay_buffer) if USE_CUDA: model = model.cuda() losses = [] all_rewards = [] episode_reward = 0 # load the saved model if len(sys.argv) > 1: checkpoint = torch.load(sys.argv[1]) model.load_state_dict(checkpoint['state_dict']) model.eval() # no random actions during this eval stage epsilon = -1
env = wrap_pytorch(env) num_frames = 100000 #num_frames = 20000 batch_size = 32 gamma = 0.99 replay_initial = 10000 replay_buffer = ReplayBuffer(100000) epsilon_start = 1.0 epsilon_final = 0.01 epsilon_decay = 30000 epsilon_by_frame = lambda frame_idx: epsilon_final + (epsilon_start - epsilon_final) * math.exp(-1. * frame_idx / epsilon_decay) """loading the saved model""" device = torch.device("cuda") model = QLearner(env, num_frames, batch_size, gamma, replay_buffer) filename = 'newdqnModel.pt' model.load_state_dict(torch.load(filename)) model.to(device) model.eval() """choosing 1000 frames randomly!""" frame_range = 50000 frame_list = set(random.sample(range(1, frame_range), 1000)) vis_feature_matrix = [] vis_rewards = [] vis_actions = [] state = env.reset() indx = 0 episode_reward = 0
USE_CUDA = torch.cuda.is_available() from dqn import QLearner, compute_td_loss, ReplayBuffer import pickle as pkl env_id = "PongNoFrameskip-v4" env = make_atari(env_id) env = wrap_deepmind(env) env = wrap_pytorch(env) num_frames = 2000000 batch_size = 32 gamma = 0.99 replay_initial = 20000 replay_buffer = ReplayBuffer(200000) model = QLearner(env, num_frames, batch_size, gamma, replay_buffer) optimizer = optim.Adam(model.parameters(), lr=0.00001) if USE_CUDA: model = model.cuda() epsilon_start = 1.0 epsilon_final = 0.01 epsilon_decay = 30000 epsilon_by_frame = lambda frame_idx: epsilon_final + ( epsilon_start - epsilon_final) * math.exp(-1. * frame_idx / epsilon_decay) losses = [] all_rewards = [] episode_reward = 0 state = env.reset()
USE_CUDA = torch.cuda.is_available() from dqn import QLearner, compute_td_loss, ReplayBuffer env_id = "PongNoFrameskip-v4" env = make_atari(env_id) env = wrap_deepmind(env) env = wrap_pytorch(env) num_frames = 1000000 batch_size = 32 #num samples provided to model at a time to update gamma = 0.99 record_idx = 10000 replay_initial = 10000 replay_buffer = ReplayBuffer(100000) model = QLearner(env, num_frames, batch_size, gamma, replay_buffer) model.load_state_dict(torch.load("modelsave.pth", map_location='cpu')) target_model = QLearner(env, num_frames, batch_size, gamma, replay_buffer) target_model.copy_from(model) #prevents too many radical changes optimizer = optim.Adam(model.parameters(), lr=0.0001) if USE_CUDA: #GPU stuff model = model.cuda() target_model = target_model.cuda() print("Using cuda") epsilon_start = 0.5 # Q-learning "error" epsilon_final = 0.01 epsilon_decay = 30000 epsilon_by_frame = lambda frame_idx: epsilon_final + (
USE_CUDA = torch.cuda.is_available() from dqn import QLearner, compute_td_loss, ReplayBuffer env_id = "PongNoFrameskip-v4" env = make_atari(env_id) env = wrap_deepmind(env) env = wrap_pytorch(env) num_frames = 1000000 batch_size = 32 gamma = 0.99 replay_initial = 10000 replay_buffer = ReplayBuffer(100000) t_replay_buffer = ReplayBuffer(100000) model = QLearner(env, num_frames, batch_size, gamma, replay_buffer) target_model = QLearner(env, num_frames, batch_size, gamma, t_replay_buffer) target_model.load_state_dict(model.state_dict()) optimizer = optim.Adam(model.parameters(), lr=0.00001) if USE_CUDA: model = model.cuda() target_model = target_model.cuda() epsilon_start = 1.0 epsilon_final = 0.01 epsilon_decay = 30000 epsilon_by_frame = lambda frame_idx: epsilon_final + ( epsilon_start - epsilon_final) * math.exp(-1. * frame_idx / epsilon_decay) losses = []
# DISCOUNTED REWARD CONSTANT gamma = 0.99 # ... record_idx = 10000 # ... replay_initial = 10000 # STORES MOMENTS FROM TRAINING EXPERIENCES, RANDOMLY SAMPLED? # STORES INPUTS AND TRANSITIONS / REWARDS FOR ANY GIVEN FRAME replay_buffer = ReplayBuffer(100000) # CONTAINS THE NETWORK THAT APPROXIMATES THE Q-TABLE model = QLearner(env, num_frames, batch_size, gamma, replay_buffer) # LOADS THE SAVED NN model.load_state_dict(torch.load(file_name, map_location='cpu')) # CONTAINS THE TARGET NETWORK THAT APPROXIMATES THE Q-TABLE target_model = QLearner(env, num_frames, batch_size, gamma, replay_buffer) # DUPLICATE THE POLICY NETWORK INTO THE TARGET (APPROXIMATES Q*) target_model.copy_from(model) # INITIALIZE OPTIMIZER, USING ADAM METHOD TO UPDATE GRADIENTS optimizer = optim.Adam(model.parameters(), lr=0.0001) # CUDA (GFX CARD) SETUP if USE_CUDA: