replay_initial = 10000 learning_rate = 1e-5 train_replay_buffer = ReplayBuffer(100000) analysis_replay_buffer = ReplayBuffer(100000) policy_model = QLearner(env, train_num_frames, batch_size, gamma, train_replay_buffer) target_model = QLearner(env, train_num_frames, batch_size, gamma, train_replay_buffer) target_model.load_state_dict(policy_model.state_dict()) target_model.eval() optimizer = optim.Adam(policy_model.parameters(), lr=learning_rate) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if USE_CUDA: policy_model = policy_model.to(device) target_model = target_model.to(device) epsilon_by_frame = lambda frame_idx: epsilon_final + ( epsilon_start - epsilon_final) * math.exp(-1. * frame_idx / epsilon_decay) def play_to_train(num_frames, policy_model, target_model, buffer): losses = [] all_rewards = [] mean_losses = [] mean_rewards = [] episode_reward = 0 state = env.reset() start_training = time.time()
batch_size = 32 gamma = 0.99 replay_initial = 10000 replay_buffer = ReplayBuffer(100000) epsilon_start = 1.0 epsilon_final = 0.01 epsilon_decay = 30000 epsilon_by_frame = lambda frame_idx: epsilon_final + (epsilon_start - epsilon_final) * math.exp(-1. * frame_idx / epsilon_decay) """loading the saved model""" device = torch.device("cuda") model = QLearner(env, num_frames, batch_size, gamma, replay_buffer) filename = 'newdqnModel.pt' model.load_state_dict(torch.load(filename)) model.to(device) model.eval() """choosing 1000 frames randomly!""" frame_range = 50000 frame_list = set(random.sample(range(1, frame_range), 1000)) vis_feature_matrix = [] vis_rewards = [] vis_actions = [] state = env.reset() indx = 0 episode_reward = 0 for frame_idx in range(0,frame_range): #print(frame_idx) action= model.act(state, 0)