default=True, action="store_true", help="Enable cuda") parser.add_argument("-n", type=int, default=DEFAULT_N_STEPS, help="steps to do on Bellman unroll") args = parser.parse_args() device = device("cuda" if args.cuda else "cpu") env = make(params.env_name) env = wrap_dqn(env) env.seed(123) net = dqn_model.DQN(env.observation_space.shape, env.action_space.n).to(device) tgt_net = TargetNet(net) selector = EpsilonGreedyActionSelector(epsilon=params.epsilon_start) epsilon_tracker = common.EpsilonTracker(selector, params) agent = DQNAgent(net, selector, device=device) exp_source = ExperienceSourceFirstLast(env, agent, gamma=params.gamma, steps_count=args.n) buffer = ExperienceReplayBuffer(exp_source, buffer_size=params.replay_size) optimizer = Adam(net.parameters(), lr=params.learning_rate) def process_batch(engine, batch): optimizer.zero_grad() loss = common.calc_loss_dqn(batch, net,
default=1, type=int, help='Enter the number of steps to unroll bellman eq') args = parser.parse_args() print('Starting...') params = HYPERPARAMS['pong'] device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print('Running on Device {}'.format(device)) writer = writer = SummaryWriter(comment="-" + params['run_name'] + "-%d-step noisy-net" % args.n) env = gym.make(params['env_name']) env = wrappers.wrap_dqn(env) # print(env.observation_space.shape, env.action_space.n) net = NoisyDQN(env.observation_space.shape, env.action_space.n).to(device) target_net = TargetNet(net) agent = DQNAgent(net, ArgmaxActionSelector(), device) experience_source = ExperienceSourceFirstLast(env, agent, params['gamma'], steps_count=args.n) buffer = ExperienceReplayBuffer(experience_source, buffer_size=params['replay_size']) optimizer = optim.Adam(net.parameters(), lr=params['learning_rate']) frame_idx = 0 with RewardTracker(writer, params['stop_reward']) as reward_tracker: while True: frame_idx += 1