示例#1
0
                        default=True,
                        action="store_true",
                        help="Enable cuda")
    parser.add_argument("-n",
                        type=int,
                        default=DEFAULT_N_STEPS,
                        help="steps to do on Bellman unroll")
    args = parser.parse_args()
    device = device("cuda" if args.cuda else "cpu")

    env = make(params.env_name)
    env = wrap_dqn(env)
    env.seed(123)
    net = dqn_model.DQN(env.observation_space.shape,
                        env.action_space.n).to(device)
    tgt_net = TargetNet(net)

    selector = EpsilonGreedyActionSelector(epsilon=params.epsilon_start)
    epsilon_tracker = common.EpsilonTracker(selector, params)
    agent = DQNAgent(net, selector, device=device)
    exp_source = ExperienceSourceFirstLast(env,
                                           agent,
                                           gamma=params.gamma,
                                           steps_count=args.n)
    buffer = ExperienceReplayBuffer(exp_source, buffer_size=params.replay_size)
    optimizer = Adam(net.parameters(), lr=params.learning_rate)

    def process_batch(engine, batch):
        optimizer.zero_grad()
        loss = common.calc_loss_dqn(batch,
                                    net,
                        default=1,
                        type=int,
                        help='Enter the number of steps to unroll bellman eq')
    args = parser.parse_args()

    print('Starting...')
    params = HYPERPARAMS['pong']
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print('Running on Device {}'.format(device))
    writer = writer = SummaryWriter(comment="-" + params['run_name'] +
                                    "-%d-step noisy-net" % args.n)
    env = gym.make(params['env_name'])
    env = wrappers.wrap_dqn(env)
    # print(env.observation_space.shape, env.action_space.n)
    net = NoisyDQN(env.observation_space.shape, env.action_space.n).to(device)
    target_net = TargetNet(net)

    agent = DQNAgent(net, ArgmaxActionSelector(), device)

    experience_source = ExperienceSourceFirstLast(env,
                                                  agent,
                                                  params['gamma'],
                                                  steps_count=args.n)
    buffer = ExperienceReplayBuffer(experience_source,
                                    buffer_size=params['replay_size'])

    optimizer = optim.Adam(net.parameters(), lr=params['learning_rate'])
    frame_idx = 0
    with RewardTracker(writer, params['stop_reward']) as reward_tracker:
        while True:
            frame_idx += 1