Exemplo n.º 1
0
    net = model.DDQN(shape, actions).to(device)
    tgt_net = ptan.agent.TargetNet(net)

    selector = ptan.actions.EpsilonGreedyActionSelector()
    eps_tracker = ptan.actions.EpsilonTracker(selector, params.eps_start,
                                              params.eps_final, params.eps_frames)

    agent = ptan.agent.DQNAgent(net, selector, device=device)
    exp_src = ptan.experience.ExperienceSourceFirstLast(
        env, agent, params.gamma, steps_count=1)

    buffer = ptan.experience.ExperienceReplayBuffer(
        exp_src, params.buffer_size)

    mean_monitor = utils.MeanRewardsMonitor(
        env, net, ALGORITHM, params.solve_rewards)

    writer = SummaryWriter(logdir=mean_monitor.runs_dir,
                           comment=params.frame_stack)

    optimizer = torch.optim.Adam(net.parameters(), lr=params.lr)
    lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                              mode='max', patience=10_000, verbose=True,
                                                              factor=0.75, min_lr=params.min_lr, cooldown=10_000)
    print(net)
    print('*'*10, ' Start Training ',
          env.game, ' {} '.format(device), '*'*10)


    frame = 0
    episode = 0
Exemplo n.º 2
0
    params = data.params[args.env]
    utils.update_params(params, args)

    params.n_envs = max(params.n_envs, 8)

    device = 'cuda' if args.cuda else 'cpu'
    envs = utils.createEnvs(params, stack_frames=2)
    shape = envs[0].observation_space.shape
    actions = envs[0].action_space.n
    net = model.A2CNet(shape, actions)
    net.to(device)
    agent = ptan.agent.ActorCriticAgent(net, device=device, apply_softmax=True)

    exp_src = ptan.experience.ExperienceSourceFirstLast(envs, agent, params.gamma,steps_count=params.steps)
    generator = utils.BatchGenerator(exp_src, params)
    mean_monitor = utils.MeanRewardsMonitor(envs[0], net, 'A2C', params.solve_rewards)

    writer = SummaryWriter(logdir=mean_monitor.runs_dir,comment=params.frame_stack)

    optimizer = torch.optim.Adam(net.parameters(), lr=params.lr)
    
    # lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.75, patience=20000,
    #                                                        cooldown=20000, verbose=True, min_lr=params.min_lr)

    
    print('# Parameters: ', utils.count_parameters(net))
    print(net)
    print('*'*10, ' Start Training ',
          envs[0].game, ' {} '.format(device), '*'*10)