示例#1
0
        writer = SummaryWriter(comment="-ddpg-" + name)
        if decay_rate is not None:
            agent = agent_model.AgentDDPG(act_net, device=device, ou_decay_steps=decay_rate)
        else:
            agent = agent_model.AgentDDPG(act_net, device=device)
        exp_source = Experience.ExperienceSourceFirstLast(env, agent, gamma=gamma, steps_count=1)
        buffer = Experience.ExperienceReplayBuffer(exp_source, buffer_size=replay_size)
        if args.optimizer and args.optimizer == "RMSprop":
            act_opt = optim.RMSprop(act_net.parameters(), lr=lr_actor)
            crt_opt = optim.RMSprop(crt_net.parameters(), lr=lr_critic)
        else:
            act_opt = optim.Adam(act_net.parameters(), lr=lr_actor)
            crt_opt = optim.Adam(crt_net.parameters(), lr=lr_critic)

        utils.load_agent_state(act_net, crt_net, [act_opt, crt_opt], path=ckpt_save_path)

        frame_idx = 0
        drl_updates = 0
        best_reward = None
        with utils.RewardTracker(writer) as tracker:
            with utils.TBMeanTracker(writer, batch_size=10) as tb_tracker:
                while True:
                    frame_idx += 1
                    buffer.populate(1)
                    rewards_steps = exp_source.pop_rewards_steps()
                    if rewards_steps:
                        rewards, steps = zip(*rewards_steps)
                        tb_tracker.track("episode_steps", steps[0], frame_idx)
                        mean_reward = tracker.reward(rewards[0], frame_idx)
                        if mean_reward is not None and mean_reward > REWARD_TO_SOLVE:
示例#2
0
        agent = DQNAgent(net, selector, device=device)

        exp_source = experience.ExperienceSourceFirstLast(
            env, agent, gamma=params['gamma'], steps_count=1)
        buffer = experience.ExperienceReplayBuffer(
            exp_source, buffer_size=params['replay_size'])
        if args.optimizer and args.optimizer == 'RMSprop':
            optimizer = optim.Adam(net.parameters(),
                                   lr=params['learning_rate'])
        else:
            optimizer = optim.RMSprop(net.parameters(),
                                      lr=params['learning_rate'])

        utils.load_agent_state(net,
                               optimizer,
                               selector,
                               load_optimizer=False,
                               env_name=params['env_name'],
                               path=model_saving_path)

        frame_idx = 0
        drl_updates = 0

        with utils.RewardTracker(writer,
                                 params['stop_reward']) as reward_tracker:
            while True:
                frame_idx += 1
                buffer.populate(1)
                epsilon_tracker.frame(frame_idx)

                new_rewards = exp_source.pop_total_rewards()
                if new_rewards:
示例#3
0
    agent = DQNAgent(net, selector, device=device)

    exp_source = experience.ExperienceSourceFirstLast(env,
                                                      agent,
                                                      gamma=params['gamma'],
                                                      steps_count=1)
    buffer = experience.ExperienceReplayBuffer(
        exp_source, buffer_size=params['replay_size'])
    optimizer = optim.Adam(
        net.parameters(),
        lr=params['learning_rate'])  # TODO: change to RMSprop

    utils.load_agent_state(net,
                           optimizer,
                           selector,
                           load_optimizer=False,
                           env_name='boxing',
                           path='./agent_ckpt/agent_ls_dqn_-boxing.pth')

    frame_idx = 0
    drl_updates = 0

    with utils.RewardTracker(writer, params['stop_reward']) as reward_tracker:
        while True:
            frame_idx += 1
            buffer.populate(1)
            epsilon_tracker.frame(frame_idx)

            new_rewards = exp_source.pop_total_rewards()
            if new_rewards:
                if reward_tracker.reward(new_rewards[0], frame_idx,
示例#4
0
    selector = EpsilonGreedyActionSelector(epsilon=params['epsilon_start'])
    epsilon_tracker = utils.EpsilonTracker(selector, params)

    agent = DQNAgent(net, selector, device=device)

    exp_source = experience.ExperienceSourceFirstLast(env,
                                                      agent,
                                                      gamma=params['gamma'],
                                                      steps_count=1)
    buffer = experience.ExperienceReplayBuffer(
        exp_source, buffer_size=params['replay_size'])
    optimizer = optim.Adam(
        net.parameters(),
        lr=params['learning_rate'])  # TODO: change to RMSprop

    utils.load_agent_state(net, optimizer, selector, load_optimizer=False)

    frame_idx = 0
    drl_updates = 0

    with utils.RewardTracker(writer, params['stop_reward']) as reward_tracker:
        while True:
            frame_idx += 1
            buffer.populate(1)
            epsilon_tracker.frame(frame_idx)

            new_rewards = exp_source.pop_total_rewards()
            if new_rewards:
                if reward_tracker.reward(new_rewards[0], frame_idx,
                                         selector.epsilon):
                    if save_for_analysis: