示例#1
0
def run(config):
    model_path = (Path('./models') / config.env_id / config.model_name /
                  ('run%i' % config.run_num))
    if config.incremental is not None:
        model_path = model_path / 'incremental' / ('model_ep%i.pt' %
                                                   config.incremental)
    else:
        model_path = model_path / 'model.pt'

    # print(config.save_gifs)
    # print(model_path.parent)
    # print(type(model_path.parent))

    if config.save_gifs:
        gif_path = model_path.parent / 'gifs'
        gif_path.mkdir(exist_ok=True)

    maddpg = MADDPG.init_from_save(str(model_path))
    env = make_env(config.env_id, discrete_action=maddpg.discrete_action)
    maddpg.prep_rollouts(device='cpu')
    ifi = 1 / config.fps  # inter-frame interval

    for ep_i in range(config.n_episodes):
        print("Episode %i of %i" % (ep_i + 1, config.n_episodes))
        obs = env.reset()
        if config.save_gifs:
            frames = []
            # print(len(env.render('rgb_array', close=False)))
            # print(type(env.render('rgb_array', close=False)))
            # print(env.render('rgb_array', close=False))
            frames.append(env.render('rgb_array', close=False)[0])
        env.render('human', close=False)
        for t_i in range(config.episode_length):
            calc_start = time.time()
            # rearrange observations to be per agent, and convert to torch Variable
            torch_obs = [
                Variable(torch.Tensor(obs[i]).view(1, -1), requires_grad=False)
                for i in range(maddpg.nagents)
            ]
            # get actions as torch Variables
            torch_actions = maddpg.step(torch_obs, explore=False)
            # convert actions to numpy arrays
            actions = [ac.data.numpy().flatten() for ac in torch_actions]
            obs, rewards, dones, infos = env.step(actions)
            if config.save_gifs:
                frames.append(env.render('rgb_array', close=False)[0])
            calc_end = time.time()
            elapsed = calc_end - calc_start
            if elapsed < ifi:
                time.sleep(ifi - elapsed)
            env.render('human', close=False)
        if config.save_gifs:
            gif_num = 0
            while (gif_path / ('%i_%i.gif' % (gif_num, ep_i))).exists():
                gif_num += 1
            imageio.mimsave(str(gif_path / ('%i_%i.gif' % (gif_num, ep_i))),
                            frames,
                            duration=ifi)

    env.close()
示例#2
0
def run(config):
    """ main entry func """
    config = setup_evaluation(args)
    logger = ExperimentLogger(config.save_dir,
                              log_std_out=True,
                              use_tensorboard=False)

    # load agent from checkpoint
    if config.checkpoint > 0:
        model_path = "checkpoints/model_ep{}.ckpt".format(config.checkpoint)
    else:
        model_path = "model.ckpt"
    model_path = os.path.join(config.restore, model_path)
    maddpg = MADDPG.init_from_save(model_path)
    if config.copy_checkpoint:
        maddpg.save(config.save_dir + "/model.ckpt")

    # make env runner
    env_func = ENV_MAP[config.env]
    env = env_func(config.scenario,
                   benchmark=False,
                   show_visual_range=config.show_visual_range,
                   **config.env_config)

    # evaluate
    rollouts = maddpg_rollouts(maddpg,
                               env,
                               config.n_episodes,
                               config.episode_length,
                               logger=logger,
                               render=True,
                               save_gifs=True,
                               fps=20)

    # save rollouts
    if save_dir is not None:
        with open(os.path.join(save_dir, "eval_rollouts.pkl"), "w") as f:
            pickle.dump(rollouts, f)

        if config.save_gifs:
            if config.save_gifs_num < 0:
                gif_num = config.n_episodes
            else:
                gif_num = min(config.save_gifs_num, config.n_episodes)
            imageio.mimsave(os.path.join(save_dir, "eval_frames.gif"),
                            rollouts["frames"][:gif_num],
                            duration=ifi)

    env.close()
示例#3
0
def run(config):
    model_path = (Path('./models') / config.env_id / config.model_name /
                  ('run%i' % config.run_num))
    if config.incremental is not None:
        model_path = model_path / 'incremental' / ('model_ep%i.pt' %
                                                   config.incremental)
    else:
        model_path = model_path / 'model.pt'

    if config.save_gifs:
        gif_path = model_path.parent / 'gifs' if not config.mixed_policies else model_path.parent / 'gifs_mixed'
        gif_path.mkdir(exist_ok=True)
    torch.manual_seed(config.seed)
    np.random.seed(config.seed)
    if config.mixed_policies:
        maddpg = MADDPG.init_from_directory(
            Path('./models') / config.env_id / config.model_name)
    else:
        maddpg = MADDPG.init_from_save(model_path)
    env = make_env(config.env_id,
                   benchmark=True,
                   discrete_action=maddpg.discrete_action)
    env.world.seed(config.seed)
    maddpg.prep_rollouts(device='cpu')
    ifi = 1 / config.fps  # inter-frame interval
    all_infos = np.empty(
        (config.n_episodes, config.episode_length, maddpg.nagents, 10))
    all_positions = np.zeros(
        (config.n_episodes, config.episode_length, maddpg.nagents, 2))
    for ep_i in range(config.n_episodes):
        print("Episode %i of %i" % (ep_i + 1, config.n_episodes))
        obs = env.reset()
        if config.save_gifs:
            frames = []
            frames.append(env.render('rgb_array')[0])
        env.render('human')
        for t_i in range(config.episode_length):
            calc_start = time.time()
            # rearrange observations to be per agent, and convert to torch Variable
            torch_obs = [
                Variable(torch.Tensor(obs[i]).view(1, -1), requires_grad=False)
                if not obs[i].ndim == 4 else Variable(torch.Tensor(obs[i]),
                                                      requires_grad=False)
                for i in range(maddpg.nagents)
            ]

            all_positions[ep_i, t_i] = env.get_positions()
            # get actions as torch Variables
            torch_actions = maddpg.step(torch_obs, explore=False)
            # convert actions to numpy arrays
            actions = [ac.data.numpy().flatten() for ac in torch_actions]
            obs, rewards, dones, infos = env.step(actions)

            if config.save_gifs:
                frames.append(env.render('rgb_array')[0])
                # frames.append(env.world.viewers[0].render(return_rgb_array = True)) uncomment if local views visible
            calc_end = time.time()
            elapsed = calc_end - calc_start
            if elapsed < ifi:
                time.sleep(ifi - elapsed)
            env.render('human')
            if len(np.array(infos['n']).shape) < 4:
                all_infos[ep_i,
                          t_i, :, :len(infos['n'][-1])] = np.array(infos['n'])

        if config.save_gifs:
            gif_num = 0
            while (gif_path / ('%i_%i.gif' % (gif_num, ep_i))).exists():
                gif_num += 1
            imageio.mimsave(str(gif_path / ('%i_%i.gif' % (gif_num, ep_i))),
                            frames,
                            duration=ifi)

    env.close()

    if config.save_stats:
        stats_path = model_path.parent / 'stats' if not config.mixed_policies else model_path.parent / 'stats_mixed'
        stats_path.mkdir(exist_ok=True)
        save(f'{stats_path}/all_infos.npy', all_infos)
        save(f'{stats_path}/all_positions.npy', all_positions)
def run(config):
    model_path = (Path('./models') / config.env_id / config.model_name /
                  ('run%i' % config.run_num))
    if config.incremental is not None:
        model_path = model_path / 'incremental' / ('model_ep%i.pt' %
                                                   config.incremental)
    else:
        model_path = model_path / 'model.pt'


    print("\n"+str(model_path)+"\n\n\n")

    if config.save_gifs:
        gif_path = model_path.parent / 'gifs'
        gif_path.mkdir(exist_ok=True)

    maddpg = MADDPG.init_from_save(model_path)
    env = make_env(config.env_id, discrete_action=maddpg.discrete_action)
    maddpg.prep_rollouts(device='cpu')
    ifi = 1 / config.fps  # inter-frame interval

    #####################################################################################################
    #                                       START EPISODES                                              #
    #####################################################################################################

    for ep_i in range(config.n_episodes):
        print("Episode %i of %i" % (ep_i + 1, config.n_episodes))
        obs = env.reset()
        # For RNN history buffer
        obs_tminus_0 = copy(obs)
        obs_tminus_1 = copy(obs)
        obs_tminus_2 = copy(obs)

        obs_tminus_3 = copy(obs)
        obs_tminus_4 = copy(obs)
        obs_tminus_5 = copy(obs)

        # TODO: obs_history shape different from main.py, so parameterize it based on "obs"
        # It is different because main.py can run multiple threads, so has an extra dimension
        obs_history = np.empty([3,108])
        next_obs_history = np.empty([3,108])

        if config.save_gifs:
            frames = []
            frames.append(env.render('rgb_array')[0])
        env.render('human')

        ##################################################################################################
        #                                       START TIME-STEPS                                         #
        ##################################################################################################

        for t_i in range(config.episode_length):

            # Populate current history for RNN
            for a in range(3):  # env.nagents
                    #obs_history[a][:] = np.concatenate((obs_tminus_0[a][:], obs_tminus_1[a][:], obs_tminus_2[a][:]))
                    obs_history[a][:] = np.concatenate(
                        (obs_tminus_0[a][:], obs_tminus_1[a][:], obs_tminus_2[a][:],
                         obs_tminus_3[a][:], obs_tminus_4[a][:], obs_tminus_5[a][:]))
                    # Now, temp has history of 6 timesteps for each agent

            calc_start = time.time()

            # rearrange observations to be per agent, and convert to torch Variable
            rnn_torch_obs = [Variable(torch.Tensor(obs_history[i]).view(1, -1),
                                  requires_grad=False)
                         for i in range(maddpg.nagents)]
            # get actions as torch Variables
            torch_actions = maddpg.step(rnn_torch_obs, explore=False)
            # convert actions to numpy arrays
            actions = [ac.data.numpy().flatten() for ac in torch_actions]
            next_obs, rewards, dones, infos = env.step(actions)

            # Update histories
            obs_tminus_5 = copy(obs_tminus_4)
            obs_tminus_4 = copy(obs_tminus_3)
            obs_tminus_3 = copy(obs_tminus_2)

            obs_tminus_2 = copy(obs_tminus_1)
            obs_tminus_1 = copy(obs_tminus_0)
            obs_tminus_0 = copy(next_obs)
            # --------------------------------------#

            if config.save_gifs:
                frames.append(env.render('rgb_array')[0])
            calc_end = time.time()
            elapsed = calc_end - calc_start
            if elapsed < ifi:
                time.sleep(ifi - elapsed)
            env.render('human')
        if config.save_gifs:
            gif_num = 0
            while (gif_path / ('%i_%i.gif' % (gif_num, ep_i))).exists():
                gif_num += 1
            imageio.mimsave(str(gif_path / ('%i_%i.gif' % (gif_num, ep_i))),
                            frames, duration=ifi)

    env.close()
示例#5
0
def run(config):
    original_model_path = (Path('./models') / config.env_id /
                           config.model_name / ('run%i' % config.run_num))
    # if config.incremental is not None:
    #     model_path = model_path / 'incremental' / ('model_ep%i.pt' %
    #                                                config.incremental)
    # else:
    #     model_path = model_path / 'model.pt'

    if config.save_gifs:
        gif_path = original_model_path.parent / 'gifs'
        gif_path.mkdir(exist_ok=True)

    # Model numbers in folder for stat runs
    rrange = [1, 1001, 2001, 3001, 4001, 5001, 6001, 7001, 8001, 9001]
    stat_run_all_models = []

    for r in rrange:
        print("Model :" + str(r))
        model_path = original_model_path / 'incremental' / ('model_ep%i.pt' %
                                                            r)
        maddpg = MADDPG.init_from_save(model_path)
        env = make_env(config.env_id, discrete_action=maddpg.discrete_action)
        maddpg.prep_rollouts(device='cpu')
        ifi = 1 / config.fps  # inter-frame interval

        stat_return_list = []
        for ep_i in range(config.n_episodes):
            print("Episode %i of %i" % (ep_i + 1, config.n_episodes))
            obs = env.reset()
            if config.save_gifs:
                frames = []
                frames.append(env.render('rgb_array')[0])
            #env.render('human')

            episode_reward = 0
            for t_i in range(config.episode_length):
                calc_start = time.time()
                # rearrange observations to be per agent, and convert to torch Variable
                torch_obs = [
                    Variable(torch.Tensor(obs[i]).view(1, -1),
                             requires_grad=False)
                    for i in range(maddpg.nagents)
                ]
                # get actions as torch Variables
                torch_actions = maddpg.step(torch_obs, explore=False)
                # convert actions to numpy arrays
                actions = [ac.data.numpy().flatten() for ac in torch_actions]
                obs, rewards, dones, infos = env.step(actions)

                # get the global reward
                episode_reward += rewards[0][0]

                if config.save_gifs:
                    frames.append(env.render('rgb_array')[0])
                calc_end = time.time()
                elapsed = calc_end - calc_start
                if elapsed < ifi:
                    time.sleep(ifi - elapsed)
                #env.render('human')
            if config.save_gifs:
                gif_num = 0
                while (gif_path / ('%i_%i.gif' % (gif_num, ep_i))).exists():
                    gif_num += 1
                imageio.mimsave(str(gif_path / ('%i_%i.gif' %
                                                (gif_num, ep_i))),
                                frames,
                                duration=ifi)
            # end of episodes (one-stat-run)
            stat_return_list.append(episode_reward / config.episode_length)
        # end of model
        stat_run_all_models.append(stat_return_list)
        env.close()

    pickling_on = open(str(original_model_path) + "/stat_runs", "wb")
    pkl.dump(stat_run_all_models, pickling_on)
    pickling_on.close()
示例#6
0
文件: main.py 项目: yathartha3/DPP
def run(config):
    model_dir = Path('./models') / config.env_id / config.model_name
    if not model_dir.exists():
        curr_run = 'run1'
    else:
        exst_run_nums = [
            int(str(folder.name).split('run')[1])
            for folder in model_dir.iterdir()
            if str(folder.name).startswith('run')
        ]
        if len(exst_run_nums) == 0:
            curr_run = 'run1'
        else:
            curr_run = 'run%i' % (max(exst_run_nums) + 1)
    run_dir = model_dir / curr_run
    log_dir = run_dir / 'logs'
    os.makedirs(log_dir)
    logger = SummaryWriter(str(log_dir))

    torch.manual_seed(config.seed)
    np.random.seed(config.seed)
    if not USE_CUDA:
        torch.set_num_threads(config.n_training_threads)
    env = make_parallel_env(config.env_id, config.n_rollout_threads,
                            config.seed, config.discrete_action)

    ##################### INITIALIZE FROM SAVED? ###########################
    if init_from_saved:
        if model_path is not None:
            maddpg = MADDPG.init_from_save(model_path)
            print("Initialized from saved model")
    # -------------------------------------------------------------------- #
    else:
        maddpg = MADDPG.init_from_env(env,
                                      agent_alg=config.agent_alg,
                                      adversary_alg=config.adversary_alg,
                                      tau=config.tau,
                                      lr=config.lr,
                                      hidden_dim=config.hidden_dim)
    # used for learning (updates)
    replay_buffer = ReplayBuffer(
        config.buffer_length, maddpg.nagents,
        [obsp.shape[0] for obsp in env.observation_space], [
            acsp.shape[0] if isinstance(acsp, Box) else acsp.n
            for acsp in env.action_space
        ])

    # This is just to store the global rewards and not for updating the policies
    g_storage_buffer = ReplayBuffer(
        config.buffer_length, maddpg.nagents,
        [obsp.shape[0] for obsp in env.observation_space], [
            acsp.shape[0] if isinstance(acsp, Box) else acsp.n
            for acsp in env.action_space
        ])

    t = 0
    for ep_i in range(0, config.n_episodes, config.n_rollout_threads):
        print(
            "Episodes %i-%i of %i" %
            (ep_i + 1, ep_i + 1 + config.n_rollout_threads, config.n_episodes))
        obs = env.reset()
        # obs.shape = (n_rollout_threads, nagent)(nobs), nobs differs per agent so not tensor
        maddpg.prep_rollouts(device='cpu')

        explr_pct_remaining = max(
            0, config.n_exploration_eps - ep_i) / config.n_exploration_eps
        maddpg.scale_noise(config.final_noise_scale +
                           (config.init_noise_scale -
                            config.final_noise_scale) * explr_pct_remaining)
        maddpg.reset_noise()

        for et_i in range(config.episode_length):
            # rearrange observations to be per agent, and convert to torch Variable
            torch_obs = [
                Variable(torch.Tensor(np.vstack(obs[:, i])),
                         requires_grad=False) for i in range(maddpg.nagents)
            ]
            # get actions as torch Variables
            torch_agent_actions = maddpg.step(torch_obs, explore=True)
            # convert actions to numpy arrays
            agent_actions = [ac.data.numpy() for ac in torch_agent_actions]
            # rearrange actions to be per environment
            actions = [[ac[i] for ac in agent_actions]
                       for i in range(config.n_rollout_threads)]
            next_obs, rewards, dones, infos = env.step(actions, maddpg)
            '''
            Reward Shaping using D++, D.
            The rewards now contain global as well as shaped rewards
            Keep the global for logging, and use the shaped rewards for updates
            '''
            # Choose which reward to use
            use_dpp = True

            # DIFFERENCE REWARDS
            d_rewards = []
            for n in range(maddpg.nagents):
                d_rewards.append([rewards[0][n][1]])
            d_rewards = [d_rewards]
            d_rewards = np.array(d_rewards)

            # GLOBAL REWARDS
            g_rewards = []
            for n in range(maddpg.nagents):
                g_rewards.append([rewards[0][n][0]])
            g_rewards = [g_rewards]
            g_rewards = np.array(g_rewards)

            if use_dpp:
                rewards = d_rewards
            else:
                rewards = g_rewards
            # ----------------------------------------------------------- #
            # Buffer used for updates
            replay_buffer.push(obs, agent_actions, rewards, next_obs, dones)
            # push global rewards into g_replay_buffer for plotting
            g_storage_buffer.push(obs, agent_actions, g_rewards, next_obs,
                                  dones)

            obs = next_obs
            t += config.n_rollout_threads
            if (len(replay_buffer) >= config.batch_size and
                (t % config.steps_per_update) < config.n_rollout_threads):
                if USE_CUDA:
                    maddpg.prep_training(device='gpu')
                else:
                    maddpg.prep_training(device='cpu')
                for u_i in range(config.n_rollout_threads):
                    for a_i in range(maddpg.nagents):
                        sample = replay_buffer.sample(config.batch_size,
                                                      to_gpu=USE_CUDA)
                        maddpg.update(sample, a_i, logger=logger)
                    maddpg.update_all_targets()
                maddpg.prep_rollouts(device='cpu')
        # Take out global reward from g_storage_buffer
        ep_rews = g_storage_buffer.get_average_rewards(
            config.episode_length * config.n_rollout_threads)

        for a_i, a_ep_rew in enumerate(ep_rews):
            logger.add_scalar('agent%i/mean_episode_rewards' % a_i, a_ep_rew,
                              ep_i)

        if ep_i % config.save_interval < config.n_rollout_threads:
            os.makedirs(run_dir / 'incremental', exist_ok=True)
            maddpg.save(run_dir / 'incremental' / ('model_ep%i.pt' %
                                                   (ep_i + 1)))
            maddpg.save(run_dir / 'model.pt')

    maddpg.save(run_dir / 'model.pt')
    env.close()
    logger.export_scalars_to_json(str(log_dir / 'summary.json'))
    logger.close()
示例#7
0
def run(config):
    original_model_path = (Path('./models') / config.env_id /
                           config.model_name / ('run%i' % config.run_num))
    # if config.incremental is not None:
    #     model_path = model_path / 'incremental' / ('model_ep%i.pt' %
    #                                                config.incremental)
    # else:
    #     model_path = model_path / 'model.pt'
    #
    # print(model_path)

    ###########################################################################
    #                      FORCE MODEL PATH                                   #
    ###########################################################################
    model_path_list = []
    rrange = [1, 1001, 2001, 3001, 4001, 5001, 6001, 7001, 8001, 9001]

    # FOR EACH MODEL, DO STATISTICAL RUNS
    # for r in rrange:
    #     model_path = model_path / 'incremental' / ('model_ep%i.pt' % r)

    ######################  SAVING STAT RUNS FOR EACH MODEL ###################
    stat_run_all_models = []

    for r in rrange:
        model_path = original_model_path / 'incremental' / ('model_ep%i.pt' %
                                                            r)
        if config.save_gifs:
            gif_path = model_path.parent / 'gifs'
            gif_path.mkdir(exist_ok=True)

        maddpg = MADDPG.init_from_save(model_path)
        env = make_env(config.env_id, discrete_action=maddpg.discrete_action)
        maddpg.prep_rollouts(device='cpu')
        ifi = 1 / config.fps  # inter-frame interval

        #####################################################################################################
        #                             CONFIGURATION FOR STATISTICAL RUNS (EPISODES)
        #####################################################################################################
        #####################################################################################################
        #                                       START EPISODES                                              #
        #####################################################################################################
        stat_return_list = []
        for ep_i in range(config.n_episodes):  # number of stat runs
            print("Episode %i of %i" % (ep_i + 1, config.n_episodes))
            obs = env.reset()
            # For RNN history buffer
            obs_tminus_0 = copy(obs)
            obs_tminus_1 = copy(obs)
            obs_tminus_2 = copy(obs)
            obs_tminus_3 = copy(obs)
            obs_tminus_4 = copy(obs)
            obs_tminus_5 = copy(obs)

            # TODO: obs_history shape different from main.py, so parameterize it based on "obs"
            # It is different because main.py can run multiple threads, so has an extra dimension
            obs_history = np.empty([3, 108])
            next_obs_history = np.empty([3, 108])

            if config.save_gifs:
                frames = []
                frames.append(env.render('rgb_array')[0])
            #env.render('human')

            ##################################################################################################
            #                                       START TIME-STEPS                                         #
            ##################################################################################################
            episode_reward = 0
            for t_i in range(config.episode_length):

                # Populate current history for RNN
                for a in range(3):  # env.nagents
                    #obs_history[a][:] = np.concatenate((obs_tminus_0[a][:], obs_tminus_1[a][:], obs_tminus_2[a][:]))
                    obs_history[a][:] = np.concatenate(
                        (obs_tminus_0[a][:], obs_tminus_1[a][:],
                         obs_tminus_2[a][:], obs_tminus_3[a][:],
                         obs_tminus_4[a][:], obs_tminus_5[a][:]))
                    # Now, temp has history of 6 timesteps for each agent

                calc_start = time.time()

                # rearrange observations to be per agent, and convert to torch Variable
                rnn_torch_obs = [
                    Variable(torch.Tensor(obs_history[i]).view(1, -1),
                             requires_grad=False)
                    for i in range(maddpg.nagents)
                ]
                # get actions as torch Variables
                torch_actions = maddpg.step(rnn_torch_obs, explore=False)
                # convert actions to numpy arrays
                actions = [ac.data.numpy().flatten() for ac in torch_actions]
                next_obs, rewards, dones, infos = env.step(actions)

                # get the global reward
                episode_reward += rewards[0][0]

                # Update histories
                obs_tminus_5 = copy(obs_tminus_4)
                obs_tminus_4 = copy(obs_tminus_3)
                obs_tminus_3 = copy(obs_tminus_2)
                obs_tminus_2 = copy(obs_tminus_1)
                obs_tminus_1 = copy(obs_tminus_0)
                obs_tminus_0 = copy(next_obs)
                # --------------------------------------#

                if config.save_gifs:
                    frames.append(env.render('rgb_array')[0])
                calc_end = time.time()
                elapsed = calc_end - calc_start
                if elapsed < ifi:
                    time.sleep(ifi - elapsed)
                #env.render('human')
                # end of an episode

            if config.save_gifs:
                gif_num = 0
                while (gif_path / ('%i_%i.gif' % (gif_num, ep_i))).exists():
                    gif_num += 1
                imageio.mimsave(str(gif_path / ('%i_%i.gif' %
                                                (gif_num, ep_i))),
                                frames,
                                duration=ifi)
            # end of episodes (one stat-run)
            stat_return_list.append(episode_reward / config.episode_length)
        # end of model
        stat_run_all_models.append(stat_return_list)
        env.close()

    pickling_on = open(str(original_model_path) + "/stat_runs", "wb")
    pkl.dump(stat_run_all_models, pickling_on)
    pickling_on.close()
    see_runs = [0]
    wait = 0.05
    ep_len = 50

    for cur_run in see_runs:
        for i in range(4):
            config = Arglist()
            config.load_args(base_path / models_to_compare[cur_model] /
                             ("run" + str(cur_run)))
            env = make_parallel_env(config)
            model_path = base_path / models_to_compare[cur_model] / (
                "run" + str(cur_run)) / "model.pt"
            print(model_path)

            # add comm to action space:
            maddpg = MADDPG.init_from_save(model_path)
            # show some examples:
            obs = env.reset()
            # env.env._render("human", True)
            maddpg.prep_rollouts(device='cpu')

            # eval_model(maddpg, env, ep_len=100, num_steps=500, rollout_threads=1, display=True)
            for step in range(ep_len):
                env.env._render("human", False)
                time.sleep(wait)
                # rearrange observations to be per agent, and convert to torch Variable
                torch_obs = [
                    Variable(torch.Tensor(np.vstack(obs[:, i])),
                             requires_grad=False)
                    for i in range(maddpg.nagents)
                ]
示例#9
0
with open(config_file, 'r') as f:
    args_dict = yaml.safe_load(f)
    args_dict['n_agents'] = args_dict['n_pursuers']
    # config = yaml.load(f, Loader=loader)
    args = SN(**args_dict)

if args.seed is not False:
    th.manual_seed(args.seed)

env = Env(**args_dict)

if args.seed is not False:
    env.seed(args.seed)

for m in pool_list:
    maddpg = MADDPG.init_from_save(result_folder + '/model/' + m + '.pt', with_cpu=True)
    maddpg.prep_rollouts(device='cpu')
    with th.no_grad():
        total_reward = 0.
        test_time = 5
        for it in range(test_time):
            l = []
            obs = env.reset()
            l.append(env.render(gui=True))
            obs = np.stack(obs, axis=0)
            obs = th.from_numpy(obs).float()
            print('----------')
            reward_it = 0.
            for t in range(args.test_max_steps):
                obs = obs.type(th.FloatTensor)
                actions = maddpg.step(obs, explore=False)
示例#10
0
def run(config):
    model_dir = Path('./models') / config.env_id / config.model_name
    if not model_dir.exists():
        curr_run = 'run1'
    else:
        exst_run_nums = [
            int(str(folder.name).split('run')[1])
            for folder in model_dir.iterdir()
            if str(folder.name).startswith('run')
        ]
        if len(exst_run_nums) == 0:
            curr_run = 'run1'
        else:
            curr_run = 'run%i' % (max(exst_run_nums) + 1)
    run_dir = model_dir / curr_run
    log_dir = run_dir / 'logs'
    os.makedirs(log_dir)
    logger = SummaryWriter(str(log_dir))

    torch.manual_seed(config.seed)
    np.random.seed(config.seed)
    if not USE_CUDA:
        torch.set_num_threads(config.n_training_threads)
    env = make_parallel_env(config.env_id, config.n_rollout_threads,
                            config.seed, config.discrete_action)

    if config.run_num:
        model_path = model_dir / f'run{config.run_num}'
        maddpg = MADDPG.init_from_save(model_path / 'model.pt')
        models_dir = model_path / 'incremental'
        ext_mods = [
            int(str(folder.name).split('model_ep')[1][:-3])
            for folder in models_dir.iterdir()
            if str(folder.name).startswith('model_ep')
            and str(folder.name).endswith('.pt')
        ]

        ep_st = np.sort(ext_mods)[-1]
    else:
        maddpg = MADDPG.init_from_env(env,
                                      agent_alg=config.agent_alg,
                                      adversary_alg=config.adversary_alg,
                                      tau=config.tau,
                                      lr=config.lr,
                                      hidden_dim=config.hidden_dim,
                                      recurrent=config.recurrent,
                                      convolutional=config.convolutional)
        ep_st = 0

    replay_buffer = ReplayBuffer(
        config.buffer_length, maddpg.nagents,
        [obsp.shape[0]
         for obsp in env.observation_space] if not config.convolutional else
        [obsp.shape for obsp in env.observation_space], [
            acsp.shape[0] if isinstance(acsp, Box) else
            acsp.n if isinstance(acsp, Discrete) else sum(acsp.high -
                                                          acsp.low + 1)
            for acsp in env.action_space
        ])
    t = 0

    intrinsic_modules = create_intrinsic_motivators(config, maddpg.agents, env)

    for ep_i in range(ep_st, config.n_episodes, config.n_rollout_threads):
        print(
            "Episodes %i-%i of %i" %
            (ep_i + 1, ep_i + 1 + config.n_rollout_threads, config.n_episodes))
        obs = env.reset()
        # obs.shape = (n_rollout_threads, nagent)(nobs), nobs differs per agent so not tensor
        maddpg.prep_rollouts(device='cpu')
        [im.prep_rollouts(device='cpu') for im in intrinsic_modules]

        explr_pct_remaining = max(
            0, config.n_exploration_eps - ep_i) / config.n_exploration_eps
        maddpg.scale_noise(config.final_noise_scale +
                           (config.init_noise_scale -
                            config.final_noise_scale) * explr_pct_remaining)
        maddpg.reset_noise()
        for et_i in range(config.episode_length):
            start = time.time()
            # rearrange observations to be per agent, and convert to torch Variable
            torch_obs = [
                Variable(torch.Tensor(np.vstack(obs[:, i])),
                         requires_grad=False) for i in range(maddpg.nagents)
            ]
            # get actions as torch Variables
            torch_agent_actions = maddpg.step(torch_obs, explore=True)
            # convert actions to numpy arrays
            agent_actions = [ac.data.numpy() for ac in torch_agent_actions]
            # rearrange actions to be per environment
            actions = [[ac[i] for ac in agent_actions]
                       for i in range(config.n_rollout_threads)]
            next_obs, rewards, dones, infos = env.step(actions)

            emps = np.sum(np.asarray(
                [im.compute(rewards, next_obs) for im in intrinsic_modules]),
                          axis=0)

            replay_buffer.push(obs, agent_actions, rewards, emps, next_obs,
                               dones)
            obs = next_obs
            t += config.n_rollout_threads
            if (len(replay_buffer) >= config.batch_size and
                (t % config.steps_per_update) < config.n_rollout_threads):
                if USE_CUDA:
                    maddpg.prep_training(device='gpu')
                    [
                        im.prep_training(device='gpu')
                        for im in intrinsic_modules
                    ]
                else:
                    maddpg.prep_training(device='cpu')
                    [
                        im.prep_training(device='cpu')
                        for im in intrinsic_modules
                    ]
                for u_i in range(config.n_rollout_threads):
                    for a_i in range(maddpg.nagents):
                        sample = replay_buffer.sample(config.batch_size,
                                                      to_gpu=USE_CUDA)
                        maddpg.update(sample, a_i, logger=logger)
                        [
                            im.update(sample, logger=logger)
                            for im in intrinsic_modules
                        ]
                    maddpg.update_all_targets()
                maddpg.prep_rollouts(device='cpu')
                [im.prep_rollouts(device='cpu') for im in intrinsic_modules]

                print(
                    f'computation time = {time.time() - start:.3f}s buffer length = {len(replay_buffer)}'
                )
        ep_rews = replay_buffer.get_average_rewards(config.episode_length *
                                                    config.n_rollout_threads)
        for a_i, a_ep_rew in enumerate(ep_rews):
            logger.add_scalars('agent%i/mean_episode_rewards' % a_i,
                               {'rew_loss': a_ep_rew}, ep_i)

        if ep_i % config.save_interval < config.n_rollout_threads:
            os.makedirs(run_dir / 'incremental', exist_ok=True)
            maddpg.save(run_dir / 'incremental' / ('model_ep%i.pt' %
                                                   (ep_i + 1)))
            maddpg.save(run_dir / 'model.pt')

    maddpg.save(run_dir / 'model.pt')
    env.close()
    logger.export_scalars_to_json(str(log_dir / 'summary.json'))
    logger.close()
def run(config):
    model_path = (Path('../models') / config.env_id / config.model_name /
                  ('run%i' % config.run_num))
    if config.incremental is not None:
        model_path = model_path / 'incremental' / ('model_ep%i.pt' %
                                                   config.incremental)
    else:
        model_path = model_path / 'model.pt'

    gif_path = model_path.parent / 'stats' if not config.mixed_policies else model_path.parent / 'stats_mixed'
    gif_path.mkdir(exist_ok=True)

    torch.manual_seed(config.seed)
    np.random.seed(config.seed)
    if config.mixed_policies:
        maddpg = MADDPG.init_from_directory(
            Path('../models') / config.env_id / config.model_name)
    else:
        maddpg = MADDPG.init_from_save(model_path)
    env = make_env(config.env_id,
                   benchmark=True,
                   discrete_action=maddpg.discrete_action)
    env.seed(config.seed)
    maddpg.prep_rollouts(device='cpu')
    ifi = 1 / config.fps  # inter-frame interval
    all_infos = np.empty(
        (config.n_episodes, config.episode_length, maddpg.nagents, 10))
    n_movable_agents = sum([1 if a.movable else 0 for a in env.agents])
    n_speaking_agents = sum([0 if a.silent else 1 for a in env.agents])
    all_positions = np.zeros((config.n_episodes, config.episode_length,
                              n_movable_agents, env.world.dim_p))
    all_communications = np.zeros((config.n_episodes, config.episode_length,
                                   n_speaking_agents, env.world.dim_c))
    all_actions = np.zeros((config.n_episodes, config.episode_length,
                            len(env.agents), env.world.dim_c))
    obs_space = sum([obsp.shape[0] for obsp in env.observation_space])
    all_obs = np.zeros((config.n_episodes, config.episode_length, obs_space))

    for ep_i in range(config.n_episodes):
        print("Episode %i of %i" % (ep_i + 1, config.n_episodes))
        obs = env.reset()
        # env.agents[1].state.p_pos = np.array([0., 0.])
        for t_i in range(config.episode_length):
            calc_start = time.time()
            # rearrange observations to be per agent, and convert to torch Variable
            torch_obs = [
                Variable(torch.Tensor(obs[i]).view(1, -1), requires_grad=False)
                if not obs[i].ndim == 4 else Variable(torch.Tensor(obs[i]),
                                                      requires_grad=False)
                for i in range(maddpg.nagents)
            ]

            all_positions[ep_i, t_i] = env.get_positions()
            all_communications[ep_i, t_i] = env.get_communications()
            # get actions as torch Variables
            torch_actions = maddpg.step(torch_obs, explore=False)
            # convert actions to numpy arrays
            actions = [ac.data.numpy().flatten() for ac in torch_actions]
            # actions[0] = np.array([0., 0., 0., 0., 0.], dtype=np.float32)
            # actions[0][ep_i] = 1.
            obs, rewards, dones, infos = env.step(actions)

            all_actions[ep_i, t_i, :, :] = actions
            all_obs[ep_i, t_i, :] = np.concatenate(np.asarray(obs))

            calc_end = time.time()
            elapsed = calc_end - calc_start
            if elapsed < ifi:
                time.sleep(ifi - elapsed)
            if len(np.array(infos['n']).shape) < 4:
                all_infos[ep_i,
                          t_i, :, :len(infos['n'][-1])] = np.array(infos['n'])

    env.close()

    if config.save_stats:
        stats_path = model_path.parent / 'stats' if not config.mixed_policies else model_path.parent / 'stats_mixed'
        stats_path.mkdir(exist_ok=True)
        save(f'{stats_path}/all_infos.npy', all_infos)
        save(f'{stats_path}/all_positions.npy', all_positions)
        save(f'{stats_path}/all_communications.npy', all_communications)
        save(f'{stats_path}/all_actions.npy', all_actions)
        save(f'{stats_path}/all_observations.npy', all_obs)
示例#12
0
def run(config):
    model_path = (Path('./models') / config.env_id / config.model_name /
                  ('run%i' % config.run_num))
    shape_path = (Path('./models') / config.env_id / config.model_name /
                  ('run%i' % config.run_num) / config.shape_file)
    if config.incremental is not None:
        model_path = model_path / 'incremental' / ('model_ep%i.pt' %
                                                   config.incremental)
    else:
        model_path = model_path / 'model.pt'

    if config.save_gifs:
        gif_path = model_path.parent / 'gifs'
        gif_path.mkdir(exist_ok=True)

    maddpg = MADDPG.init_from_save(model_path)
    #env = make_env(config.env_id, discrete_action=maddpg.discrete_action)
    env=HeavyObjectEnv( num_agents=config.num_agents,shape_file=shape_path)
    maddpg.prep_rollouts(device='cpu')
    ifi = 1 / config.fps  # inter-frame interval

    for ep_i in range(config.n_episodes):
        print("Episode %i of %i" % (ep_i + 1, config.n_episodes))
        obs = env.reset()
        if config.save_gifs:
            frames = []
            frames.append(env.render('rgb_array')[0])
        env.render()
        for t_i in range(config.episode_length):
            calc_start = time.time()

            if t_i ==15:
                env.change_centroid(0.3,0.3)
                print("change centroid!")

            # rearrange observations to be per agent, and convert to torch Variable
            torch_obs = [Variable(torch.Tensor(obs[i]).view(1, -1),
                                  requires_grad=False)
                         for i in range(maddpg.nagents)]
            # get actions as torch Variables
            torch_actions = maddpg.step(torch_obs, explore=False)
            # convert actions to numpy arrays
            actions_one_hot = [ac.data.numpy().flatten() for ac in torch_actions]
            #print(actions_one_hot)
            actions = np.array([i.tolist().index(1.0) for i in actions_one_hot])
            #print(actions,len(actions))
            #print(env._state,env._last_value)
            print(t_i)

            #for j in actions:
            #    j[1]*=np.pi
            #print(actions,"new")
            obs, rewards, dones, infos = env.step(actions)
            #print(dones)
            if config.save_gifs:
                frames.append(env.render('rgb_array')[0])
            calc_end = time.time()
            elapsed = calc_end - calc_start
            if elapsed < ifi:
                time.sleep(ifi - elapsed)
            env.render()
        if config.save_gifs:
            gif_num = 0
            while (gif_path / ('%i_%i.gif' % (gif_num, ep_i))).exists():
                gif_num += 1
            imageio.mimsave(str(gif_path / ('%i_%i.gif' % (gif_num, ep_i))),
                            frames, duration=ifi)

    env.close()
示例#13
0
def run(config):
    # model_dir = Path('./models') / config.env_id / config.model_name
    # if not model_dir.exists():
    #     curr_run = 'run1'
    # else:
    #     exst_run_nums = [int(str(folder.name).split('run')[1]) for folder in
    #                      model_dir.iterdir() if
    #                      str(folder.name).startswith('run')]
    #     if len(exst_run_nums) == 0:
    #         curr_run = 'run1'
    #     else:
    #         curr_run = 'run%i' % (max(exst_run_nums) + 1)
    # run_dir = model_dir / curr_run
    # log_dir = run_dir / 'logs'
    # os.makedirs(log_dir)
    # logger = SummaryWriter(str(log_dir))

    # torch.manual_seed(config.seed)
    # np.random.seed(config.seed)
    # if not USE_CUDA:
    #     torch.set_num_threads(config.n_training_threads)

    # transport configuration
    name = 'Materials Transport'
    conf = {
        'n_player': 2,  # 玩家数量
        'board_width': 11,  # 地图宽
        'board_height': 11,  # 地图高
        'n_cell_type': 5,  # 格子的种类
        'materials': 4,  # 集散点数量
        'cars': 2,  # 汽车数量
        'planes': 0,  # 飞机数量
        'barriers': 12,  # 固定障碍物数量
        'max_step': 50,  # 最大步数
        'game_name': name,  # 游戏名字
        'K': 5,  # 每个K局更新集散点物资数目
        'map_path': 'env/map.txt',  # 存放初始地图
        'cell_range': 6,  # 单格中各维度取值范围(tuple类型,只有一个int自动转为tuple)##?
        'ob_board_width': None,  # 不同智能体观察到的网格宽度(tuple类型),None表示与实际网格相同##?
        'ob_board_height': None,  # 不同智能体观察到的网格高度(tuple类型),None表示与实际网格相同##?
        'ob_cell_range':
        None,  # 不同智能体观察到的单格中各维度取值范围(二维tuple类型),None表示与实际网格相同##?
    }

    env = make_parallel_env_transport(config.env_id, conf, config.seed,
                                      config.discrete_action)

    model_path = (Path('./models') / config.env_id / config.model_name /
                  ('run%i' % config.run_num))

    if config.incremental is not None:
        model_path = model_path / 'incremental' / ('model_ep%i.pt' %
                                                   config.incremental)
    else:
        model_path = model_path / 'model.pt'

    maddpg = MADDPG.init_from_save(model_path)
    maddpg.prep_rollouts(device='cpu')
    t = 0

    reward_epi = np.zeros(config.n_episodes)
    for ep_i in range(0, config.n_episodes):

        obs = env.reset()  # TODO: TO CHECK
        '''
        # obs.shape = (n_rollout_threads, nagent)(nobs), nobs differs per agent so not tensor
        maddpg.prep_rollouts(device='cpu')

        explr_pct_remaining = max(0, config.n_exploration_eps - ep_i) / config.n_exploration_eps
        maddpg.scale_noise(config.final_noise_scale + (config.init_noise_scale - config.final_noise_scale) * explr_pct_remaining)
        maddpg.reset_noise()
        '''
        reward_eti = 0
        pygame.init()
        screen = pygame.display.set_mode((440, 440))
        # pygame.display.set_caption(g.game_name)

        clock = pygame.time.Clock()
        for et_i in range(config.episode_length):

            # env.render()
            # rearrange observations to be per agent, and convert to torch Variable
            # print('step', et_i)

            torch_obs = [
                Variable(
                    torch.Tensor(np.vstack(obs[:, i])),  # 沿着竖直方向将矩阵堆叠起来。
                    requires_grad=False) for i in range(maddpg.nagents)
            ]

            # get actions as torch Variables
            torch_agent_actions = maddpg.step(torch_obs, explore=False)
            # convert actions to numpy arrays
            agent_actions = [ac.data.numpy() for ac in torch_agent_actions]
            # rearrange actions to be per environment
            actions = [[ac[i] for ac in agent_actions] for i in range(1)]
            print(actions)
            ############################################
            # add
            # actions = actions.astype(int)
            ############################################
            # add: 前两个action
            joint_action = []
            for i in range(2):
                player = []
                for j in range(1):
                    each = [0] * 11
                    idx = np.random.randint(11)
                    each[idx] = 1
                    player.append(each)
                joint_action.append(player)
            for m in range(2):
                joint_action.append([actions[0][m].astype(int).tolist()])

            next_obs, rewards, dones, infos = env.step(joint_action)
            obs = next_obs

            reward_eti += rewards[0][0]

            pygame.surfarray.blit_array(screen,
                                        env.render().transpose(1, 0, 2))
            pygame.display.flip()
            clock.tick(1)
            fname = "./image/" + str(et_i) + ".png"  # save image
            pygame.image.save(screen, fname)
示例#14
0
def run(config):
    model_path = (Path('./models') / config.env_id / config.model_name /
                  ('run%i' % config.run_num))
    #model_path = config.path
    if config.incremental is not None:
        model_path = model_path / 'incremental' / ('model_ep%i.pt' %
                                                   config.incremental)
    else:
        model_path = model_path / 'model.pt'

    if config.save_gifs:
        gif_path = model_path.parent / 'gifs'
        gif_path.mkdir(exist_ok=True)
    
    maddpg = MADDPG.init_from_save(model_path)
    env = make_env(config.env_id, config.benchmark, discrete_action=maddpg.discrete_action)
    print(type(env))
    maddpg.prep_rollouts(device='cpu')
    ifi = 1 / config.fps  # inter-frame interval
    if config.save_gifs:
        frames = []
    agent_info = [[[]]]
    reward_info = []
    trajectories = []
    
    for ep_i in range(config.n_episodes):
        print("Episode %i of %i" % (ep_i + 1, config.n_episodes))
        obs = env.reset()
        if config.save_gifs:
            frames.append(env.render('rgb_array')[0])
            env.render('human')
        episode_rewards = np.zeros((config.episode_length, maddpg.nagents))
        current_trajectory = []
        current_entities = []
        if config.store_traj:
            cur_state_ent = env.getStateEntities()
            current_entities.append(cur_state_ent)
            cur_state = env.getState()
            current_trajectory.append(cur_state)
        for t_i in range(config.episode_length):
            calc_start = time.time()
            # rearrange observations to be per agent, and convert to torch Variable
            torch_obs = [Variable(torch.Tensor(obs[i]).view(1, -1),
                                  requires_grad=False)
                         for i in range(maddpg.nagents)]
            # get actions as torch Variables
            torch_actions = maddpg.step(torch_obs, explore=False)
            # convert actions to numpy arrays
            actions = [ac.data.numpy().flatten() for ac in torch_actions]
            obs, rewards, dones, infos = env.step(actions)
            if config.store_traj:
                cur_state = env.getState()
                current_trajectory.append(cur_state)

            if config.benchmark:
                for i, info in enumerate(infos):
                    agent_info[-1][i].append(infos['n'])
            if config.sparse_reward:
                if t_i == 0:
                    total = np.array(rewards)
                if t_i!=config.episode_length-1:
                    total = total + np.array(rewards)
                    rewards = list(np.zeros(len(rewards)))
                else:
                    rewards = list(total)
            episode_rewards[t_i] = rewards
            if config.save_gifs:
                frames.append(env.render('rgb_array')[0])
            calc_end = time.time()
            elapsed = calc_end - calc_start
            if config.save_gifs:
                if elapsed < ifi:
                    time.sleep(ifi - elapsed)
                env.render('human')
        agent_info.append([[]])
        mean_rewards = np.mean(episode_rewards, axis=0)
        reward_info.append(mean_rewards)
        if config.store_traj:
            trajectories.append([current_entities, current_trajectory])
    if config.save_gifs:
        gif_num = 0
        while (gif_path / ('%i.gif' % gif_num)).exists():
            gif_num += 1
        imageio.mimsave(str(gif_path / ('%i.gif' % gif_num)),
                        frames, duration=ifi)
    run_dir = model_path.parent 
    if config.benchmark:
        with open(run_dir / 'eval_info.pkl', 'wb') as fp:
            pickle.dump(agent_info, fp)
    with open(run_dir / 'eval_rew.pkl', 'wb') as fp:
        pickle.dump(reward_info, fp)
    if config.store_traj:
        with open(run_dir / 'static_trajectories_eval.pkl', 'wb') as fp:
            pickle.dump(trajectories, fp)
    env.close()