Exemplo n.º 1
0
def main():
    log_dir = '../../drl_for_surveillance_runs/flood/indiv/drqn/pretrained/'
    pretrained_model_name = 'drqn-20_model.pkl'

    env = SurveillanceEnv(nr_agents=2,
                          obs_mode='normal',
                          obs_type='flood',
                          obs_radius=500,
                          world_size=1000,
                          grid_size=100,
                          range_cutpoints=30,
                          angular_cutpoints=40,
                          torus=False,
                          dynamics='aircraft',
                          shared_reward=False,
                          render_dir=log_dir + 'video/')

    policy = StatePlusImageLstmPolicy

    model = DRQN.load('../../' + 'pretrained_models/' + pretrained_model_name,
                      env=env)

    model.tensorboard_log = log_dir
    model.checkpoint_path = log_dir + 'models/'
    model.initial_exploration = 0.5

    train(model, callback, num_timesteps=int(2e6), log_dir=log_dir)
def main():
    log_dir = '../../../../Results/models/wildfire/indiv/drqn/obs_100/'
    saved_model_name = 'checkpoint_model.pkl'

    config = tf.ConfigProto(log_device_placement=True)

    env = SurveillanceEnv(
        nr_agents=2,
        obs_mode='normal',
        obs_type='wildfire',
        obs_radius=100,  # CHECK
        world_size=1000,
        grid_size=100,
        range_cutpoints=30,
        angular_cutpoints=40,
        torus=False,
        dynamics='aircraft',
        shared_reward=False,
        render_dir=log_dir + 'video/')

    model = DRQN.load(log_dir + 'models/' + saved_model_name, env=env)
    params = find_trainable_variables("main")

    params_val = model.sess.run(
        model.graph._collections['trainable_variables'][6])
    import matplotlib.pyplot as plt
    import numpy as np
    plt.hist(params_val.flatten(), 40, alpha=0.3)

    plt.show()
    print('')
def main():
    log_dir = '../../drl_for_surveillance_runs/flood/indiv/drqn/v1/'

    env = SurveillanceEnv(nr_agents=2,
                          obs_mode='normal',
                          obs_type='flood',
                          obs_radius=500,
                          world_size=1000,
                          grid_size=100,
                          range_cutpoints=30,
                          angular_cutpoints=40,
                          torus=False,
                          dynamics='aircraft',
                          shared_reward=False,
                          render_dir=log_dir + 'video/')

    policy = StatePlusImageLstmPolicy

    model = DRQN(policy,
                 env,
                 prioritized_replay=False,
                 verbose=1,
                 tensorboard_log=log_dir,
                 batch_size=100,
                 target_network_update_freq=100,
                 learning_starts=15000,
                 train_freq=10,
                 trace_length=20,
                 buffer_size=200000,
                 exploration_fraction=0.7,
                 exploration_final_eps=0.1,
                 checkpoint_path=log_dir + 'models/',
                 checkpoint_freq=10000)

    train(model, callback, num_timesteps=int(3e6), log_dir=log_dir)
def main():
    log_dir = '../../drl_for_surveillance_runs/wildfire/indiv/drqn/v4/'
    saved_model_name = 'checkpoint_model.pkl'

    env = SurveillanceEnv(nr_agents=2,
                          obs_mode='normal',
                          obs_type='wildfire',
                          obs_radius=500,
                          world_size=1000,
                          grid_size=100,
                          range_cutpoints=30,
                          angular_cutpoints=40,
                          torus=False,
                          dynamics='aircraft',
                          shared_reward=False,
                          render_dir=log_dir + 'video/')

    model = DRQN.load(log_dir + 'models/' + saved_model_name, env=env)

    obs = env.reset()
    states = None
    while True:
        actions, states = model.predict(obs, states=states)
        obs, rewards, dones, info = env.step(actions)
        if env.timestep % 100 == 0:
            env.render()
        if dones:
            obs = env.reset()
            states = None
def main():
    log_dir = '../../drl_for_surveillance_runs/flood/terrain/'

    env = SurveillanceEnv(nr_agents=2,
                          obs_mode='normal',
                          obs_type='flood',
                          obs_radius=500,
                          world_size=1000,
                          grid_size=100,
                          range_cutpoints=30,
                          angular_cutpoints=40,
                          torus=False,
                          dynamics='aircraft',
                          shared_reward=False,
                          render_dir=log_dir + 'video/')

    env.reset()
    while True:
        env.world.reset()
Exemplo n.º 6
0
def main():
    log_dir = '../../drl_for_surveillance_runs/wildfire/indiv/dqn/report/'
    n_agents = 2
    env = SurveillanceEnv(nr_agents=n_agents,
                          obs_mode='normal',
                          obs_type='wildfire',
                          obs_radius=500,
                          world_size=1000,
                          grid_size=100,
                          range_cutpoints=30,
                          angular_cutpoints=40,
                          torus=False,
                          dynamics='aircraft',
                          shared_reward=False,
                          render_dir=log_dir + 'video/')

    while True:
        obs, done = env.reset(), False
        episode_rew = 0
        timestep = 0
        while True:
            if timestep % 500 == 0:
                time = datetime.datetime.now().strftime("%Y_%m_%d-%H_%M_%S-")
                env.render(render_type='wildfire_state',
                           output_dir=log_dir + 'video/',
                           name_append=time + format(timestep, '04d') + '_')

            action = np.zeros(n_agents)
            obs, rew, done, _ = env.step(action)
            episode_rew += rew
            timestep += 1
        print("Episode reward", episode_rew)
Exemplo n.º 7
0
def main():
    log_dir = '../../drl_for_surveillance_runs/wildfire/indiv/drqn/test/'

    env = SurveillanceEnv(nr_agents=2,
                          obs_mode='normal',
                          obs_type='wildfire',
                          obs_radius=500,
                          world_size=1000,
                          grid_size=100,
                          range_cutpoints=30,
                          angular_cutpoints=40,
                          torus=False,
                          dynamics='aircraft',
                          shared_reward=False,
                          render_dir=log_dir + 'video/')

    policy = StatePlusImageLstmPolicy

    model = DRQN(policy, env, prioritized_replay=False,
                 verbose=1,
                 tensorboard_log=log_dir,
                 batch_size=100,
                 target_network_update_freq=100,
                 learning_starts=10000,
                 train_freq=10,
                 trace_length=20,
                 h_size=100,
                 learning_rate=5e-4,
                 buffer_size=200000,
                 exploration_fraction=0.7,
                 exploration_final_eps=0.1,
                 checkpoint_path=log_dir + 'models/',
                 checkpoint_freq=10000)

    params_val = model.sess.run(model.graph._collections['trainable_variables'][22])
    import matplotlib.pyplot as plt
    import numpy as np
    plt.hist(params_val.flatten(), 40, alpha=0.3)

    plt.show()
    print('')
def main():
    log_dir = '../../../models/wildfire/indiv/dqn/'
    saved_model_name = 'checkpoint_model.pkl'
    n_agents = 2
    env = SurveillanceEnv(nr_agents=n_agents,
                          obs_mode='normal',
                          obs_type='wildfire',
                          obs_radius=500,
                          world_size=1000,
                          grid_size=100,
                          range_cutpoints=30,
                          angular_cutpoints=40,
                          torus=False,
                          dynamics='aircraft',
                          shared_reward=False,
                          render_dir=log_dir + 'video/')

    # All tests share the same seed
    env.seed(seed=1)
    np.random.seed(seed=1)

    model = DQN.load(log_dir + 'models/' + saved_model_name, env=env)

    total_episodes = 100

    episode_rewards = []
    for ep in range(total_episodes):

        obs = env.reset()
        states = None
        episode_rewards.append(0.0)

        while True:
            actions, states = model.predict(obs, states=states)
            obs, rewards, dones, info = env.step(actions)

            for i in range(n_agents):
                # Sum of rewards for logging
                episode_rewards[-1] += rewards[i]

            if env.timestep % 500 == 0:
                print("Timestep " + str(env.timestep) + "/" +
                      str(env.timestep_limit))

            algo = model
            time = datetime.datetime.now().strftime("%Y_%m_%d-%H_%M_%S")

            # Render map
            if (algo.env.is_terminal or algo.env.timestep % 500 == 0) \
                    and algo.env.timestep > 0:

                n = 1  # agent to plot
                dim_image = algo.observation_space.dim_image
                obs_array = np.array(obs[n])
                obs_image = np.reshape(obs_array[:dim_image[0] * dim_image[1]],
                                       dim_image,
                                       order='C')

                if algo.env.ax is not None:
                    x = algo.env.agents[n].obs_angles
                    y = algo.env.agents[n].obs_ranges
                    ax1 = algo.env.ax[1]
                    ax1.set_theta_zero_location("S")

                    x_mod = np.append(x, 2 * np.pi)
                    obs_image_append = np.expand_dims(obs_image[:, 0], axis=1)
                    obs_image_mod = np.append(obs_image,
                                              obs_image_append,
                                              axis=1)

                    ax1.pcolormesh(x_mod, y, obs_image_mod)

                render_dir = algo.env.render_dir
                algo.env.render(mode='animate',
                                output_dir=render_dir + time + '-',
                                name_append=format(algo.num_timesteps, '04d') +
                                '_')
                print(time)
                print("Instantaneous rewards:", rewards)

            if dones:
                print("Episode", ep + 1, "reward:", episode_rewards[-1])
                break

        np.save(log_dir + 'test_' + str(total_episodes) + '_episode_rewards',
                episode_rewards)