Пример #1
0
def ConvertToCSV(env):
    dir_path = env.sim_params.emission_path
    emission_filename = \
        "{0}-emission.xml".format(env.network.name)
    emission_path = os.path.join(dir_path, emission_filename)
    # convert the emission file into a csv
    emission_to_csv(emission_path)
Пример #2
0
    def test_emission_to_csv(self):
        # current path
        current_path = os.path.realpath(__file__).rsplit("/", 1)[0]

        # run the emission_to_csv function on a small emission file
        emission_to_csv(current_path + "/test_files/test-emission.xml")

        # import the generated csv file and its headers
        dict1 = []
        filename = current_path + "/test_files/test-emission.csv"
        with open(filename, "r") as infile:
            reader = csv.reader(infile)
            headers = next(reader)
            for row in reader:
                dict1.append(dict())
                for i, key in enumerate(headers):
                    dict1[-1][key] = row[i]

        # check the names of the headers
        expected_headers = \
            ['time', 'CO', 'y', 'CO2', 'electricity', 'type', 'id', 'eclass',
             'waiting', 'NOx', 'fuel', 'HC', 'x', 'route', 'relative_position',
             'noise', 'angle', 'PMx', 'speed', 'edge_id', 'lane_number']

        self.assertCountEqual(headers, expected_headers)

        # check the number of rows of the generated csv file
        # Note that, rl vehicles are missing their final (reset) values, which
        # I don't think is a problem
        self.assertEqual(len(dict1), 104)
Пример #3
0
def visualizer_rllab(args):
    """Visualizer for rllab experiments.

    This function takes args (see function create_parser below for
    more detailed information on what information can be fed to this
    visualizer), and renders the experiment associated with it.
    """
    # extract the flow environment
    data = joblib.load(args.file)
    policy = data['policy']
    env = data['env']

    # FIXME(ev, ak) only one of these should be needed
    # unwrapped_env = env._wrapped_env._wrapped_env.env.unwrapped
    # unwrapped_env = env.wrapped_env.env.env.unwrapped

    # if this doesn't work, try the one above it
    unwrapped_env = env._wrapped_env.env.unwrapped

    # Set sumo to make a video
    sim_params = unwrapped_env.sim_params
    sim_params.emission_path = './test_time_rollout/' if args.gen_emission \
        else None
    if args.no_render:
        sim_params.render = False
    else:
        sim_params.render = True
    unwrapped_env.restart_simulation(
        sim_params=sim_params, render=sim_params.render)

    # Load data into arrays
    rew = []
    for j in range(args.num_rollouts):
        # run a single rollout of the experiment
        path = rollout(env=env, agent=policy)

        # collect the observations and rewards from the rollout
        new_rewards = path['rewards']

        # print the cumulative reward of the most recent rollout
        print('Round {}, return: {}'.format(j, sum(new_rewards)))
        rew.append(sum(new_rewards))

    # print the average cumulative reward across rollouts
    print('Average, std return: {}, {}'.format(np.mean(rew), np.std(rew)))

    # if prompted, convert the emission file into a csv file
    if args.gen_emission:
        dir_path = os.path.dirname(os.path.realpath(__file__))
        emission_filename = '{0}-emission.xml'.format(
            unwrapped_env.scenario.name)

        emission_path = \
            '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename)

        emission_to_csv(emission_path)
Пример #4
0
    def run(self, num_runs, num_steps, rl_actions=None, convert_to_csv=False):
        """
        Runs the given scenario for a set number of runs and a set number of
        steps per run.

        Parameters
        ----------
        num_runs: int
            number of runs the experiment should perform
        num_steps: int
            number of steps to be performs in each run of the experiment
        rl_actions: list or numpy ndarray, optional
            actions to be performed by rl vehicles in the network (if there are
            any)
        convert_to_csv: bool
            Specifies whether to convert the emission file created by sumo into
            a csv file
        """
        if rl_actions is None:
            rl_actions = []

        rets = []
        for i in range(num_runs):
            logging.info("Iter #" + str(i))
            ret = 0
            self.env.reset()
            for j in range(num_steps):
                state, reward, done, _ = self.env.step(rl_actions)
                ret += reward
                if done:
                    break
            rets.append(ret)
            print("Round {0}, return: {1}".format(i, ret))

        print("Average Return", np.mean(rets))
        self.env.terminate()

        if convert_to_csv:
            # collect the location of the emission file
            dir_path = self.env.sumo_params.emission_path
            emission_filename = \
                "{0}-emission.xml".format(self.env.scenario.name)
            emission_path = \
                "{0}/{1}".format(dir_path, emission_filename)

            # convert the emission file into a csv
            emission_to_csv(emission_path)
Пример #5
0
def visualizer_rllib(args):
    """Visualizer for RLlib experiments.

    This function takes args (see function create_parser below for
    more detailed information on what information can be fed to this
    visualizer), and renders the experiment associated with it.
    """
    result_dir = args.result_dir if args.result_dir[-1] != '/' \
        else args.result_dir[:-1]

    config = get_rllib_config(result_dir)

    # check if we have a multiagent environment but in a
    # backwards compatible way
    if config.get('multiagent', {}).get('policies', None):
        multiagent = True
        pkl = get_rllib_pkl(result_dir)
        config['multiagent'] = pkl['multiagent']
    else:
        multiagent = False

    # Run on only one cpu for rendering purposes
    config['num_workers'] = 0

    flow_params = get_flow_params(config)

    # hack for old pkl files
    # TODO(ev) remove eventually
    sim_params = flow_params['sim']
    setattr(sim_params, 'num_clients', 1)

    # for hacks for old pkl files TODO: remove eventually
    if not hasattr(sim_params, 'use_ballistic'):
        sim_params.use_ballistic = False

    # Determine agent and checkpoint
    config_run = config['env_config']['run'] if 'run' in config['env_config'] \
        else None
    if args.run and config_run:
        if args.run != config_run:
            print('visualizer_rllib.py: error: run argument ' +
                  '\'{}\' passed in '.format(args.run) +
                  'differs from the one stored in params.json ' +
                  '\'{}\''.format(config_run))
            sys.exit(1)
    if args.run:
        agent_cls = get_agent_class(args.run)
    elif config_run:
        agent_cls = get_agent_class(config_run)
    else:
        print('visualizer_rllib.py: error: could not find flow parameter '
              '\'run\' in params.json, '
              'add argument --run to provide the algorithm or model used '
              'to train the results\n e.g. '
              'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO')
        sys.exit(1)

    sim_params.restart_instance = True
    dir_path = os.path.dirname(os.path.realpath(__file__))
    emission_path = '{0}/test_time_rollout/'.format(dir_path)
    sim_params.emission_path = emission_path if args.gen_emission else None

    # pick your rendering mode
    if args.render_mode == 'sumo_web3d':
        sim_params.num_clients = 2
        sim_params.render = False
    elif args.render_mode == 'drgb':
        sim_params.render = 'drgb'
        sim_params.pxpm = 4
    elif args.render_mode == 'sumo_gui':
        sim_params.render = False  # will be set to True below
    elif args.render_mode == 'no_render':
        sim_params.render = False
    if args.save_render:
        if args.render_mode != 'sumo_gui':
            sim_params.render = 'drgb'
            sim_params.pxpm = 4
        sim_params.save_render = True

    # Create and register a gym+rllib env
    create_env, env_name = make_create_env(params=flow_params, version=0)
    register_env(env_name, create_env)

    # check if the environment is a single or multiagent environment, and
    # get the right address accordingly
    # single_agent_envs = [env for env in dir(flow.envs)
    #                      if not env.startswith('__')]

    # if flow_params['env_name'] in single_agent_envs:
    #     env_loc = 'flow.envs'
    # else:
    #     env_loc = 'flow.envs.multiagent'

    # Start the environment with the gui turned on and a path for the
    # emission file
    env_params = flow_params['env']
    env_params.restart_instance = False
    if args.evaluate:
        env_params.evaluate = True

    # lower the horizon if testing
    if args.horizon:
        config['horizon'] = args.horizon
        env_params.horizon = args.horizon

    # create the agent that will be used to compute the actions
    agent = agent_cls(env=env_name, config=config)
    checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num
    checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num
    agent.restore(checkpoint)

    if hasattr(agent, "local_evaluator") and \
            os.environ.get("TEST_FLAG") != 'True':
        env = agent.local_evaluator.env
    else:
        env = gym.make(env_name)

    if args.render_mode == 'sumo_gui':
        env.sim_params.render = True  # set to True after initializing agent and env

    if multiagent:
        rets = {}
        # map the agent id to its policy
        policy_map_fn = config['multiagent']['policy_mapping_fn'].func
        for key in config['multiagent']['policies'].keys():
            rets[key] = []
    else:
        rets = []

    if config['model']['use_lstm']:
        use_lstm = True
        if multiagent:
            state_init = {}
            # map the agent id to its policy
            policy_map_fn = config['multiagent']['policy_mapping_fn'].func
            size = config['model']['lstm_cell_size']
            for key in config['multiagent']['policies'].keys():
                state_init[key] = [
                    np.zeros(size, np.float32),
                    np.zeros(size, np.float32)
                ]
        else:
            state_init = [
                np.zeros(config['model']['lstm_cell_size'], np.float32),
                np.zeros(config['model']['lstm_cell_size'], np.float32)
            ]
    else:
        use_lstm = False

    # if restart_instance, don't restart here because env.reset will restart later
    if not sim_params.restart_instance:
        env.restart_simulation(sim_params=sim_params, render=sim_params.render)

    # Simulate and collect metrics
    final_outflows = []
    final_inflows = []
    mean_speed = []
    std_speed = []
    #bmil edit
    acc_rollout = []
    power = 0

    for i in range(args.num_rollouts):
        vel = []

        # bmil list for collecting data
        timerange = []
        vel_dict = defaultdict(list)
        rl_acc = []

        state = env.reset()
        if multiagent:
            ret = {key: [0] for key in rets.keys()}
        else:
            ret = 0
        for _ in range(env_params.horizon):
            vehicles = env.unwrapped.k.vehicle
            # speeds = vehicles.get_speed(vehicles.get_ids())
            ids = vehicles.get_ids()
            speeds = vehicles.get_speed(ids)

            # BMIL EDIT FOR COLLECTING DATA OF ACCELERATION AND VELOCITY
            rl = vehicles.get_rl_ids()[0]
            act = vehicles.get_realized_accel(rl)
            rl_acc.append(act or 0)
            timerange.append(vehicles.get_timestep(ids[-1]) / 100000)

            # only include non-empty speeds
            if speeds:
                vel.append(np.mean(speeds))
                # bmil edit
                for veh_id, speed in zip(ids, speeds):
                    vel_dict[veh_id].append(speed)
                if vehicles.get_timestep(ids[0]) >= 100000:
                    M = 1200  # mass of average sized vehicle (kg)
                    g = 9.81  # gravitational acceleration (m/s^2)
                    Cr = 0.005  # rolling resistance coefficient
                    Ca = 0.3  # aerodynamic drag coefficient
                    rho = 1.225  # air density (kg/m^3)
                    A = 2.6  # vehicle cross sectional area (m^2)
                    speed = vehicles.get_speed(veh_id)
                    prev_speed = vehicles.get_previous_speed(veh_id)

                    accel = abs(speed - prev_speed) / env.sim_step

                    power += M * speed * accel + M * g * Cr * speed + 0.5 * rho * A * Ca * speed**3

            if multiagent:
                action = {}
                for agent_id in state.keys():
                    if use_lstm:
                        action[agent_id], state_init[agent_id], logits = \
                            agent.compute_action(
                            state[agent_id], state=state_init[agent_id],
                            policy_id=policy_map_fn(agent_id))
                    else:
                        action[agent_id] = agent.compute_action(
                            state[agent_id], policy_id=policy_map_fn(agent_id))
            else:
                action = agent.compute_action(state)
                #  BMIL EDIT FOR COLLECTING DATA FROM 100000 TO 375000
                #  Because 0 - 75000 steps are warm up and 75000 - 100000 steps are process of stabilizing
                if vehicles.get_timestep(rl) >= 100000:
                    acc_rollout.append(act)

            state, reward, done, _ = env.step(action)
            if multiagent:
                for actor, rew in reward.items():
                    ret[policy_map_fn(actor)][0] += rew
            else:
                ret += reward
            if multiagent and done['__all__']:
                break
            if not multiagent and done:
                break

        if multiagent:
            for key in rets.keys():
                rets[key].append(ret[key])
        else:
            rets.append(ret)
        outflow = vehicles.get_outflow_rate(500)
        final_outflows.append(outflow)
        inflow = vehicles.get_inflow_rate(500)
        final_inflows.append(inflow)
        if np.all(np.array(final_inflows) > 1e-5):
            throughput_efficiency = [
                x / y for x, y in zip(final_outflows, final_inflows)
            ]
        else:
            throughput_efficiency = [0] * len(final_inflows)
        mean_speed.append(np.mean(vel))
        std_speed.append(np.std(vel))
        if multiagent:
            for agent_id, rew in rets.items():
                print('Round {}, Return: {} for agent {}'.format(
                    i, ret, agent_id))
        else:
            print('Round {}, Return: {}'.format(i, ret))

        # BMIL EDIT FOR PLOT DATA
        veh = list(vel_dict.keys())
        plt.subplot(3, 1, 1)
        plt.title('Results')
        for v in veh[:-1]:
            plt.plot(timerange, vel_dict[v])
        plt.xlabel('timestep(s)')
        plt.ylabel('speed(m/s)')
        plt.legend(veh[:-1], fontsize=9)
        plt.grid(True)
        # plt.show()

        plt.subplot(3, 1, 2)
        plt.plot(timerange, vel_dict[veh[-1]], color='r')
        plt.xlabel('timestep(s)')
        plt.ylabel('speed(m/s)')
        plt.legend(['lc'] + veh[-1:])
        plt.grid(True)

        plt.subplot(3, 1, 3)
        plt.plot(timerange, rl_acc, color='b')
        plt.xlabel('timestep(s)')
        plt.ylabel('acceleration(m/s^2)')
        plt.grid(True)
        # BASE_DIR = '/home/bmil/BMIL_FLOW_CODE/Graph/'
        # plt.savefig(f'{BASE_DIR}{"__".join(name)}', dpi=400)
        plt.show()

    # BMIL EDIT FOR COMPUTING ACCELERATION's MEAN AND VAR
    acc_rollout1 = [accarr for accarr in acc_rollout]
    mean_acc_rollout = [np.mean(acc_rollout1)]
    variance_acc_rollout = [np.var(acc_rollout1)]

    print('==== Summary of results ====')
    print("Return:")
    print(mean_speed)
    if multiagent:
        for agent_id, rew in rets.items():
            print('For agent', agent_id)
            print(rew)
            print('Average, std return: {}, {} for agent {}'.format(
                np.mean(rew), np.std(rew), agent_id))
    else:
        print(rets)
        print('Average, std: {}, {}'.format(np.mean(rets), np.std(rets)))

    print("\nSpeed, mean (m/s):")
    print(mean_speed)
    print('Average, std: {}, {}'.format(np.mean(mean_speed),
                                        np.std(mean_speed)))
    print("\nSpeed, std (m/s):")
    print(std_speed)
    print('Average, std: {}, {}'.format(np.mean(std_speed), np.std(std_speed)))

    # BMIL Edit FOR PRINT ACCEL's MEAN AND VAR
    print("\nAccel, mean (m/s^2):")
    print(mean_acc_rollout)
    print("\nAccel, var (m/s^2):")
    print(variance_acc_rollout)
    print("\nTotal Power Consumption (kgᐧm^2/s^3):")
    print(power)

    # Compute arrival rate of vehicles in the last 500 sec of the run
    print("\nOutflows (veh/hr):")
    print(final_outflows)
    print('Average, std: {}, {}'.format(np.mean(final_outflows),
                                        np.std(final_outflows)))
    # Compute departure rate of vehicles in the last 500 sec of the run
    print("Inflows (veh/hr):")
    print(final_inflows)
    print('Average, std: {}, {}'.format(np.mean(final_inflows),
                                        np.std(final_inflows)))
    # Compute throughput efficiency in the last 500 sec of the
    print("Throughput efficiency (veh/hr):")
    print(throughput_efficiency)
    print('Average, std: {}, {}'.format(np.mean(throughput_efficiency),
                                        np.std(throughput_efficiency)))

    # terminate the environment
    env.unwrapped.terminate()

    # if prompted, convert the emission file into a csv file
    if args.gen_emission:
        time.sleep(0.1)

        dir_path = os.path.dirname(os.path.realpath(__file__))
        emission_filename = '{0}-emission.xml'.format(env.network.name)

        emission_path = \
            '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename)

        # convert the emission file into a csv file
        emission_to_csv(emission_path)

        # print the location of the emission csv file
        emission_path_csv = emission_path[:-4] + ".csv"
        print("\nGenerated emission file at " + emission_path_csv)

        # delete the .xml version of the emission file
        os.remove(emission_path)
Пример #6
0
    def run(self, num_runs, num_steps, rl_actions=None, convert_to_csv=False):
        """Run the given scenario for a set number of runs and steps per run.

        Parameters
        ----------
        num_runs : int
            number of runs the experiment should perform
        num_steps : int
            number of steps to be performs in each run of the experiment
        rl_actions : method, optional
            maps states to actions to be performed by the RL agents (if
            there are any)
        convert_to_csv : bool
            Specifies whether to convert the emission file created by sumo
            into a csv file

        Returns
        -------
        info_dict : dict
            contains returns, average speed per step
        """
        info_dict = {}
        if rl_actions is None:

            def rl_actions(*_):
                return None

        rets = []
        mean_rets = []
        ret_lists = []
        vels = []
        mean_vels = []
        std_vels = []
        for i in range(num_runs):
            vel = np.zeros(num_steps)
            logging.info("Iter #" + str(i))
            ret = 0
            ret_list = []
            state = self.env.reset()
            for j in range(num_steps):
                state, reward, done, _ = self.env.step(rl_actions(state))
                vel[j] = np.mean(
                    self.env.k.vehicle.get_speed(self.env.k.vehicle.get_ids()))
                ret += reward
                ret_list.append(reward)
                if done:
                    break
            rets.append(ret)
            vels.append(vel)
            mean_rets.append(np.mean(ret_list))
            ret_lists.append(ret_list)
            mean_vels.append(np.mean(vel))
            std_vels.append(np.std(vel))
            print("Round {0}, return: {1}".format(i, ret))

        info_dict["returns"] = rets
        info_dict["velocities"] = vels
        info_dict["mean_returns"] = mean_rets
        info_dict["per_step_returns"] = ret_lists

        print("Average, std return: {}, {}".format(np.mean(rets),
                                                   np.std(rets)))
        print("Average, std speed: {}, {}".format(np.mean(mean_vels),
                                                  np.std(std_vels)))
        self.env.terminate()

        if convert_to_csv:
            # wait a short period of time to ensure the xml file is readable
            time.sleep(0.1)

            # collect the location of the emission file
            dir_path = self.env.sim_params.emission_path
            emission_filename = \
                "{0}-emission.xml".format(self.env.scenario.name)
            emission_path = os.path.join(dir_path, emission_filename)

            # convert the emission file into a csv
            emission_to_csv(emission_path)

        return info_dict
Пример #7
0
def visualizer_rllib(args):
    """Visualizer for RLlib experiments.

    This function takes args (see function create_parser below for
    more detailed information on what information can be fed to this
    visualizer), and renders the experiment associated with it.
    """
    result_dir = args.result_dir if args.result_dir[-1] != '/' \
        else args.result_dir[:-1]

    config = get_rllib_config(result_dir)

    # check if we have a multiagent environment but in a
    # backwards compatible way
    if config.get('multiagent', {}).get('policies', None):
        multiagent = True
        pkl = get_rllib_pkl(result_dir)
        config['multiagent'] = pkl['multiagent']
    else:
        multiagent = False

    # Run on only one cpu for rendering purposes
    config['num_workers'] = 0

    flow_params = get_flow_params(config)

    # hack for old pkl files
    # TODO(ev) remove eventually
    sim_params = flow_params['sim']
    setattr(sim_params, 'num_clients', 1)

    # for hacks for old pkl files TODO: remove eventually
    if not hasattr(sim_params, 'use_ballistic'):
        sim_params.use_ballistic = False

    # Determine agent and checkpoint
    config_run = config['env_config']['run'] if 'run' in config['env_config'] \
        else None
    if args.run and config_run:
        if args.run != config_run:
            print('visualizer_rllib.py: error: run argument ' +
                  '\'{}\' passed in '.format(args.run) +
                  'differs from the one stored in params.json ' +
                  '\'{}\''.format(config_run))
            sys.exit(1)
    if args.run:
        agent_cls = get_agent_class(args.run)
    elif config_run:
        agent_cls = get_agent_class(config_run)
    else:
        print('visualizer_rllib.py: error: could not find flow parameter '
              '\'run\' in params.json, '
              'add argument --run to provide the algorithm or model used '
              'to train the results\n e.g. '
              'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO')
        sys.exit(1)

    sim_params.restart_instance = True
    dir_path = os.path.dirname(os.path.realpath(__file__))
    emission_path = '{0}/test_time_rollout/'.format(dir_path)
    sim_params.emission_path = emission_path if args.gen_emission else None

    # pick your rendering mode
    if args.render_mode == 'sumo_web3d':
        sim_params.num_clients = 2
        sim_params.render = False
    elif args.render_mode == 'drgb':
        sim_params.render = 'drgb'
        sim_params.pxpm = 4
    elif args.render_mode == 'sumo_gui':
        sim_params.render = False  # will be set to True below
    elif args.render_mode == 'no_render':
        sim_params.render = False
    if args.save_render:
        if args.render_mode != 'sumo_gui':
            sim_params.render = 'drgb'
            sim_params.pxpm = 4
        sim_params.save_render = True

    # Create and register a gym+rllib env
    create_env, env_name = make_create_env(params=flow_params, version=0)
    register_env(env_name, create_env)

    # check if the environment is a single or multiagent environment, and
    # get the right address accordingly
    # single_agent_envs = [env for env in dir(flow.envs)
    #                      if not env.startswith('__')]

    # if flow_params['env_name'] in single_agent_envs:
    #     env_loc = 'flow.envs'
    # else:
    #     env_loc = 'flow.envs.multiagent'

    # Start the environment with the gui turned on and a path for the
    # emission file
    env_params = flow_params['env']
    env_params.restart_instance = False
    if args.evaluate:
        env_params.evaluate = True

    # lower the horizon if testing
    if args.horizon:
        config['horizon'] = args.horizon
        env_params.horizon = args.horizon

    # create the agent that will be used to compute the actions
    agent = agent_cls(env=env_name, config=config)
    checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num
    checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num
    agent.restore(checkpoint)

    if hasattr(agent, "local_evaluator") and \
            os.environ.get("TEST_FLAG") != 'True':
        env = agent.local_evaluator.env
    else:
        env = gym.make(env_name)

    if args.render_mode == 'sumo_gui':
        env.sim_params.render = True  # set to True after initializing agent and env

    if multiagent:
        rets = {}
        # map the agent id to its policy
        policy_map_fn = config['multiagent']['policy_mapping_fn'].func
        for key in config['multiagent']['policies'].keys():
            rets[key] = []
    else:
        rets = []

    if config['model']['use_lstm']:
        use_lstm = True
        if multiagent:
            state_init = {}
            # map the agent id to its policy
            policy_map_fn = config['multiagent']['policy_mapping_fn'].func
            size = config['model']['lstm_cell_size']
            for key in config['multiagent']['policies'].keys():
                state_init[key] = [
                    np.zeros(size, np.float32),
                    np.zeros(size, np.float32)
                ]
        else:
            state_init = [
                np.zeros(config['model']['lstm_cell_size'], np.float32),
                np.zeros(config['model']['lstm_cell_size'], np.float32)
            ]
    else:
        use_lstm = False

    # if restart_instance, don't restart here because env.reset will restart later
    if not sim_params.restart_instance:
        env.restart_simulation(sim_params=sim_params, render=sim_params.render)

    # Simulate and collect metrics
    final_outflows = []
    final_inflows = []
    mean_speed = []
    std_speed = []

    state_translation = {
        "inflow": 0,
        "slowing": 1,
        "parking": 2,
        "parked": 3,
        "outflow": 4
    }

    custom_outputs = {
        k: np.zeros(env_params.horizon * args.num_rollouts)
        for k in ["num_rollout", "t", "speed", "position", "state", "reward"]
    }

    for i in range(args.num_rollouts):
        vel = []
        state = env.reset()

        if multiagent:
            ret = {key: [0] for key in rets.keys()}
        else:
            ret = 0
        for j in range(env_params.horizon):
            vehicles = env.unwrapped.k.vehicle
            speeds = vehicles.get_speed(vehicles.get_ids())

            # only include non-empty speeds
            if speeds:
                vel.append(np.mean(speeds))

            if multiagent:
                action = {}
                for agent_id in state.keys():
                    if use_lstm:
                        action[agent_id], state_init[agent_id], logits = \
                            agent.compute_action(
                            state[agent_id], state=state_init[agent_id],
                            policy_id=policy_map_fn(agent_id))
                    else:
                        action[agent_id] = agent.compute_action(
                            state[agent_id], policy_id=policy_map_fn(agent_id))
            else:
                action = agent.compute_action(state)
            state, reward, done, _ = env.step(action)
            if multiagent:
                for actor, rew in reward.items():
                    ret[policy_map_fn(actor)][0] += rew
            else:
                ret += reward
            if multiagent and done['__all__']:
                break
            if not multiagent and done:
                break

            #"num_rollout", "t", "speed", "position", "state", "reward"]}
            m = i * env_params.horizon + j
            custom_outputs["num_rollout"][m] = i
            custom_outputs["t"][m] = j
            custom_outputs["reward"][m] = reward

            veh_id = vehicles.get_ids()[0]
            custom_outputs["speed"][m] = vehicles.get_speed(veh_id)
            custom_outputs["position"][m] = vehicles.get_global_position(
                veh_id, env)
            custom_outputs["state"][m] = state_translation.get(
                vehicles.get_state(veh_id), -1)

        if multiagent:
            for key in rets.keys():
                rets[key].append(ret[key])
        else:
            rets.append(ret)
        outflow = vehicles.get_outflow_rate(500)
        final_outflows.append(outflow)
        inflow = vehicles.get_inflow_rate(500)
        final_inflows.append(inflow)
        if np.all(np.array(final_inflows) > 1e-5):
            throughput_efficiency = [
                x / y for x, y in zip(final_outflows, final_inflows)
            ]
        else:
            throughput_efficiency = [0] * len(final_inflows)
        mean_speed.append(np.mean(vel))
        std_speed.append(np.std(vel))
        if multiagent:
            for agent_id, rew in rets.items():
                print('Round {}, Return: {} for agent {}'.format(
                    i, ret, agent_id))
        else:
            print('Round {}, Return: {}'.format(i, ret))

    print('==== Summary of results ====')
    print("Return:")
    print(mean_speed)
    if multiagent:
        for agent_id, rew in rets.items():
            print('For agent', agent_id)
            print(rew)
            print('Average, std return: {}, {} for agent {}'.format(
                np.mean(rew), np.std(rew), agent_id))
    else:
        print(rets)
        print('Average, std: {}, {}'.format(np.mean(rets), np.std(rets)))

    print("\nSpeed, mean (m/s):")
    print(mean_speed)
    print('Average, std: {}, {}'.format(np.mean(mean_speed),
                                        np.std(mean_speed)))
    print("\nSpeed, std (m/s):")
    print(std_speed)
    print('Average, std: {}, {}'.format(np.mean(std_speed), np.std(std_speed)))

    # Compute arrival rate of vehicles in the last 500 sec of the run
    print("\nOutflows (veh/hr):")
    print(final_outflows)
    print('Average, std: {}, {}'.format(np.mean(final_outflows),
                                        np.std(final_outflows)))
    # Compute departure rate of vehicles in the last 500 sec of the run
    print("Inflows (veh/hr):")
    print(final_inflows)
    print('Average, std: {}, {}'.format(np.mean(final_inflows),
                                        np.std(final_inflows)))
    # Compute throughput efficiency in the last 500 sec of the
    print("Throughput efficiency (veh/hr):")
    print(throughput_efficiency)
    print('Average, std: {}, {}'.format(np.mean(throughput_efficiency),
                                        np.std(throughput_efficiency)))

    df = pd.DataFrame(custom_outputs)
    df.to_csv("output.csv")

    # terminate the environment
    env.unwrapped.terminate()

    # if prompted, convert the emission file into a csv file
    if args.gen_emission:
        time.sleep(0.1)

        dir_path = os.path.dirname(os.path.realpath(__file__))
        emission_filename = '{0}-emission.xml'.format(env.network.name)

        emission_path = \
            '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename)

        # convert the emission file into a csv file
        emission_to_csv(emission_path)

        # print the location of the emission csv file
        emission_path_csv = emission_path[:-4] + ".csv"
        print("\nGenerated emission file at " + emission_path_csv)

        # delete the .xml version of the emission file
        os.remove(emission_path)
def visualizer_rllib(args):
    result_dir = args.result_dir if args.result_dir[-1] != '/' \
        else args.result_dir[:-1]

    # config = get_rllib_config(result_dir + '/..')
    # pkl = get_rllib_pkl(result_dir + '/..')
    config = get_rllib_config(result_dir)
    # TODO(ev) backwards compatibility hack
    try:
        pkl = get_rllib_pkl(result_dir)
    except Exception:
        pass

    # check if we have a multiagent scenario but in a
    # backwards compatible way
    if config.get('multiagent', {}).get('policy_graphs', {}):
        multiagent = True
        config['multiagent'] = pkl['multiagent']
    else:
        multiagent = False

    # Run on only one cpu for rendering purposes
    config['num_workers'] = 0

    flow_params = get_flow_params(config)

    # hack for old pkl files
    # TODO(ev) remove eventually
    sumo_params = flow_params['sumo']
    setattr(sumo_params, 'num_clients', 1)

    # Create and register a gym+rllib env
    create_env, env_name = make_create_env(params=flow_params,
                                           version=0,
                                           render=False)
    register_env(env_name, create_env)

    # Determine agent and checkpoint
    config_run = config['env_config']['run'] if 'run' in config['env_config'] \
        else None
    if (args.run and config_run):
        if (args.run != config_run):
            print('visualizer_rllib.py: error: run argument ' +
                  '\'{}\' passed in '.format(args.run) +
                  'differs from the one stored in params.json ' +
                  '\'{}\''.format(config_run))
            sys.exit(1)
    if (args.run):
        agent_cls = get_agent_class(args.run)
    elif (config_run):
        agent_cls = get_agent_class(config_run)
    else:
        print('visualizer_rllib.py: error: could not find flow parameter '
              '\'run\' in params.json, '
              'add argument --run to provide the algorithm or model used '
              'to train the results\n e.g. '
              'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO')
        sys.exit(1)

    sumo_params.restart_instance = False

    sumo_params.emission_path = './test_time_rollout/'

    # pick your rendering mode
    if args.render_mode == 'sumo-web3d':
        sumo_params.num_clients = 2
        sumo_params.render = False
    elif args.render_mode == 'drgb':
        sumo_params.render = 'drgb'
        sumo_params.pxpm = 4
    elif args.render_mode == 'sumo-gui':
        sumo_params.render = False
    elif args.render_mode == 'no-render':
        sumo_params.render = False

    if args.save_render:
        sumo_params.render = 'drgb'
        sumo_params.pxpm = 4
        sumo_params.save_render = True

    # Recreate the scenario from the pickled parameters
    exp_tag = flow_params['exp_tag']
    net_params = flow_params['net']
    vehicles = flow_params['veh']
    initial_config = flow_params['initial']
    module = __import__('flow.scenarios', fromlist=[flow_params['scenario']])
    scenario_class = getattr(module, flow_params['scenario'])

    scenario = scenario_class(name=exp_tag,
                              vehicles=vehicles,
                              net_params=net_params,
                              initial_config=initial_config)

    # Start the environment with the gui turned on and a path for the
    # emission file
    module = __import__('flow.envs', fromlist=[flow_params['env_name']])
    env_class = getattr(module, flow_params['env_name'])
    env_params = flow_params['env']
    env_params.restart_instance = False
    if args.evaluate:
        env_params.evaluate = True

    # create the agent that will be used to compute the actions
    agent = agent_cls(env=env_name, config=config)
    checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num
    checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num
    agent.restore(checkpoint)

    env = ModelCatalog.get_preprocessor_as_wrapper(
        env_class(env_params=env_params,
                  sumo_params=sumo_params,
                  scenario=scenario))

    import matplotlib.pyplot as plt
    from matplotlib import cm
    from matplotlib.ticker import LinearLocator, FormatStrFormatter
    fig = plt.figure()
    h = np.linspace(0, 60, 100)
    Deltav = np.linspace(-6, 12, 100)
    Headway, DELTAV = np.meshgrid(h, Deltav)
    # fix v=20m/s
    xn, yn = Headway.shape
    geta = np.array(Headway)
    for xk in range(xn):
        for yk in range(yn):
            #输入状态
            #Headway[xk,yk]
            #DELTAV[xk,yk]
            geta[xk, yk] = agent.compute_action(
                np.array(
                    [3.8 / 30, DELTAV[xk, yk] / 30, Headway[xk, yk] / 260]))
    surf = plt.contourf(DELTAV, Headway, geta, 20, cmap=cm.coolwarm)
    plt.colorbar()
    #C = plt.contour(DELTAV, Headway, geta, 20, colors='black')
    # plt.clabel(C, inline = True, fontsize = 10)
    plt.show()
    # terminate the environment
    env.unwrapped.terminate()

    # if prompted, convert the emission file into a csv file
    if args.emission_to_csv:
        dir_path = os.path.dirname(os.path.realpath(__file__))
        emission_filename = '{0}-emission.xml'.format(scenario.name)

        emission_path = \
            '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename)

        emission_to_csv(emission_path)

    # if we wanted to save the render, here we create the movie
    '''
Пример #9
0
def visualizer_rllib(args):
    """Visualizer for RLlib experiments.

    This function takes args (see function create_parser below for
    more detailed information on what information can be fed to this
    visualizer), and renders the experiment associated with it.
    """
    result_dir = args.result_dir if args.result_dir[-1] != '/' \
        else args.result_dir[:-1]

    config = get_rllib_config(result_dir)

    # check if we have a multiagent environment but in a
    # backwards compatible way
    if config.get('multiagent', {}).get('policies', None):
        multiagent = True
        pkl = get_rllib_pkl(result_dir)
        config['multiagent'] = pkl['multiagent']
    else:
        multiagent = False

    # Run on only one cpu for rendering purposes
    config['num_workers'] = 0

    flow_params = get_flow_params(config)

    # hack for old pkl files
    # TODO(ev) remove eventually
    sim_params = flow_params['sim']
    setattr(sim_params, 'num_clients', 1)

    # Determine agent and checkpoint
    config_run = config['env_config']['run'] if 'run' in config['env_config'] \
        else None
    if args.run and config_run:
        if args.run != config_run:
            print('visualizer_rllib.py: error: run argument '
                  + '\'{}\' passed in '.format(args.run)
                  + 'differs from the one stored in params.json '
                  + '\'{}\''.format(config_run))
            sys.exit(1)
    if args.run:
        agent_cls = get_agent_class(args.run)
    elif config_run:
        agent_cls = get_agent_class(config_run)
    else:
        print('visualizer_rllib.py: error: could not find flow parameter '
              '\'run\' in params.json, '
              'add argument --run to provide the algorithm or model used '
              'to train the results\n e.g. '
              'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO')
        sys.exit(1)

    sim_params.restart_instance = True
    dir_path = os.path.dirname(os.path.realpath(__file__))
    emission_path = '{0}/test_time_rollout/'.format(dir_path)
    sim_params.emission_path = emission_path if args.gen_emission else None

    # pick your rendering mode
    if args.render_mode == 'sumo_web3d':
        sim_params.num_clients = 2
        sim_params.render = False
    elif args.render_mode == 'drgb':
        sim_params.render = 'drgb'
        sim_params.pxpm = 4
    elif args.render_mode == 'sumo_gui':
        sim_params.render = True
        print('NOTE: With render mode {}, an extra instance of the SUMO GUI '
              'will display before the GUI for visualizing the result. Click '
              'the green Play arrow to continue.'.format(args.render_mode))
    elif args.render_mode == 'no_render':
        sim_params.render = False
    if args.save_render:
        sim_params.render = 'drgb'
        sim_params.pxpm = 4
        sim_params.save_render = True

    # Create and register a gym+rllib env
    create_env, env_name = make_create_env(params=flow_params, version=0)
    register_env(env_name, create_env)

    # check if the environment is a single or multiagent environment, and
    # get the right address accordingly
    # single_agent_envs = [env for env in dir(flow.envs)
    #                      if not env.startswith('__')]

    # if flow_params['env_name'] in single_agent_envs:
    #     env_loc = 'flow.envs'
    # else:
    #     env_loc = 'flow.envs.multiagent'

    # Start the environment with the gui turned on and a path for the
    # emission file
    env_params = flow_params['env']
    env_params.restart_instance = False
    if args.evaluate:
        env_params.evaluate = True

    # lower the horizon if testing
    if args.horizon:
        config['horizon'] = args.horizon
        env_params.horizon = args.horizon

    # create the agent that will be used to compute the actions
    agent = agent_cls(env=env_name, config=config)
    checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num
    checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num
    agent.restore(checkpoint)

    if hasattr(agent, "local_evaluator") and \
            os.environ.get("TEST_FLAG") != 'True':
        env = agent.local_evaluator.env
    else:
        env = gym.make(env_name)

    if multiagent:
        rets = {}
        # map the agent id to its policy
        policy_map_fn = config['multiagent']['policy_mapping_fn'].func
        for key in config['multiagent']['policies'].keys():
            rets[key] = []
    else:
        rets = []

    if config['model']['use_lstm']:
        use_lstm = True
        if multiagent:
            state_init = {}
            # map the agent id to its policy
            policy_map_fn = config['multiagent']['policy_mapping_fn'].func
            size = config['model']['lstm_cell_size']
            for key in config['multiagent']['policies'].keys():
                state_init[key] = [np.zeros(size, np.float32),
                                   np.zeros(size, np.float32)]
        else:
            state_init = [
                np.zeros(config['model']['lstm_cell_size'], np.float32),
                np.zeros(config['model']['lstm_cell_size'], np.float32)
            ]
    else:
        use_lstm = False

    env.restart_simulation(
        sim_params=sim_params, render=sim_params.render)

    # Simulate and collect metrics
    final_outflows = []
    final_inflows = []
    mean_speed = []
    std_speed = []
    for i in range(args.num_rollouts):
        vel = []
        state = env.reset()
        if multiagent:
            ret = {key: [0] for key in rets.keys()}
        else:
            ret = 0
        for _ in range(env_params.horizon):
            vehicles = env.unwrapped.k.vehicle
            vel.append(np.mean(vehicles.get_speed(vehicles.get_ids())))
            if multiagent:
                action = {}
                for agent_id in state.keys():
                    if use_lstm:
                        action[agent_id], state_init[agent_id], logits = \
                            agent.compute_action(
                            state[agent_id], state=state_init[agent_id],
                            policy_id=policy_map_fn(agent_id))
                    else:
                        action[agent_id] = agent.compute_action(
                            state[agent_id], policy_id=policy_map_fn(agent_id))
            else:
                action = agent.compute_action(state)
            state, reward, done, _ = env.step(action)
            if multiagent:
                for actor, rew in reward.items():
                    ret[policy_map_fn(actor)][0] += rew
            else:
                ret += reward
            if multiagent and done['__all__']:
                break
            if not multiagent and done:
                break

        if multiagent:
            for key in rets.keys():
                rets[key].append(ret[key])
        else:
            rets.append(ret)
        outflow = vehicles.get_outflow_rate(500)
        final_outflows.append(outflow)
        inflow = vehicles.get_inflow_rate(500)
        final_inflows.append(inflow)
        if np.all(np.array(final_inflows) > 1e-5):
            throughput_efficiency = [x / y for x, y in
                                     zip(final_outflows, final_inflows)]
        else:
            throughput_efficiency = [0] * len(final_inflows)
        mean_speed.append(np.mean(vel))
        std_speed.append(np.std(vel))
        if multiagent:
            for agent_id, rew in rets.items():
                print('Round {}, Return: {} for agent {}'.format(
                    i, ret, agent_id))
        else:
            print('Round {}, Return: {}'.format(i, ret))

    print('==== Summary of results ====')
    print("Return:")
    print(mean_speed)
    if multiagent:
        for agent_id, rew in rets.items():
            print('For agent', agent_id)
            print(rew)
            print('Average, std return: {}, {} for agent {}'.format(
                np.mean(rew), np.std(rew), agent_id))
    else:
        print(rets)
        print('Average, std: {}, {}'.format(
            np.mean(rets), np.std(rets)))

    print("\nSpeed, mean (m/s):")
    print(mean_speed)
    print('Average, std: {}, {}'.format(np.mean(mean_speed), np.std(
        mean_speed)))
    print("\nSpeed, std (m/s):")
    print(std_speed)
    print('Average, std: {}, {}'.format(np.mean(std_speed), np.std(
        std_speed)))

    # Compute arrival rate of vehicles in the last 500 sec of the run
    print("\nOutflows (veh/hr):")
    print(final_outflows)
    print('Average, std: {}, {}'.format(np.mean(final_outflows),
                                        np.std(final_outflows)))
    # Compute departure rate of vehicles in the last 500 sec of the run
    print("Inflows (veh/hr):")
    print(final_inflows)
    print('Average, std: {}, {}'.format(np.mean(final_inflows),
                                        np.std(final_inflows)))
    # Compute throughput efficiency in the last 500 sec of the
    print("Throughput efficiency (veh/hr):")
    print(throughput_efficiency)
    print('Average, std: {}, {}'.format(np.mean(throughput_efficiency),
                                        np.std(throughput_efficiency)))

    # terminate the environment
    env.unwrapped.terminate()

    # if prompted, convert the emission file into a csv file
    if args.gen_emission:
        time.sleep(0.1)

        dir_path = os.path.dirname(os.path.realpath(__file__))
        emission_filename = '{0}-emission.xml'.format(env.network.name)

        emission_path = \
            '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename)

        # convert the emission file into a csv file
        emission_to_csv(emission_path)

        # print the location of the emission csv file
        emission_path_csv = emission_path[:-4] + ".csv"
        print("\nGenerated emission file at " + emission_path_csv)

        # delete the .xml version of the emission file
        os.remove(emission_path)

    # if we wanted to save the render, here we create the movie
    if args.save_render:
        dirs = os.listdir(os.path.expanduser('~')+'/flow_rendering')
        # Ignore hidden files
        dirs = [d for d in dirs if d[0] != '.']
        dirs.sort(key=lambda date: datetime.strptime(date, "%Y-%m-%d-%H%M%S"))
        recent_dir = dirs[-1]
        # create the movie
        movie_dir = os.path.expanduser('~') + '/flow_rendering/' + recent_dir
        save_dir = os.path.expanduser('~') + '/flow_movies'
        if not os.path.exists(save_dir):
            os.mkdir(save_dir)
        os_cmd = "cd " + movie_dir + " && ffmpeg -i frame_%06d.png"
        os_cmd += " -pix_fmt yuv420p " + dirs[-1] + ".mp4"
        os_cmd += "&& cp " + dirs[-1] + ".mp4 " + save_dir + "/"
        os.system(os_cmd)
Пример #10
0
    def run(self, num_runs, num_steps, rl_actions=None, convert_to_csv=False):
        """Run the given scenario for a set number of runs and steps per run.

        Parameters
        ----------
        num_runs : int
            number of runs the experiment should perform
        num_steps : int
            number of steps to be performs in each run of the experiment
        rl_actions : method, optional
            maps states to actions to be performed by the RL agents (if
            there are any)
        convert_to_csv : bool
            Specifies whether to convert the emission file created by sumo
            into a csv file

        Returns
        -------
        info_dict : dict
            contains returns, average speed per step
        """
        # raise an error if convert_to_csv is set to True but no emission
        # file will be generated, to avoid getting an error at the end of the
        # simulation
        if convert_to_csv and self.env.sim_params.emission_path is None:
            raise ValueError(
                'The experiment was run with convert_to_csv set '
                'to True, but no emission file will be generated. If you wish '
                'to generate an emission file, you should set the parameter '
                'emission_path in the simulation parameters (SumoParams or '
                'AimsunParams) to the path of the folder where emissions '
                'output should be generated. If you do not wish to generate '
                'emissions, set the convert_to_csv parameter to False.')

        info_dict = {}
        if rl_actions is None:
            def rl_actions(*_):
                return None

        rets = []
        mean_rets = []
        ret_lists = []
        vels = []
        mean_vels = []
        std_vels = []
        outflows = []
        for i in range(num_runs):
            vel = np.zeros(num_steps)
            logging.info("Iter #" + str(i))
            ret = 0
            ret_list = []
            state = self.env.reset()
            for j in range(num_steps):
                state, reward, done, _ = self.env.step(rl_actions(state))
                vel[j] = np.mean(
                    self.env.k.vehicle.get_speed(self.env.k.vehicle.get_ids()))
                ret += reward
                ret_list.append(reward)

                if done:
                    break
            rets.append(ret)
            vels.append(vel)
            mean_rets.append(np.mean(ret_list))
            ret_lists.append(ret_list)
            mean_vels.append(np.mean(vel))
            std_vels.append(np.std(vel))
            outflows.append(self.env.k.vehicle.get_outflow_rate(int(500)))
            print("Round {0}, return: {1}".format(i, ret))

        info_dict["returns"] = rets
        info_dict["velocities"] = vels
        info_dict["mean_returns"] = mean_rets
        info_dict["per_step_returns"] = ret_lists
        info_dict["mean_outflows"] = np.mean(outflows)

        print("Average, std return: {}, {}".format(
            np.mean(rets), np.std(rets)))
        print("Average, std speed: {}, {}".format(
            np.mean(mean_vels), np.std(mean_vels)))
        self.env.terminate()

        if convert_to_csv:
            # wait a short period of time to ensure the xml file is readable
            time.sleep(0.1)

            # collect the location of the emission file
            dir_path = self.env.sim_params.emission_path
            emission_filename = \
                "{0}-emission.xml".format(self.env.scenario.name)
            emission_path = os.path.join(dir_path, emission_filename)

            # convert the emission file into a csv
            emission_to_csv(emission_path)

            # Delete the .xml version of the emission file.
            os.remove(emission_path)

        return info_dict
Пример #11
0
def visualizer_rllib(args, seed=None):
    """Visualizer for RLlib experiments.

    This function takes args (see function create_parser below for
    more detailed information on what information can be fed to this
    visualizer), and renders the experiment associated with it.
    """
    result_dir = args.result_dir if args.result_dir[-1] != '/' \
        else args.result_dir[:-1]

    config = get_rllib_config(result_dir)

    # check if we have a multiagent environment but in a
    # backwards compatible way
    if config.get('multiagent', {}).get('policies', None):
        multiagent = True
        pkl = get_rllib_pkl(result_dir)
        config['multiagent'] = pkl['multiagent']
    else:
        multiagent = False

    config['callbacks'] = MyCallbacks
    # Run on only one cpu for rendering purposes
    config['num_workers'] = 0

    flow_params = get_flow_params(config)
    #flow_params['env'].additional_params["use_seeds"]=args.use_seeds
    #    print(args.use_seeds)
    seed_tmp = None
    if seed:
        with open(seed, 'rb') as f:
            seed_tmp = pickle.load(f)
        config['seed'] = int(seed_tmp['rllib_seed'])
    elif args.use_seeds:
        with open(args.use_seeds, 'rb') as f:
            seed_tmp = pickle.load(f)
        config['seed'] = int(seed_tmp['rllib_seed'])
    # hack for old pkl files
    # TODO(ev) remove eventually
    sim_params = flow_params['sim']
    setattr(sim_params, 'num_clients', 1)
    if seed_tmp:
        #setattr(sim_params, 'seed', seed_tmp['sumo_seed'])
        sim_params.seed = int(int(seed_tmp['sumo_seed']) / 10**6)
        print(sim_params.seed)
    #import IPython
    #IPython.embed()
    # Determine agent and checkpoint
    config_run = config['env_config']['run'] if 'run' in config['env_config'] \
        else None
    if args.run and config_run:
        if args.run != config_run:
            print('visualizer_rllib.py: error: run argument ' +
                  '\'{}\' passed in '.format(args.run) +
                  'differs from the one stored in params.json ' +
                  '\'{}\''.format(config_run))
            sys.exit(1)

    # Merge with `evaluation_config`.
    evaluation_config = copy.deepcopy(config.get("evaluation_config", {}))
    config = merge_dicts(config, evaluation_config)

    if args.run:
        agent_cls = get_trainable_cls(args.run)
    elif config_run:
        agent_cls = get_trainable_cls(config_run)
    else:
        print('visualizer_rllib.py: error: could not find flow parameter '
              '\'run\' in params.json, '
              'add argument --run to provide the algorithm or model used '
              'to train the results\n e.g. '
              'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO')
        sys.exit(1)

    sim_params.restart_instance = True
    dir_path = os.path.dirname(os.path.realpath(__file__))
    emission_path = '{0}/test_time_rollout/'.format(dir_path)
    sim_params.emission_path = emission_path if args.gen_emission else None

    # pick your rendering mode
    if args.render_mode == 'sumo_web3d':
        sim_params.num_clients = 2
        sim_params.render = False
    elif args.render_mode == 'drgb':
        sim_params.render = 'drgb'
        sim_params.pxpm = 4
    elif args.render_mode == 'sumo_gui':
        sim_params.render = True
        print('NOTE: With render mode {}, an extra instance of the SUMO GUI '
              'will display before the GUI for visualizing the result. Click '
              'the green Play arrow to continue.'.format(args.render_mode))
    elif args.render_mode == 'no_render':
        sim_params.render = False
    if args.save_render:
        sim_params.render = 'drgb'
        sim_params.pxpm = 4
        sim_params.save_render = True
    #if seed is not None:
    #    print(seed)
    #    flow_params["env"].additional_params["use_seeds"] = seed
    #    input()
    #else:
    #    flow_params["env"].additional_params["use_seeds"] = args.use_seeds
    if args.horizon:
        config['horizon'] = args.horizon
        flow_params['env'].horizon = args.horizon

    # Create and register a gym+rllib env
    register_time = time.time()
    create_env, env_name = make_create_env(params=flow_params,
                                           version=0,
                                           seeds_file=seed)
    register_env(env_name, create_env)
    register_time = time.time() - register_time
    print("Register Time:", register_time)
    # check if the environment is a single or multiagent environment, and
    # get the right address accordingly
    # single_agent_envs = [env for env in dir(flow.envs)
    #                      if not env.startswith('__')]

    # if flow_params['env_name'] in single_agent_envs:
    #     env_loc = 'flow.envs'
    # else:
    #     env_loc = 'flow.envs.multiagent'

    # Start the environment with the gui turned on and a path for the
    # emission file
    env_params = flow_params['env']
    env_params.restart_instance = True  #False
    if args.evaluate:
        env_params.evaluate = True

    # lower the horizon if testing
    if args.horizon:
        config['horizon'] = args.horizon
        env_params.horizon = args.horizon
    # create the agent that will be used to compute the actions
    agent = agent_cls(env=env_name, config=config)
    checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num
    checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num
    agent.restore(checkpoint)

    create_time = time.time()
    if hasattr(agent, "local_evaluator") and \
            os.environ.get("TEST_FLAG") != 'True':
        env = agent.local_evaluator.env
    else:
        env = gym.make(env_name)
    create_time = time.time() - create_time
    print("Create time:", create_time)
    if multiagent:
        rets = {}
        # map the agent id to its policy
        print(config['multiagent']['policy_mapping_fn'])
        policy_map_fn = config['multiagent']['policy_mapping_fn']  #.func

        for key in config['multiagent']['policies'].keys():
            rets[key] = []
    else:
        rets = []

    if config['model']['use_lstm']:
        use_lstm = True
        if multiagent:
            state_init = {}
            # map the agent id to its policy
            policy_map_fn = config['multiagent']['policy_mapping_fn'].func
            size = config['model']['lstm_cell_size']
            for key in config['multiagent']['policies'].keys():
                state_init[key] = [
                    np.zeros(size, np.float32),
                    np.zeros(size, np.float32)
                ]
        else:
            state_init = [
                np.zeros(config['model']['lstm_cell_size'], np.float32),
                np.zeros(config['model']['lstm_cell_size'], np.float32)
            ]
    else:
        use_lstm = False

    restart_time = time.time()
    env.restart_simulation(sim_params=sim_params, render=sim_params.render)
    restart_time = time.time() - restart_time
    print("Restart Time:", restart_time)

    # Simulate and collect metrics
    final_outflows = []
    final_inflows = []
    mean_speed = []
    std_speed = []

    if PRINT_TO_SCREEN:
        pp = pprint.PrettyPrinter(indent=2)
        print("config ")
        pp.pprint(config)
        print("flow_params ")
        pp.pprint(flow_params)

    if REALTIME_PLOTS:
        # prepare plots
        # You probably won't need this if you're embedding things in a tkinter plot...
        plt.ion()
        fig = plt.figure()
        axA = fig.add_subplot(331)
        axA.set_title("Actions")
        axR = fig.add_subplot(332)
        axR.set_title("Rewards")
        axS = fig.add_subplot(333)
        axS.set_title("States")
        axS0 = fig.add_subplot(334)
        axS0.set_title("S0")
        axS1 = fig.add_subplot(335)
        axS1.set_title("S1")
        axS2 = fig.add_subplot(336)
        axS2.set_title("S2")
        axA_hist = fig.add_subplot(337)
        axA_hist.set_title("Actions")
        axR_hist = fig.add_subplot(338)
        axR_hist.set_title("Rewards")
        axS_hist = fig.add_subplot(339)
        axS_hist.set_title("States")
        axS.set_ylim((-2, 3))
        axA.set_ylim((-5, 5))
        axR.set_ylim((-1, 1))
        initialized_plot = False

    # record for visualization purposes
    actions = []
    rewards = []
    states = []
    times = []
    WARMUP = args.warmup
    run_time = time.time()
    for i in range(args.num_rollouts):
        vel = []
        time_to_exit = 0
        state = env.reset()
        if multiagent:
            ret = {key: [0] for key in rets.keys()}
        else:
            ret = 0
        for _ in range(env_params.horizon):
            time_to_exit += 1
            vehicles = env.unwrapped.k.vehicle
            if np.mean(vehicles.get_speed(vehicles.get_ids())) > 0:
                vel.append(np.mean(vehicles.get_speed(vehicles.get_ids())))
            #vel.append(np.mean(vehicles.get_speed(vehicles.get_ids())))
            if multiagent:
                action = {}
                for agent_id in state.keys():
                    if use_lstm:
                        action[agent_id], state_init[agent_id], logits = \
                            agent.compute_action(
                            state[agent_id], state=state_init[agent_id],
                            policy_id=policy_map_fn(agent_id))
                    else:
                        action[agent_id] = agent.compute_action(
                            state[agent_id], policy_id=policy_map_fn(agent_id))
            else:
                action = agent.compute_action(state)
            state, reward, done, _ = env.step(action)

            if SUMMARY_PLOTS:
                # record for visualization purposes
                actions.append(action)
                rewards.append(reward)
                states.append(state)

            if PRINT_TO_SCREEN:
                print("action")
                pp.pprint(action)
                print("reward")
                pp.pprint(reward)
                print("state")
                pp.pprint(state)
                print("after step ")

            if REALTIME_PLOTS:
                # Update plots.
                if not initialized_plot:  # initialize
                    lineA, = axA.plot(
                        [0] * len(action), 'g^'
                    )  # Returns a tuple of line objects, thus the comma
                    lineR, = axR.plot(
                        0, 'bs'
                    )  # Returns a tuple of line objects, thus the comma
                    lineS, = axS.plot(
                        [0] * len(state), 'r+'
                    )  # Returns a tuple of line objects, thus the comma
                    initialized_plot = True
                lineA.set_ydata(action)
                lineR.set_ydata(reward)
                lineS.set_ydata(state)
                fig.canvas.draw()
                fig.canvas.flush_events()

            if multiagent:
                for actor, rew in reward.items():
                    ret[policy_map_fn(actor)][0] += rew
            else:
                ret += reward

            if multiagent and done['__all__']:
                break
            if not multiagent and done:
                break
            if args.use_delay > 0:
                if vehicles.get_num_arrived() >= args.use_delay:
                    break

        if multiagent:
            for key in rets.keys():
                rets[key].append(ret[key])
        else:
            rets.append(ret)
        outflow = vehicles.get_outflow_rate(5000)
        final_outflows.append(outflow)
        inflow = vehicles.get_inflow_rate(5000)
        final_inflows.append(inflow)
        times.append(time_to_exit)
        if np.all(np.array(final_inflows) > 1e-5):
            throughput_efficiency = [
                x / y for x, y in zip(final_outflows, final_inflows)
            ]
        else:
            throughput_efficiency = [0] * len(final_inflows)
        mean_speed.append(np.mean(vel))
        std_speed.append(np.std(vel))
        if multiagent:
            for agent_id, rew in rets.items():
                print('Round {}, Return: {} for agent {}'.format(
                    i, ret, agent_id))
        else:
            print('Round {}, Return: {}'.format(i, ret))
    run_time = time.time() - run_time
    print('==== Summary of results ====')
    print("Run Time: ", run_time)
    print("Return:")
    env.close()
    return_reward = 0
    if multiagent:
        for agent_id, rew in rets.items():
            print('For agent', agent_id)
            print(rew)
            print('Average, std return: {}, {} for agent {}'.format(
                np.mean(rew), np.std(rew), agent_id))
            return_reward = np.mean(rew)
    else:
        print(rets)
        print('Average, std: {:.2f}, {:.5f}'.format(np.mean(rets),
                                                    np.std(rets)))
        return_reward = np.mean(rets)

    print("\nSpeed, mean (m/s):")
    print(mean_speed)
    print('Average, std: {:.2f}, {:.5f}'.format(np.mean(mean_speed),
                                                np.std(mean_speed)))
    print("\nSpeed, std (m/s):")
    print(std_speed)
    print('Average, std: {:.2f}, {:.5f}'.format(np.mean(std_speed),
                                                np.std(std_speed)))

    # Compute arrival rate of vehicles in the last 500 sec of the run
    print("\nOutflows (veh/hr):")
    print(final_outflows)
    print('Average, std: {:.2f}, {:.5f}'.format(np.mean(final_outflows),
                                                np.std(final_outflows)))
    # Compute departure rate of vehicles in the last 500 sec of the run
    print("Inflows (veh/hr):")
    print(final_inflows)
    print('Average, std: {:.2f}, {:.5f}'.format(np.mean(final_inflows),
                                                np.std(final_inflows)))
    # Compute throughput efficiency in the last 500 sec of the
    print("Throughput efficiency (veh/hr):")
    print(throughput_efficiency)
    print('Average, std: {:.2f}, {:.5f}'.format(np.mean(throughput_efficiency),
                                                np.std(throughput_efficiency)))
    print("Time Delay")
    print(times)
    print("Time for certain number of vehicles to exit {:.2f},{:.5f}".format(
        (np.mean(times)), np.std(times)))

    if args.output:
        np.savetxt(args.output, [
            return_reward, mean_speed, std_speed, final_inflows,
            final_outflows, times
        ])
    if SUMMARY_PLOTS:
        generateHtmlplots(actions, rewards, states)

    # terminate the environment
    env.unwrapped.terminate()
    env.terminate()
    # Deleting the env in order to remove sumo process
    del env
    del evaluation_config

    # if prompted, convert the emission file into a csv file
    if args.gen_emission:
        time.sleep(0.1)

        dir_path = os.path.dirname(os.path.realpath(__file__))
        emission_filename = '{0}-emission.xml'.format(env.network.name)

        emission_path = \
            '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename)

        # convert the emission file into a csv file
        emission_to_csv(emission_path)

        # print the location of the emission csv file
        emission_path_csv = emission_path[:-4] + ".csv"
        print("\nGenerated emission file at " + emission_path_csv)

        # delete the .xml version of the emission file
        os.remove(emission_path)

    # if we wanted to save the render, here we create the movie
    if args.save_render:
        dirs = os.listdir(os.path.expanduser('~') + '/flow_rendering')
        # Ignore hidden files
        dirs = [d for d in dirs if d[0] != '.']
        dirs.sort(key=lambda date: datetime.strptime(date, "%Y-%m-%d-%H%M%S"))
        recent_dir = dirs[-1]
        # create the movie
        movie_dir = os.path.expanduser('~') + '/flow_rendering/' + recent_dir
        save_dir = os.path.expanduser('~') + '/flow_movies'
        if not os.path.exists(save_dir):
            os.mkdir(save_dir)
        os_cmd = "cd " + movie_dir + " && ffmpeg -i frame_%06d.png"
        os_cmd += " -pix_fmt yuv420p " + dirs[-1] + ".mp4"
        os_cmd += "&& cp " + dirs[-1] + ".mp4 " + save_dir + "/"
        os.system(os_cmd)
    return return_reward, mean_speed, final_inflows, final_outflows
Пример #12
0
            vehicles = env.vehicles
            vel.append(np.mean(vehicles.get_speed(vehicles.get_ids())))
            action = agent.compute_action(state)
            state, reward, done, _ = env.step(action)
            ret += reward
            if done:
                break
        rets.append(ret)
        outflow = vehicles.get_outflow_rate(500)
        final_outflows.append(outflow)
        mean_speed.append(np.mean(vel))
        print("Round {}, Return: {}".format(i, ret))
    print("Average, std return: {}, {}".format(np.mean(rets), np.std(rets)))
    print("Average, std speed: {}, {}".format(np.mean(mean_speed),
                                              np.std(mean_speed)))
    print("Average, std outflow: {}, {}".format(np.mean(final_outflows),
                                                np.std(final_outflows)))

    # terminate the environment
    env.terminate()

    # if prompted, convert the emission file into a csv file
    if args.emission_to_csv:
        dir_path = os.path.dirname(os.path.realpath(__file__))
        emission_filename = "{0}-emission.xml".format(scenario.name)

        emission_path = \
            "{0}/test_time_rollout/{1}".format(dir_path, emission_filename)

        emission_to_csv(emission_path)
Пример #13
0
def visualizer_rllab(args):
    # extract the flow environment
    data = joblib.load(args.file)
    policy = data['policy']
    env = data['env']

    # FIXME(ev, ak) only one of these should be needed
    # unwrapped_env = env._wrapped_env._wrapped_env.env.unwrapped
    # unwrapped_env = env.wrapped_env.env.env.unwrapped

    # if this doesn't work, try the one above it
    unwrapped_env = env._wrapped_env.env.unwrapped

    # Recreate experiment params
    tot_cars = unwrapped_env.vehicles.num_vehicles
    rl_cars = unwrapped_env.vehicles.num_rl_vehicles
    max_path_length = int(env.horizon)
    flat_obs = env._wrapped_env.observation_space.flat_dim
    obs_vars = unwrapped_env.obs_var_labels or []
    num_obs_var = flat_obs / tot_cars

    # Set sumo to make a video
    sumo_params = unwrapped_env.sumo_params
    sumo_params.emission_path = './test_time_rollout/'
    if args.no_render:
        sumo_params.render = False
    else:
        sumo_params.render = True
    unwrapped_env.restart_sumo(sumo_params=sumo_params,
                               render=sumo_params.render)

    # Load data into arrays
    all_obs = np.zeros((args.num_rollouts, max_path_length, flat_obs))
    all_rewards = np.zeros((args.num_rollouts, max_path_length))
    rew = []
    for j in range(args.num_rollouts):
        # run a single rollout of the experiment
        path = rollout(env=env, agent=policy)

        # collect the observations and rewards from the rollout
        new_obs = path['observations']
        all_obs[j, :new_obs.shape[0], :new_obs.shape[1]] = new_obs
        new_rewards = path['rewards']
        all_rewards[j, :len(new_rewards)] = new_rewards

        # print the cumulative reward of the most recent rollout
        print('Round {}, return: {}'.format(j, sum(new_rewards)))
        rew.append(sum(new_rewards))

    # print the average cumulative reward across rollouts
    print('Average, std return: {}, {}'.format(np.mean(rew), np.std(rew)))

    # ensure that a reward_plots folder exists in the directory, and if not,
    # create one
    if not os.path.exists('plots') and not os.environ.get('TEST_FLAG', 1):
        os.makedirs('plots')

    # create an array of time
    sim_step = unwrapped_env.sumo_params.sim_step
    t = np.arange(max_path_length) * sim_step

    for obs_var_idx in range(int(num_obs_var)):
        if len(obs_vars) < obs_var_idx + 1:
            obs_var = 'Observation {0}'.format(obs_var_idx)
        else:
            obs_var = obs_vars[obs_var_idx]

        # plot mean value for observation for each vehicle across rollouts
        plt.figure()
        for car in range(tot_cars):
            center = np.mean(all_obs[:, :, tot_cars * obs_var_idx + car],
                             axis=0)
            plt.plot(range(max_path_length),
                     center,
                     lw=2.0,
                     label='Veh {}'.format(car))
        plt.ylabel(obs_var, fontsize=15)
        plt.xlabel('time (s)', fontsize=15)
        plt.title('{2}, Autonomous Penetration: {0}/{1}'.format(
            rl_cars, tot_cars, obs_var),
                  fontsize=16)
        plt.legend(loc=0)

        # save the plot in the "plots" directory unless we're testing
        if not os.environ.get('TEST_FLAG', 1):
            plt.savefig('plots/{0}_{1}.png'.format(args.plotname, obs_var),
                        bbox='tight')

        # plot mean values for the observations across all vehicles and all
        # rollouts
        car_mean = np.mean(np.mean(
            all_obs[:, :, tot_cars * obs_var_idx:tot_cars * (obs_var_idx + 1)],
            axis=0),
                           axis=1)
        plt.figure()
        plt.plot(t, car_mean)
        plt.ylabel(obs_var, fontsize=15)
        plt.xlabel('time (s)', fontsize=15)
        plt.title('Mean {2}, Autonomous Penetration: {0}/{1}'.format(
            rl_cars, tot_cars, obs_var),
                  fontsize=16)

        # save the plot in the "plots" directory
        if not os.environ.get('TEST_FLAG', 1):
            plt.savefig('plots/{0}_{1}_mean.png'.format(
                args.plotname, obs_var),
                        bbox='tight')

    # Make a figure for the mean rewards over the course of the rollout
    mean_reward = np.mean(all_rewards, axis=0)

    plt.figure()
    plt.plot(t, mean_reward, lw=2.0)
    plt.ylabel('reward', fontsize=15)
    plt.xlabel('time (s)', fontsize=15)
    plt.title('Reward, Autonomous Penetration: {0}/{1}'.format(
        rl_cars, tot_cars),
              fontsize=16)

    # save the rewards plot in the "reward_plots" directory
    if not os.environ.get('TEST_FLAG', 1):
        plt.savefig('plots/{0}_reward.png'.format(args.plotname), bbox='tight')

    # if prompted, convert the emission file into a csv file
    if args.emission_to_csv:
        dir_path = os.path.dirname(os.path.realpath(__file__))
        emission_filename = '{0}-emission.xml'.format(
            unwrapped_env.scenario.name)

        emission_path = \
            '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename)

        emission_to_csv(emission_path)
Пример #14
0
    def run_eval(self,
                 num_runs,
                 num_steps,
                 run,
                 saveLogs,
                 train,
                 rl_actions=None,
                 convert_to_csv=False,
                 load_path=None):
        """
        Run the given scenario for a set number of runs and steps per run.

        Parameters
        ----------
            num_runs: int
                number of runs the experiment should perform
            num_steps: int
                number of steps to be performs in each run of the experiment
            train: bool
            	Define if it is a trainning or evaluating experiment
            run: int
            	The number of the current experiment
            saveLogs: SaveLogs object
            	The instance of the package used to save the logs of the simulation
            rl_actions: method, optional
                maps states to actions to be performed by the RL agents (if
                there are any)
            convert_to_csv: bool
                Specifies whether to convert the emission file created by sumo
                into a csv file
            load_path: string
            	Path to the model that should be loaded into the neural network
            	Default: None
        Returns
        -------
            info_dict: dict
                contains returns, average speed per step
		if rl_actions is None:
			def rl_actions(*_):
				return None
		"""
        #1. Initialize the information variables
        info_dict = {}
        rets = []
        mean_rets = []
        ret_lists = []
        vels = []
        mean_vels = []
        std_vels = []

        performance = []
        collisions = []

        #2. Set the reinforcement learning parameters
        action_set = self.env.getActionSet()
        print('LOAD PATH 	--	run:', load_path)
        time.sleep(2)
        agent = Agent(action_set, train=False, load_path=load_path)
        target_update_counter = 0

        #3. Run the experiment for a set number of simulations(runs)
        for i in range(num_runs):
            #1. initialize the environment
            vel = np.zeros(num_steps)
            logging.info("Iter #" + str(i))
            ret = 0
            ret_list = []
            vehicles = self.env.vehicles
            collision_check = 5

            obs = self.get_screen(self.env.reset())
            self.env.reset_params()
            state = np.stack([obs for _ in range(4)], axis=0)

            #2. Perform one simulation
            for j in range(num_steps):
                print('(episode, step) = ', i, ',', j)

                #1. Select and perform an action(the method rl_action is responsable to select the action to be taken)
                action, Q_value = agent.select_action(self.concatenate(
                    state, agent),
                                                      train=False)
                if Q_value is not None: saveLogs.save_Q_value(Q_value, run)
                obs, reward, done, _ = self.env.step(action_set[action[0]])

                #2. Convert the observation to a pytorch observation
                obs = self.get_screen(obs)
                reward = torch.tensor([reward], device=agent.device)

                #3. Observe new state
                if not (self.env.arrived or self.env.crashed):
                    next_state = []
                    next_state.append(obs)
                    next_state.append(deepcopy(state[0]))
                    next_state.append(deepcopy(state[1]))
                    next_state.append(deepcopy(state[2]))
                else:
                    next_state = None

                #4. Store the transition in memory
                agent.memory.push(self.concatenate(state, agent), action,
                                  self.concatenate(next_state, agent), reward)

                #5. Move to the next state
                state = next_state

                #6. Flow code
                vel[j] = np.mean(vehicles.get_speed(vehicles.get_ids()))
                ret += reward
                ret_list.append(reward)

                #7. Decide if the simulation gets to an end
                if done or self.env.arrived or self.env.crashed:
                    agent.episode_durations.append(j + 1)
                    if self.env.crashed:
                        saveLogs.add_crash()
                        print('Crash')
                        collision_check = 1
                    elif self.env.arrived:
                        saveLogs.add_arrive()
                        print('all vehicles arrived the destination')
                    break

            #3. Store information from the simulation
            saveLogs.add_simulation_time(time=j)
            performance.append(j)
            collisions.append(1 if self.env.crashed else 0)

            #4. flow code
            rets.append(ret)
            vels.append(vel)
            mean_rets.append(np.mean(ret_list))
            ret_lists.append(ret_list)
            mean_vels.append(np.mean(vel))
            std_vels.append(np.std(vel))

            #5. save rewards
            #if i % Config.SAVE_REWARDS_FREQUENCy == 0:
            saveLogs.save_reward(rets, run, i)
            saveLogs.save_average_reward(ret)
            saveLogs.save_collision(collision_check, run)
            saveLogs.save_time(j, run)

        #4. Store the logs of the simulation
        info_dict["returns"] = np.array(rets.copy())
        info_dict["velocities"] = vels
        info_dict["mean_returns"] = mean_rets
        info_dict["per_step_returns"] = ret_lists
        info_dict["performance"] = np.array(performance.copy())
        info_dict["collisions"] = np.array(collisions.copy())

        print("Average, std return: {}, {}".format(np.mean(rets),
                                                   np.std(rets)))
        print("Average, std speed: {}, {}".format(np.mean(mean_vels),
                                                  np.std(std_vels)))
        self.env.terminate()

        if convert_to_csv:
            # collect the location of the emission file
            dir_path = self.env.sumo_params.emission_path
            emission_filename = \
             "{0}-emission.xml".format(self.env.scenario.name)
            emission_path = \
             "{0}/{1}".format(dir_path, emission_filename)

            # convert the emission file into a csv
            emission_to_csv(emission_path)

        return info_dict
Пример #15
0
def generate_emission_csv(emission_path, emission_name):
    """
    Generates csv from emission xml. The xml path needs to be added manually!
    """
    xml_path = os.path.join(emission_path, (emission_name + ".xml"))
    emission_to_csv(xml_path)
def visualizer_rllib(args):
    """Visualizer for RLlib experiments.

    This function takes args (see function create_parser below for
    more detailed information on what information can be fed to this
    visualizer), and renders the experiment associated with it.
    """
    result_dir = args.result_dir if args.result_dir[-1] != '/' \
        else args.result_dir[:-1]

    config = get_rllib_config(result_dir)
    name = result_dir.split("/")[-2:]

    # check if we have a multiagent environment but in a
    # backwards compatible way
    if config.get('multiagent', {}).get('policies', None):
        multiagent = True
        pkl = get_rllib_pkl(result_dir)
        config['multiagent'] = pkl['multiagent']
    else:
        multiagent = False

    # Run on only one cpu for rendering purposes
    config['num_workers'] = 0

    flow_params = get_flow_params(config)

    # hack for old pkl files
    # TODO(ev) remove eventually
    sim_params = flow_params['sim']
    setattr(sim_params, 'num_clients', 1)

    # for hacks for old pkl files TODO: remove eventually
    if not hasattr(sim_params, 'use_ballistic'):
        sim_params.use_ballistic = False

    # Determine agent and checkpoint
    config_run = config['env_config']['run'] if 'run' in config['env_config'] \
        else None
    if args.run and config_run:
        if args.run != config_run:
            print('visualizer_rllib.py: error: run argument '
                  + '\'{}\' passed in '.format(args.run)
                  + 'differs from the one stored in params.json '
                  + '\'{}\''.format(config_run))
            sys.exit(1)
    if args.run:
        agent_cls = get_agent_class(args.run)
    elif config_run:
        agent_cls = get_agent_class(config_run)
    else:
        print('visualizer_rllib.py: error: could not find flow parameter '
              '\'run\' in params.json, '
              'add argument --run to provide the algorithm or model used '
              'to train the results\n e.g. '
              'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO')
        sys.exit(1)

    sim_params.restart_instance = True
    dir_path = os.path.dirname(os.path.realpath(__file__))
    emission_path = '{0}/test_time_rollout/'.format(dir_path)
    sim_params.emission_path = emission_path if args.gen_emission else None

    # pick your rendering mode
    if args.render_mode == 'sumo_web3d':
        sim_params.num_clients = 2
        sim_params.render = False
    elif args.render_mode == 'drgb':
        sim_params.render = 'drgb'
        sim_params.pxpm = 4
    elif args.render_mode == 'sumo_gui':
        sim_params.render = False  # will be set to True below
    elif args.render_mode == 'no_render':
        sim_params.render = False
    if args.save_render:
        if args.render_mode != 'sumo_gui':
            sim_params.render = 'drgb'
            sim_params.pxpm = 4
        sim_params.save_render = True

    # Create and register a gym+rllib env
    create_env, env_name = make_create_env(params=flow_params, version=0)
    register_env(env_name, create_env)

    # check if the environment is a single or multiagent environment, and
    # get the right address accordingly
    # single_agent_envs = [env for env in dir(flow.envs)
    #                      if not env.startswith('__')]

    # if flow_params['env_name'] in single_agent_envs:
    #     env_loc = 'flow.envs'
    # else:
    #     env_loc = 'flow.envs.multiagent'

    # Start the environment with the gui turned on and a path for the
    # emission file
    env_params = flow_params['env']
    env_params.restart_instance = False
    if args.evaluate:
        env_params.evaluate = True  # FIXME: this not works

    # lower the horizon if testing
    if args.horizon:
        config['horizon'] = args.horizon
        env_params.horizon = args.horizon

    # create the agent that will be used to compute the actions
    agent = agent_cls(env=env_name, config=config)
    checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num
    checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num
    agent.restore(checkpoint)

    if hasattr(agent, "local_evaluator") and \
            os.environ.get("TEST_FLAG") != 'True':
        env = agent.local_evaluator.env
    else:
        env = gym.make(env_name)

    if args.render_mode == 'sumo_gui':
        env.sim_params.render = True  # set to True after initializing agent and env

    if multiagent:
        rets = {}
        # map the agent id to its policy
        policy_map_fn = config['multiagent']['policy_mapping_fn'].func
        for key in config['multiagent']['policies'].keys():
            rets[key] = []
    else:
        rets = []

    if config['model']['use_lstm']:
        use_lstm = True
        if multiagent:
            state_init = {}
            # map the agent id to its policy
            policy_map_fn = config['multiagent']['policy_mapping_fn'].func
            size = config['model']['lstm_cell_size']
            for key in config['multiagent']['policies'].keys():
                state_init[key] = [np.zeros(size, np.float32),
                                   np.zeros(size, np.float32)]
        else:
            state_init = [
                np.zeros(config['model']['lstm_cell_size'], np.float32),
                np.zeros(config['model']['lstm_cell_size'], np.float32)
            ]
    else:
        use_lstm = False

    # if restart_instance, don't restart here because env.reset will restart later
    if not sim_params.restart_instance:
        env.restart_simulation(sim_params=sim_params, render=sim_params.render)

    # Simulate and collect metrics
    final_outflows = []
    final_inflows = []
    mean_speed = []
    std_speed = []
    rl_speed = []  # store rl controlled vehicle's speed

    log2_stack = defaultdict(list)  # This dict stores log2 data during rollouts

    if args.evaluate:
        env.unwrapped.env_params.evaluate = True    # To cover bug

    for i in range(args.num_rollouts):
        vel = []
        vel_dict = defaultdict(list)
        timerange = []
        state = env.reset()
        if multiagent:
            ret = {key: [0] for key in rets.keys()}
        else:
            ret = 0
        for _ in range(env_params.horizon):
            vehicles = env.unwrapped.k.vehicle
            ids = vehicles.get_ids()
            rls = vehicles.get_rl_ids()
            speeds = vehicles.get_speed(ids)

            timerange.append(vehicles.get_timestep(ids[-1]) / 10000)

            # only include non-empty speeds
            if speeds:
                vel.append(np.mean(speeds))
                for veh_id, speed in zip(ids, speeds):
                    vel_dict[veh_id].append(speed)

            if multiagent:
                action = {}
                for agent_id in state.keys():
                    if use_lstm:
                        action[agent_id], state_init[agent_id], logits = \
                            agent.compute_action(
                                state[agent_id], state=state_init[agent_id],
                                policy_id=policy_map_fn(agent_id))
                    else:
                        action[agent_id] = agent.compute_action(
                            state[agent_id], policy_id=policy_map_fn(agent_id))
            else:
                action = agent.compute_action(state)
            state, reward, done, _ = env.step(action)
            if multiagent:
                for actor, rew in reward.items():
                    ret[policy_map_fn(actor)][0] += rew
            else:
                ret += reward
            if multiagent and done['__all__']:
                break
            if not multiagent and done:
                break

        if multiagent:
            for key in rets.keys():
                rets[key].append(ret[key])
        else:
            rets.append(ret)
        outflow = vehicles.get_outflow_rate(500)
        final_outflows.append(outflow)
        inflow = vehicles.get_inflow_rate(500)
        final_inflows.append(inflow)
        if np.all(np.array(final_inflows) > 1e-5):
            throughput_efficiency = [x / y for x, y in
                                     zip(final_outflows, final_inflows)]
        else:
            throughput_efficiency = [0] * len(final_inflows)
        mean_speed.append(np.mean(vel))
        std_speed.append(np.std(vel))
        if multiagent:
            for agent_id, rew in rets.items():
                print('Round {}, Return: {} for agent {}'.format(
                    i, ret, agent_id))
        else:
            print('Round {}, Return: {}'.format(i, ret))

        log2 = env.unwrapped.log2
        for k in log2:
            log2_stack[k].append(log2[k])

        # plot non-rl's speed and rl's speed graph
        if i == args.num_rollouts - 1 and args.render_mode != "no_render":
            veh = list(vel_dict.keys())
            plt.subplot(2, 1, 1)
            plt.title('/'.join(name))
            for v in veh[:-1]:
                plt.plot(timerange, vel_dict[v])
            plt.xlabel('timestep(s)')
            plt.ylabel('speed(m/s)')
            plt.legend(veh[:-1])
            plt.grid(True)
            # plt.show()

            plt.subplot(2, 1, 2)
            plt.plot(timerange, vel_dict[veh[-1]])
            plt.xlabel('timestep(s)')
            plt.ylabel('speed(m/s)')
            plt.legend(veh[-1:])
            plt.grid(True)
            plt.show()

        rl_speed = [np.mean(vel_dict[rl]) for rl in vehicles.get_rl_ids()]

    for k in log2_stack:
        log2_stack[k] = np.mean(log2_stack[k]).round(3)

    # export the log2_stack
    from time import strftime
    time = strftime('%Y-%m-%d')
    flow_autonomous_home = os.path.expanduser('~/log/')
    with open(flow_autonomous_home + f'/log.csv', 'a') as f:
        keys = ['\"' + str(k) + '\"' for k in log2_stack.keys()]
        values = ['\"' + str(v) + '\"' for v in log2_stack.values()]
        # f.write('time,name,'+','.join(keys)+'\n')
        f.write(f'{time},{name[0]},{",".join(values)}\n')

    print('==== Summary of results ====')
    print("Return:")
    print(mean_speed)
    if multiagent:
        for agent_id, rew in rets.items():
            print('For agent', agent_id)
            print(rew)
            print('Average, std return: {}, {} for agent {}'.format(
                np.mean(rew), np.std(rew), agent_id))
    else:
        print(rets)
        print('Average, std: {}, {}'.format(
            np.mean(rets), np.std(rets)))

    print("\nSpeed, mean (m/s):")
    print(mean_speed)
    print('')

    # bmil edit
    rls = vehicles.get_rl_ids()
    [print(f'{rls[i]} Speed, mean (m/s): {rl_speed[i]}') for i in range(len(rls))]

    print('Average, std: {}, {}'.format(np.mean(mean_speed), np.std(
        mean_speed)))
    print("\nSpeed, std (m/s):")
    print(std_speed)
    print('Average, std: {}, {}'.format(np.mean(std_speed), np.std(
        std_speed)))

    # Compute arrival rate of vehicles in the last 500 sec of the run
    print("\nOutflows (veh/hr):")
    print(final_outflows)
    print('Average, std: {}, {}'.format(np.mean(final_outflows),
                                        np.std(final_outflows)))
    # Compute departure rate of vehicles in the last 500 sec of the run
    print("Inflows (veh/hr):")
    print(final_inflows)
    print('Average, std: {}, {}'.format(np.mean(final_inflows),
                                        np.std(final_inflows)))
    # Compute throughput efficiency in the last 500 sec of the
    print("Throughput efficiency (veh/hr):")
    print(throughput_efficiency)
    print('Average, std: {}, {}'.format(np.mean(throughput_efficiency),
                                        np.std(throughput_efficiency)))

    # terminate the environment
    env.unwrapped.terminate()

    # if prompted, convert the emission file into a csv file
    if args.gen_emission:
        time.sleep(0.1)

        dir_path = os.path.dirname(os.path.realpath(__file__))
        emission_filename = '{0}-emission.xml'.format(env.network.name)

        emission_path = \
            '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename)

        # convert the emission file into a csv file
        emission_to_csv(emission_path)

        # print the location of the emission csv file
        emission_path_csv = emission_path[:-4] + ".csv"
        print("\nGenerated emission file at " + emission_path_csv)

        # delete the .xml version of the emission file
        os.remove(emission_path)
Пример #17
0
    def run(self, num_runs, rl_actions=None, convert_to_csv=False):
        """Run the given network for a set number of runs.

        Parameters
        ----------
        num_runs : int
            number of runs the experiment should perform
        rl_actions : method, optional
            maps states to actions to be performed by the RL agents (if
            there are any)
        convert_to_csv : bool
            Specifies whether to convert the emission file created by sumo
            into a csv file

        Returns
        -------
        info_dict : dict < str, Any >
            contains returns, average speed per step
        """
        num_steps = self.env.env_params.horizon

        # raise an error if convert_to_csv is set to True but no emission
        # file will be generated, to avoid getting an error at the end of the
        # simulation
        if convert_to_csv and self.env.sim_params.emission_path is None:
            raise ValueError(
                'The experiment was run with convert_to_csv set '
                'to True, but no emission file will be generated. If you wish '
                'to generate an emission file, you should set the parameter '
                'emission_path in the simulation parameters (SumoParams or '
                'AimsunParams) to the path of the folder where emissions '
                'output should be generated. If you do not wish to generate '
                'emissions, set the convert_to_csv parameter to False.')

        # used to store
        info_dict = {
            "returns": [],
            "velocities": [],
            "outflows": [],
        }
        info_dict.update({key: [] for key in self.custom_callables.keys()})

        if rl_actions is None:

            def rl_actions(*_):
                return None

        # time profiling information
        t = time.time()
        times = []
        meanSpeeds = []

        for i in range(num_runs):
            ret = 0
            vel = []
            custom_vals = {key: [] for key in self.custom_callables.keys()}
            state = self.env.reset()
            for j in range(num_steps):
                t0 = time.time()
                state, reward, done, _ = self.env.step(rl_actions(state))
                t1 = time.time()
                times.append(1 / (t1 - t0))

                # Compute the velocity speeds and cumulative returns.
                veh_ids = self.env.k.vehicle.get_ids()
                vel.append(np.mean(self.env.k.vehicle.get_speed(veh_ids)))
                ret += reward

                ids = self.env.k.vehicle.get_ids()
                speeds = self.env.k.vehicle.get_speed(ids)
                #Only count speeds of cars in edge prior to the 'construction site'
                targetSpeeds = []
                for veh_id in ids:
                    edge = self.env.k.vehicle.get_edge(veh_id)
                    if edge == "edge3" or edge == "edge4":
                        speed = self.env.k.vehicle.get_speed(veh_id)
                        if abs(speed) > 10000: continue
                        targetSpeeds.append(speed)
                if (len(targetSpeeds) == 0): meanSpeeds.append(0)
                else: meanSpeeds.append(np.mean(targetSpeeds))

                # Compute the results for the custom callables.
                for (key, lambda_func) in self.custom_callables.items():
                    custom_vals[key].append(lambda_func(self.env))

                if done:
                    break

            # Store the information from the run in info_dict.
            outflow = self.env.k.vehicle.get_outflow_rate(int(500))
            info_dict["returns"].append(ret)
            info_dict["velocities"].append(np.mean(vel))
            info_dict["outflows"].append(outflow)
            for key in custom_vals.keys():
                info_dict[key].append(np.mean(custom_vals[key]))

            print("Round {0}, return: {1}".format(i, ret))

        # Print the averages/std for all variables in the info_dict.
        for key in info_dict.keys():
            print("Average, std {}: {}, {}".format(key,
                                                   np.mean(info_dict[key]),
                                                   np.std(info_dict[key])))

        print("Total time:", time.time() - t)
        print("steps/second:", np.mean(times))
        self.env.terminate()

        if convert_to_csv and self.env.simulator == "traci":
            # wait a short period of time to ensure the xml file is readable
            time.sleep(0.1)

            # collect the location of the emission file
            dir_path = self.env.sim_params.emission_path
            emission_filename = \
                "{0}-emission.xml".format(self.env.network.name)
            emission_path = os.path.join(dir_path, emission_filename)

            # convert the emission file into a csv
            emission_to_csv(emission_path)

            # Delete the .xml version of the emission file.
            os.remove(emission_path)

        print(np.mean(meanSpeeds))
        meanSpeeds = np.asarray(meanSpeeds)
        np.savetxt("meanSpeeds_sim.csv", meanSpeeds, delimiter=",")

        return info_dict
Пример #18
0
    def run_train_eval(self,
                       num_runs,
                       num_steps,
                       run,
                       saveLogs,
                       train,
                       rl_actions=None,
                       convert_to_csv=False,
                       load_path=None):
        """
        Run the given scenario for a set number of runs and steps per run.

        Parameters
        ----------
            num_runs: int
                number of runs the experiment should perform
            num_steps: int
                number of steps to be performs in each run of the experiment
            train: bool
            	Define if it is a trainning or evaluating experiment
            run: int
            	The number of the current experiment
            saveLogs: SaveLogs object
            	The instance of the package used to save the logs of the simulation
            rl_actions: method, optional
                maps states to actions to be performed by the RL agents (if
                there are any)
            convert_to_csv: bool
                Specifies whether to convert the emission file created by sumo
                into a csv file
            load_path: string
            	Path to the model that should be loaded into the neural network
            	Default: None
        Returns
        -------
            info_dict: dict
                contains returns, average speed per step
		if rl_actions is None:
			def rl_actions(*_):
				return None
		"""
        #1. Initialize the information variables
        info_dict = {}
        rets = []
        mean_rets = []
        ret_lists = []
        vels = []
        mean_vels = []
        std_vels = []

        performance = []
        collisions = []
        q_values = []
        losses = []

        #2. Set the reinforcement learning parameters
        action_set = self.env.getActionSet()
        agent = Agent(action_set, train=True, load_path=load_path)
        target_update_counter = 0

        #3. Initialize the variables that decide when to store the best network
        got_it = 0  # How many times the agent reaches the end of the street
        max_ret = -200
        evaluate_counter = Config.EVALUATE_AMMOUNT
        best_net_state_dict = None

        #4. Run the experiment for a set number of simulations(runs)
        train_simul = 0
        total_simul = 0

        while train_simul < num_runs:
            total_simul += 1

            #1. initialize the environment
            vel = np.zeros(num_steps)
            logging.info("Iter #" + str(total_simul))
            ret = 0
            ret_list = []
            vehicles = self.env.vehicles
            collision_check = 0

            obs = self.get_screen(self.env.reset())
            self.env.reset_params()
            state = np.stack([obs for _ in range(4)], axis=0)
            #state = torch.from_numpy(obs).to(agent.device).unsqueeze(0)

            if evaluate_counter == Config.EVALUATE_AMMOUNT:
                train_simul += 1

            #2. Perform one simulation
            for j in range(num_steps):
                print('(episode, step) = ', total_simul, ',', j)

                #1. Select and perform an action(the method rl_action is responsable to select the action to be taken)
                if evaluate_counter == Config.EVALUATE_AMMOUNT:
                    print('------------ EH IGUAL')
                    agent.policy_net.train()
                    train = True

                action, Q_value, uncertainty = agent.select_action(
                    self.concatenate(state, agent), train)

                if Q_value is not None and train:
                    saveLogs.save_Q_value(Q_value, run)
                    q_values.append(Q_value)
                    saveLogs.save_uncertainty(uncertainty, run)

                obs, reward, done, _ = self.env.step(action_set[action])

                #2. Convert the observation to a pytorch observation
                obs = self.get_screen(obs)
                reward = torch.tensor([reward], device=agent.device)
                #state = torch.from_numpy(obs).to(agent.device).unsqueeze(0)

                #3. Observe new state
                if not (self.env.arrived or self.env.crashed):
                    next_state = []
                    next_state.append(obs)
                    next_state.append(deepcopy(state[0]))
                    next_state.append(deepcopy(state[1]))
                    next_state.append(deepcopy(state[2]))
                    #next_state = deepcopy(state)
                else:
                    next_state = None

                #4. Store the transition in memory
                if train and evaluate_counter == Config.EVALUATE_AMMOUNT:
                    agent.append_sample(self.concatenate(state, agent), action,
                                        self.concatenate(next_state, agent),
                                        reward)
                    #agent.append_sample(state, action, next_state, reward)

                #5. Move to the next state
                state = next_state

                #6. Flow code
                vel[j] = np.mean(vehicles.get_speed(vehicles.get_ids()))
                ret += reward
                ret_list.append(reward)

                #7. Perform one step of the optimization (on the target network) if in training mode
                if train and evaluate_counter == Config.EVALUATE_AMMOUNT:
                    print('-----ENTROU NA OTIMIZACAO')
                    loss = agent.optimize_model()
                    saveLogs.save_loss(loss, run)
                    losses.append(loss)

                    agent.policy_net.eval()
                    train = False
                    target_update_counter += 1

                #8. update target network
                if target_update_counter % Config.TARGET_UPDATE == 0:
                    target_update_counter = 0
                    agent.target_net.load_state_dict(
                        agent.policy_net.state_dict())
                    print('update target network ok...')

                #9. Decide if the simulation gets to an end
                if done or self.env.arrived or self.env.crashed:
                    agent.episode_durations.append(j + 1)
                    if self.env.crashed:
                        saveLogs.add_crash()
                        print('Crash')
                        collision_check = 1
                    elif self.env.arrived:
                        saveLogs.add_arrive()
                        print('all vehicles arrived the destination')
                    break

            #3. Decide if the current model of the neural network will be stored
            if self.env.arrived:
                got_it += 1
            else:
                got_it = 0

            print('got_it:', got_it)

            if evaluate_counter == Config.EVALUATE_AMMOUNT:
                print('--------EVALUATE A: ', evaluate_counter)

                #4. Store information from the simulation
                saveLogs.add_simulation_time(time=j)
                performance.append(j)
                collisions.append(1 if self.env.crashed else 0)
                #5. flow code
                rets.append(ret)
                vels.append(vel)
                mean_rets.append(np.mean(ret_list))
                ret_lists.append(ret_list)
                mean_vels.append(np.mean(vel))
                std_vels.append(np.std(vel))
                #6. save rewards
                #if i % Config.SAVE_REWARDS_FREQUENCy == 0:
                saveLogs.save_reward(ret, run, train_simul)
                saveLogs.save_average_reward(ret)
                saveLogs.save_collision(collision_check, run)
                saveLogs.save_time(j, run)
                evaluate_counter = 0
                got_it = 0

            else:
                print('--------EVALUATE B: ', evaluate_counter)
                evaluate_counter += 1

            if evaluate_counter == Config.EVALUATE_AMMOUNT and got_it == Config.EVALUATE_AMMOUNT and ret > max_ret:
                print('------ENTROU PRA SALVAR')
                max_ret = ret
                saveLogs.save_model(agent.policy_net, agent.optimizer,
                                    10101010, train_simul * j)
                best_net_state_dict = agent.policy_net.state_dict()
                print('got:', got_it)

        #5. Store the logs of the simulation
        #a. save the final model of the neural network
        saveLogs.save_model(agent.policy_net, agent.optimizer, run,
                            train_simul * j)

        #b. store the data statistics of the simulation
        info_dict["returns"] = np.array(rets.copy())
        info_dict["velocities"] = vels
        info_dict["mean_returns"] = mean_rets
        info_dict["per_step_returns"] = ret_lists
        info_dict["performance"] = np.array(performance.copy())
        info_dict["collisions"] = np.array(collisions.copy())
        info_dict["loss"] = np.array(losses.copy())
        info_dict["q_values"] = np.array(q_values.copy())

        print("Average, std return: {}, {}".format(np.mean(rets),
                                                   np.std(rets)))
        print("Average, std speed: {}, {}".format(np.mean(mean_vels),
                                                  np.std(std_vels)))
        self.env.terminate()

        if convert_to_csv:
            # collect the location of the emission file
            dir_path = self.env.sumo_params.emission_path
            emission_filename = \
             "{0}-emission.xml".format(self.env.scenario.name)
            emission_path = \
             "{0}/{1}".format(dir_path, emission_filename)

            # convert the emission file into a csv
            emission_to_csv(emission_path)

        return info_dict
Пример #19
0
def visualizer_rllib(args):
    """Visualizer for RLlib experiments.

    This function takes args (see function create_parser below for
    more detailed information on what information can be fed to this
    visualizer), and renders the experiment associated with it.
    """
    result_dir = args.result_dir if args.result_dir[-1] != '/' \
        else args.result_dir[:-1]

    config = get_rllib_config(result_dir)

    # check if we have a multiagent environment but in a
    # backwards compatible way
    if config.get('multiagent', {}).get('policies', None):
        multiagent = True
        pkl = get_rllib_pkl(result_dir)
        config['multiagent'] = pkl['multiagent']
    else:
        multiagent = False

    # Run on only one cpu for rendering purposes
    config['num_workers'] = 0

    flow_params = get_flow_params(config)

    # hack for old pkl files
    # TODO(ev) remove eventually
    sim_params = flow_params['sim']
    setattr(sim_params, 'num_clients', 1)

    # Determine agent and checkpoint
    config_run = config['env_config']['run'] if 'run' in config['env_config'] \
        else None
    if args.run and config_run:
        if args.run != config_run:
            print('visualizer_rllib.py: error: run argument ' +
                  '\'{}\' passed in '.format(args.run) +
                  'differs from the one stored in params.json ' +
                  '\'{}\''.format(config_run))
            sys.exit(1)
    if args.run:
        agent_cls = get_agent_class(args.run)
    elif config_run:
        agent_cls = get_agent_class(config_run)
    else:
        print('visualizer_rllib.py: error: could not find flow parameter '
              '\'run\' in params.json, '
              'add argument --run to provide the algorithm or model used '
              'to train the results\n e.g. '
              'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO')
        sys.exit(1)

    sim_params.restart_instance = True

    # specify emission file path
    dir_path = os.path.dirname(os.path.realpath(__file__))

    emission_path = '{0}/test_time_rollout/'.format(dir_path)
    sim_params.emission_path = emission_path if args.gen_emission else None

    # pick your rendering mode
    if args.render_mode == 'sumo_web3d':
        sim_params.num_clients = 2
        sim_params.render = False
    elif args.render_mode == 'drgb':
        sim_params.render = 'drgb'
        sim_params.pxpm = 4
    elif args.render_mode == 'sumo_gui':
        sim_params.render = False  # this will be set to true after creating agent and gym
        print('NOTE: With render mode {}, an extra instance of the SUMO GUI '
              'will display before the GUI for visualizing the result. Click '
              'the green Play arrow to continue.'.format(args.render_mode))
    elif args.render_mode == 'no_render':
        sim_params.render = False
    if args.save_render:
        sim_params.render = 'drgb'
        sim_params.pxpm = 4
        sim_params.save_render = True

    # Create and register a gym+rllib env
    create_env, env_name = make_create_env(params=flow_params, version=0)
    register_env(env_name, create_env)

    # Start the environment with the gui turned on and a path for the
    # emission file
    env_params = flow_params['env']
    env_params.restart_instance = False
    if args.evaluate:
        env_params.evaluate = True

    # lower the horizon if testing
    if args.horizon:
        config['horizon'] = args.horizon
        env_params.horizon = args.horizon

    # create the agent that will be used to compute the actions
    agent = agent_cls(env=env_name, config=config)
    checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num
    checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num
    agent.restore(checkpoint)

    if hasattr(agent, "local_evaluator") and \
            os.environ.get("TEST_FLAG") != 'True':
        env = agent.local_evaluator.env
    else:
        env = gym.make(env_name)

    if args.render_mode == 'sumo_gui':
        env.sim_params.render = True  # set to true after initializing agent and env

    # if restart_instance, don't restart here because env.reset will restart later
    if not sim_params.restart_instance:
        env.restart_simulation(sim_params=sim_params)

    use_lstm = config['model'].get('use_lstm', False)
    if use_lstm:
        state_size = config['model']['lstm_cell_size']
        lstm_state = [np.zeros(state_size), np.zeros(state_size)]
        if multiagent:
            lstm_state = {
                key: deepcopy(lstm_state)
                for key in config['multiagent']['policies'].keys()
            }

    rewards = []
    if multiagent:
        rewards = defaultdict(list)
        policy_map_fn = config['multiagent']['policy_mapping_fn'].func

    # Simulate and collect metrics
    final_outflows = []
    final_inflows = []
    mean_speed = []
    std_speed = []
    for i in range(args.num_rollouts):
        obs = env.reset()
        kv = env.k.vehicle
        rollout_speeds = []
        rollout_reward = 0
        if multiagent:
            rollout_reward = defaultdict(int)
        for _ in range(env_params.horizon):
            rollout_speeds.append(np.mean(kv.get_speed(kv.get_ids())))
            if multiagent:
                action = {}
                for agent_id in obs.keys():
                    if use_lstm:
                        action[agent_id], obs[
                            agent_id], logits = agent.compute_action(
                                obs[agent_id],
                                obs=lstm_state[agent_id],
                                policy_id=policy_map_fn(agent_id))
                    else:
                        action[agent_id] = agent.compute_action(
                            obs[agent_id], policy_id=policy_map_fn(agent_id))
            else:
                action = agent.compute_action(obs)
            obs, reward, done, _ = env.step(action)
            if multiagent:
                done = done['__all__']
                for agent_id, agent_reward in reward.items():
                    rollout_reward[policy_map_fn(agent_id)] += agent_reward
            else:
                rollout_reward += reward

            if done:
                break

        if multiagent:
            for agent_id, reward in rollout_reward.items():
                rewards[agent_id].append(reward)
                print('rollout %s, agent %s reward: %.5g' %
                      (i, agent_id, reward))
        else:
            rewards.append(rollout_reward)
            print('rollout %s, reward: %.5g' % (i, rollout_reward))
        mean_speed.append(np.nanmean(rollout_speeds))
        std_speed.append(np.nanstd(rollout_speeds))
        # Compute rate of inflow / outflow in the last 500 steps
        final_outflows.append(kv.get_outflow_rate(500))
        final_inflows.append(kv.get_inflow_rate(500))

    print(
        '\n==== Summary of results: mean (std) [rollout1, rollout2, ...] ====')
    mean, std = np.mean, np.std
    if multiagent:
        for agent_id, agent_rewards in rewards.items():
            print('agent %s rewards: %.4g (%.4g) %s' %
                  (agent_id, mean(agent_rewards), std(agent_rewards),
                   agent_rewards))
    else:
        print('rewards: %.4g (%.4g) %s' %
              (mean(rewards), std(rewards), rewards))

    print('mean speeds (m/s): %.4g (%.4g) %s' %
          (mean(mean_speed), std(mean_speed), mean_speed))
    print('std speeds: %.4g (%.4g) %s' %
          (mean(std_speed), std(std_speed), std_speed))

    print('inflows (veh/hr): %.4g (%.4g) %s' %
          (mean(final_inflows), std(final_inflows), final_inflows))
    print('outflows (veh/hr): %.4g (%.4g) %s' %
          (mean(final_outflows), std(final_outflows), final_outflows))

    # Compute throughput efficiency in the last 500 sec of the
    throughput = [o / i for o, i in zip(final_outflows, final_inflows)]
    print('throughput efficiency: %.4g (%.4g) %s' %
          (mean(throughput), std(throughput), throughput))

    # terminate the environment
    env.terminate()

    # if prompted, convert the emission file into a csv file
    if args.gen_emission:
        time.sleep(0.1)

        dir_path = os.path.dirname(os.path.realpath(__file__))
        emission_filename = '{0}-emission.xml'.format(env.network.name)

        emission_path = \
            '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename)

        # convert the emission file into a csv file
        emission_to_csv(emission_path)

        # print the location of the emission csv file
        emission_path_csv = emission_path[:-4] + ".csv"
        print("\nGenerated emission file at " + emission_path_csv)

        # delete the .xml version of the emission file
        os.remove(emission_path)

    # if we wanted to save the render, here we create the movie
    if args.save_render:
        dirs = os.listdir(os.path.expanduser('~') + '/flow_rendering')
        # Ignore hidden files
        dirs = [d for d in dirs if d[0] != '.']
        dirs.sort(key=lambda date: datetime.strptime(date, "%Y-%m-%d-%H%M%S"))
        recent_dir = dirs[-1]
        # create the movie
        movie_dir = os.path.expanduser('~') + '/flow_rendering/' + recent_dir
        save_dir = os.path.expanduser('~') + '/flow_movies'
        if not os.path.exists(save_dir):
            os.mkdir(save_dir)
        os_cmd = "cd " + movie_dir + " && ffmpeg -i frame_%06d.png"
        os_cmd += " -pix_fmt yuv420p " + dirs[-1] + ".mp4"
        os_cmd += "&& cp " + dirs[-1] + ".mp4 " + save_dir + "/"
        os.system(os_cmd)
Пример #20
0
    def run_eval(self,
                 num_runs,
                 num_steps,
                 run,
                 saveLogs,
                 attack,
                 epsilon,
                 rl_actions=None,
                 convert_to_csv=False,
                 load_path=None):

        #1. Initialize the information variables
        info_dict = {}
        rets = []
        mean_rets = []
        ret_lists = []
        vels = []
        mean_vels = []
        std_vels = []

        performance = []
        collisions = []
        losses = []
        q_values = []

        attack_number = 0
        attack_dettected = 0

        #2. Set the reinforcement learning parameters
        action_set = self.env.getActionSet()
        agent = Agent(action_set, train=False, load_path=load_path)
        target_update_counter = 0

        #3. Run the experiment for a set number of simulations(runs)
        for i in range(num_runs):
            #1. initialize the environment
            vel = np.zeros(num_steps)
            logging.info("Iter #" + str(i))
            ret = 0
            ret_list = []
            vehicles = self.env.vehicles
            collision_check = 0

            obs = self.get_screen(self.env.reset())
            self.env.reset_params()
            state = np.stack([obs for _ in range(4)], axis=0)

            #2. Perform one simulation
            for j in range(num_steps):
                print('(episode, step) = ', i, ',', j)

                state_conc = self.concatenate(state, agent)

                #0.Attack the images
                is_attack = False
                if attack:
                    random.seed()
                    random_number = random.random()
                    is_attack = False

                    if random_number < Config.ATTACK_PROBABILITY:
                        is_attack = True
                        state_conc = fgsm_attack(state_conc, epsilon, agent,
                                                 i * j, saveLogs)
                        #state = fgsm_attack(state, epsilon, agent, i*j, saveLogs)

                detected, uncertainty, confidence = check_attack(
                    agent, state_conc)

                if is_attack:
                    saveLogs.save_uncertainty_attack(uncertainty, run)
                else:
                    saveLogs.save_uncertainty_no_attack(uncertainty, run)

                detection_information(attack, detected, saveLogs)

                saveLogs.save_uncertainty(uncertainty, run)

                print('detected: ', detected)

                #1. Select and perform an action(the method rl_action is responsable to select the action to be taken)
                #action, Q_value, uncertainty = agent.select_action(state_conc, train=False)
                action, Q_value = agent.select_action(state_conc, train=False)
                print('action, Q-value:', action, Q_value)
                if Q_value is not None:
                    saveLogs.save_Q_value(Q_value, run)
                    q_values.append(Q_value)
                obs, reward, done, _ = self.env.step(action_set[action])

                sc_name2 = os.getcwd() + "/image_simulation/screenshot" + str(
                    i * j) + ".png"
                self.env.traci_connection.gui.screenshot("View #0", sc_name2)

                #2. Convert the observation to a pytorch observation
                obs = self.get_screen(obs)
                reward = torch.tensor([reward], device=agent.device)

                #3. Observe new state
                if not (self.env.arrived or self.env.crashed):
                    next_state = []
                    next_state.append(obs)
                    next_state.append(deepcopy(state[0]))
                    next_state.append(deepcopy(state[1]))
                    next_state.append(deepcopy(state[2]))
                    #next_state = deepcopy(state)
                else:
                    next_state = None

                #4. Store the transition in memory
                agent.append_sample(self.concatenate(state, agent), action,
                                    self.concatenate(next_state, agent),
                                    reward)
                #agent.append_sample(state, action, next_state, reward)

                #5. Move to the next state
                state = next_state

                #6. Flow code
                vel[j] = np.mean(vehicles.get_speed(vehicles.get_ids()))
                ret += reward
                ret_list.append(reward)

                #7. Decide if the simulation gets to an end
                if done or self.env.arrived or self.env.crashed:
                    agent.episode_durations.append(j + 1)
                    if self.env.crashed:
                        saveLogs.add_crash()
                        print('Crash')
                        collision_check = 1
                    elif self.env.arrived:
                        saveLogs.add_arrive()
                        print('all vehicles arrived the destination')
                    break

            #3. Store information from the simulation
            saveLogs.add_simulation_time(time=j)
            performance.append(j)
            collisions.append(1 if self.env.crashed else 0)

            #4. flow code
            rets.append(ret)
            vels.append(vel)
            mean_rets.append(np.mean(ret_list))
            ret_lists.append(ret_list)
            mean_vels.append(np.mean(vel))
            std_vels.append(np.std(vel))

            #5. save rewards
            #if i % Config.SAVE_REWARDS_FREQUENCy == 0:
            saveLogs.save_reward(ret, run, i)
            saveLogs.save_average_reward(ret)
            saveLogs.save_collision(collision_check, run)
            saveLogs.save_time(j, run)

        #4. Store the logs of the simulation
        info_dict["returns"] = np.array(rets.copy())
        info_dict["velocities"] = vels
        info_dict["mean_returns"] = mean_rets
        info_dict["per_step_returns"] = ret_lists
        info_dict["performance"] = np.array(performance.copy())
        info_dict["collisions"] = np.array(collisions.copy())
        info_dict["loss"] = None
        info_dict["q_values"] = np.array(collisions.copy())

        print("Average, std return: {}, {}".format(np.mean(rets),
                                                   np.std(rets)))
        print("Average, std speed: {}, {}".format(np.mean(mean_vels),
                                                  np.std(std_vels)))
        self.env.terminate()

        if convert_to_csv:
            # collect the location of the emission file
            dir_path = self.env.sumo_params.emission_path
            emission_filename = \
             "{0}-emission.xml".format(self.env.scenario.name)
            emission_path = \
             "{0}/{1}".format(dir_path, emission_filename)

            # convert the emission file into a csv
            emission_to_csv(emission_path)

        return info_dict
def visualizer_rllib(args):
    """Visualizer for RLlib experiments.

    This function takes args (see function create_parser below for
    more detailed information on what information can be fed to this
    visualizer), and renders the experiment associated with it.
    """
    result_dir = args.result_dir if args.result_dir[-1] != '/' \
        else args.result_dir[:-1]

    config = get_rllib_pkl(result_dir)

    # Run on only one cpu for rendering purposes
    config['num_workers'] = 0

    flow_params = get_flow_params(config)

    # Determine agent and checkpoint
    config_run = config['env_config']['run'] if 'run' in config['env_config'] \
        else None
    if args.run and config_run:
        if args.run != config_run:
            print('visualizer_rllib.py: error: run argument ' +
                  '\'{}\' passed in '.format(args.run) +
                  'differs from the one stored in params.json ' +
                  '\'{}\''.format(config_run))
            sys.exit(1)
    if args.run:
        agent_cls = get_agent_class(args.run)
    elif config_run:
        agent_cls = get_agent_class(config_run)
    else:
        print('visualizer_rllib.py: error: could not find flow parameter '
              '\'run\' in params.json, '
              'add argument --run to provide the algorithm or model used '
              'to train the results\n e.g. '
              'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO')
        sys.exit(1)

    sim_params = flow_params['sim']
    sim_params.restart_instance = True
    dir_path = os.path.dirname(os.path.realpath(__file__))
    emission_path = '{0}/test_time_rollout/'.format(dir_path)
    sim_params.emission_path = emission_path if args.gen_emission else None

    # pick your rendering mode
    if args.render_mode == 'sumo_web3d':
        sim_params.num_clients = 2
        sim_params.render = False
    elif args.render_mode == 'drgb':
        sim_params.render = 'drgb'
        sim_params.pxpm = 4
    elif args.render_mode == 'sumo_gui':
        sim_params.render = True
        print('NOTE: With render mode {}, an extra instance of the SUMO GUI '
              'will display before the GUI for visualizing the result. Click '
              'the green Play arrow to continue.'.format(args.render_mode))
    elif args.render_mode == 'no_render':
        sim_params.render = False
    if args.save_render:
        sim_params.render = 'drgb'
        sim_params.pxpm = 4
        sim_params.save_render = True

    # Start the environment with the gui turned on and a path for the
    # emission file
    env_params = flow_params['env']
    sim_params.restart_instance = False
    if args.evaluate:
        env_params.evaluate = True

    # lower the horizon if testing
    if args.horizon:
        config['horizon'] = args.horizon
        env_params.horizon = args.horizon

    # Create and register a gym+rllib env
    create_env, env_name = make_create_env(params=flow_params, version=0)
    register_env(env_name, create_env)

    # create the agent that will be used to compute the actions
    agent = agent_cls(env=env_name, config=config)
    checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num
    checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num
    agent.restore(checkpoint)

    # Simulate and collect metrics
    final_outflows = []
    final_inflows = []
    mean_speed = []
    std_speed = []

    policy_agent_mapping = default_policy_agent_mapping
    if hasattr(agent, "workers"):
        env = agent.workers.local_worker().env
        multiagent = isinstance(env, MultiAgentEnv)
        if agent.workers.local_worker().multiagent:
            policy_agent_mapping = agent.config["multiagent"][
                "policy_mapping_fn"]

        policy_map = agent.workers.local_worker().policy_map
        state_init = {p: m.get_initial_state() for p, m in policy_map.items()}
        use_lstm = {p: len(s) > 0 for p, s in state_init.items()}
        action_init = {
            p: m.action_space.sample()
            for p, m in policy_map.items()
        }
    else:
        env = gym.make(env_name)
        multiagent = False
        use_lstm = {DEFAULT_POLICY_ID: False}

    steps = 0
    for i in range(args.num_rollouts):
        vel = []
        mapping_cache = {}  # in case policy_agent_mapping is stochastic
        reward_dict = {}
        obs = env.reset()
        agent_states = DefaultMapping(
            lambda agent_id: state_init[mapping_cache[agent_id]])
        prev_actions = DefaultMapping(
            lambda agent_id: action_init[mapping_cache[agent_id]])
        prev_rewards = collections.defaultdict(lambda: 0.)
        done = False
        reward_total = 0.0
        while not done and steps < (env_params.horizon or steps + 1):
            vehicles = env.unwrapped.k.vehicle
            vel.append(np.mean(vehicles.get_speed(vehicles.get_ids())))
            multi_obs = obs if multiagent else {_DUMMY_AGENT_ID: obs}
            action_dict = {}
            for agent_id, a_obs in multi_obs.items():
                if a_obs is not None:
                    policy_id = mapping_cache.setdefault(
                        agent_id, policy_agent_mapping(agent_id))
                    p_use_lstm = use_lstm[policy_id]
                    if p_use_lstm:
                        a_action, p_state, _ = agent.compute_action(
                            a_obs,
                            state=agent_states[agent_id],
                            prev_action=prev_actions[agent_id],
                            prev_reward=prev_rewards[agent_id],
                            policy_id=policy_id)
                        agent_states[agent_id] = p_state
                    else:
                        a_action = agent.compute_action(
                            a_obs,
                            prev_action=prev_actions[agent_id],
                            prev_reward=prev_rewards[agent_id],
                            policy_id=policy_id)
                    a_action = _flatten_action(a_action)  # tuple actions
                    action_dict[agent_id] = a_action
                    prev_actions[agent_id] = a_action
            action = action_dict

            action = action if multiagent else action[_DUMMY_AGENT_ID]
            next_obs, reward, done, _ = env.step(action)

            if multiagent:
                for agent_id, r in reward.items():
                    prev_rewards[agent_id] = r
            else:
                prev_rewards[_DUMMY_AGENT_ID] = reward

            if multiagent:
                done = done["__all__"]
                reward_total += sum(reward.values())
            else:
                reward_total += reward
            steps += 1
            obs = next_obs
        outflow = vehicles.get_outflow_rate(500)
        final_outflows.append(outflow)
        inflow = vehicles.get_inflow_rate(500)
        final_inflows.append(inflow)
        if np.all(np.array(final_inflows) > 1e-5):
            throughput_efficiency = [
                x / y for x, y in zip(final_outflows, final_inflows)
            ]
        else:
            throughput_efficiency = [0] * len(final_inflows)
        mean_speed.append(np.mean(vel))
        std_speed.append(np.std(vel))
        print("Episode reward", reward_total)

    print('==== Summary of results ====')
    print(mean_speed)
    # if multiagent:
    #     for agent_id, rew in rets.items():
    #         print('For agent', agent_id)
    #         print(rew)
    #         print('Average, std return: {}, {} for agent {}'.format(
    #             np.mean(rew), np.std(rew), agent_id))
    # else:
    #     print(rets)
    #     print('Average, std: {}, {}'.format(
    #         np.mean(rets), np.std(rets)))

    print("\nSpeed, mean (m/s): {}".format(mean_speed))
    print('Average, std: {}, {}'.format(np.mean(mean_speed),
                                        np.std(mean_speed)))
    print("\nSpeed, std (m/s): {}".format(std_speed))
    print('Average, std: {}, {}'.format(np.mean(std_speed), np.std(std_speed)))

    # Compute arrival rate of vehicles in the last 500 sec of the run
    print("\nOutflows (veh/hr): {}".format(final_outflows))
    print('Average, std: {}, {}'.format(np.mean(final_outflows),
                                        np.std(final_outflows)))

    # Compute departure rate of vehicles in the last 500 sec of the run
    print("Inflows (veh/hr): {}".format(final_inflows))
    print('Average, std: {}, {}'.format(np.mean(final_inflows),
                                        np.std(final_inflows)))

    # Compute throughput efficiency in the last 500 sec of the
    print("Throughput efficiency (veh/hr): {}".format(throughput_efficiency))
    print('Average, std: {}, {}'.format(np.mean(throughput_efficiency),
                                        np.std(throughput_efficiency)))

    # terminate the environment
    env.unwrapped.terminate()

    # if prompted, convert the emission file into a csv file
    if args.gen_emission:
        time.sleep(0.1)

        dir_path = os.path.dirname(os.path.realpath(__file__))
        emission_filename = '{0}-emission.xml'.format(env.scenario.name)

        emission_path = \
            '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename)

        emission_to_csv(emission_path)

    # if we wanted to save the render, here we create the movie
    if args.save_render:
        dirs = os.listdir(os.path.expanduser('~') + '/flow_rendering')
        # Ignore hidden files
        dirs = [d for d in dirs if d[0] != '.']
        dirs.sort(key=lambda date: datetime.strptime(date, "%Y-%m-%d-%H%M%S"))
        recent_dir = dirs[-1]
        # create the movie
        movie_dir = os.path.expanduser('~') + '/flow_rendering/' + recent_dir
        save_dir = os.path.expanduser('~') + '/flow_movies'
        if not os.path.exists(save_dir):
            os.mkdir(save_dir)
        os_cmd = "cd " + movie_dir + " && ffmpeg -i frame_%06d.png"
        os_cmd += " -pix_fmt yuv420p " + dirs[-1] + ".mp4"
        os_cmd += "&& cp " + dirs[-1] + ".mp4 " + save_dir + "/"
        os.system(os_cmd)
Пример #22
0
def visualizer_rllib(args):
    result_dir = args.result_dir if args.result_dir[-1] != '/' \
        else args.result_dir[:-1]

    config = get_rllib_config(result_dir)
    # TODO(ev) backwards compatibility hack
    try:
        pkl = get_rllib_pkl(result_dir)
    except Exception:
        pass

    # check if we have a multiagent scenario but in a
    # backwards compatible way
    if config.get('multiagent', {}).get('policy_graphs', {}):
        multiagent = True
        config['multiagent'] = pkl['multiagent']
    else:
        multiagent = False

    # Run on only one cpu for rendering purposes
    config['num_workers'] = 0

    flow_params = get_flow_params(config)

    # hack for old pkl files
    # TODO(ev) remove eventually
    sim_params = flow_params['sim']
    setattr(sim_params, 'num_clients', 1)

    # Determine agent and checkpoint
    config_run = config['env_config']['run'] if 'run' in config['env_config'] \
        else None
    if args.run and config_run:
        if args.run != config_run:
            print('visualizer_rllib.py: error: run argument ' +
                  '\'{}\' passed in '.format(args.run) +
                  'differs from the one stored in params.json ' +
                  '\'{}\''.format(config_run))
            sys.exit(1)
    if args.run:
        agent_cls = get_agent_class(args.run)
    elif config_run:
        agent_cls = get_agent_class(config_run)
    else:
        print('visualizer_rllib.py: error: could not find flow parameter '
              '\'run\' in params.json, '
              'add argument --run to provide the algorithm or model used '
              'to train the results\n e.g. '
              'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO')
        sys.exit(1)

    sim_params.restart_instance = False
    dir_path = os.path.dirname(os.path.realpath(__file__))
    emission_path = '{0}/test_time_rollout/'.format(dir_path)
    sim_params.emission_path = emission_path if args.gen_emission else None

    # pick your rendering mode
    if args.render_mode == 'sumo_web3d':
        sim_params.num_clients = 2
        sim_params.render = False
    elif args.render_mode == 'drgb':
        sim_params.render = 'drgb'
        sim_params.pxpm = 4
    elif args.render_mode == 'sumo_gui':
        sim_params.render = True
    elif args.render_mode == 'no_render':
        sim_params.render = False
    if args.save_render:
        sim_params.render = 'drgb'
        sim_params.pxpm = 4
        sim_params.save_render = True

    # Create and register a gym+rllib env
    create_env, env_name = make_create_env(params=flow_params, version=0)
    register_env(env_name, create_env)

    # check if the environment is a single or multiagent environment, and
    # get the right address accordingly
    # single_agent_envs = [env for env in dir(flow.envs)
    #                      if not env.startswith('__')]

    # if flow_params['env_name'] in single_agent_envs:
    #     env_loc = 'flow.envs'
    # else:
    #     env_loc = 'flow.multiagent_envs'

    # Start the environment with the gui turned on and a path for the
    # emission file
    env_params = flow_params['env']
    env_params.restart_instance = False
    if args.evaluate:
        env_params.evaluate = True

    # lower the horizon if testing
    if args.horizon:
        config['horizon'] = args.horizon
        env_params.horizon = args.horizon

    # create the agent that will be used to compute the actions
    agent = agent_cls(env=env_name, config=config)
    checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num
    checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num
    agent.restore(checkpoint)

    if hasattr(agent, "local_evaluator") and \
            os.environ.get("TEST_FLAG") != 'True':
        env = agent.local_evaluator.env
    else:
        env = gym.make(env_name)

    if multiagent:
        rets = {}
        # map the agent id to its policy
        policy_map_fn = config['multiagent']['policy_mapping_fn'].func
        for key in config['multiagent']['policy_graphs'].keys():
            rets[key] = []
    else:
        rets = []

    if config['model']['use_lstm']:
        use_lstm = True
        if multiagent:
            state_init = {}
            # map the agent id to its policy
            policy_map_fn = config['multiagent']['policy_mapping_fn'].func
            size = config['model']['lstm_cell_size']
            for key in config['multiagent']['policy_graphs'].keys():
                state_init[key] = [
                    np.zeros(size, np.float32),
                    np.zeros(size, np.float32)
                ]
        else:
            state_init = [
                np.zeros(config['model']['lstm_cell_size'], np.float32),
                np.zeros(config['model']['lstm_cell_size'], np.float32)
            ]
    else:
        use_lstm = False

    env.restart_simulation(sim_params=sim_params, render=sim_params.render)

    final_outflows = []
    mean_speed = []
    for i in range(args.num_rollouts):
        vel = []
        state = env.reset()
        if multiagent:
            ret = {key: [0] for key in rets.keys()}
        else:
            ret = 0
        for _ in range(env_params.horizon):
            vehicles = env.unwrapped.k.vehicle
            vel.append(np.mean(vehicles.get_speed(vehicles.get_ids())))
            if multiagent:
                action = {}
                for agent_id in state.keys():
                    if use_lstm:
                        action[agent_id], state_init[agent_id], logits = \
                            agent.compute_action(
                            state[agent_id], state=state_init[agent_id],
                            policy_id=policy_map_fn(agent_id))
                    else:
                        action[agent_id] = agent.compute_action(
                            state[agent_id], policy_id=policy_map_fn(agent_id))
            else:
                action = agent.compute_action(state)
            state, reward, done, _ = env.step(action)
            if multiagent:
                for actor, rew in reward.items():
                    ret[policy_map_fn(actor)][0] += rew
            else:
                ret += reward
            if multiagent and done['__all__']:
                break
            if not multiagent and done:
                break

        if multiagent:
            for key in rets.keys():
                rets[key].append(ret[key])
        else:
            rets.append(ret)
        outflow = vehicles.get_outflow_rate(500)
        final_outflows.append(outflow)
        mean_speed.append(np.mean(vel))
        if multiagent:
            for agent_id, rew in rets.items():
                print('Round {}, Return: {} for agent {}'.format(
                    i, ret, agent_id))
        else:
            print('Round {}, Return: {}'.format(i, ret))
    if multiagent:
        for agent_id, rew in rets.items():
            print('Average, std return: {}, {} for agent {}'.format(
                np.mean(rew), np.std(rew), agent_id))
    else:
        print('Average, std return: {}, {}'.format(np.mean(rets),
                                                   np.std(rets)))
    print('Average, std speed: {}, {}'.format(np.mean(mean_speed),
                                              np.std(mean_speed)))
    print('Average, std outflow: {}, {}'.format(np.mean(final_outflows),
                                                np.std(final_outflows)))

    # terminate the environment
    env.unwrapped.terminate()

    # if prompted, convert the emission file into a csv file
    if args.gen_emission:
        time.sleep(0.1)

        dir_path = os.path.dirname(os.path.realpath(__file__))
        emission_filename = '{0}-emission.xml'.format(env.scenario.name)

        emission_path = \
            '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename)

        emission_to_csv(emission_path)

    # if we wanted to save the render, here we create the movie
    if args.save_render:
        dirs = os.listdir(os.path.expanduser('~') + '/flow_rendering')
        dirs.sort(key=lambda date: datetime.strptime(date, "%Y-%m-%d-%H%M%S"))
        recent_dir = dirs[-1]
        # create the movie
        movie_dir = os.path.expanduser('~') + '/flow_rendering/' + recent_dir
        save_dir = os.path.expanduser('~') + '/flow_movies'
        if not os.path.exists(save_dir):
            os.mkdir(save_dir)
        os_cmd = "cd " + movie_dir + " && ffmpeg -i frame_%06d.png"
        os_cmd += " -pix_fmt yuv420p " + dirs[-1] + ".mp4"
        os_cmd += "&& cp " + dirs[-1] + ".mp4 " + save_dir + "/"
        os.system(os_cmd)
Пример #23
0
    def run(self, num_runs, num_steps, rl_actions=None, convert_to_csv=False):
        """
        Runs the given scenario for a set number of runs and a set number of
        steps per run.

        Parameters
            num_runs: int
                number of runs the experiment should perform
            num_steps: int
                number of steps to be performs in each run of the experiment
            rl_actions: list or numpy ndarray, optional
                actions to be performed by rl vehicles in the network (if there
                are any)
            convert_to_csv: bool
                Specifies whether to convert the emission file created by sumo
                into a csv file
        Returns
            info_dict: dict
                contains returns, average speed per step
        """
        info_dict = {}
        if rl_actions is None:
            rl_actions = []

        rets = []
        mean_rets = []
        ret_lists = []
        vels = []
        mean_vels = []
        std_vels = []
        for i in range(num_runs):
            vel = np.zeros(num_steps)
            logging.info("Iter #" + str(i))
            ret = 0
            ret_list = []
            vehicles = self.env.vehicles
            self.env.reset()
            for j in range(num_steps):
                state, reward, done, _ = self.env.step(rl_actions)
                vel[j] = np.mean(vehicles.get_speed(vehicles.get_ids()))
                ret += reward
                ret_list.append(reward)
                if done:
                    break
            rets.append(ret)
            vels.append(vel)
            mean_rets.append(np.mean(ret_list))
            ret_lists.append(ret_list)
            mean_vels.append(np.mean(vel))
            std_vels.append(np.std(vel))
            print("Round {0}, return: {1}".format(i, ret))

        info_dict["returns"] = rets
        info_dict["velocities"] = vels
        info_dict["mean_returns"] = mean_rets
        info_dict["per_step_returns"] = ret_lists

        print("Average, std return: {}, {}".format(np.mean(rets),
                                                   np.std(rets)))
        print("Average, std speed: {}, {}".format(np.mean(mean_vels),
                                                  np.std(std_vels)))
        self.env.terminate()

        if convert_to_csv:
            # collect the location of the emission file
            dir_path = self.env.sumo_params.emission_path
            emission_filename = \
                "{0}-emission.xml".format(self.env.scenario.name)
            emission_path = \
                "{0}/{1}".format(dir_path, emission_filename)

            # convert the emission file into a csv
            emission_to_csv(emission_path)

        return info_dict
Пример #24
0
    def run(self,
            num_runs,
            num_steps,
            rl_actions=None,
            output_to_terminal=True,
            convert_to_csv=False):
        """Run the given network for a set number of runs and steps per run.

        Parameters
        ----------
        num_runs : int
            number of runs the experiment should perform
        num_steps : int
            number of steps to be performs in each run of the experiment
        rl_actions : method, optional
            maps states to actions to be performed by the RL agents (if
            there are any)
        convert_to_csv : bool
            Specifies whether to convert the emission file created by sumo
            into a csv file

        Returns
        -------
        info_dict : dict
            contains returns, average speed per step
        """
        # raise an error if convert_to_csv is set to True but no emission
        # file will be generated, to avoid getting an error at the end of the
        # simulation
        if convert_to_csv and self.env.sim_params.emission_path is None:
            raise ValueError(
                'The experiment was run with convert_to_csv set '
                'to True, but no emission file will be generated. If you wish '
                'to generate an emission file, you should set the parameter '
                'emission_path in the simulation parameters (SumoParams or '
                'AimsunParams) to the path of the folder where emissions '
                'output should be generated. If you do not wish to generate '
                'emissions, set the convert_to_csv parameter to False.')

        info_dict = {}
        if rl_actions is None:

            def rl_actions(*_):
                return None

        # collecting experiment results, ret = return
        # reward
        overall_return_all_runs = []
        mean_return_all_runs = []
        per_step_return_all_runs = []

        # speed
        per_step_speed_all_runs = []
        mean_speed_over_all_runs = []
        std_speed_over_all_runs = []

        # throughput
        inflow_over_all_runs = []
        outflow_over_all_runs = []

        # for each run
        for i in range(num_runs):
            logging.info("Run #" + str(i + 1))
            state = self.env.reset()

            # reward
            overall_return_one_run = 0
            per_step_return_one_run = []

            # speed
            per_step_speed_one_run = np.zeros(num_steps)

            # for each step
            for j in range(num_steps):

                # get the states, rewards, etc
                state, reward, done, _ = self.env.step(rl_actions(state))

                # store the returns
                overall_return_one_run += reward
                per_step_return_one_run.append(reward)

                # store the averaged speed of all vehicles at this step
                per_step_speed_one_run[j] = np.mean(
                    self.env.k.vehicle.get_speed(self.env.k.vehicle.get_ids()))

                if done:
                    break

            # reward
            overall_return_all_runs.append(overall_return_one_run)
            mean_return_all_runs.append(np.mean(per_step_return_one_run))
            per_step_return_all_runs.append(per_step_return_one_run)

            # speed
            per_step_speed_all_runs.append(per_step_speed_one_run)
            mean_speed_over_all_runs.append(np.mean(per_step_speed_one_run))
            std_speed_over_all_runs.append(np.std(per_step_speed_one_run))

            # get the outflows and inflows for the past 500 seconds, if the simulation is less than
            # 500 seconds then this will get all inflows (the number of vehicles entering the network)
            # and outflows (the number of vehicles leaving the network)
            inflow_over_all_runs.append(
                self.env.k.vehicle.get_inflow_rate(int(500)))
            outflow_over_all_runs.append(
                self.env.k.vehicle.get_outflow_rate(int(500)))

            # compute the throughput efficiency
            if np.all(np.array(inflow_over_all_runs) > 1e-5):
                throughput_over_all_runs = [
                    x / y for x, y in zip(outflow_over_all_runs,
                                          inflow_over_all_runs)
                ]
            else:
                throughput_over_all_runs = [0] * len(inflow_over_all_runs)

        info_dict["overall_return_all_runs"] = overall_return_all_runs
        info_dict["mean_return_all_runs"] = mean_return_all_runs
        info_dict["per_step_return_all_runs"] = per_step_return_all_runs
        info_dict["per_step_speed_all_runs"] = per_step_speed_all_runs
        info_dict["mean_ret_all"] = np.mean(overall_return_all_runs)
        info_dict["std_ret_all"] = np.std(overall_return_all_runs)

        info_dict["mean_inflows"] = np.mean(inflow_over_all_runs)
        info_dict["mean_outflows"] = np.mean(outflow_over_all_runs)

        info_dict["max_spd_all"] = np.max(mean_speed_over_all_runs)
        info_dict["min_spd_all"] = np.min(mean_speed_over_all_runs)
        info_dict["mean_spd_all"] = np.mean(mean_speed_over_all_runs)
        info_dict["std_spd_all"] = np.std(mean_speed_over_all_runs)
        info_dict["max_tpt_all"] = np.max(throughput_over_all_runs)
        info_dict["min_tpt_all"] = np.min(throughput_over_all_runs)
        info_dict["mean_tpt_all"] = np.mean(throughput_over_all_runs)
        info_dict["std_tpt_all"] = np.std(throughput_over_all_runs)

        if output_to_terminal:
            print("Round {0} -- Return: {1}".format(i + 1,
                                                    overall_return_one_run))
            print("Return: {} (avg), {} (std)".format(
                info_dict["mean_ret_all"], info_dict["std_ret_all"]))

            print("Speed (m/s): {} (avg), {} (std)".format(
                info_dict["mean_spd_all"], info_dict["std_spd_all"]))

            print("Throughput (veh/hr): {} (avg), {} (std)".format(
                info_dict["mean_tpt_all"], info_dict["std_tpt_all"]))

        self.env.terminate()

        if convert_to_csv:
            # wait a short period of time to ensure the xml file is readable
            time.sleep(0.1)

            # collect the location of the emission file
            dir_path = self.env.sim_params.emission_path
            emission_filename = "{0}-emission.xml".format(
                self.env.network.name)
            emission_path = os.path.join(dir_path, emission_filename)

            # convert the emission file into a csv
            emission_to_csv(emission_path)

            # Delete the .xml version of the emission file.
            os.remove(emission_path)

        return info_dict
Пример #25
0
def visualizer_rllib(args):
    result_dir = args.result_dir if args.result_dir[-1] != '/' \
        else args.result_dir[:-1]

    config = get_rllib_config(result_dir)

    # Run on only one cpu for rendering purposes
    config['num_workers'] = 1

    flow_params = get_flow_params(config)

    # Create and register a gym+rllib env
    create_env, env_name = make_create_env(params=flow_params,
                                           version=0,
                                           render=False)
    register_env(env_name, create_env)

    # Determine agent and checkpoint
    config_run = config['env_config']['run'] if 'run' in config['env_config'] \
        else None
    if (args.run and config_run):
        if (args.run != config_run):
            print('visualizer_rllib.py: error: run argument ' +
                  '\'{}\' passed in '.format(args.run) +
                  'differs from the one stored in params.json ' +
                  '\'{}\''.format(config_run))
            sys.exit(1)
    if (args.run):
        agent_cls = get_agent_class(args.run)
    elif (config_run):
        agent_cls = get_agent_class(config_run)
    else:
        print('visualizer_rllib.py: error: could not find flow parameter '
              '\'run\' in params.json, '
              'add argument --run to provide the algorithm or model used '
              'to train the results\n e.g. '
              'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO')
        sys.exit(1)
    agent = agent_cls(env=env_name, config=config)
    checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num
    checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num
    agent.restore(checkpoint)

    # Recreate the scenario from the pickled parameters
    exp_tag = flow_params['exp_tag']
    net_params = flow_params['net']
    vehicles = flow_params['veh']
    initial_config = flow_params['initial']
    module = __import__('flow.scenarios', fromlist=[flow_params['scenario']])
    scenario_class = getattr(module, flow_params['scenario'])

    scenario = scenario_class(name=exp_tag,
                              vehicles=vehicles,
                              net_params=net_params,
                              initial_config=initial_config)

    # Start the environment with the gui turned on and a path for the
    # emission file
    module = __import__('flow.envs', fromlist=[flow_params['env_name']])
    env_class = getattr(module, flow_params['env_name'])
    env_params = flow_params['env']
    if args.evaluate:
        env_params.evaluate = True
    sumo_params = flow_params['sumo']
    if args.no_render:
        sumo_params.render = False
    else:
        sumo_params.render = True
    sumo_params.emission_path = './test_time_rollout/'

    env = ModelCatalog.get_preprocessor_as_wrapper(
        env_class(env_params=env_params,
                  sumo_params=sumo_params,
                  scenario=scenario))

    # Run the environment in the presence of the pre-trained RL agent for the
    # requested number of time steps / rollouts
    rets = []
    final_outflows = []
    mean_speed = []
    for i in range(args.num_rollouts):
        vel = []
        state = env.reset()
        ret = 0
        for _ in range(env_params.horizon):
            vehicles = env.unwrapped.vehicles
            vel.append(np.mean(vehicles.get_speed(vehicles.get_ids())))
            action = agent.compute_action(state)
            state, reward, done, _ = env.step(action)
            ret += reward
            if done:
                break
        rets.append(ret)
        outflow = vehicles.get_outflow_rate(500)
        final_outflows.append(outflow)
        mean_speed.append(np.mean(vel))
        print('Round {}, Return: {}'.format(i, ret))
    print('Average, std return: {}, {}'.format(np.mean(rets), np.std(rets)))
    print('Average, std speed: {}, {}'.format(np.mean(mean_speed),
                                              np.std(mean_speed)))
    print('Average, std outflow: {}, {}'.format(np.mean(final_outflows),
                                                np.std(final_outflows)))

    # terminate the environment
    env.unwrapped.terminate()

    # if prompted, convert the emission file into a csv file
    if args.emission_to_csv:
        dir_path = os.path.dirname(os.path.realpath(__file__))
        emission_filename = '{0}-emission.xml'.format(scenario.name)

        emission_path = \
            '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename)

        emission_to_csv(emission_path)
Пример #26
0
def visualizer_rllib(args):
    result_dir = args.result_dir if args.result_dir[-1] != '/' \
        else args.result_dir[:-1]

    # config = get_rllib_config(result_dir + '/..')
    # pkl = get_rllib_pkl(result_dir + '/..')
    config = get_rllib_config(result_dir)
    # TODO(ev) backwards compatibility hack
    try:
        pkl = get_rllib_pkl(result_dir)
    except Exception:
        pass

    # check if we have a multiagent scenario but in a
    # backwards compatible way
    if config.get('multiagent', {}).get('policy_graphs', {}):
        multiagent = True
        config['multiagent'] = pkl['multiagent']
    else:
        multiagent = False

    # Run on only one cpu for rendering purposes
    config['num_workers'] = 0

    flow_params = get_flow_params(config)

    # hack for old pkl files
    # TODO(ev) remove eventually
    sumo_params = flow_params['sumo']
    setattr(sumo_params, 'num_clients', 1)

    # Create and register a gym+rllib env
    create_env, env_name = make_create_env(
        params=flow_params, version=0, render=False)
    register_env(env_name, create_env)

    # Determine agent and checkpoint
    config_run = config['env_config']['run'] if 'run' in config['env_config'] \
        else None
    if (args.run and config_run):
        if (args.run != config_run):
            print('visualizer_rllib.py: error: run argument '
                  + '\'{}\' passed in '.format(args.run)
                  + 'differs from the one stored in params.json '
                  + '\'{}\''.format(config_run))
            sys.exit(1)
    if (args.run):
        agent_cls = get_agent_class(args.run)
    elif (config_run):
        agent_cls = get_agent_class(config_run)
    else:
        print('visualizer_rllib.py: error: could not find flow parameter '
              '\'run\' in params.json, '
              'add argument --run to provide the algorithm or model used '
              'to train the results\n e.g. '
              'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO')
        sys.exit(1)

    sumo_params.restart_instance = False

    sumo_params.emission_path = './test_time_rollout/'

    # pick your rendering mode
    if args.render_mode == 'sumo-web3d':
        sumo_params.num_clients = 2
        sumo_params.render = False
    elif args.render_mode == 'drgb':
        sumo_params.render = 'drgb'
        sumo_params.pxpm = 4
    elif args.render_mode == 'sumo-gui':
        sumo_params.render = False
    elif args.render_mode == 'no-render':
        sumo_params.render = False

    if args.save_render:
        sumo_params.render = 'drgb'
        sumo_params.pxpm = 4
        sumo_params.save_render = True

    # Recreate the scenario from the pickled parameters
    exp_tag = flow_params['exp_tag']
    net_params = flow_params['net']
    vehicles = flow_params['veh']
    initial_config = flow_params['initial']
    module = __import__('flow.scenarios', fromlist=[flow_params['scenario']])
    scenario_class = getattr(module, flow_params['scenario'])

    scenario = scenario_class(
        name=exp_tag,
        vehicles=vehicles,
        net_params=net_params,
        initial_config=initial_config)

    # Start the environment with the gui turned on and a path for the
    # emission file
    module = __import__('flow.envs', fromlist=[flow_params['env_name']])
    env_class = getattr(module, flow_params['env_name'])
    env_params = flow_params['env']
    env_params.restart_instance = False
    if args.evaluate:
        env_params.evaluate = True

    # lower the horizon if testing
    if args.horizon:
        config['horizon'] = 6000  #可以考虑改成6000
        env_params.horizon = 6000

    # create the agent that will be used to compute the actions
    agent = agent_cls(env=env_name, config=config)
    checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num
    checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num
    agent.restore(checkpoint)

    env = ModelCatalog.get_preprocessor_as_wrapper(env_class(
        env_params=env_params, sumo_params=sumo_params, scenario=scenario))

    if multiagent:
        rets = {}
        # map the agent id to its policy
        policy_map_fn = config['multiagent']['policy_mapping_fn'].func
        for key in config['multiagent']['policy_graphs'].keys():
            rets[key] = []
    else:
        rets = []
    final_outflows = []
    mean_speed = []
    for i in range(1):#args.num_rollouts):
        vel = []
        state = env.reset()
        done = False
        if multiagent:
            ret = {key: [0] for key in rets.keys()}
        else:
            ret = 0
        for _ in range(env_params.horizon):
            vehicles = env.unwrapped.vehicles
            vel.append(vehicles.get_speed(vehicles.get_ids())[0])#这里是整体平均速度
            if multiagent:
                action = {}
                for agent_id in state.keys():
                    action[agent_id] = agent.compute_action(
                        state[agent_id], policy_id=policy_map_fn(agent_id))
            else:
                print(type(state),state)
                action = agent.compute_action(state)
                print(type(action),action)
            state, reward, done, _ = env.step(action)
            if multiagent:
                for actor, rew in reward.items():
                    ret[policy_map_fn(actor)][0] += rew
            else:
                ret += reward
            if multiagent and done['__all__']:
                break
            if not multiagent and done:
                break

        if multiagent:
            for key in rets.keys():
                rets[key].append(ret[key])
        else:
            rets.append(ret)
        outflow = vehicles.get_outflow_rate(500)
        final_outflows.append(outflow)
        #mean_speed.append(np.mean(vel))#注意这里
        print('Round {}, Return: {}'.format(i, ret))
    if multiagent:
        for agent_id, rew in rets.items():
            print('Average, std return: {}, {} for agent {}'.format(
                np.mean(rew), np.std(rew), agent_id))
    else:
        print('Average, std return: {}, {}'.format(
            np.mean(rets), np.std(rets)))
    print('Average, std speed: {}, {}'.format(
        np.mean(mean_speed), np.std(mean_speed)))
    print('Average, std outflow: {}, {}'.format(
        np.mean(final_outflows), np.std(final_outflows)))
    import matplotlib.pyplot as plt
    plt.figure()
    plt.plot(vel)
    plt.show()
    # terminate the environment
    env.unwrapped.terminate()

    # if prompted, convert the emission file into a csv file
    if args.emission_to_csv:
        dir_path = os.path.dirname(os.path.realpath(__file__))
        emission_filename = '{0}-emission.xml'.format(scenario.name)

        emission_path = \
            '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename)

        emission_to_csv(emission_path)

    # if we wanted to save the render, here we create the movie
    '''
Пример #27
0
def main(args):
    """Execute multiple training operations."""
    flags = parse_options(args)

    # get the hyperparameters
    env_name, policy, hp, seed = get_hyperparameters_from_dir(flags.dir_name)
    hp['render'] = not flags.no_render  # to visualize the policy

    # create the algorithm object. We will be using the eval environment in
    # this object to perform the rollout.
    alg = OffPolicyRLAlgorithm(policy=policy, env=env_name, **hp)

    # setup the seed value
    if not flags.random_seed:
        random.seed(seed)
        np.random.seed(seed)
        tf.compat.v1.set_random_seed(seed)

    # get the checkpoint number
    if flags.ckpt_num is None:
        filenames = os.listdir(os.path.join(flags.dir_name, "checkpoints"))
        metafiles = [f[:-5] for f in filenames if f[-5:] == ".meta"]
        metanum = [int(f.split("-")[-1]) for f in metafiles]
        ckpt_num = max(metanum)
    else:
        ckpt_num = flags.ckpt_num

    # location to the checkpoint
    ckpt = os.path.join(flags.dir_name, "checkpoints/itr-{}".format(ckpt_num))

    # restore the previous checkpoint
    alg.saver = tf.compat.v1.train.Saver(alg.trainable_vars)
    alg.load(ckpt)

    # some variables that will be needed when replaying the rollout
    policy = alg.policy_tf
    env = alg.sampler.env

    # Perform the evaluation procedure.
    episdoe_rewards = []

    # Add an emission path to Flow environments.
    if env_name in FLOW_ENV_NAMES:
        sim_params = deepcopy(env.wrapped_env.sim_params)
        sim_params.emission_path = "./flow_results"
        env.wrapped_env.restart_simulation(sim_params,
                                           render=not flags.no_render)

    for episode_num in range(flags.num_rollouts):
        # Run a rollout.
        obs = env.reset()
        total_reward = 0
        while True:
            context = [env.current_context] \
                if hasattr(env, "current_context") else None
            action = policy.get_action(
                np.asarray([obs]),
                context=context,
                apply_noise=False,
                random_actions=False,
            )
            obs, reward, done, _ = env.step(action[0])
            if not flags.no_render:
                env.render()
            total_reward += reward
            if done:
                break

        # Print total returns from a given episode.
        episdoe_rewards.append(total_reward)
        print("Round {}, return: {}".format(episode_num, total_reward))

    # Print total statistics.
    print("Average, std return: {}, {}".format(np.mean(episdoe_rewards),
                                               np.std(episdoe_rewards)))

    if env_name in FLOW_ENV_NAMES:
        # wait a short period of time to ensure the xml file is readable
        time.sleep(0.1)

        # collect the location of the emission file
        dir_path = env.wrapped_env.sim_params.emission_path
        emission_filename = "{0}-emission.xml".format(
            env.wrapped_env.network.name)
        emission_path = os.path.join(dir_path, emission_filename)

        # convert the emission file into a csv
        emission_to_csv(emission_path)

        # Delete the .xml version of the emission file.
        os.remove(emission_path)