Exemplo n.º 1
0
def main():
    env = make_env(config.env, config.seed, -1)
    print('State dimension:', env.observation_space.shape[0])
    print('Action dimension:', env.action_space.shape[0])
    print('Action limited:', env.action_space.low, "~", env.action_space.high)

    memorySampler = Sampler(config)
    safety_layer = SafetyLayer(args, env.observation_space.shape[0],
                               env.action_space.shape[0])

    agent = Agent(env.observation_space.shape[0], env.action_space.shape[0],
                  config)

    # 载入网络参数
    if args.load is not None:
        pretrained_model_path = os.path.join(proj_path + '/save_model/' +
                                             str(args.load))
        pretrained_model = torch.load(pretrained_model_path,
                                      map_location=device)
        agent.policy.load_state_dict(pretrained_model)

    finished_steps = 0
    finished_episodes = 0
    best_target = 0
    save_index = 0
    print('____________________train safety layer_________________________')
    for i in range(args.safety_layer_epoch):
        for _ in range(args.sample_scale):
            memorySampler.sample(None)
        safety_layer.train(memorySampler.buffer)
        memorySampler.buffer.clear()
    for _ in range(args.sample_scale):
        memorySampler.sample(None)
    safety_layer.evaluate(memorySampler.buffer)
    memorySampler.buffer.clear()
    agent = Agent(env.observation_space.shape[0], env.action_space.shape[0],
                  config)
    print('____________________train agent_________________________')

    for i in range(config.iterations):
        print('________________________iter:', i,
              '_____________________________')
        '''sample stage'''
        start_time = time.time()
        samples = memorySampler.sample(agent.policy)
        sample_time = time.time() - start_time
        '''train stage'''
        start_time = time.time()
        agent.train_model(samples, iter_index=i)
        train_time = time.time() - start_time
        show_agent(writer, config.agent['name'], agent, i)

        if config.agent['name'] in ['cppo', 'ppo', 'sac', 'safe_sac']:
            agent.print_loss()
        print('TRAIN:')
        print('reward: mean', np.mean(memorySampler.result_dict['reward']),
              'max', np.max(memorySampler.result_dict['reward']), 'min',
              np.min(memorySampler.result_dict['reward']), 'std',
              np.std(memorySampler.result_dict['reward']))
        print('cost: mean', np.mean(memorySampler.result_dict['cost']), 'max',
              np.max(memorySampler.result_dict['cost']), 'min',
              np.min(memorySampler.result_dict['cost']), 'std',
              np.std(memorySampler.result_dict['cost']))
        print('done rate:',
              np.mean(memorySampler.result_dict['done']), 'collision rate:',
              np.mean(memorySampler.result_dict['collision']))
        '''for saving model'''
        finished_steps += len(memorySampler.buffer)
        finished_episodes += memorySampler.buffer.num_episode
        target = np.mean(memorySampler.result_dict['reward']) / np.mean(
            memorySampler.result_dict['time'])

        # '''evel stage'''
        start_time = time.time()
        memorySampler.evel(agent.policy)
        evel_time = time.time() - start_time
        print('EVELUATION:')
        print('reward: mean', np.mean(memorySampler.result_dict['reward']),
              'max', np.max(memorySampler.result_dict['reward']), 'std',
              np.std(memorySampler.result_dict['reward']))
        print('cost: mean', np.mean(memorySampler.result_dict['cost']), 'max',
              np.max(memorySampler.result_dict['cost']), 'std',
              np.std(memorySampler.result_dict['cost']))
        print('done rate:',
              np.mean(memorySampler.result_dict['done']), 'collision rate:',
              np.mean(memorySampler.result_dict['collision']))

        # print('USE TIME AND SAMPLES:')
        # print('sample time:', sample_time, 'train time:',
        #       train_time, 'steps:', memorySampler.buffer.sample_size,
        #       'total steps:', finished_steps, 'episodes:',
        #       memorySampler.buffer.num_episode, 'total episodes:', finished_episodes)

        print('USE TIME AND SAMPLES:')
        print('sample time:', sample_time, 'train time:', train_time,
              'evel time:', evel_time, 'steps:',
              memorySampler.buffer.sample_size, 'total steps:', finished_steps,
              'episodes:', memorySampler.buffer.num_episode, 'total episodes:',
              finished_episodes)

        # Save the trained model
        condition = 2
        # 保存满足条件的模型
        if target > condition:
            ckpt_path = os.path.join(save_path \
                                     + '/i_' + str(i) \
                                     + 'seed_' + str(args.seed) \
                                     + '_st_' + str(finished_steps) \
                                     + '_ep_' + str(finished_episodes) \
                                     + '_tar_' + str(round(target, 2)) + '.pt')
            torch.save(agent.policy.state_dict(), ckpt_path)
            if target > best_target:
                if save_index > 0:
                    os.remove(save_path + '/best_model_i_' + str(save_index) +
                              '.pt')
                best_ckpt_path = os.path.join(save_path \
                                              + '/best_model_i_' + str(i) + '.pt')
                save_index = i
                torch.save(agent.policy.state_dict(), best_ckpt_path)
                best_target = target
    memorySampler.close()
Exemplo n.º 2
0
# else:
#     policys[args.algo] = SACActor(env.observation_space.shape[0], env.action_space.shape[0],
#                                   hidden_sizes=(64, 64), )
#
# if not args.algo == 'human':
#     policys[args.algo].load_state_dict(torch.load(args.load_path, map_location=device))


def slope(distances):
    distances = np.array(distances)
    return (distances[1:] - distances[:-1])


env = make_env(
    config.env,
    config.seed,
    'test',
)
model = {
    '(a) CPPO-PID-AEB': [
        'cppo',
        '/home/user/repos/CAV/save_model/expsumo_env-v6_2021-01-07/cppo_15-21-35/best_model_i_269.pt'
    ],
    '(b) CPPO-PID': [
        'cppo',
        '/home/user/repos/CAV/save_model/expsumo_env-v6_2021-01-08/cppo_14-05-50/best_model_i_153.pt'
    ],
    '(c) PPO-safe-AEB': [
        'ppo2',
        '/home/user/repos/CAV/save_model/expsumo_env-v6_2021-01-07/ppo2_15-21-43/best_model_i_252.pt'
    ],
Exemplo n.º 3
0
def evaluation(algo_name, ax):
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    random.seed(config.seed)
    torch.cuda.manual_seed(config.seed)
    if algo_name == '(g) Human':
        config.env['human_model'] = True
    env = make_env(
        config.env,
        config.seed,
        'test',
    )
    # fig, axs = plt.subplots(figsize=(3, 1))
    # segments = np.full((args.episodes, config.env['max_step'], 2), np.nan)
    # dydx_s = np.full((args.episodes, config.env['max_step']), np.nan)
    segments = []
    dydx_s = []
    for i in range(args.episodes):
        results = {}
        results['time'] = 0
        results['speed'] = 0
        results['collision'] = 0
        results['success'] = 0
        results['fuel'] = 0
        print('-----------------', i, '---------------------')
        # env.set_startstep(i)
        obs = env.reset()
        done = False
        speed = 0
        step = 0
        distances = []
        if algo_name == '(g) Human':
            while not done:
                step += 1
                distances.append(
                    env.connect.vehicle.getDistance(env.curr_veh_id))
                obs_next, reward, done, info = env.step()
                speed += info['speed']
                results['collision'] += info['crash']
                results['success'] += info['success']
                results['fuel'] += info['fuel']
                # print(info['speed'])

        else:
            while not done:
                step += 1
                distances.append(
                    env.connect.vehicle.getDistance(env.curr_veh_id))
                obs = torch.Tensor(obs).unsqueeze(0)
                action = policys[algo_name](obs)
                action = action.detach().cpu().numpy()[0]
                obs_next, reward, done, info = env.step(action)
                obs = obs_next

                speed += info['speed']
                results['collision'] += info['crash']
                results['success'] += info['success']
                results['fuel'] += info['fuel']

            if config.env['gui']:
                time.sleep(0.005)
        results['time'] += 0.2 * step
        results['speed'] += speed / step

        dydx = slope(distances)
        # distances = distances[:-1]
        x = np.arange(0 + i * 40, len(distances) + i * 40, 1)
        y = np.array(distances)
        dydx_s.append(dydx)
        segments.append(np.array([x, y]).T.reshape(-1, 2))
        # dydx_s[i, :x.shape[0]] = dydx
        # segments[i, :x.shape[0], :] = np.array([x, y]).T.reshape(-1, 2)
        print(results)
    # dydx_s = dydx_s.T.reshape(-1)
    # dydx_s = dydx_s[~np.isnan(dydx_s)]
    # dydx_s=dydx_s.reshape(-1)
    norm = plt.Normalize(0, 4)
    for i in range(args.episodes):
        segment = np.concatenate([[segments[i][:-1, :]], [segments[i][1:, :]]],
                                 axis=0)
        segment = segment.transpose(1, 0, 2)
        lc = LineCollection(segment, cmap='viridis', norm=norm)
        lc.set_array(dydx_s[i])
        lc.set_linewidth(0.7)
        line = ax.add_collection(lc)
    # lc = LineCollection(segments, cmap='viridis', norm=norm)

    cb = fig.colorbar(
        line,
        ax=ax,
    )
    cb.set_label('speed(m/0.2s)')
    x = np.arange(0, len(distances) + i * 40, 1)
    y = np.ones(len(distances) + i * 40)
    plt.fill_between(
        x,
        95 * y,
        110 * y,
        facecolor="gray",  # The fill color
        # color='gray',  # The outline color
        alpha=0.2,
        label="Crosswalk")
    plt.fill_between(
        x,
        140 * y,
        145 * y,
        facecolor="green",  # The fill color
        # color='green',  # The outline color
        alpha=0.2,
        label="End")
    # ax.fill_between(x, 95 * y, 110 * y,
    #                  facecolor="gray",  # The fill color
    #                  # color='gray',  # The outline color
    #                  alpha=0.2, )
    # ax.fill_between(x, 140 * y, 145 * y,
    #                  facecolor="green",  # The fill color
    #                  # color='green',  # The outline color
    #                  alpha=0.2, )

    ax.set_xlabel('Time(s)')
    ax.set_ylabel('Distance(m)')
    plt.legend(loc='lower right')
    # ax.set_title(algo_name)
    env.close()
Exemplo n.º 4
0
def evaluation():
    plt.style.use(['science', 'ieee'])
    for i in range(args.episodes):
        print('-----------------', i, '---------------------')
        fig, axes = plt.subplots(2, 1, sharex='col', figsize=(5, 5))
        for key in policys.keys():
            if key == 'Human':
                config.env['human_model'] = True
            else:
                config.env['human_model'] = False
            env = make_env(config.env, seed=i, env_index='test')
            np.random.seed(i)
            torch.manual_seed(i)
            random.seed(i)
            torch.cuda.manual_seed(i)
            print('-----------------', key, '---------------------')
            # env.set_startstep(i)
            for a in range(1):
                obs = env.reset()
                done = False
                step = 0
                distances = []
                speeds = []
                actions = []
                if key == 'Human':
                    while not done:
                        step += 1
                        distances.append(
                            env.connect.vehicle.getDistance(env.curr_veh_id))
                        speeds.append(env.curr_speed)
                        obs_next, reward, done, info = env.step()
                        if config.env['gui']:
                            time.sleep(0.005)
                else:
                    while not done:
                        step += 1
                        distances.append(
                            env.connect.vehicle.getDistance(env.curr_veh_id))
                        speeds.append(env.curr_speed)

                        obs = torch.Tensor(obs).unsqueeze(0)
                        action = policys[key](obs)
                        action = action.detach().cpu().numpy()[0]
                        obs_next, reward, done, info = env.step(action)
                        obs = obs_next
                        if config.env['gui']:
                            time.sleep(0.005)

            env.close()
            x = np.arange(0, len(distances), 1)
            y = np.array(distances)
            y_1 = np.array(speeds)
            # y_2= np.array(actions)
            axes[0].plot(x, y, label=key)
            axes[1].plot(x, y_1, label=key)
            # axes[2].plot(x, y_2, label=key)
        y = np.ones(150)
        axes[0].fill_between(
            np.arange(0, 150, 1),
            95 * y,
            110 * y,
            facecolor="gray",  # The fill color
            # color='gray',  # The outline color
            alpha=0.2,
            label="Crosswalk")
        axes[0].fill_between(
            np.arange(0, 150, 1),
            140 * y,
            145 * y,
            facecolor="green",  # The fill color
            # color='green',  # The outline color
            alpha=0.2,
            label="End")
        plt.legend(loc='lower right')
        axes[0].set_ylabel('Distance(m)')
        axes[1].set_ylabel('Velocity(m/s)')
        # axes[2].set_ylabel(r'Acceleration($m/s^2$)')
        plt.xlabel('Time(s)')
        # plt.ylabel('Distance(m)')
        plt.legend(loc=8, bbox_to_anchor=(0.4, -0.5), ncol=3)
        plt.show()