Exemplo n.º 1
0
def super_dqn_train(configs, time_data, sumoCmd):
    from Agent.super_dqn import Trainer
    if configs['model'] == 'base':
        from Env.MultiEnv import GridEnv
    # elif configs['model'] == 'frap':
    #     from Env.FRAP import TL3x3Env # will be added
    side_list = ['u', 'r', 'd', 'l']
    tl_rl_list = list()
    for _, node in enumerate(configs['node_info']):
        if node['id'][-1] not in side_list:
            tl_rl_list.append(node['id'])
    configs['tl_rl_list'] = tl_rl_list
    NUM_EPOCHS = configs['num_epochs']
    MAX_STEPS = configs['max_steps']
    # init agent and tensorboard writer
    agent = Trainer(configs)
    writer = SummaryWriter(os.path.join(
        configs['current_path'], 'training_data', time_data))
    # save hyper parameters
    agent.save_params(time_data)
    # init training
    epoch = 0
    while epoch < NUM_EPOCHS:
        traci.start(sumoCmd)
        for tl_rl in tl_rl_list:
            traci.trafficlight.setRedYellowGreenState(tl_rl, 'G{0}{3}rr{2}{3}rG{0}{3}rr{2}{3}r'.format(
                'G'*configs['num_lanes'], 'G', 'r'*configs['num_lanes'], 'r'))
        
        before_action= torch.ones((1,len(tl_rl_list),1),dtype=torch.int)
        env = GridEnv(configs)
        step = 0
        done = False
        # state initialization
        # agent setting
        total_reward = 0
        reward = 0
        arrived_vehicles = 0
        state = env.get_state()
        action_distribution = tuple()
        a = time.time()
        
        while step < MAX_STEPS:

            action = agent.get_action(state)
            action_distribution += tuple(action.unsqueeze(1))
            # action 을 정하고

            # action이 before_actio과 같으면 yellow없이 진행하고
            for tl_rl in tl_rl_list:
                idx=tl_rl_list.index(tl_rl)
                if before_action[0][idx]==action[0][idx]:
                    traci.trafficlight.setRedYellowGreenState(
                        tl_rl, 'y'*(3+configs['num_lanes'])*4)
            arrived_vehicles += simulation_step(env, 5)
            step+=5
            
            # environment에 적용
            env.step(action)  # action 적용함수

            #적용 후 20초 진행
            arrived_vehicles += simulation_step(env, 20)
            step+=20
            next_state = env.get_state()  # 다음스테이트

            reward = env.get_reward()  # 20초 지연된 보상
            agent.save_replay(state, action, reward, next_state)  # dqn
            agent.update(done)
            state = next_state
            total_reward += reward
            before_action=action
            # 20초 끝나고 yellow 4초

        agent.update_hyperparams(epoch)  # lr and epsilon upate
        if epoch % 2 == 0:
            agent.target_update()  # dqn
        b = time.time()
        traci.close()
        print("time:", b-a)
        epoch += 1
        # once in an epoch
        update_tensorboard(writer, epoch, env, agent, arrived_vehicles)
        print('======== {} epoch/ return: {} arrived number:{}'.format(epoch,
                                                                       total_reward.sum(), arrived_vehicles))
        if epoch % 50 == 0:
            agent.save_weights(
                configs['file_name']+'_{}_{}'.format(time_data, epoch))

    writer.close()
Exemplo n.º 2
0
def dqn_train(configs, time_data, sumoCmd):
    # Environment Setting
    from Agent.dqn import Trainer
    if configs['model'] == 'base':
        from Env.Env import TL3x3Env
    elif configs['model'] == 'frap':
        from Env.FRAP import TL3x3Env
    # EXP_CONFIG Setting
    NUM_EPOCHS = configs['num_epochs']
    MAX_STEPS = configs['max_steps']
    tl_rl_list = configs['tl_rl_list']
    epoch = 0
    # init agent and tensorboard writer
    # agent setting
    agent = Trainer(configs)
    writer = SummaryWriter(os.path.join(
        configs['current_path'], 'training_data', time_data))
    # save hyper parameters
    agent.save_params(time_data)
    # init training
    while epoch < NUM_EPOCHS:
        # Epoch Start
        traci.start(sumoCmd)
        step = 0
        action_distribution = tuple()
        # Epoch Start setting
        env = TL3x3Env(configs)
        traci.trafficlight.setRedYellowGreenState(tl_rl_list[0], 'G{0}{3}rr{2}{3}rG{0}{3}rr{2}{3}r'.format(
            'G'*configs['num_lanes'], 'G', 'r'*configs['num_lanes'], 'r'))
        before_action=torch.ones((1,len(tl_rl_list)))
        done = False
        total_reward = 0
        reward = 0
        arrived_vehicles = 0
        # state initialization
        state = env.get_state()
        # Time Check
        a = time.time()
        while step < MAX_STEPS:

            action = agent.get_action(state)
            action_distribution += tuple(action.unsqueeze(1))
            # action 을 정하고

            # action이 before_actio과 같으면 yellow없이 진행하고
            if before_action!=action:
                traci.trafficlight.setRedYellowGreenState(
                    tl_rl_list[0], 'y'*28)
            arrived_vehicles += simulation_step(env, 5)
            step+=5
            
            # environment에 적용
            env.step(action)  # action 적용함수

            #적용 후 20초 진행
            arrived_vehicles += simulation_step(env, 20)
            step+=20
            next_state = env.get_state()  # 다음스테이트

            reward = env.get_reward()  # 20초 지연된 보상
            agent.save_replay(state, action, reward, next_state)  # dqn
            agent.update(done)
            state = next_state
            total_reward += reward
            before_action=action
            # 20초 끝나고 yellow 4초

        b = time.time()
        traci.close()
        print("time:", b-a)
        epoch += 1
        # update hyper parameter
        agent.update_hyperparams(epoch)  # lr and epsilon upate
        if epoch % agent.configs['target_update_period'] == 0:
            agent.target_update()  # dqn
        # once in an epoch update tensorboard
        update_tensorboard(writer, epoch, env, agent, arrived_vehicles)
        print('======== {} epoch/ return: {} arrived number:{}'.format(epoch,
                                                                       total_reward, arrived_vehicles))
        if epoch % 50 == 0:
            agent.save_weights(
                configs['file_name']+'_{}_{}'.format(time_data, epoch))

    writer.close()
Exemplo n.º 3
0
        val_psnr = 0
        val_ssim = 0
        for i in range(len(valid_hr_imgs)):
            hr = valid_hr_imgs[i]
            lr = valid_lr_imgs[i]
            [lr, hr] = utils.datatype([lr, hr])

            hr_expand = np.expand_dims(hr, axis=0)
            lr_expand = np.expand_dims(lr, axis=0)

            psnr, ssim, sr_expand = sess.run([PSNR, SSIM, tensor_sr], {
                tensor_lr: lr_expand,
                tensor_hr: hr_expand
            })
            sr = np.squeeze(sr_expand)
            utils.update_tensorboard(epoch, writer, i, lr, sr, hr)
            val_psnr += psnr
            val_ssim += ssim

        val_psnr = val_psnr / len(valid_hr_imgs)
        val_ssim = val_ssim / len(valid_hr_imgs)
        if val_psnr > best_psnr:
            best_psnr = val_psnr
            best_epoch = epoch
            print('Saving new best model')

            ## save model
            saver.save(sess, os.path.join(checkpoint_dir, 'model.ckpt'))
        writer.add_scalar('Validate PSNR', val_psnr, epoch)
        writer.add_scalar('Validate SSIM', val_ssim, epoch)
Exemplo n.º 4
0
def dqn_train(configs, time_data, sumoCmd):
    from Agent.dqn import Trainer
    if configs['model'] == 'base':
        from Env.Env import TL3x3Env
    elif configs['model'] == 'frap':
        from Env.FRAP import TL3x3Env
    NUM_EPOCHS = configs['num_epochs']
    MAX_STEPS = configs['max_steps']
    tl_rl_list = configs['tl_rl_list']
    # init agent and tensorboard writer
    writer = SummaryWriter(
        os.path.join(configs['current_path'], 'training_data', time_data))
    agent = Trainer(configs)
    # save hyper parameters
    agent.save_params(time_data)
    # init training
    epoch = 0
    while epoch < NUM_EPOCHS:
        traci.start(sumoCmd)
        env = TL3x3Env(configs)
        traci.trafficlight.setRedYellowGreenState(
            tl_rl_list[0], 'G{0}{3}rr{2}{3}rG{0}{3}rr{2}{3}r'.format(
                'G' * configs['num_lanes'], 'G', 'r' * configs['num_lanes'],
                'r'))
        step = 0
        done = False
        # state initialization
        # agent setting
        total_reward = 0
        reward = 0
        arrived_vehicles = 0
        state = env.get_state()
        action_distribution = tuple()
        a = time.time()
        while step < MAX_STEPS:
            '''
            # state=env.get_state(action) #partial하게는 env에서 조정
            action=agent.get_action(state)
            env.step(action)
            reward=env.get_reward()
            next_state=env.get_state()
            # if traci.inductionloop.getLastStepVehicleNumber("0") > 0:
            store transition in D (experience replay)
            Sample random minibatch from D
            step += 1
            state=next_state


            set yi
            '''

            action = agent.get_action(state)
            action_distribution += tuple(action.unsqueeze(1))
            env.step(action)  # action 적용함수

            for _ in range(20):  # 10초마다 행동 갱신
                traci.simulationStep()
                env.collect_state()
                step += 1
                arrived_vehicles += traci.simulation.getArrivedNumber(
                )  # throughput
            next_state = env.get_state()  # 다음스테이트

            traci.trafficlight.setRedYellowGreenState(tl_rl_list[0], 'y' * 28)

            for _ in range(5):  # 4번더
                traci.simulationStep()
                env.collect_state()
                step += 1
                arrived_vehicles += traci.simulation.getArrivedNumber(
                )  # throughput

            reward = env.get_reward()  # 25초 지연된 보상
            agent.save_replay(state, action, reward, next_state)  # dqn
            agent.update(done)
            state = next_state
            total_reward += reward

            # 20초 끝나고 yellow 4초

        agent.update_hyperparams(epoch)  # lr and epsilon upate
        if epoch % 2 == 0:
            agent.target_update()  # dqn
        b = time.time()
        traci.close()
        print("time:", b - a)
        epoch += 1
        # once in an epoch
        update_tensorboard(writer, epoch, env, agent, arrived_vehicles)
        print('======== {} epoch/ return: {} arrived number:{}'.format(
            epoch, total_reward, arrived_vehicles))
        if epoch % 50 == 0:
            agent.save_weights(configs['file_name'] +
                               '_{}_{}'.format(time_data, epoch))

    writer.close()
Exemplo n.º 5
0
def ppo_train(configs, time_data, sumoCmd):
    from Agent.ppo import Trainer
    if configs['model'] == 'base':
        from Env.Env import TL3x3Env
    elif configs['model'] == 'frap':
        from Env.FRAP import TL3x3Env
    tl_rl_list = configs['tl_rl_list']
    NUM_EPOCHS = configs['num_epochs']
    MAX_STEPS = configs['max_steps']
    # init agent and tensorboard writer
    agent = Trainer(configs)
    writer = SummaryWriter(
        os.path.join(configs['current_path'], 'training_data', time_data))
    # save hyper parameters
    agent.save_params(time_data)
    # init training
    epoch = 0
    ppo_update_step = 0
    while epoch < NUM_EPOCHS:
        traci.start(sumoCmd)
        traci.trafficlight.setRedYellowGreenState(
            tl_rl_list[0], 'G{0}{3}rr{2}{3}rG{0}{3}rr{2}{3}r'.format(
                'G' * configs['num_lanes'], 'G', 'r' * configs['num_lanes'],
                'r'))
        env = TL3x3Env(configs)
        # env = GridEnv( configs)
        step = 0
        done = False
        # state initialization
        # agent setting
        total_reward = 0
        reward = 0
        arrived_vehicles = 0
        state = env.get_state()
        action_distribution = tuple()
        a = time.time()
        while step < MAX_STEPS:

            action = agent.get_action(state)
            action_distribution += tuple(action.unsqueeze(1))
            env.step(action)  # action 적용함수
            ppo_update_step += 1

            for _ in range(20):  # 10초마다 행동 갱신
                traci.simulationStep()
                env.collect_state()
                step += 1
                arrived_vehicles += traci.simulation.getArrivedNumber(
                )  # throughput
            next_state = env.get_state()  # 다음스테이트

            traci.trafficlight.setRedYellowGreenState(tl_rl_list[0], 'y' * 28)

            for _ in range(5):  # 4번더
                traci.simulationStep()
                env.collect_state()
                step += 1
                arrived_vehicles += traci.simulation.getArrivedNumber(
                )  # throughput

            reward = env.get_reward()  # 25초 지연된 보상
            agent.memory.rewards.append(reward)
            if step >= MAX_STEPS:
                done = True
            agent.memory.dones.append(done)
            state = next_state
            total_reward += reward
            if ppo_update_step % 400 == 0:
                agent.update()
                agent.update_hyperparams(epoch)  # lr update
                ppo_update_step = 0

        b = time.time()
        traci.close()
        print("time:", b - a)
        epoch += 1
        # once in an epoch
        update_tensorboard(writer, epoch, env, agent, arrived_vehicles)
        print('======== {} epoch/ return: {} arrived number:{}'.format(
            epoch, total_reward, arrived_vehicles))
        if epoch % 50 == 0:
            agent.save_weights(configs['file_name'] +
                               '_{}_{}'.format(time_data, epoch))

    writer.close()
Exemplo n.º 6
0
def REINFORCE_train(configs, time_data, sumoCmd):
    from Agent.REINFORCE import Trainer
    from Agent.REINFORCE import DEFAULT_CONFIG
    from Env.Env import TL3x3Env
    tl_rl_list = configs['tl_rl_list']
    NUM_EPOCHS = configs['num_epochs']
    MAX_STEPS = configs['max_steps']
    # init agent and tensorboard writer
    agent = Trainer(configs)
    writer = SummaryWriter(
        os.path.join(configs['current_path'], 'training_data', time_data))
    # save hyper parameters
    agent.save_params(time_data)
    # init training
    epoch = 0
    while epoch < NUM_EPOCHS:
        traci.start(sumoCmd)
        traci.trafficlight.setRedYellowGreenState(
            tl_rl_list[0], 'G{0}{3}rr{2}{3}rG{0}{3}rr{2}{3}r'.format(
                'G' * configs['num_lanes'], 'G', 'r' * configs['num_lanes'],
                'r'))
        env = TL3x3Env(configs)
        # env = GridEnv( configs)
        step = 0
        done = False
        # state initialization
        # agent setting
        total_reward = 0
        reward = 0
        arrived_vehicles = 0
        state = env.get_state()
        while step < MAX_STEPS:

            action = agent.get_action(state)
            env.step(action)  # action 적용함수
            next_state = env.get_state()

            for _ in range(20):  # 10초마다 행동 갱신
                traci.simulationStep()
                env.collect_state()
                step += 1
                arrived_vehicles += traci.simulation.getArrivedNumber(
                )  # throughput
            # 20초 끝나고 yellow 4초
            traci.trafficlight.setRedYellowGreenState(tl_rl_list[0], 'y' * 28)

            for _ in range(5):  # 4번더
                traci.simulationStep()
                env.collect_state()
                step += 1
                arrived_vehicles += traci.simulation.getArrivedNumber(
                )  # throughput

            reward = env.get_reward()
            prob = agent.get_prob()
            agent.put_data((reward, prob[action]))

            state = next_state
            total_reward += reward
            if step > MAX_STEPS:
                done = True

        agent.update(done)
        agent.update_hyperparams(epoch)  # lr and epsilon upate
        traci.close()
        epoch += 1
        # once in an epoch
        update_tensorboard(writer, epoch, env, agent, arrived_vehicles)
        print('======== {} epoch/ return: {} arrived number:{}'.format(
            epoch, total_reward, arrived_vehicles))

    writer.close()
def city_dqn_train(configs, time_data, sumoCmd):
    from Agent.super_dqn import Trainer
    if configs['model'] == 'city':
        from Env.CityEnv import CityEnv

    phase_num_matrix = torch.tensor(  # 각 tl이 갖는 최대 phase갯수
        [len(configs['traffic_node_info'][index]['phase_duration']) for _, index in enumerate(configs['traffic_node_info'])])
    # init agent and tensorboard writer
    writer = SummaryWriter(os.path.join(
        configs['current_path'], 'training_data', time_data))
    agent = Trainer(configs)
    # save hyper parameters
    agent.save_params(time_data)
    # init training
    NUM_AGENT = configs['num_agent']
    DEVICE = configs['device']
    TL_RL_LIST = configs['tl_rl_list']
    MAX_PHASES = configs['max_phase_num']
    MAX_STEPS = configs['max_steps']
    OFFSET = torch.tensor(configs['offset'],  # i*10
                          device=DEVICE, dtype=torch.int)
    TL_PERIOD = torch.tensor(
        configs['tl_period'], device=DEVICE, dtype=torch.int)
    epoch = 0
    while epoch < configs['num_epochs']:
        step = 0
        if configs['randomness'] == True:
            tmp_sumoCmd = sumoCmd+['--scale', str(1.5+random())]  # 1.5~2.5
        else:
            if configs['network'] == 'dunsan' or  'grid' in configs['network']:
                tmp_sumoCmd = sumoCmd+['--scale', str(configs['scale'])]
            else:
                tmp_sumoCmd = sumoCmd
        traci.start(tmp_sumoCmd)
        env = CityEnv(configs)
        # Total Initialization
        actions = torch.zeros(
            (NUM_AGENT, configs['action_size']), dtype=torch.int, device=DEVICE)
        # Mask Matrix : TL_Period가 끝나면 True
        mask_matrix = torch.zeros(
            (NUM_AGENT), dtype=torch.bool, device=DEVICE)

        # MAX Period까지만 증가하는 t
        t_agent = torch.zeros(
            (NUM_AGENT), dtype=torch.int, device=DEVICE)
        t_agent -= OFFSET

        # Action configs['offset']on Matrix : 비교해서 동일할 때 collect_state, 없는 state는 zero padding
        action_matrix = torch.zeros(
            (NUM_AGENT, MAX_PHASES), dtype=torch.int, device=DEVICE)  # 노란불 3초 해줘야됨
        action_index_matrix = torch.zeros(
            (NUM_AGENT), dtype=torch.long, device=DEVICE)  # 현재 몇번째 phase인지
        action_update_mask = torch.eq(   # action이 지금 update해야되는지 확인
            t_agent, action_matrix[0, action_index_matrix]).view(NUM_AGENT)  # 0,인 이유는 인덱싱

        # 최대에 도달하면 0으로 초기화 (offset과 비교)
        clear_matrix = torch.eq(t_agent % TL_PERIOD, 0)
        t_agent[clear_matrix] = 0
        # action 넘어가야된다면 action index증가 (by tensor slicing)
        action_index_matrix[action_update_mask] += 1
        action_index_matrix[clear_matrix] = 0

        # mask update, matrix True로 전환
        mask_matrix[clear_matrix] = True
        mask_matrix[~clear_matrix] = False

        # state initialization
        state = env.collect_state(
            action_update_mask, action_index_matrix, mask_matrix)
        total_reward = 0

        # agent setting
        arrived_vehicles = 0
        a = time.time()
        while step < MAX_STEPS:
            # action 을 정하고
            actions = agent.get_action(state, mask_matrix)
            if mask_matrix.sum()>0:
                print(actions.transpose(1,2))
            # action형태로 변환 # 다음으로 넘어가야할 시점에 대한 matrix
            action_matrix = env.calc_action(
                action_matrix, actions, mask_matrix)
            # 누적값으로 나타남

            # environment에 적용
            # action 적용함수, traci.simulationStep 있음
            env.step(
                actions, mask_matrix, action_index_matrix, action_update_mask)

            # 전체 1초증가 # traci는 env.step에
            step += 1
            t_agent += 1
            # 최대에 도달하면 0으로 초기화 (offset과 비교)
            clear_matrix = torch.eq(t_agent % TL_PERIOD, 0)

            # action 넘어가야된다면 action index증가 (by tensor slicing)
            for idx,_ in enumerate(TL_RL_LIST):
                action_update_mask[idx] = torch.eq(  # update는 단순히 진짜 현시만 받아서 결정해야됨
                    t_agent[idx], action_matrix[idx, action_index_matrix[idx]].view(-1))  # 0,인 이유는 인덱싱

            action_index_matrix[action_update_mask] += 1
            # agent의 최대 phase를 넘어가면 해당 agent의 action index 0으로 초기화
            action_index_matrix[clear_matrix] = 0
            
            # mask update, matrix True로 전환
            t_agent[clear_matrix] = 0
            # print(t_agent,action_index_matrix,step,action_update_mask)
            mask_matrix[clear_matrix] = True
            mask_matrix[~clear_matrix] = False

            next_state = env.collect_state(
                action_update_mask, action_index_matrix, mask_matrix)
            # env속에 agent별 state를 꺼내옴, max_offset+period 이상일 때 시작
            if step >= int(torch.max(OFFSET)+torch.max(TL_PERIOD)) and mask_matrix.sum() > 0:
                rep_state, rep_action, rep_reward, rep_next_state = env.get_state(
                    mask_matrix)
                agent.save_replay(rep_state, rep_action, rep_reward,
                                  rep_next_state, mask_matrix)  # dqn
            # update
            agent.update(mask_matrix)

            state = next_state
            # info
            arrived_vehicles += traci.simulation.getArrivedNumber()

        agent.target_update(epoch)
        agent.update_hyperparams(epoch)  # lr and epsilon upate
        b = time.time()
        traci.close()
        print("time:", b-a)
        epoch += 1
        # once in an epoch
        print('======== {} epoch/ return: {:.5f} arrived number:{}'.format(epoch,
                                                                           env.cum_reward.sum(), arrived_vehicles))
        update_tensorboard(writer, epoch, env, agent, arrived_vehicles)
        env.test_val=0
        if epoch % 50 == 0:
            agent.save_weights(
                configs['file_name']+'_{}'.format(epoch))

    writer.close()