示例#1
0
def dqn_train(configs, time_data, sumoCmd):
    # Environment Setting
    from Agent.dqn import Trainer
    if configs['model'] == 'base':
        from Env.Env import TL3x3Env
    elif configs['model'] == 'frap':
        from Env.FRAP import TL3x3Env
    # EXP_CONFIG Setting
    NUM_EPOCHS = configs['num_epochs']
    MAX_STEPS = configs['max_steps']
    tl_rl_list = configs['tl_rl_list']
    epoch = 0
    # init agent and tensorboard writer
    # agent setting
    agent = Trainer(configs)
    writer = SummaryWriter(os.path.join(
        configs['current_path'], 'training_data', time_data))
    # save hyper parameters
    agent.save_params(time_data)
    # init training
    while epoch < NUM_EPOCHS:
        # Epoch Start
        traci.start(sumoCmd)
        step = 0
        action_distribution = tuple()
        # Epoch Start setting
        env = TL3x3Env(configs)
        traci.trafficlight.setRedYellowGreenState(tl_rl_list[0], 'G{0}{3}rr{2}{3}rG{0}{3}rr{2}{3}r'.format(
            'G'*configs['num_lanes'], 'G', 'r'*configs['num_lanes'], 'r'))
        before_action=torch.ones((1,len(tl_rl_list)))
        done = False
        total_reward = 0
        reward = 0
        arrived_vehicles = 0
        # state initialization
        state = env.get_state()
        # Time Check
        a = time.time()
        while step < MAX_STEPS:

            action = agent.get_action(state)
            action_distribution += tuple(action.unsqueeze(1))
            # action 을 정하고

            # action이 before_actio과 같으면 yellow없이 진행하고
            if before_action!=action:
                traci.trafficlight.setRedYellowGreenState(
                    tl_rl_list[0], 'y'*28)
            arrived_vehicles += simulation_step(env, 5)
            step+=5
            
            # environment에 적용
            env.step(action)  # action 적용함수

            #적용 후 20초 진행
            arrived_vehicles += simulation_step(env, 20)
            step+=20
            next_state = env.get_state()  # 다음스테이트

            reward = env.get_reward()  # 20초 지연된 보상
            agent.save_replay(state, action, reward, next_state)  # dqn
            agent.update(done)
            state = next_state
            total_reward += reward
            before_action=action
            # 20초 끝나고 yellow 4초

        b = time.time()
        traci.close()
        print("time:", b-a)
        epoch += 1
        # update hyper parameter
        agent.update_hyperparams(epoch)  # lr and epsilon upate
        if epoch % agent.configs['target_update_period'] == 0:
            agent.target_update()  # dqn
        # once in an epoch update tensorboard
        update_tensorboard(writer, epoch, env, agent, arrived_vehicles)
        print('======== {} epoch/ return: {} arrived number:{}'.format(epoch,
                                                                       total_reward, arrived_vehicles))
        if epoch % 50 == 0:
            agent.save_weights(
                configs['file_name']+'_{}_{}'.format(time_data, epoch))

    writer.close()
示例#2
0
def dqn_train(configs, time_data, sumoCmd):
    from Agent.dqn import Trainer
    if configs['model'] == 'base':
        from Env.Env import TL3x3Env
    elif configs['model'] == 'frap':
        from Env.FRAP import TL3x3Env
    NUM_EPOCHS = configs['num_epochs']
    MAX_STEPS = configs['max_steps']
    tl_rl_list = configs['tl_rl_list']
    # init agent and tensorboard writer
    writer = SummaryWriter(
        os.path.join(configs['current_path'], 'training_data', time_data))
    agent = Trainer(configs)
    # save hyper parameters
    agent.save_params(time_data)
    # init training
    epoch = 0
    while epoch < NUM_EPOCHS:
        traci.start(sumoCmd)
        env = TL3x3Env(configs)
        traci.trafficlight.setRedYellowGreenState(
            tl_rl_list[0], 'G{0}{3}rr{2}{3}rG{0}{3}rr{2}{3}r'.format(
                'G' * configs['num_lanes'], 'G', 'r' * configs['num_lanes'],
                'r'))
        step = 0
        done = False
        # state initialization
        # agent setting
        total_reward = 0
        reward = 0
        arrived_vehicles = 0
        state = env.get_state()
        action_distribution = tuple()
        a = time.time()
        while step < MAX_STEPS:
            '''
            # state=env.get_state(action) #partial하게는 env에서 조정
            action=agent.get_action(state)
            env.step(action)
            reward=env.get_reward()
            next_state=env.get_state()
            # if traci.inductionloop.getLastStepVehicleNumber("0") > 0:
            store transition in D (experience replay)
            Sample random minibatch from D
            step += 1
            state=next_state


            set yi
            '''

            action = agent.get_action(state)
            action_distribution += tuple(action.unsqueeze(1))
            env.step(action)  # action 적용함수

            for _ in range(20):  # 10초마다 행동 갱신
                traci.simulationStep()
                env.collect_state()
                step += 1
                arrived_vehicles += traci.simulation.getArrivedNumber(
                )  # throughput
            next_state = env.get_state()  # 다음스테이트

            traci.trafficlight.setRedYellowGreenState(tl_rl_list[0], 'y' * 28)

            for _ in range(5):  # 4번더
                traci.simulationStep()
                env.collect_state()
                step += 1
                arrived_vehicles += traci.simulation.getArrivedNumber(
                )  # throughput

            reward = env.get_reward()  # 25초 지연된 보상
            agent.save_replay(state, action, reward, next_state)  # dqn
            agent.update(done)
            state = next_state
            total_reward += reward

            # 20초 끝나고 yellow 4초

        agent.update_hyperparams(epoch)  # lr and epsilon upate
        if epoch % 2 == 0:
            agent.target_update()  # dqn
        b = time.time()
        traci.close()
        print("time:", b - a)
        epoch += 1
        # once in an epoch
        update_tensorboard(writer, epoch, env, agent, arrived_vehicles)
        print('======== {} epoch/ return: {} arrived number:{}'.format(
            epoch, total_reward, arrived_vehicles))
        if epoch % 50 == 0:
            agent.save_weights(configs['file_name'] +
                               '_{}_{}'.format(time_data, epoch))

    writer.close()
示例#3
0
def REINFORCE_train(configs, time_data, sumoCmd):
    from Agent.REINFORCE import Trainer
    from Agent.REINFORCE import DEFAULT_CONFIG
    from Env.Env import TL3x3Env
    tl_rl_list = configs['tl_rl_list']
    NUM_EPOCHS = configs['num_epochs']
    MAX_STEPS = configs['max_steps']
    # init agent and tensorboard writer
    agent = Trainer(configs)
    writer = SummaryWriter(
        os.path.join(configs['current_path'], 'training_data', time_data))
    # save hyper parameters
    agent.save_params(time_data)
    # init training
    epoch = 0
    while epoch < NUM_EPOCHS:
        traci.start(sumoCmd)
        traci.trafficlight.setRedYellowGreenState(
            tl_rl_list[0], 'G{0}{3}rr{2}{3}rG{0}{3}rr{2}{3}r'.format(
                'G' * configs['num_lanes'], 'G', 'r' * configs['num_lanes'],
                'r'))
        env = TL3x3Env(configs)
        # env = GridEnv( configs)
        step = 0
        done = False
        # state initialization
        # agent setting
        total_reward = 0
        reward = 0
        arrived_vehicles = 0
        state = env.get_state()
        while step < MAX_STEPS:

            action = agent.get_action(state)
            env.step(action)  # action 적용함수
            next_state = env.get_state()

            for _ in range(20):  # 10초마다 행동 갱신
                traci.simulationStep()
                env.collect_state()
                step += 1
                arrived_vehicles += traci.simulation.getArrivedNumber(
                )  # throughput
            # 20초 끝나고 yellow 4초
            traci.trafficlight.setRedYellowGreenState(tl_rl_list[0], 'y' * 28)

            for _ in range(5):  # 4번더
                traci.simulationStep()
                env.collect_state()
                step += 1
                arrived_vehicles += traci.simulation.getArrivedNumber(
                )  # throughput

            reward = env.get_reward()
            prob = agent.get_prob()
            agent.put_data((reward, prob[action]))

            state = next_state
            total_reward += reward
            if step > MAX_STEPS:
                done = True

        agent.update(done)
        agent.update_hyperparams(epoch)  # lr and epsilon upate
        traci.close()
        epoch += 1
        # once in an epoch
        update_tensorboard(writer, epoch, env, agent, arrived_vehicles)
        print('======== {} epoch/ return: {} arrived number:{}'.format(
            epoch, total_reward, arrived_vehicles))

    writer.close()
示例#4
0
def ppo_train(configs, time_data, sumoCmd):
    from Agent.ppo import Trainer
    if configs['model'] == 'base':
        from Env.Env import TL3x3Env
    elif configs['model'] == 'frap':
        from Env.FRAP import TL3x3Env
    tl_rl_list = configs['tl_rl_list']
    NUM_EPOCHS = configs['num_epochs']
    MAX_STEPS = configs['max_steps']
    # init agent and tensorboard writer
    agent = Trainer(configs)
    writer = SummaryWriter(
        os.path.join(configs['current_path'], 'training_data', time_data))
    # save hyper parameters
    agent.save_params(time_data)
    # init training
    epoch = 0
    ppo_update_step = 0
    while epoch < NUM_EPOCHS:
        traci.start(sumoCmd)
        traci.trafficlight.setRedYellowGreenState(
            tl_rl_list[0], 'G{0}{3}rr{2}{3}rG{0}{3}rr{2}{3}r'.format(
                'G' * configs['num_lanes'], 'G', 'r' * configs['num_lanes'],
                'r'))
        env = TL3x3Env(configs)
        # env = GridEnv( configs)
        step = 0
        done = False
        # state initialization
        # agent setting
        total_reward = 0
        reward = 0
        arrived_vehicles = 0
        state = env.get_state()
        action_distribution = tuple()
        a = time.time()
        while step < MAX_STEPS:

            action = agent.get_action(state)
            action_distribution += tuple(action.unsqueeze(1))
            env.step(action)  # action 적용함수
            ppo_update_step += 1

            for _ in range(20):  # 10초마다 행동 갱신
                traci.simulationStep()
                env.collect_state()
                step += 1
                arrived_vehicles += traci.simulation.getArrivedNumber(
                )  # throughput
            next_state = env.get_state()  # 다음스테이트

            traci.trafficlight.setRedYellowGreenState(tl_rl_list[0], 'y' * 28)

            for _ in range(5):  # 4번더
                traci.simulationStep()
                env.collect_state()
                step += 1
                arrived_vehicles += traci.simulation.getArrivedNumber(
                )  # throughput

            reward = env.get_reward()  # 25초 지연된 보상
            agent.memory.rewards.append(reward)
            if step >= MAX_STEPS:
                done = True
            agent.memory.dones.append(done)
            state = next_state
            total_reward += reward
            if ppo_update_step % 400 == 0:
                agent.update()
                agent.update_hyperparams(epoch)  # lr update
                ppo_update_step = 0

        b = time.time()
        traci.close()
        print("time:", b - a)
        epoch += 1
        # once in an epoch
        update_tensorboard(writer, epoch, env, agent, arrived_vehicles)
        print('======== {} epoch/ return: {} arrived number:{}'.format(
            epoch, total_reward, arrived_vehicles))
        if epoch % 50 == 0:
            agent.save_weights(configs['file_name'] +
                               '_{}_{}'.format(time_data, epoch))

    writer.close()
示例#5
0
def test(flags, configs, sumoConfig):
    from Env.Env import TL3x3Env
    from Agent.dqn import Trainer
    from Env.MultiEnv import GridEnv
    from utils import save_params, load_params, update_tensorboard

    # init test setting
    sumoBinary = checkBinary('sumo-gui')
    sumoCmd = [sumoBinary, "-c", sumoConfig]

    # setting the rl list
    tl_rl_list = configs['tl_rl_list']
    MAX_STEPS = configs['max_steps']
    reward = 0
    traci.start(sumoCmd)
    agent = Trainer(configs)
    # setting the replay
    if flags.replay_name is not None:
        agent.load_weights(flags.replay_name)
        configs = load_params(configs, flags.replay_name)
    env = TL3x3Env(configs)
    step = 0
    # state initialization
    state = env.get_state()
    # agent setting
    total_reward = 0
    arrived_vehicles = 0
    action_distribution = tuple()
    with torch.no_grad():
        while step < MAX_STEPS:

            action = agent.get_action(state)
            action_distribution += action
            env.step(action)  # action 적용함수
            for _ in range(20):  # 10초마다 행동 갱신
                env.collect_state()
                traci.simulationStep()
                step += 1

            traci.trafficlight.setRedYellowGreenState(tl_rl_list[0], 'y' * 28)
            for _ in range(5):
                traci.simulationStep()
                env.collect_state()
                step += 1

            reward = env.get_reward()
            next_state = env.get_state()
            # agent.save_replay(state, action, reward, next_state)
            state = next_state
            total_reward += reward
            step += 1
            if step == MAX_STEPS:
                done = True
            # agent.update(done) # no update in
            # loss += agent.get_loss()  # 총 loss
            arrived_vehicles += traci.simulation.getArrivedNumber(
            )  # throughput
            traci.simulationStep()
            # if step % 200 == 0:

        agent.target_update()
        traci.close()
        print('======== return: {} arrived number:{}'.format(
            total_reward, arrived_vehicles))