Exemplo n.º 1
0
    def __init__(self, configs):
        super().__init__(configs)
        if configs['mode'] == 'train' or configs['mode'] == 'simulate':
            os.mkdir(
                os.path.join(self.configs['current_path'], 'training_data',
                             self.configs['time_data'], 'model'))
            self.configs = merge_dict(configs, DEFAULT_CONFIG)
        else:  # test
            self.configs = merge_dict_non_conflict(configs, DEFAULT_CONFIG)
        self.num_agent = len(self.configs['tl_rl_list'])
        self.state_space = self.configs['state_space']

        # action space
        # rate action space
        self.rate_action_space = self.configs['rate_action_space']
        # time action space
        self.time_action_space = self.configs['time_action_space']
        self.action_size = self.configs['action_size']
        self.gamma = self.configs['gamma']
        self.epsilon = self.configs['epsilon']
        self.criterion = nn.MSELoss()
        self.lr = self.configs['lr']
        self.lr_decay_rate = self.configs['lr_decay_rate']
        self.epsilon_decay_rate = self.configs['epsilon_decay_rate']
        self.batch_size = self.configs['batch_size']
        self.device = self.configs['device']
        self.running_loss = 0
        # self.writer=writer

        # NN composition
        # size에 따라 다르게 해주어야함
        self.rate_key_list = list()
        for i, key in enumerate(self.configs['traffic_node_info'].keys()):
            if configs['mode'] == 'train':
                rate_key = self.configs['traffic_node_info'][key]['num_phase']
            elif configs['mode'] == 'test':
                rate_key = str(
                    self.configs['traffic_node_info'][key]['num_phase'])
            self.rate_key_list.append(rate_key)

        self.mainSuperQNetwork = SuperQNetwork(
            self.state_space, self.rate_action_space[rate_key],
            self.time_action_space[0], self.configs)
        self.targetSuperQNetwork = SuperQNetwork(
            self.state_space, self.rate_action_space[rate_key],
            self.time_action_space[0], self.configs)
        # hard update, optimizer setting
        self.optimizer = optim.Adadelta(self.mainSuperQNetwork.parameters(),
                                        lr=self.configs['lr'])
        hard_update(self.targetSuperQNetwork, self.mainSuperQNetwork)
        self.lr_scheduler = optim.lr_scheduler.StepLR(
            optimizer=self.optimizer,
            step_size=self.configs['lr_decay_period'],
            gamma=self.configs['lr_decay_rate'])
def train(flags, time_data, configs, sumoConfig):

    # check gui option
    if flags.disp == True:
        sumoBinary = checkBinary('sumo-gui')
    else:
        sumoBinary = checkBinary('sumo')
    sumoCmd = [sumoBinary, "-c", sumoConfig, '--start']
    # configs setting
    configs['num_agent'] = len(configs['tl_rl_list'])
    configs['algorithm'] = flags.algorithm.lower()
    configs['randomness'] = flags.randomness
    print("training algorithm: ", configs['algorithm'])
    configs['action_size'] = 2
    # state space 는 map.py에서 결정
    if flags.network.lower() == 'grid':
        configs['state_space'] = 10

    configs['model'] = 'city'
    from train import city_dqn_train
    from configs import SUPER_DQN_TRAFFIC_CONFIGS
    configs = merge_dict_non_conflict(configs, SUPER_DQN_TRAFFIC_CONFIGS)
    city_dqn_train(configs, time_data, sumoCmd)
def main(args):
    random_seed = 20000
    random.seed(random_seed)
    torch.manual_seed(random_seed)
    np.random.seed(random_seed)
    flags = parse_args(args)
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda and flags.gpu == True else "cpu")
    # device = torch.device('cpu')
    print("Using device: {}".format(device))
    configs = EXP_CONFIGS
    configs['device'] = str(device)
    configs['current_path'] = os.path.dirname(os.path.abspath(__file__))
    configs['mode'] = flags.mode.lower()
    time_data = time.strftime('%m-%d_%H-%M-%S', time.localtime(time.time()))
    configs['time_data'] = str(time_data)
    if os.path.exists(os.path.join(os.path.dirname(__file__), 'data')):
        if os.path.exists(
                os.path.join(os.path.dirname(__file__), 'data',
                             configs['mode'])) == False:
            os.mkdir(
                os.path.join(os.path.dirname(__file__), 'data',
                             configs['mode']))
    configs['file_name'] = configs['time_data']
    # check the network
    configs['network'] = flags.network.lower()
    if configs['network'] == 'grid':
        from Network.grid import GridNetwork  # network바꿀때 이걸로 바꾸세요(수정 예정)
        configs['grid_num'] = 5
        configs['scale'] = 1
        if configs['mode'] == 'simulate':
            configs['file_name'] = '{}x{}grid'.format(configs['grid_num'],
                                                      configs['grid_num'])
        elif configs['mode'] == 'test':  # test
            configs['file_name'] = flags.replay_name.lower()
        # Generating Network
        network = GridNetwork(configs, grid_num=configs['grid_num'])
        network.generate_cfg(True, configs['mode'])
        NET_CONFIGS = network.get_configs()
        configs = merge_dict_non_conflict(configs, NET_CONFIGS)

    # Generating Network
    else:  # map file 에서 불러오기
        print("Load from map file")
        from Network.map import MapNetwork
        # TODO Grid num은 삭제요망
        configs['grid_num'] = 3
        configs['num_lanes'] = 2
        configs['load_file_name'] = configs['network']
        mapnet = MapNetwork(configs)
        MAP_CONFIGS = mapnet.get_tl_from_xml()

        for key in MAP_CONFIGS.keys():
            configs[key] = MAP_CONFIGS[key]

        mapnet.gen_net_from_xml()
        mapnet.gen_rou_from_xml()
        mapnet.generate_cfg(True, configs['mode'])
        mapnet._generate_add_xml()
        if configs['network'] == '3x3grid':
            configs['scale'] = str(1)
        if configs['network'] == '5x5grid':
            configs['scale'] = str(1)
        if configs['network'] == '5x5grid_v2':
            configs['scale'] = str(1.5)
        if configs['network'] == 'dunsan':
            configs['scale'] = str(1)
        if configs['network'] == 'dunsan_v2':
            configs['scale'] = str(0.8)
        print("Scale:", configs['scale'])

    # check the environment
    if 'SUMO_HOME' in os.environ:
        tools = os.path.join(os.environ['SUMO_HOME'], 'tools')
        sys.path.append(tools)
    else:
        sys.exit("please declare environment variable 'SUMO_HOME'")

    # check the mode
    if configs['mode'] == 'train':
        # init train setting
        configs['update_type'] = flags.update_type
        sumoConfig = os.path.join(configs['current_path'], 'training_data',
                                  time_data, 'net_data',
                                  configs['file_name'] + '_train.sumocfg')
        train(flags, time_data, configs, sumoConfig)
    elif configs['mode'] == 'test':
        configs['file_name'] = flags.replay_name
        configs['replay_name'] = configs['time_data']
        sumoConfig = os.path.join(configs['current_path'], 'training_data',
                                  time_data, 'net_data',
                                  configs['time_data'] + '_test.sumocfg')
        test(flags, configs, sumoConfig)
    else:  # simulate
        sumoConfig = os.path.join(configs['current_path'], 'Net_data',
                                  configs['file_name'] + '_simulate.sumocfg')
        simulate(flags, configs, sumoConfig)
Exemplo n.º 4
0
    def __init__(self, configs):
        super().__init__(configs)
        if configs['mode'] == 'train' or configs['mode'] == 'simulate':
            os.mkdir(
                os.path.join(self.configs['current_path'], 'training_data',
                             self.configs['time_data'], 'model'))
            self.configs = merge_dict(configs, DEFAULT_CONFIG)
        else:  # test
            self.configs = merge_dict_non_conflict(configs, DEFAULT_CONFIG)
        self.num_agent = len(self.configs['tl_rl_list'])
        self.state_space = self.configs['state_space']

        # action space
        # rate action space
        self.rate_action_space = self.configs['rate_action_space']
        # time action space
        self.time_action_space = self.configs['time_action_space']
        self.action_size = self.configs['action_size']
        self.gamma = self.configs['gamma']
        self.epsilon = self.configs['epsilon']
        self.criterion = nn.SmoothL1Loss()
        self.lr = self.configs['lr']
        self.lr_decay_rate = self.configs['lr_decay_rate']
        self.epsilon_decay_rate = self.configs['epsilon_decay_rate']
        self.batch_size = self.configs['batch_size']
        self.device = self.configs['device']
        self.running_loss = 0
        self.super_output_size = int(self.num_agent * 2)
        self.super_input_size = int(self.num_agent)
        # NN composition
        self.mainSuperQNetwork = SuperQNetwork(self.super_input_size,
                                               self.super_output_size,
                                               self.configs)
        self.targetSuperQNetwork = SuperQNetwork(self.super_input_size,
                                                 self.super_output_size,
                                                 self.configs)
        # size에 따라 다르게 해주어야함
        self.mainQNetwork = list()
        self.targetQNetwork = list()
        self.rate_key_list = list()
        for i, key in enumerate(self.configs['traffic_node_info'].keys()):
            if configs['mode'] == 'train':
                rate_key = self.configs['traffic_node_info'][key]['num_phase']
            elif configs['mode'] == 'test':
                rate_key = str(
                    self.configs['traffic_node_info'][key]['num_phase'])
            self.rate_key_list.append(rate_key)
            self.mainQNetwork.append(
                QNetwork(self.super_output_size,
                         self.rate_action_space[rate_key],
                         self.time_action_space[i], self.configs))
            self.targetQNetwork.append(
                QNetwork(self.super_output_size,
                         self.rate_action_space[rate_key],
                         self.time_action_space[i], self.configs))

        # hard update, optimizer setting
        self.optimizer = list()
        hard_update(self.targetSuperQNetwork, self.mainSuperQNetwork)
        for targetQ, mainQ in zip(self.targetQNetwork, self.mainQNetwork):
            hard_update(targetQ, mainQ)
            params = chain(self.mainSuperQNetwork.parameters(),
                           mainQ.parameters())
            self.optimizer.append(optim.Adam(params, lr=self.lr))

        # Network
        print("========SUPER NETWORK==========\n", self.mainSuperQNetwork)
        print("========NETWORK==========\n")
        for i in range(self.num_agent):
            print(self.mainQNetwork[i])