Python QLearningTable.epsilon示例，RL_brain.QLearningTable.epsilon Python示例

示例#1

0

显示文件

文件： run.py 项目： ettzzz/RL_traffic_control

def main():
    # --------------preparation--------------------
    rst_path, sim_path = generatePath(
        current_time)  # Create a new folder for the experiment
    RL = QLearningTable(list(range(
        len(green_states))))  # Initialize the Q-learning framework
    feed_path = '{}/results/{}/qtable.csv'.format(WORKSPACE, 'p5i3g0')
    RL.feedQTable(
        feed_path
    )  # This could be helpful when inheriting from previous trained agent
    # ---------------training--------------------
    trainAgent(RL, rst_path, sim_path)
    # --------------testing--------------------
    RL.epsilon = 1  # Epsilon-greedy no longer selects random actions
    fixed, rl, actuated = testAgent('fixed', RL), testAgent('rl',
                                                            RL), testAgent(
                                                                'actuated', RL)
    plotTestResult(rl, fixed, actuated, sim_path)
    flow_scenarios = ['-50%', '-25%', '0%', '+25%', '+50%']
    pushAgent(flow_scenarios, sim_path,
              RL)  # Explore the limit of the trained agent
    # --------------results----------------------
    RL.saveQTable('{}/qtable.csv'.format(sim_path))
    RL.plotCumulativeReward(sim_path)  # Plot the cumulative reward
    RL_params = {
        'lr': RL.alpha,
        'gamma': RL.gamma,
        'e_max': RL.e_greedy_max,
        'e_inc': RL.e_greedy_increment
    }
    writeLog(RL_params, rst_path, sim_path,
             clean=True)  # Record some basic information of the experiment
    # --------------end--------------------
    print('\nALL DONE, check {}'.format(str(current_time)))

示例#2

0

显示文件

def main():
    trained_number = getLastExperiment('p5i3g0')
    RL = QLearningTable(list(range(len(green_states))))
    trained_path = '{}/results/{}/'.format(WORKSPACE, trained_number)
    qtable_path = trained_path + 'qtable.csv'
    RL.feedQTable(qtable_path)
    RL.epsilon = 1
    fixed,rl,actuated = testAgent('fixed', RL), testAgent('rl', RL), testAgent('actuated', RL)
    plotTestResult(rl, fixed, actuated, trained_path)

示例#3

0

显示文件

文件： update_static.py 项目： newliar/Experiment

    def update(self):
        # TODO Start_Point & End_Point 待输入
        for i in range(166, 288):
            np.random.seed(i)
            start_point = np.random.randint(0, 800)
            end_point = np.random.randint(801, 1725)
            RL = QLearningTable(self.actions)
            env = Cross(self.next_state_list, self.action_list,
                        self.distance_list, start_point, end_point,
                        self.cross_info)
            # update block
            time_start = time.time()
            for episode in range(100):
                # import SA
                T = 1000
                epsilon, T = tools.SA(T, episode, 100, 0.95)
                RL.epsilon = epsilon
                if epsilon > 1:
                    print("yes")
                print(epsilon)
                episode_start_time = time.time()
                plt.ion()
                observation = env.start_point
                prior_state = observation
                while True:
                    index = RL.choose_action(observation, env, 1)

                    observation_, reward, done = env.step(
                        observation, index, prior_state)

                    # print("observation_:", observation_, "observation:", observation, "prior_state:", prior_state)

                    # 画图可视化
                    # plt.clf()
                    # plt.scatter(self.x[start_point], self.y[start_point], marker='o', s=100, label='start_point', c='yellow')
                    # plt.scatter(self.x[end_point], self.y[end_point], marker='^', s=100, label='end_point', c='yellow')
                    # plt.scatter(self.x, self.y, s=15, alpha=0.3, c='green')
                    # if observation_ == 'end_point':
                    #     plt.scatter(self.x[end_point], self.y[end_point], s=15, c='red')
                    # elif observation_ == 'terminal':
                    #     plt.scatter(self.x[observation], self.y[observation], s=15, c='yellow')
                    # else:
                    #     plt.scatter(self.x[observation_], self.y[observation_], s=15, c='red')
                    # plt.pause(0.01)
                    # plt.ioff()

                    q_table = RL.learn(observation, index, reward,
                                       observation_, 1)
                    # print(q_table.loc[observation_])

                    prior_state = observation
                    observation = observation_
                    current_time = time.time()
                    if current_time - episode_start_time > 60:
                        break
                    if done:
                        break
                episode_end_time = time.time()
                print('==========================================')
                print(episode + 1, "th episode is completed, time cost:",
                      episode_end_time - episode_start_time)
                print('==========================================')
                print(q_table)

            time_end = time.time()
            print('totally completely, time cost:', time_end - time_start)
            if 1 - bool(
                    os.path.exists(os.getcwd() + '/table_' +
                                   str(configuration.Omega))):
                os.makedirs(os.getcwd() + '/table_' + str(configuration.Omega))
            q_table.to_csv(os.getcwd() + '/table_' + str(configuration.Omega) +
                           '/' + configuration.CITY + '_' + str(start_point) +
                           '_' + str(end_point) + '_' + 'q_table.csv',
                           encoding="utf-8")

示例#4

0

显示文件

文件： verify_single.py 项目： ettzzz/RL_traffic_control

            if env.isCheckpoint():
                o, s = env.getCurrentOccasion()
                a = RL.chooseAction(s, o)
                env.prolongTL(a)

        if step % VERIFY_INTERVAL == 0:
            env.calWaitingTime()
        step += 1
        env.conn.simulationStep()

    env.conn.close()
    sys.stdout.flush()

    print(round(np.mean(env.waitingtime), 2), len(env.waitingtime))
    return env.waitingtime


if __name__ == '__main__':
    from RL_brain import QLearningTable
    from global_var import green_states, WORKSPACE

    trained_path = '{}/results/{}/'.format(WORKSPACE, 'p5i3g0')
    qtable_path = trained_path + 'qtable.csv'
    RL = QLearningTable(list(range(len(green_states))))
    RL.feedQTable(qtable_path)
    RL.epsilon = 1
    fixed, rl, actuated = testAgent('fixed', RL), testAgent('rl',
                                                            RL), testAgent(
                                                                'actuated', RL)
    plotTestResult(rl, fixed, actuated, trained_path)

示例#5

0

显示文件

文件： update_realtime.py 项目： newliar/Experiment

    def update_realtime(self):
        # error_point = [256, 512, 768, 3, 5, 778, 138, 779, 655, 786, 789, 793, 155, 34, 675, 420, 293, 424, 169, 428, 301,
        #                173, 431, 49, 306, 182, 439, 701, 189, 65, 322, 199, 456, 457, 461, 725, 599, 345, 732, 734, 351,
        #                98, 485, 742, 104, 490, 620, 750, 240, 753, 626, 116, 380]
        # error_point = [750, 240, 189, 155, 199, 485, 306, 457, 380, 626, 116, 461]
        error_point = [
            512, 5, 138, 779, 280, 155, 34, 675, 420, 424, 301, 430, 306, 439,
            701, 189, 317, 63, 322, 199, 457, 461, 589, 725, 215, 599, 345,
            732, 351, 609, 485, 620, 240, 626, 380
        ]
        # time_start = time.time()
        error_list = []
        # TODO Start_Point & End_Point 待输入
        # delay_col = {'s_e', 'start_point', 'end_point', 'transfer', 'queue', 'process'}
        delay_df = pd.DataFrame(columns=('s_e', 'start_point', 'end_point',
                                         'transfer', 'queue', 'process'))
        # delay_df = delay_df.append({'s_e': 'TASK_SIZE:'+str(configuration.TASK_SIZE),山西053乡道
        #                             'start_point:': 'CPU_CLOCK'+str(configuration.CPU_CLOCK),
        #                             'end_point:': 'VEHICLE_POWER'+str(configuration.VEHICLE_POWER),
        #                             'transfer': 000,
        #                             'queue': 000,
        #                             'process': 000},
        #                            ignore_index=True)
        # x = [1, 2, 3, 4, 5, 6, 7, 8, 9]
        cost_list = []
        # for z in range(10):
        time_start = time.time()
        count = 0
        e_count = 0
        for i in range(166, 288):
            flag = False
            # 随机种子，保证和第一次训练是相同的
            np.random.seed(i)
            start_point = np.random.randint(0, 800)
            if start_point in error_point:
                continue
            count += 1
            end_point = np.random.randint(801, 1725)
            print(start_point, '-->', end_point)

            # 读取已经存在本地的Q表
            df_q_table = pd.read_csv(
                os.getcwd() + '/table_' + str(self.omega) + '/' +
                configuration.CITY + '_' + str(start_point) + '_' +
                str(end_point) + '_' + 'q_table.csv',
                encoding="utf-8")
            # print(os.getcwd() + '/table_' + str(self.omega) + '/' + configuration.CITY + '_' +
            #       str(start_point) + '_' + str(end_point) + '_' + 'q_table.csv')
            df_q_table = df_q_table.set_index(['Unnamed: 0'])
            df_q_table = df_q_table[['1', '2', '3', '4']].astype(np.float64)

            RL = QLearningTable(self.actions)

            RL.gamma = configuration.VEHICLE_POWER
            # print(self.omega)
            # 贪心策略设置为1
            # RL.epsilon = 0.95

            # 更换Q表
            RL.q_table = df_q_table

            env = Cross_2th(self.next_state_list, self.action_list,
                            self.distance_list, start_point, end_point,
                            self.cross_info, self.tel_list, self.df_tel,
                            self.omega)
            # update block
            # for循环计数
            index_for = 0
            # for循环内延迟总和计算平均值
            delay_for_sum = 0
            transfer_for_sum = 0
            queue_for_sum = 0
            process_for_sum = 0
            for episode in range(10):
                # import SA
                T = 1000
                epsilon, T = tools.SA(T, episode, 10, 0.95)
                RL.epsilon = epsilon
                if epsilon > 1:
                    print("yes")
                # print(epsilon)
                one_episode_start_time = time.time()
                # 画图
                # plt.ion()
                observation = env.start_point
                prior_state = observation
                # while循环计数
                index_while = 0
                # while循环内延迟总和计算平均值
                delay_while_sum = 0
                transfer_while_sum = 0
                queue_while_sum = 0
                process_while_sum = 0
                while True:
                    index = RL.choose_action(observation, env, 2)
                    observation_, reward, done, tel_delay, transfer_time, queue_time, process_time = \
                        env.step_2th(observation, index, prior_state)

                    # print("observation_:", observation_, "observation:", observation, "prior_state:", prior_state)

                    index_while += 1
                    delay_while_sum += tel_delay
                    transfer_while_sum += transfer_time
                    queue_while_sum += queue_time
                    process_while_sum += process_time

                    # 陷入局部最优跳出
                    current_time = time.time()
                    if current_time - one_episode_start_time > 10:
                        flag = True
                        e_count += 1
                        print('error:', start_point, 'x--x', end_point)
                        # if observation not in error_list:
                        #     error_list.append(start_point)
                        break

                    # 画图部分
                    # plt.clf()
                    # plt.scatter(self.x[start_point], self.y[start_point], marker='o', s=100, label='start_point',
                    #             c='yellow')
                    # plt.scatter(self.x[end_point], self.y[end_point], marker='^', s=100, label='end_point', c='yellow')
                    # plt.scatter(self.x, self.y, s=15, alpha=0.3, c='green')
                    # if observation_ == 'end_point':
                    #     plt.scatter(self.x[end_point], self.y[end_point], s=15, c='red')
                    # elif observation_ == 'terminal':
                    #     plt.scatter(self.x[observation], self.y[observation], s=15, c='yellow')
                    # else:
                    #     plt.scatter(self.x[observation_], self.y[observation_], s=15, c='red')
                    # plt.pause(0.1)
                    # plt.ioff()
                    #
                    df_q_table = RL.learn(observation, index, reward,
                                          observation_, 2)
                    # print(q_table[
                    #       q_table.index.values.tolist().index(str(29)):q_table.index.values.tolist().index(
                    #           str(29)) + 1])
                    # print(q_table[
                    #       q_table.index.values.tolist().index(str(77)):q_table.index.values.tolist().index(
                    #           str(77)) + 1])

                    prior_state = observation
                    observation = observation_
                    current_time = time.time()
                    if done:
                        break

                delay_while_avg = delay_while_sum / index_while
                transfer_while_avg = transfer_while_sum / index_while
                queue_while_avg = queue_while_sum / index_while
                process_while_avg = process_while_sum / index_while

                index_for += 1
                delay_for_sum += delay_while_avg
                transfer_for_sum += transfer_while_avg
                queue_for_sum += queue_while_avg
                process_for_sum += process_while_avg
                one_episode_end_time = time.time()
                # print('==========================================')
                # print(episode + 1, "th episode is completed, time cost:", one_episode_end_time - one_episode_start_time)
                # print('==========================================')
                # print(q_table)
                if flag:
                    break
            delay_avg = delay_for_sum / index_for
            transfer_avg = transfer_for_sum / index_for
            queue_avg = queue_for_sum / index_for
            process_avg = process_for_sum / index_for
            # print('transfer_avg is:', transfer_avg, 'queue_avg is:', queue_avg, 'process_avg is:', process_avg)
            delay_df = delay_df.append(
                {
                    's_e': str(start_point) + '_' + str(end_point),
                    'start_point': start_point,
                    'end_point': end_point,
                    'transfer': transfer_avg,
                    'queue': queue_avg,
                    'process': process_avg
                },
                ignore_index=True)
            # print('======================================================================')
            # print(delay_df)
            dir_path = os.getcwd() + '/table_realtime_Ω_' + str(
                self.omega) + '_ts_' + str(
                    configuration.TASK_SIZE) + '_cc_' + str(
                        configuration.CPU_CLOCK) + '_vp_' + str(
                            configuration.VEHICLE_POWER)
            # print(dir_path)
            if 1 - bool(os.path.exists(dir_path)):
                os.makedirs(dir_path)
                os.makedirs(dir_path + '/time_cost/')
            df_q_table.to_csv(dir_path + '/' + configuration.CITY + '_' +
                              str(start_point) + '_' + str(end_point) +
                              '_realtime_q_table.csv',
                              encoding="utf-8")
            delay_df.to_csv(dir_path + '/time_cost/' + 'TASK_SIZE_' +
                            str(configuration.TASK_SIZE) + '_CPU_CLOCK_' +
                            str(configuration.CPU_CLOCK) + '_VEHICLE_POWER_' +
                            str(configuration.VEHICLE_POWER) +
                            '_time_cost.csv',
                            encoding="utf-8")
        #     跳出z循环
        #     if count - e_count == 5*(z+1):
        #         break
        time_end = time.time()
        time_cost = time_end - time_start - e_count * 10
        c_minus = count - e_count
        # cost_pre = time_cost*(round(10/(count-e_count), 3))
        print('totally completely, time cost:', time_cost)
        # print(c_minus)
        # print(cost_pre)
        print('==========================================')
        cost_list.append(time_cost)
        print(cost_list)