Exemplo n.º 1
0
def Insert_db(conn, table_name, result):
    if conn == None:
        conn = get_connection()
    cr = conn.cursor()
    # print('diaoyong')

    # sql = "INSERT INTO SYS.TRAVEL_TIME_HK93TOHK92(HPHM, HPZL_y, JGSJ_x, JGSJ_y, TRAVEL_TIME, DATE_TYPES) VALUES (:1, :2, :3, :4, :5, :6)"
    sql = (
        "INSERT INTO %s(HPHM, TOTAL_NUM, WORK_NUM, HOLIDAY_NUM, START_DATE, END_DATE) VALUES (:1, :2, :3, :4, :5, :6)"
    ) % (table_name)

    # # 如果插入数据库失败,可以取消下面3行注释,看报的错误是什么
    # cr.executemany(sql, result)
    # conn.commit()
    # print('insert successfully!')

    # # 批量运算时用下面的代码
    try:
        cr.executemany(sql, result)
        conn.commit()
        print('insert successfully!')
    except:
        conn.rollback()

    # 关闭游标、关闭数据库连接
    cr.close()
    conn.close()
    return 0
Exemplo n.º 2
0
def plate_match(conn, SSID, CDBH, start_time, end_time):
    # SSID=['HK-92','HK-107','HK-93'] [终点,起点], start_time是一个list,形式为:['2019-7-2 17:00:00', '2019-7-2 16:00:00']
    date_types = holiday(start_time[0][0:10])
    if conn == None:
        conn = get_connection()  # 建立数据库连接

    cr = conn.cursor()  # 生成连接的游标
    # 查询路段下游的车牌和经过时间,使用时从下面4个终点里面选择一个取消注释(注释掉其他3个终点)
    query_plate_low = (
        "SELECT HPHM, HPZL, JGSJ FROM SJCJ_T_CLXX_LS_ERXIAO WHERE SSID='%s' AND CDBH in %s AND JGSJ>=to_date('%s','yyyy-mm-dd hh24:mi:ss') AND JGSJ<=to_date('%s','yyyy-mm-dd hh24:mi:ss')"
    ) % (SSID[0], CDBH[0], start_time[0], end_time[0])

    cr.execute(query_plate_low)
    query_res_low = cr.fetchall()
    dataframe_res_low = pd.DataFrame(list(query_res_low),
                                     columns=['HPHM', 'HPZL', 'JGSJ'])

    # 查询路段上游的车牌和经过时间
    # query_plate_upper = ("SELECT HPHM, HPZL, JGSJ FROM SJCJ_T_CLXX_LS WHERE SSID='%s' AND CDBH in ('1','2','3','4','5','6','10','11','12') AND JGSJ>=to_date('%s','yyyy-mm-dd hh24:mi:ss') AND JGSJ<=to_date('%s','yyyy-mm-dd hh24:mi:ss')")%(SSID[1],start_time[0],end_time[0])
    query_plate_upper = (
        "SELECT HPHM, HPZL, JGSJ FROM SJCJ_T_CLXX_LS_ERXIAO WHERE SSID='%s' AND CDBH in %s AND JGSJ>=to_date('%s','yyyy-mm-dd hh24:mi:ss') AND JGSJ<=to_date('%s','yyyy-mm-dd hh24:mi:ss')"
    ) % (SSID[1], CDBH[1], start_time[0], end_time[0])
    cr.execute(query_plate_upper)
    query_res_upper = cr.fetchall()
    dataframe_res_upper = pd.DataFrame(list(query_res_upper),
                                       columns=['HPHM', 'HPZL', 'JGSJ'])

    # 上下游匹配并计算路段旅行时间,merge之后jgsj自动分成2列:jgsj_x(下游)和jgsj_y(上游)
    merge_ls = pd.merge(dataframe_res_low, dataframe_res_upper, on='HPHM')
    merge_ls = merge_ls.drop(index=(merge_ls.loc[(
        merge_ls['HPHM'] == '车牌')].index)).reset_index()  # 删除hphm列中,值为车牌的行
    merge_ls = merge_ls.drop(
        index=(merge_ls.loc[merge_ls['JGSJ_x'] < merge_ls['JGSJ_y']].index
               )).reset_index()  # 如果下游检测时间小于上游检测时间,说明匹配错误
    merge_ls['JGSJ_x'] = pd.to_datetime(
        merge_ls['JGSJ_x'],
        format='%Y-%m-%d %H:%M:%S')  # 将JGSJ_x列转为datetime标准格式
    merge_ls['JGSJ_y'] = pd.to_datetime(
        merge_ls['JGSJ_y'],
        format='%Y-%m-%d %H:%M:%S')  # 将JGSJ_y列转为datetime标准格式
    merge_ls['travel_time'] = merge_ls['JGSJ_x'].sub(
        merge_ls['JGSJ_y'])  # jgsj_x - jgsj_y,即下游检测时间-上游检测时间
    # merge_ls['travel_time'] = pd.to_numeric(merge_ls['travel_time'].dt.seconds, downcast='integer') # 计算结果从timedelta转为int,以秒为单位
    merge_ls['travel_time'] = (merge_ls['travel_time'] /
                               np.timedelta64(1, 's')).astype(
                                   int)  # 计算结果从timedelta转为int,以秒为单位
    merge_ls = merge_ls.sort_values(by=['JGSJ_y'],
                                    ascending=True)  # 将dataframe按照JGSJ_y的排序
    merge_ls = merge_ls.drop_duplicates(
        'JGSJ_x', 'last', inplace=False)  # 按JGSJ_x,去除该列下面的重复行,删除重复项并保留最后一次出现的项
    merge_ls = merge_ls.drop_duplicates(
        'JGSJ_y', 'last', inplace=False)  # 按JGSJ_y,去除该列下面的重复行,删除重复项并保留最后一次出现的项

    # print(merge_ls['travel_time'].dtypes)
    merge_ls = merge_ls[['HPHM', 'HPZL_y', 'JGSJ_x', 'JGSJ_y',
                         'travel_time']]  # 提取列表中的5列组成新的merge_ls
    merge_ls['date_types'] = date_types  # 新增一列,用于表征日期性质,0:工作日;1:周末;2:节假日

    free(conn, cr)
    return merge_ls
Exemplo n.º 3
0
def Query_Afternoon_Travel_Time(conn, HPHM):
    if conn == None: conn = get_connection()  # 建立数据库连接
    cr = conn.cursor()  # 生成连接的游标

    # 同时查6个表中的平均旅行时间和样本数(晚上放学期间)
    query_sql = ("SELECT AVG(TRAVEL_TIME),COUNT(TRAVEL_TIME) FROM TRAVEL_TIME_HK107TOHK92 WHERE HPHM='%s' AND TO_CHAR(JGSJ_x,'HH24:MI') BETWEEN '16:30' AND '18:00'"
                 "UNION SELECT AVG(TRAVEL_TIME),COUNT(TRAVEL_TIME) FROM TRAVEL_TIME_HK107TOHK93 WHERE HPHM='%s' AND TO_CHAR(JGSJ_x,'HH24:MI') BETWEEN '16:30' AND '18:00'"
                 "UNION SELECT AVG(TRAVEL_TIME),COUNT(TRAVEL_TIME) FROM TRAVEL_TIME_HK92TOHK107 WHERE HPHM='%s' AND TO_CHAR(JGSJ_x,'HH24:MI') BETWEEN '16:30' AND '18:00'"
                 "UNION SELECT AVG(TRAVEL_TIME),COUNT(TRAVEL_TIME) FROM TRAVEL_TIME_HK92TOHK93 WHERE HPHM='%s' AND TO_CHAR(JGSJ_x,'HH24:MI') BETWEEN '16:30' AND '18:00'"
                 "UNION SELECT AVG(TRAVEL_TIME),COUNT(TRAVEL_TIME) FROM TRAVEL_TIME_HK93TOHK92 WHERE HPHM='%s' AND TO_CHAR(JGSJ_x,'HH24:MI') BETWEEN '16:30' AND '18:00'"
                 "UNION SELECT AVG(TRAVEL_TIME),COUNT(TRAVEL_TIME) FROM TRAVEL_TIME_HK93TOHK107 WHERE HPHM='%s' AND TO_CHAR(JGSJ_x,'HH24:MI') BETWEEN '16:30' AND '18:00'")\
                % (HPHM, HPHM, HPHM, HPHM, HPHM, HPHM)

    cr.execute(query_sql)
    query_res = cr.fetchall()
    dataframe_res = pd.DataFrame(list(query_res),
                                 columns=['AVG_T',
                                          'SAM_NUM'])  # 查询结果转为dataframe格式
    dataframe_res = dataframe_res.sort_values(
        by=['SAM_NUM'], ascending=False).reset_index()  # 将查询结果按样本数量降序排列
    dataframe_res = dataframe_res[['AVG_T',
                                   'SAM_NUM']]  # 仅提取'AVG_T', 'SAM_NUM'列
    # print(dataframe_res)
    # # 统计对应车牌在6个子路段上的总样本数时,取消下面代码注释
    # travel_time, sample_numbers = dataframe_res['AVG_T'][0], dataframe_res['SAM_NUM'].sum()    # 将dataframe中的第一行,赋值给travel_time和sample_numbers(样本数是指该车牌在6个子路段上的总共样本数)
    # # 统计用于计算平均旅行时间的样本数(SAM_NUM),对应车牌在6个子路段上的总样本数(TOTAL_NUM)
    travel_time, sample_numbers, total_numbers = dataframe_res[
        'AVG_T'][0], dataframe_res['SAM_NUM'][0], dataframe_res['SAM_NUM'].sum(
        )  # 当需要统计:用于计算平均旅行时间的样本数时取消注释
    return travel_time, sample_numbers, total_numbers
Exemplo n.º 4
0
def Travel_time_query(conn, table_name, car_type, start_time, end_time):
    if conn == None: conn = get_connection()
    cr = conn.cursor()

    query_sql = (
                    "SELECT HPZL_Y, JGSJ_X, TRAVEL_TIME FROM %s WHERE HPZL_Y='%s' AND "
                    "TO_CHAR(JGSJ_X,'YYYY-MM-DD HH24:MI:SS') BETWEEN '%s' AND '%s' ORDER BY JGSJ_X ") % (
                    table_name, car_type, start_time, end_time)
    cr.execute(query_sql)
    query_res = cr.fetchall()
    query_res = pd.DataFrame(data=query_res, columns=['HPZL_Y', 'JGSJ_X', 'TRAVEL_TIME'])

    return query_res
Exemplo n.º 5
0
def Query_ls(conn, SSID, start_time, end_time, cdbh):
    if conn == None: conn = get_connection()  # conn为None时,建立数据库连接
    cr = conn.cursor()  # 建立查询游标
    # 从卡口流水表中查询给定卡口编号、时段和车道组编号的JGSJ
    query_sql = (
        "SELECT JGSJ FROM SJCJ_T_CLXX_LS WHERE SSID = '%s' AND CDBH IN %s AND TO_CHAR(JGSJ,'YYYY-MM-DD HH24:MI:SS') BETWEEN '%s' AND '%s'"
    ) % (SSID, cdbh[0], start_time, end_time)
    cr.execute(query_sql)  # 执行查询
    query_res = cr.fetchall(
    )  # 提取查询结果,赋予变量query_res,查询结果形式:[(结果1),(结果2),(结果3)...]
    query_res = [i[0] for i in query_res]  # 重新组织查询结果,变成[结果1,结果2,...]
    series_res = pd.Series(
        data=query_res,
        dtype='datetime64[ns]')  # 将jgsj列转为Series格式,数据类型指定为datetime64
    result = pd.Series(
        np.ones(len(query_res)),
        index=series_res)  # 将查询到的JGSJ时间列设置为索引,Series的值为1(索引—1辆车)
    ls_query_result = result.sort_index(
        ascending=True)  # 将构建好的Series按索引升序排序(输出结果时间升序)
    return ls_query_result  # 返回流水查询结果
Exemplo n.º 6
0
def Insert_db(conn, table_name, result):
    if conn == None:
        conn = get_connection()
    cr = conn.cursor()
    # print('diaoyong')

    # sql = "INSERT INTO SYS.TRAVEL_TIME_HK93TOHK92(HPHM, HPZL_y, JGSJ_x, JGSJ_y, TRAVEL_TIME, DATE_TYPES) VALUES (:1, :2, :3, :4, :5, :6)"
    sql = (
        "INSERT INTO %s(HPHM, HPZL_y, JGSJ_x, JGSJ_y, TRAVEL_TIME, DATE_TYPES) VALUES (:1, :2, :3, :4, :5, :6)"
    ) % (table_name)

    try:
        cr.executemany(sql, result)
        conn.commit()
        print('insert successfully!')
    except:
        conn.rollback()

    # 关闭游标、关闭数据库连接
    cr.close()
    conn.close()
    return 0
Exemplo n.º 7
0
def Work_day_frequency_vehicles(conn):

    if conn == None: conn = get_connection()  # 建立数据库连接
    cr = conn.cursor()  # 生成连接的游标

    # 查询 HIGH_FRE_VEHICLES表的HPHM、TOTAL_NUM、WORK_NUM列
    query_sql = "SELECT HPHM, TOTAL_NUM, WORK_NUM FROM HIGH_FRE_VEHICLES"

    cr.execute(query_sql)  # 执行查询
    query_res_total = cr.fetchall()  # 查询结果从游标中提取并赋值给变量query_res_total
    df_res_total = pd.DataFrame(list(query_res_total),
                                columns=['HPHM', 'TOTAL_NUM',
                                         'WORK_NUM'])  # 查询结果转化成pd的dataframe
    df_res = df_res_total.groupby('HPHM').sum()  # 按HPHM分组合并,将其他列HPHM相同的值求和
    df_res = df_res.drop(index=(
        df_res.loc[1.0 *
                   (df_res['WORK_NUM'] / df_res['TOTAL_NUM']) < 0.9].index
    ))  # 保留工作日出行频数占总频数的90%以上的车牌
    df_res_c1 = df_res.sort_values(
        by=['TOTAL_NUM'], ascending=False).reset_index()  # 将上面的结果按降序排列

    return df_res_c1
Exemplo n.º 8
0
def get_travel_month(conn, hphm):
    if conn == None: conn = get_connection()
    cr = conn.cursor()  # 生成连接的游标
    # 查询语句,查询的3个表,都可以忽略信号配时的影响(路中卡口)
    query_sql_1 = (
        "SELECT JGSJ_x from TRAVEL_TIME_HK93TOHK107 WHERE HPHM='%s'") % (hphm)
    query_sql_2 = (
        "SELECT JGSJ_x from TRAVEL_TIME_HK92TOHK107 WHERE HPHM='%s'") % (hphm)
    query_sql_3 = (
        "SELECT JGSJ_x from TRAVEL_TIME_HK93TOHK92 WHERE HPHM='%s'") % (hphm)
    cr.execute(query_sql_1)  # 执行查询语句1
    res1 = cr.fetchall()  # 查询结果赋值res1
    cr.execute(query_sql_2)  # 执行查询语句2
    res2 = cr.fetchall()  # 查询结果赋值res2
    cr.execute(query_sql_3)  # 执行查询语句3
    res3 = cr.fetchall()  # 查询结果赋值res3
    res = res1 + res2 + res3  # 查询的3个list合并,结果示例 [(res1),(res2),...]
    free(conn, cr)  # 关闭查询
    lis_res = []  # 提取查询到的旅行时间(datetime格式),并存储至该list
    for ele in res:  # 查询结果合并list中的每个元素(tuple),将它的第一个值添加到上一行的list
        lis_res.append(ele[0])
    month_list = [x.month for x in lis_res]  # 提取lis_res中每个元素的月份信息并存在一个列表中
    return month_list  # 返回结果
Exemplo n.º 9
0
def Query_Morning_Travel_NUM(conn, HPHM):
    # if am == 'am':
    #     JGSJ_x = ('06','07')
    # else: JGSJ_x = ('16','17','18')

    if conn == None: conn = get_connection()  # 建立数据库连接
    cr = conn.cursor()  # 生成连接的游标

    query_sql_1 = (
        "SELECT COUNT(a.JGSJ_x) FROM TRAVEL_TIME_HK93TOHK107 a "
        "WHERE HPHM='%s' AND TO_CHAR(JGSJ_x,'HH24:MI') BETWEEN '06:45' AND '07:20'"
    ) % (HPHM)

    query_sql_2 = (
        "SELECT  COUNT(b.JGSJ_x) FROM TRAVEL_TIME_HK92TOHK107 b "
        "WHERE HPHM='%s' AND TO_CHAR(JGSJ_x,'HH24:MI') BETWEEN '06:45' AND '07:20'"
    ) % (HPHM)
    cr.execute(query_sql_1)
    res_1 = cr.fetchall()
    cr.execute(query_sql_2)
    res_2 = cr.fetchall()
    res = res_1[0][0] + res_2[0][0]
    return res
Exemplo n.º 10
0
def High_frequency_vehicles(conn, start_time):

    if conn == None: conn = get_connection()  # 建立数据库连接
    cr = conn.cursor()  # 生成连接的游标

    # 按日期查询一周内上下学期间车辆的出行次数,并返回出行次数大于等于5次的车牌号及其总体出行次数

    query_time_interval = Week_Period(
        start_time)  # 根据给定的start_time,生成一个列表,包含该周的起始日期和终止日期
    # 查询车牌号码和经过时间记录数(仅上下学期间)
    query_sql = (
        "SELECT HPHM, COUNT(JGSJ) FROM SJCJ_T_CLXX_LS_OCT WHERE SSID='HK-107' AND CDBH IN ('1','2','3','4') "
        "AND TO_CHAR(JGSJ,'HH24') IN ('16','17')"
        " AND JGSJ BETWEEN to_date('%s','yyyy-mm-dd hh24:mi:ss') AND to_date('%s','yyyy-mm-dd hh24:mi:ss') GROUP BY HPHM "
    ) % (query_time_interval[0], query_time_interval[1])

    cr.execute(query_sql)  # 执行查询
    query_res_total = cr.fetchall()  # 查询结果从游标中提取并赋值给变量query_res_total
    dataframe_res_total = pd.DataFrame(list(query_res_total),
                                       columns=['HPHM', 'TOTAL_numbers'
                                                ])  # 查询结果转化成pd的dataframe

    holiday_list = Get_Holidays_during_Aweek(
        start_time
    )  # 根据给定的start_time,生成该周的休息日 eg:['2019-05-011 00:00:00', '2019-05-12 00:00:00']

    holiday_query_list = []  # 将休息日的查询结果转化为dataframe格式,存入该列表
    for i in range(len(holiday_list)):
        query_sql_for_holiday = (
            "SELECT HPHM, COUNT(JGSJ) FROM SJCJ_T_CLXX_LS_OCT WHERE SSID='HK-107' AND CDBH IN ('1','2','3','4') "
            "AND TO_CHAR(JGSJ,'HH24') IN ('16','17')"
            " AND JGSJ BETWEEN to_date('%s','yyyy-mm-dd hh24:mi:ss') AND to_date('%s','yyyy-mm-dd hh24:mi:ss') GROUP BY HPHM "
        ) % (holiday_list[i], Add_serval_days(holiday_list[i], 1)
             )  # 查休息日当天的出行情况
        cr.execute(query_sql_for_holiday)  # 执行查询
        query_res_holiday = cr.fetchall()  # 查询结果提取
        To_dataframe = pd.DataFrame(list(query_res_holiday),
                                    columns=['HPHM',
                                             'HOLIDAY_numbers'])  # 转为dataframe
        holiday_query_list.append(To_dataframe)  # 加入列表holiday_query_list
    # 将休息日的查询结果合成为一个dataframe
    df_holiday = pd.concat(holiday_query_list,
                           ignore_index=True)  # 将所有查询结果合并(上下合并)
    df_holiday = df_holiday.groupby(
        'HPHM').sum().reset_index()  # 按HPHM列的内容进行表内合并,对其他列执行求和的操作

    # 将df_holiday和dataframe_res_total整合
    # df_holiday.rename(columns={'HOLIDAY_numbers': 'holiday_numbers'}, inplace=True)  # 重新命名指定列的列名
    df_holiday_total_tem = pd.concat([dataframe_res_total, df_holiday],
                                     ignore_index=True,
                                     sort=True)  # 上下合并2个表格,空白地方填NaN
    # print(df_holiday_total_tem)
    df_holiday_total = df_holiday_total_tem.groupby(
        'HPHM').sum().reset_index()  # 按HPHM列的内容,进行表内整合,对其他列执行求和的操作
    df_holiday_total['WORK_numbers'] = df_holiday_total['TOTAL_numbers'].sub(
        df_holiday_total['HOLIDAY_numbers']
    )  # TOTAL_numbers列减去HOLIDAY_numbers列,赋值给新增的WORK_numbers列
    df_holiday_total['START_DATE'] = datetime.datetime.strptime(
        query_time_interval[0], '%Y-%m-%d %H:%M:%S')  # 新增一列用于表征计算起始的日期
    df_holiday_total['END_DATE'] = datetime.datetime.strptime(
        query_time_interval[1], '%Y-%m-%d %H:%M:%S')  # 新增一列用于表征统计截止的日期(不包括当天)

    # 通过上述流程,将同一周内出行的车牌及工作日、休息日出行次数统计完成
    # 下面将统计、筛选高频车(一周内出行5次以上,工作日出行3次以上的作为高频车)
    df_holiday_total = df_holiday_total.drop(index=(
        df_holiday_total.loc[df_holiday_total['TOTAL_numbers'] < 5].index))
    df_holiday_total = df_holiday_total.drop(
        index=(df_holiday_total.loc[df_holiday_total['WORK_numbers'] < 3].index
               )).reset_index()  # 删去工作日出行次数小于3次的
    df_holiday_total = df_holiday_total.drop(
        index=(df_holiday_total.loc[df_holiday_total['HPHM'] == '车牌'].index
               )).reset_index()  # 删去车牌识别失败的行
    df_holiday_total = df_holiday_total[[
        'HPHM', 'TOTAL_numbers', 'WORK_numbers', 'HOLIDAY_numbers',
        'START_DATE', 'END_DATE'
    ]]  #提取对应列重构dataframe
    return df_holiday_total
Exemplo n.º 11
0
def plate_match(conn, SSID, CDBH, start_time, end_time):
    date_types = holiday(start_time[0][0:10])
    start_ssid, end_ssid = SSID[0], SSID[1]
    start_cdbh, end_cdbh = CDBH[0], CDBH[1]

    if conn == None:
        conn = get_connection()  # 建立数据库连接

    cr = conn.cursor()  # 生成连接的游标
    # 查询路段上游的车牌和经过时间
    query_plate_upper = (
        "SELECT HPHM, HPZL, JGSJ, CDBH FROM SJCJ_T_CLXX_LS WHERE SSID='%s' AND CDBH in %s AND JGSJ BETWEEN '%s' AND '%s'"
    ) % (start_ssid, start_cdbh, start_time[0], end_time[0])
    cr.execute(query_plate_upper)
    query_res_upper = cr.fetchall()
    dataframe_res_upper = pd.DataFrame(
        list(query_res_upper), columns=['HPHM', 'HPZL', 'JGSJ', 'CDBH'])
    # 查询路段下游的车牌和经过时间
    query_plate_low = (
        "SELECT HPHM, HPZL, JGSJ, CDBH FROM SJCJ_T_CLXX_LS WHERE SSID='%s' AND CDBH in %s AND JGSJ BETWEEN '%s' AND '%s'"
    ) % (end_ssid, end_cdbh, start_time[0], end_time[0])
    cr.execute(query_plate_low)
    query_res_low = cr.fetchall()
    dataframe_res_low = pd.DataFrame(list(query_res_low),
                                     columns=['HPHM', 'HPZL', 'JGSJ', 'CDBH'])

    # 上下游匹配并计算路段旅行时间,merge之后jgsj自动分成2列:jgsj_x(下游)和jgsj_y(上游)
    merge_ls = pd.merge(dataframe_res_low, dataframe_res_upper, on='HPHM')
    # print(merge_ls)
    merge_ls = merge_ls.drop(index=(merge_ls.loc[(
        merge_ls['HPHM'] == '车牌')].index)).reset_index()  # 删除hphm列中,值为车牌的行
    merge_ls = merge_ls.drop(
        index=(merge_ls.loc[merge_ls['JGSJ_x'] < merge_ls['JGSJ_y']].index
               )).reset_index()  # 如果下游检测时间小于上游检测时间,说明匹配错误
    merge_ls['JGSJ_x'] = pd.to_datetime(
        merge_ls['JGSJ_x'],
        format='%Y-%m-%d %H:%M:%S')  # 将JGSJ_x列转为datetime标准格式
    merge_ls['JGSJ_y'] = pd.to_datetime(
        merge_ls['JGSJ_y'],
        format='%Y-%m-%d %H:%M:%S')  # 将JGSJ_y列转为datetime标准格式
    merge_ls['travel_time'] = merge_ls['JGSJ_x'].sub(
        merge_ls['JGSJ_y'])  # jgsj_x - jgsj_y,即下游检测时间-上游检测时间
    # 计算结果从timedelta转为int,以秒为单位
    merge_ls['travel_time'] = (merge_ls['travel_time'] /
                               np.timedelta64(1, 's')).astype(int)
    merge_ls = merge_ls.sort_values(by=['JGSJ_y'],
                                    ascending=True)  # 将dataframe按照JGSJ_y的排序
    merge_ls = merge_ls.drop_duplicates(
        'JGSJ_x', 'last', inplace=False)  # 按JGSJ_x,去除该列下面的重复行,删除重复项并保留最后一次出现的项
    merge_ls = merge_ls.drop_duplicates(
        'JGSJ_y', 'last', inplace=False)  # 按JGSJ_y,去除该列下面的重复行,删除重复项并保留最后一次出现的项

    # print(merge_ls['travel_time'].dtypes)
    merge_ls = merge_ls[[
        'HPHM', 'HPZL_y', 'JGSJ_x', 'JGSJ_y', 'travel_time', 'CDBH_x', 'CDBH_y'
    ]]  # 提取列表中的5列组成新的merge_ls
    merge_ls.rename(columns={
        'JGSJ_x': 'JGSJ_OUT',
        'JGSJ_y': 'JGSJ_IN',
        'CDBH_x': 'CDBH_OUT',
        'CDBH_y': 'CDBH_IN',
        'HPZL_y': 'HPZL'
    },
                    inplace=True)
    merge_ls['date_types'] = date_types  # 新增一列,用于表征日期性质,0:工作日;1:周末;2:节假日
    merge_ls['SSID_IN'] = start_ssid
    merge_ls['SSID_OUT'] = end_ssid
    columns_order = [
        'HPHM', 'HPZL', 'SSID_IN', 'CDBH_IN', 'JGSJ_IN', 'SSID_OUT',
        'CDBH_OUT', 'JGSJ_OUT', 'travel_time', 'date_types'
    ]
    merge_ls = merge_ls[columns_order]

    free(conn, cr)
    return merge_ls