예제 #1
0
            n2 = len(df[df["上报时间"].dt.date == day])  # 当天案件总数
            # 自行处理的案件总数
            n3 = len(df_self)
            # 分类统计
            s = df.groupby("小类名称").count()["上报时间"]
            s["案件总数"] = n1
            s["当天案件总数"] = n2
            s["自行处理案件总数"] = n3
            s["日期"] = day
            s["街道"] = area
            s["原指标"] = gt
            res[j] = s
            j += 1

    res.fillna(0, inplace=True)
    return res.T


if __name__ == "__main__":
    # source_file = '../queryResult_2019-09-10_145030_zs341.xlsx'
    source_file = '../queryResult_2019-09-10_145030.npy'
    gt_file = "../source_data/ZS321 - 环境问题指数.xlsx"

    df2 = convert_to_new_dataframe(source_file,
                                   gt_file,
                                   write_path='../tmp_zs321')
    df2.to_excel('../zs321_20190923.xlsx')

    # regression
    regression_test('../zs321_20190923.xlsx', 'zs321')
    # TODO: 增加当天内完成案件的权重(比如自行处理案件因当天完成, 相比第二天完成的案件, 少了一晚上的执行分数)
    w1 = 60  # 自行处理案件总数, 认为w1分钟为完成一个自行处理案件所需的平均时间
    w2 = 0  # 其他案件总数, 不考评
    w3 = 0  # 强制结案总数, 暂不考评(没有好的思路, 且强制结案会同时生成一个新的案件)
    w4 = 0  # 立案耗时总长(分钟), 不考评
    w5 = 1  # 计划内耗时总长(分钟)
    w6 = -1  # 计划外耗时总长(分钟)
    df['新评分'] = (df["自行处理案件总数"] * w1 + df["其他案件总数"] * w2 + df["强制结案总数"] * w3 +
                 df["立案耗时总长(分钟)"] * w4 + df["计划内耗时总长(分钟)"] * w5 +
                 df["计划外耗时总长(分钟)"] * w6) / 1000
    return df


if __name__ == "__main__":
    # source_file = '../queryResult_2019-09-10_145030.xlsx'
    source_file = '../queryResult_2019-09-10_145030.npy'
    gt_file = "../source_data/ZS222 - 处置效能指数.xlsx"

    df2 = convert_to_new_dataframe(source_file,
                                   gt_file,
                                   write_path='../tmp_zs222')
    df2.to_excel('../zs222_20190923.xlsx')

    df3_file_path = '../zs222_20190923.xlsx'
    df3 = cal_index(df3_file_path)
    df3 = df3.drop('Unnamed: 0', 1)
    df3.to_excel(df3_file_path)

    # regression
    regression_test('../zs222_20190923.xlsx')
            s["按时完成"] = n1a
            s["延期完成"] = n1b
            s["当天案件总数"] = n2
            s["自行处理案件总数"] = n3
            s["日期"] = day
            s["街道"] = area
            s["原指标"] = gt
            lst.append(s)

    res = pd.concat(lst, axis=1, sort=False)
    res.fillna(0, inplace=True)
    res = res.T
    # https://stackoverflow.com/questions/14507794/pandas-how-to-flatten-a-hierarchical-index-in-columns @Andy Hayden
    res.columns = [' '.join(col).strip() for col in res.columns.values]
    res.reset_index(inplace=True, drop=True)
    return res


if __name__ == "__main__":
    # source_file = '../queryResult_2019-09-10_145030_zs341.xlsx'
    source_file = '../queryResult_2019-09-10_145030.npy'
    gt_file = "../source_data/ZS341 - 服务需求指数.xlsx"

    df2 = convert_to_new_dataframe(source_file,
                                   gt_file,
                                   write_path='../tmp_zs341')
    df2.to_excel('../zs341_20190923.xlsx')

    # regression
    regression_test('../zs341_20190923.xlsx')