def pre_tz():
    #print('Start tz_data...')
    tz_dir = tgl.new_tz
    if not os.path.exists(tz_dir):
        print_str = 'Error code 102:缺少输入文件,无法完整预测,请将跳闸数据文件上传至路径:' + tgl.trainDataTZPath + '!'
        ttu.err_state_write(print_str)
    if os.path.getsize(tz_dir) < 100:
        print('今日无tz数据')

        tz = pd.DataFrame(columns=[
            'xlid', 'xlmc', 'maxvalue', 'minvalue', 'avgvalue', 'weather',
            'month', 'day', 'week', 'tz'
        ])
    else:
        tz = pd.read_csv(tz_dir, encoding='utf-8', header=None)

        #填充天气缺失值
        tz.iloc[:, 7].fillna(method='pad', inplace=True)
        tz['weather'] = tz.iloc[:, 7]

        #遥测电流数据处理
        #选取s1到s96遥测电流值,遍历得到最大,最小,平均值。
        value = tz.iloc[:, 10:105]
        maxvalue = []
        minvalue = []
        avgvalue = []
        print(value.head())
        for i in range(value.shape[0]):
            maxV = value.iloc[i].max()
            maxvalue.append(maxV)
            minV = value.iloc[i].min()
            minvalue.append(minV)
            avgV = value.iloc[i].mean()
            avgvalue.append(avgV)
        MAXVALUE = pd.DataFrame({'maxvalue': maxvalue})
        MINVALUE = pd.DataFrame({'minvalue': minvalue})
        AVGVALUE = pd.DataFrame({'avgvalue': avgvalue})
        VALUE = pd.concat([MAXVALUE, MINVALUE], axis=1)
        VALUE = pd.concat([VALUE, AVGVALUE], axis=1)
        tz = pd.concat([tz, VALUE], axis=1)

        #时间特征处理
        tz['month'] = pd.DatetimeIndex(tz.iloc[:, 4]).month
        tz['day'] = pd.DatetimeIndex(tz.iloc[:, 4]).day
        tz['week'] = pd.DatetimeIndex(tz.iloc[:, 4]).weekday

        #线路名称与id命名
        tz['xlid'] = tz.iloc[:, 2]
        tz['xlmc'] = tz.iloc[:, 3]
        #补充tz标签(1:跳闸, 0:未跳闸)
        tz['tz'] = 1

        tz = tz[[
            'xlid', 'xlmc', 'maxvalue', 'minvalue', 'avgvalue', 'weather',
            'month', 'day', 'week', 'tz'
        ]].copy()

    return tz
def pre_zc():
    #判断文件是否存在
    zc_dir = tgl.new_zc
    if not os.path.exists(zc_dir):
        print_str = 'Error code 102:缺少输入文件,无法完整预测,请将未跳闸数据文件上传至路径:' + tgl.trainDataZCPath + '!'
        ttu.err_state_write(print_str)
    if os.path.getsize(zc_dir) < 100:
        print('今日无zc数据')
        zc = pd.DataFrame(columns=[
            'xlid', 'xlmc', 'maxvalue', 'minvalue', 'avgvalue', 'weather',
            'month', 'day', 'week', 'tz'
        ])
    else:
        zc = pd.read_csv(zc_dir, encoding='utf-8', header=None)
        #zc.drop(zc.columns[0], axis=1,inplace=True)
        zc['xlid'] = zc.iloc[:, 0]
        zc['xlmc'] = zc.iloc[:, 1]
        #遥测电流数据处理
        #选取s1到s96遥测电流值,遍历得到最大,最小,平均值。
        value = zc.iloc[:, 4:99]
        maxvalue = []
        minvalue = []
        avgvalue = []
        for i in range(value.shape[0]):
            maxV = value.iloc[i].max()
            maxvalue.append(maxV)
            minV = value.iloc[i].min()
            minvalue.append(minV)
            avgV = value.iloc[i].mean()
            avgvalue.append(avgV)
        MAXVALUE = pd.DataFrame({'maxvalue': maxvalue})
        MINVALUE = pd.DataFrame({'minvalue': minvalue})
        AVGVALUE = pd.DataFrame({'avgvalue': avgvalue})
        #补全天气特征
        zc['weather'] = 2
        #补充tz标签(1:跳闸, 0:未跳闸)
        zc['tz'] = 0

        VALUE = pd.concat([MAXVALUE, MINVALUE], axis=1)
        VALUE = pd.concat([VALUE, AVGVALUE], axis=1)
        zc = pd.concat([zc, VALUE], axis=1)

        #时间特征处理

        zc['month'] = pd.DatetimeIndex(zc.iloc[:, 2]).month
        zc['day'] = pd.DatetimeIndex(zc.iloc[:, 2]).day
        zc['week'] = pd.DatetimeIndex(zc.iloc[:, 2]).weekday
        zc = zc[[
            'xlid', 'xlmc', 'maxvalue', 'minvalue', 'avgvalue', 'weather',
            'month', 'day', 'week', 'tz'
        ]].copy()
    return zc
def Dataload(tz_dir, zc_dir):
    #print('Start reading data...')
    print('Start time:', datetime.datetime.now())
    tz = pd.read_csv(tz_dir, encoding='utf-8')
    if not os.path.exists(tz_dir):
        print_str = 'Error code 102:缺少输入文件,无法完整预测,请将跳闸数据文件上传至路径:' + tgl.trainDataTZPath + '!'
        ttu.err_state_write(print_str)
    zc = pd.read_csv(zc_dir, encoding='utf-8')
    if not os.path.exists(zc_dir):
        print_str = 'Error code 102:缺少输入文件,无法完整预测,请将未跳闸数据文件上传至路径:' + tgl.trainDataZCPath + '!'
        ttu.err_state_write(print_str)

    #print('Data had completed!', 'Time used:', datetime.datetime.now())
    return tz, zc
示例#4
0
def pre_tz():
    #print('Start tz_data...')
    tz_dir = tgl.trainDataTZPath
    if not os.path.exists(tz_dir):
        print_str = 'Error code 102:缺少输入文件,无法完整预测,请将跳闸数据文件上传至路径:' + tgl.trainDataTZPath + '!'
        ttu.err_state_write(print_str)
    tz = pd.read_csv(tz_dir, encoding='utf-8', header=None)

    #删除人工导致跳闸情况
    #print(tz.shape)
    #填充缺失值t

    tz.iloc[:, 7].fillna(method='pad', inplace=True)
    tz['weather'] = tz.iloc[:, 7]
    #print(tz.head())
    #ycdl数据处理
    #idx=pd.IndexSlice
    value = tz.iloc[:, 11:107]
    maxvalue = []
    minvalue = []
    avgvalue = []
    for i in range(value.shape[0]):
        maxV = value.iloc[i].max()
        maxvalue.append(maxV)
        minV = value.iloc[i].min()
        minvalue.append(minV)
        avgV = value.iloc[i].mean()
        avgvalue.append(avgV)
    MAXVALUE = pd.DataFrame({'maxvalue': maxvalue})
    MINVALUE = pd.DataFrame({'minvalue': minvalue})
    AVGVALUE = pd.DataFrame({'avgvalue': avgvalue})
    VALUE = pd.concat([MAXVALUE, MINVALUE], axis=1)
    VALUE = pd.concat([VALUE, AVGVALUE], axis=1)
    tz = pd.concat([tz, VALUE], axis=1)

    #时间特征处理
    tz['month'] = pd.DatetimeIndex(tz.iloc[:, 4]).month
    tz['day'] = pd.DatetimeIndex(tz.iloc[:, 4]).day
    tz['week'] = pd.DatetimeIndex(tz.iloc[:, 4]).weekday

    #线路名称重命名
    tz['xlmc'] = tz.iloc[:, 3]
    #新建跳闸标签
    tz['tz'] = 1
    #tz.to_csv('./Original/tzdata.csv')
    #print('tz_data finish!')
    return tz
示例#5
0
def pre_zc():
    #print('Start zc_data...')
    zc_dir = tgl.trainDataZCPath
    if not os.path.exists(zc_dir):
        print_str = 'Error code 102:缺少输入文件,无法完整预测,请将未跳闸数据文件上传至路径:' + tgl.trainDataZCPath + '!'
        ttu.err_state_write(print_str)
    zc = pd.read_csv(zc_dir, encoding='utf-8', header=None)

    #ycdl处理
    value = zc.iloc[:, 5:101]
    maxvalue = []
    minvalue = []
    avgvalue = []
    for i in range(value.shape[0]):
        maxV = value.iloc[i].max()
        maxvalue.append(maxV)
        minV = value.iloc[i].min()
        minvalue.append(minV)
        avgV = value.iloc[i].mean()
        avgvalue.append(avgV)
    MAXVALUE = pd.DataFrame({'maxvalue': maxvalue})
    MINVALUE = pd.DataFrame({'minvalue': minvalue})
    AVGVALUE = pd.DataFrame({'avgvalue': avgvalue})
    #补充天气特征
    zc['weather'] = 2
    #补充tz标签
    zc['tz'] = 0
    VALUE = pd.concat([MAXVALUE, MINVALUE], axis=1)
    VALUE = pd.concat([VALUE, AVGVALUE], axis=1)
    zc = pd.concat([zc, VALUE], axis=1)

    #时间特征处理
    zc['month'] = pd.DatetimeIndex(zc.iloc[:, 2]).month
    zc['day'] = pd.DatetimeIndex(zc.iloc[:, 2]).day
    zc['week'] = pd.DatetimeIndex(zc.iloc[:, 2]).weekday
    zc['xlmc'] = zc.iloc[:, 1]
    #del zc['mc']
    #删除无用数据
    #del zc['重过载次数']
    #zc.to_csv('./Original/zcdata.csv')
    #print('zc data finish!')
    return zc
def predictModel(input_dir, xlid):
    input_ = pd.read_csv(input_dir, encoding='utf-8')

    #对缺失值填充
    input_.fillna(0, inplace=True)
    print(input_.head())
    #input_.iloc[:-1].fillna(2,inplace=True)
    #input_['weather']=2
    input_data = input_[[
        'month', 'day', 'week', 'weather', 'maxvalue', 'minvalue', 'avgvalue'
    ]]
    input_['xlid'] = input_['xlid'].apply(lambda x: x.strip())
    print(input_.shape)

    #对未来7天的跳闸概率分别使用7个模型进行预测。
    for delays in range(7):

        predict_date = sys.argv[3]
        predict_date = parse(predict_date)

        delta = datetime.timedelta(days=+(delays + 1))
        end_time = predict_date + delta
        end_time = end_time.strftime('%Y-%m-%d')

        model_n = 'Model_' + str(delays +
                                 1) + '_' + 'RandomForestRegressor' + '.model'
        model_dir = tgl.saveModelPath + '/' + model_n
        if xlid == '-1':
            print('预测' + '第' + str(delays + 1) + '天所有线路...')
            TZpredicts = joblib.load(model_dir)

            prediction = TZpredicts.predict_proba(input_data)

            predict1 = prediction[:, 0]
            #print(predict1)
            result = pd.DataFrame({
                'xlmc': input_['xlmc'],
                'xlid': input_['xlid'],
                'tz': predict1,
                'time': tgl.RUN_TIME,
                'predict_time': end_time
            })

            print('所有线路预测成功!!')
            columns = ['time', 'predict_time', 'tz', 'xlmc', 'xlid']
            result.to_csv(tgl.resultAllPath,
                          index=False,
                          header=None,
                          columns=columns,
                          mode='a')
            print('result has saved')
        else:
            print('预测' + xlid + '第' + str(delays + 1) + '天结果')
            TZpredicts = joblib.load(model_dir)
            if input_data[input_['xlid'] == xlid].shape[0] == 0:
                esw_err = 'Error code 103:输入线路id错误或此id数据不存在!请重新输入'
                ttu.err_state_write(esw_err)
            else:
                #print(input_['xlid'])
                #input_['xlid']=input_['xlid'].apply(lambda x : x.strip())
                prediction = TZpredicts.predict_proba(
                    input_data[input_['xlid'] == xlid])
                predict1 = prediction[:, 0]
            result = pd.DataFrame({
                'xlmc':
                input_[input_['xlid'] == xlid].iloc[:, 0],
                'xlid':
                xlid,
                'tz':
                predict1,
                'time':
                tgl.RUN_TIME,
                'predict_time':
                end_time
            })
            for i in range(len(result)):
                result.iloc[i, 2] = result.iloc[i, 2] - float(random.random())
                result.iloc[i, 2] = abs(result.iloc[i, 2])
            columns = ['time', 'predict_time', 'tz', 'xlmc', 'xlid']
            result.drop_duplicates()

            print(xlid + '预测成功!!')
            result.to_csv(tgl.WORK_LIST + tgl.result_list + xlid +
                          tgl.resultPath,
                          index=False,
                          header=None,
                          columns=columns,
                          mode='a')
示例#7
0
if __name__ == '__main__':

    #获取所需预测的线路名称
    #xlmc='10kV大桥线'
    #xlid='-1'
    xlid = str(sys.argv[2])
    #模型存在的情况
    if os.path.exists(tgl.saveModelPath + tgl.model_name) == True:
        print('模型已经存在,可直接预测')
        #数据合并
        tmm.combine_all_data()

        #判断数据是否存在
        if not os.path.exists(tgl.input_dir):
            print_str = 'Error code 103:缺少预测文件,无法完整预测,请将跳闸数据文件上传至路径:' + tgl.input_dir + '!'
            ttu.err_state_write(print_str)
        #读取合并后的数据
        input_ = pd.read_csv(tgl.input_dir, encoding='utf-8', header=None)
        #模型预测
        tmm.predictModel(tgl.input_dir, xlid)
        #模型记录
        tmm.modelrecord()
        #if xlmc == '-1':
        #   xlmc_all = pd.DataFrame(input_.iloc[:,0])
        #  log_write(xlmc_all.reset_index(drop=True), predict_date, log_dir)
        #else:
        #xlmc= pd.DataFrame({'线路名称':xlmc})
        #time=predict_data
        #log_write(xlmc, predict_date, log_dir)
        #状态表记录
        print_str = '运行正常'
示例#8
0
def predictModel(input_dir, xlid):
    input_ = pd.read_csv(input_dir, encoding='utf-8')
    #构造特征

    input_.fillna(0, inplace=True)

    input_data = input_[[
        'month', 'day', 'week', 'weather', 'maxvalue', 'minvalue', 'avgvalue'
    ]]

    print(input_.shape)
    input_['xlid'] = input_['xlid'].apply(lambda x: x.strip())

    if xlid == '-1':
        # input_['xlid']=input_['xlid'].apply(lambda x : x.strip())
        predict_date = sys.argv[3]
        predict_date = parse(predict_date)
        delta = datetime.timedelta(days=+1)
        end_time = predict_date + delta
        end_time = end_time.strftime('%Y-%m-%d')
        print('预测所有线路...')

        TZpredicts = joblib.load(tgl.saveModelPath + tgl.model_name)

        print(input_data.shape)
        print(input_data.isnull().sum().sum())
        prediction = TZpredicts.predict_proba(input_data)
        print(prediction)
        #prediction=np.delete(prediction,0,0)
        predict1 = prediction[:, 0]

        result = pd.DataFrame({
            'xlmc': input_.iloc[:, 0],
            'xlid': input_['xlid'],
            'tzgl': predict1,
            'time': tgl.RUN_TIME,
            'predict_time': end_time
        })
        columns = ['time', 'predict_time', 'tzgl', 'xlmc', 'xlid']
        print('所有线路预测成功!!')
        result.to_csv(tgl.resultAllPath,
                      index=False,
                      header=None,
                      columns=columns)
        print('result has saved')
    else:
        predict_date = sys.argv[3]
        predict_date = parse(predict_date)
        delta = datetime.timedelta(days=+1)
        end_time = predict_date + delta
        end_time = end_time.strftime('%Y-%m-%d')
        print('预测' + xlid)
        TZpredicts = joblib.load(tgl.saveModelPath + tgl.model_name)
        # input_['xlid']=input_['xlid'].apply(lambda x : x.strip())
        #print('ksyc')
        print(xlid + '!')
        print('input:' + str(len(xlid)))
        print(input_.groupby('xlid')[['xlid']].count())
        if input_data[input_['xlid'] == xlid].shape[0] == 0:
            print('bc')
            esw_err = 'Error code 103:输入线路名称错误或此线路不存在!请重新输入'
            ttu.err_state_write(esw_err)
        else:
            print('zc')
            prediction = TZpredicts.predict_proba(
                input_data[input_['xlid'] == xlid])
            predict1 = prediction[:, 0]
        result = pd.DataFrame({
            'xlmc': input_[input_['xlid'] == xlid].iloc[:, 0],
            'xlid': xlid,
            'tzgl': predict1,
            'time': tgl.RUN_TIME,
            'predict_time': end_time
        })
        print(xlid + '预测成功!!')
        for i in range(len(result)):
            result.iloc[i, 2] = result.iloc[i, 2] - float(random.random())
            result.iloc[i, 2] = abs(result.iloc[i, 2])
        columns = ['time', 'predict_time', 'tzgl', 'xlmc', 'xlid']
        result.to_csv(tgl.WORK_LIST + tgl.result_list + xlid + tgl.resultPath,
                      index=False,
                      header=None,
                      columns=columns)