Пример #1
0
def peijian_model(handle):
    start_time_model = time.time()
    # extract=Extract()
    df = handle.get_months_data()
    df = outlier_process_train(df)
    df = pp_yc_process(df)
    ss, xgb_model, x_test, y_test = train_model(df)
    print('模型训练耗时为{}'.format(get_time_dif(start_time_model)))
    joblib.dump(ss, "pp_yc/data_ss.model")  ## 将标准化模型保存
    joblib.dump(xgb_model, "pp_yc/gbm.model")  ## 将模型保存
    y_pred = xgb_model.predict(x_test)
    valuation(y_pred, y_test)
    handle.logger.info(('get model done!'))
Пример #2
0
def main():
    os.environ['CUDA_VISIBLE_DEVICES'] = "0"
    start_time = time.time()
    handle = Process("dd_data2", "xdf123", "LBORA", "10.9.1.169")
    handle.trunct_table()
    handle.handle2oracle()
    handle.changfang_price()
    statistics_pre(handle)
    peijian_model(handle)
    handle.model_predict()
    verification()
    handle.yzt_process()
    handle.bg_process()
    handle.left_right()
    handle.to_oracle()
    handle.inter2oracle()
    print('总耗时:', get_time_dif(start_time))
Пример #3
0
#! usr/bin/env python3
# -*- coding:utf-8 -*-
from ForCall01 import *
from branch_code.dadi_loader import get_time_from_table, get_time_dif


def handle2oracle():
    oracle = useOracle("dd_data2", "xdf123", "LBORA169")
    account = "dd_data2/[email protected]/lbora"
    comm1 = '''select  max(OPERATETIMEFORHIS) from  LB_PEIJIAN_ORIGINAL_DATA_LOAD t'''
    date = oracle.getData(comm1, account)
    date = date['MAX(OPERATETIMEFORHIS)'][0]
    start_time, endtime = get_time_from_table(date)
    comm2 = '''insert into LB_PEIJIAN_ORIGINAL_HANDLE select * from LB_PEIJIAN_ORIGINAL_DATA_LOAD where OPERATETIMEFORHIS between to_date('{}','yyyy/mm/dd hh24:mi:ss') and to_date('{}','yyyy/mm/dd hh24:mi:ss') '''.format(
        start_time, endtime)
    oracle.executeCommitSubmit(comm2, account)
    print('handle2oracle done!')


if __name__ == '__main__':
    start_time = time.time()
    handle2oracle()
    print(get_time_dif(start_time))
Пример #4
0
def main():
    def large_area_model(handle):
        '''
        模型训练是训练33个区域的油漆,钣金,拆装模型及统计数据
        '''
        chexi2id = chexi_id.chexi_transform_id()
        handle.youqi_model('youqi', 'large_area', chexi2id)
        handle.banjin_model('banjin', 'large_area', chexi2id)
        handle.chaizhaung_model('chaizhuang', 'large_area', chexi2id)
        handle.youqi_statistics_pre('large_area', chexi2id)
        handle.banjin_statistics_pre('large_area', chexi2id)
        handle.chaizhuang_statistics_pre('large_area', chexi2id)
        handle.jixiu_statistics_pre('large_area', chexi2id)
        handle.diangong_statistics_pre('large_area', chexi2id)
        print('大区域3个模型完成!')

    def single_area_model(handle):
        '''
        分别训练江苏,深圳,云南的油漆,钣金,拆装模型及统计数据
        '''
        jigous = ['jiangsu', 'shenzhen', 'yunnan']
        for jigou in jigous:
            if jigou == 'jiangsu':
                chexi2id = chexi_id.jiangsu_chexi_transform_id()
            elif jigou == 'shenzhen':
                chexi2id = chexi_id.shenzhen_chexi_transform_id()
            elif jigou == 'yunnan':
                chexi2id = chexi_id.yunnan_chexi_transform_id()
            handle.single_area_youqi_model('youqi', jigou, chexi2id)
            handle.single_area_banjin_model('banjin', jigou, chexi2id)
            handle.single_area_chaizhaung_model('chaizhuang', jigou, chexi2id)
            handle.youqi_statistics_pre(jigou, chexi2id)
            handle.banjin_statistics_pre(jigou, chexi2id)
            handle.chaizhuang_statistics_pre(jigou, chexi2id)
            handle.jixiu_statistics_pre(jigou, chexi2id)
            handle.diangong_statistics_pre(jigou, chexi2id)
        print('3个区域3个模型完成!')

    def high_end_brand_model(handle):
        '''全国25个高端品牌油漆,钣金,拆装模型及统计数据'''
        group_dict, group2id = chexi_id.gaoduan_chexi_transform_id()
        handle.youqi_25_model(group_dict, group2id)
        handle.banjin_25_model(group_dict, group2id)
        handle.chaizhaung_25_model(group_dict, group2id)
        handle.youqi_statistics_25_pre(group_dict, group2id)
        handle.banjin_statistics_25_pre(group_dict, group2id)
        handle.chaizhuang_statistics_25_pre(group_dict, group2id)
        handle.jixiu_statistics_25_pre(group_dict, group2id)
        handle.diangong_statistics_25_pre(group_dict, group2id)
        print('高端品牌模型完成!')

    start_time_model = time.time()
    handle = Process("DDPJCXBUSI", "ccic8519", "ccicgis", "10.1.88.75")
    handle.trunct_table()
    handle.handle2oracle()
    large_area_model(handle)
    single_area_model(handle)
    high_end_brand_model(handle)
    handle.outlier()
    handle.system_value_oracle()
    handle.generate_system()
    print('总耗时为{}'.format(get_time_dif(start_time_model)))
Пример #5
0
    lambda row: get_bianma(row['ORIGINALCODE'], row['PARTSTANDARDCODE']),
    axis=1)
df['WAY_FLAG'] = df.apply(lambda row: get_flag(row['CODE']), axis=1)
df = df.loc[df['WAY_FLAG'] == 1].reset_index(drop=True)
code_ls = list(set(df['CODE'].tolist()))
code2id = {code_ls[i - 1]: i for i in range(1, len(code_ls) + 1)}
with open('dict/code2id.json', 'w', encoding='utf-8') as writer3:
    json.dump(code2id, writer3, ensure_ascii=False)
df['code_id'] = df['CODE'].map(code2id)

df = df[(df['CHGCOMPSET'] == 2) | (df['CHGCOMPSET'] == 3)]
print(df.info())
df = pd.DataFrame(df,
                  columns=[
                      'jigou_id', 'brand_id', 'compname_id', 'code_id',
                      'NEW_IS4S', 'CHGCOMPSET', 'UNITPRICE'
                  ])
df.dropna(subset=[
    'jigou_id', 'brand_id', 'compname_id', 'code_id', 'NEW_IS4S', 'CHGCOMPSET',
    'UNITPRICE'
],
          inplace=True)
df = df.astype(float)
print(df.shape[0])
start_time_model = time.time()
ss, gbm_model, x_test, y_test = train_model(df)
print('模型训练耗时为{}'.format(get_time_dif(start_time_model)))
joblib.dump(ss, "pinpai/data_ss.pinpai")  ## 将标准化模型保存
joblib.dump(gbm_model, "gbm.pinpai")  ## 将模型保存
y_pred = gbm_model.predict(x_test)
valuation(y_pred, y_test)