def peijian_model(handle): start_time_model = time.time() # extract=Extract() df = handle.get_months_data() df = outlier_process_train(df) df = pp_yc_process(df) ss, xgb_model, x_test, y_test = train_model(df) print('模型训练耗时为{}'.format(get_time_dif(start_time_model))) joblib.dump(ss, "pp_yc/data_ss.model") ## 将标准化模型保存 joblib.dump(xgb_model, "pp_yc/gbm.model") ## 将模型保存 y_pred = xgb_model.predict(x_test) valuation(y_pred, y_test) handle.logger.info(('get model done!'))
def main(): os.environ['CUDA_VISIBLE_DEVICES'] = "0" start_time = time.time() handle = Process("dd_data2", "xdf123", "LBORA", "10.9.1.169") handle.trunct_table() handle.handle2oracle() handle.changfang_price() statistics_pre(handle) peijian_model(handle) handle.model_predict() verification() handle.yzt_process() handle.bg_process() handle.left_right() handle.to_oracle() handle.inter2oracle() print('总耗时:', get_time_dif(start_time))
#! usr/bin/env python3 # -*- coding:utf-8 -*- from ForCall01 import * from branch_code.dadi_loader import get_time_from_table, get_time_dif def handle2oracle(): oracle = useOracle("dd_data2", "xdf123", "LBORA169") account = "dd_data2/[email protected]/lbora" comm1 = '''select max(OPERATETIMEFORHIS) from LB_PEIJIAN_ORIGINAL_DATA_LOAD t''' date = oracle.getData(comm1, account) date = date['MAX(OPERATETIMEFORHIS)'][0] start_time, endtime = get_time_from_table(date) comm2 = '''insert into LB_PEIJIAN_ORIGINAL_HANDLE select * from LB_PEIJIAN_ORIGINAL_DATA_LOAD where OPERATETIMEFORHIS between to_date('{}','yyyy/mm/dd hh24:mi:ss') and to_date('{}','yyyy/mm/dd hh24:mi:ss') '''.format( start_time, endtime) oracle.executeCommitSubmit(comm2, account) print('handle2oracle done!') if __name__ == '__main__': start_time = time.time() handle2oracle() print(get_time_dif(start_time))
def main(): def large_area_model(handle): ''' 模型训练是训练33个区域的油漆,钣金,拆装模型及统计数据 ''' chexi2id = chexi_id.chexi_transform_id() handle.youqi_model('youqi', 'large_area', chexi2id) handle.banjin_model('banjin', 'large_area', chexi2id) handle.chaizhaung_model('chaizhuang', 'large_area', chexi2id) handle.youqi_statistics_pre('large_area', chexi2id) handle.banjin_statistics_pre('large_area', chexi2id) handle.chaizhuang_statistics_pre('large_area', chexi2id) handle.jixiu_statistics_pre('large_area', chexi2id) handle.diangong_statistics_pre('large_area', chexi2id) print('大区域3个模型完成!') def single_area_model(handle): ''' 分别训练江苏,深圳,云南的油漆,钣金,拆装模型及统计数据 ''' jigous = ['jiangsu', 'shenzhen', 'yunnan'] for jigou in jigous: if jigou == 'jiangsu': chexi2id = chexi_id.jiangsu_chexi_transform_id() elif jigou == 'shenzhen': chexi2id = chexi_id.shenzhen_chexi_transform_id() elif jigou == 'yunnan': chexi2id = chexi_id.yunnan_chexi_transform_id() handle.single_area_youqi_model('youqi', jigou, chexi2id) handle.single_area_banjin_model('banjin', jigou, chexi2id) handle.single_area_chaizhaung_model('chaizhuang', jigou, chexi2id) handle.youqi_statistics_pre(jigou, chexi2id) handle.banjin_statistics_pre(jigou, chexi2id) handle.chaizhuang_statistics_pre(jigou, chexi2id) handle.jixiu_statistics_pre(jigou, chexi2id) handle.diangong_statistics_pre(jigou, chexi2id) print('3个区域3个模型完成!') def high_end_brand_model(handle): '''全国25个高端品牌油漆,钣金,拆装模型及统计数据''' group_dict, group2id = chexi_id.gaoduan_chexi_transform_id() handle.youqi_25_model(group_dict, group2id) handle.banjin_25_model(group_dict, group2id) handle.chaizhaung_25_model(group_dict, group2id) handle.youqi_statistics_25_pre(group_dict, group2id) handle.banjin_statistics_25_pre(group_dict, group2id) handle.chaizhuang_statistics_25_pre(group_dict, group2id) handle.jixiu_statistics_25_pre(group_dict, group2id) handle.diangong_statistics_25_pre(group_dict, group2id) print('高端品牌模型完成!') start_time_model = time.time() handle = Process("DDPJCXBUSI", "ccic8519", "ccicgis", "10.1.88.75") handle.trunct_table() handle.handle2oracle() large_area_model(handle) single_area_model(handle) high_end_brand_model(handle) handle.outlier() handle.system_value_oracle() handle.generate_system() print('总耗时为{}'.format(get_time_dif(start_time_model)))
lambda row: get_bianma(row['ORIGINALCODE'], row['PARTSTANDARDCODE']), axis=1) df['WAY_FLAG'] = df.apply(lambda row: get_flag(row['CODE']), axis=1) df = df.loc[df['WAY_FLAG'] == 1].reset_index(drop=True) code_ls = list(set(df['CODE'].tolist())) code2id = {code_ls[i - 1]: i for i in range(1, len(code_ls) + 1)} with open('dict/code2id.json', 'w', encoding='utf-8') as writer3: json.dump(code2id, writer3, ensure_ascii=False) df['code_id'] = df['CODE'].map(code2id) df = df[(df['CHGCOMPSET'] == 2) | (df['CHGCOMPSET'] == 3)] print(df.info()) df = pd.DataFrame(df, columns=[ 'jigou_id', 'brand_id', 'compname_id', 'code_id', 'NEW_IS4S', 'CHGCOMPSET', 'UNITPRICE' ]) df.dropna(subset=[ 'jigou_id', 'brand_id', 'compname_id', 'code_id', 'NEW_IS4S', 'CHGCOMPSET', 'UNITPRICE' ], inplace=True) df = df.astype(float) print(df.shape[0]) start_time_model = time.time() ss, gbm_model, x_test, y_test = train_model(df) print('模型训练耗时为{}'.format(get_time_dif(start_time_model))) joblib.dump(ss, "pinpai/data_ss.pinpai") ## 将标准化模型保存 joblib.dump(gbm_model, "gbm.pinpai") ## 将模型保存 y_pred = gbm_model.predict(x_test) valuation(y_pred, y_test)