def test(): # data_file = "/home/sun/桌面/account_model/data/2021-05-08#2021-05-08.txt" data_file="C:/Users/sunyyao/Desktop/NanGuo/xgb_model/data/2021-05-12#2021-05-12.txt" cnf = configparser.ConfigParser() cnf.read("feature_extract/feature_cfg.conf") perfix_feature_names = cnf.get('prefix_feature', 'feature_names') perfix_feature_names = perfix_feature_names.split(',') perfix_feature_names = [x.strip() for x in perfix_feature_names] print("perfix_feature_names", perfix_feature_names) feature_names = cnf.get('evidence_feature', 'feature_names') feature_names=feature_names.split(",") feature_names=[x.strip() for x in feature_names] data_wraper = data_interface.DataWraper( src=data_file ) data_gen = data_wraper.wrap_batch_data() # skip None next(data_gen) for main_account_data,spread_account_data in data_gen: deal_msg=main_account_data["deal_msg"] add_time = main_account_data["add_time"] task_create_time = datetime.datetime.strptime(add_time, TIME_FORMAT) evidence_feature=parse_line( main_account_data, spread_account_data, task_create_time=task_create_time ) assert len(evidence_feature)==len(feature_names),"not match,feature_names is:%d but return is :%d"%(len(feature_names),len(evidence_feature)) print( list( zip(feature_names,evidence_feature) ) )
def main(): data_wraper=data_interface.DataWraper( src=SOURCE_FILE ) data_gen=data_wraper.wrap_batch_data() #skip None next(data_gen) for main_account_data,spread_account_data in data_gen: main_account_id=main_account_data["main_account_id"] wxid=main_account_data["account"] quality=main_account_data["quality"] add_time=main_account_data["add_time"] task_create_time=datetime.datetime.strptime(add_time,TIME_FORMAT) output=[ main_account_id,wxid,quality ] intercept_output=intercept_feature.pase_line( main_account_data=main_account_data, task_create_time=task_create_time ) evidence_output=evidence_feature.parse_line( main_account_data=main_account_data, spread_account_data=spread_account_data, task_create_time=task_create_time ) wxbasic_output=wxbasic_feature.parse_line( main_account_data=main_account_data, spread_account_data=spread_account_data, task_create_time=task_create_time ) bg_judge_output=bg_judge_feature.parse_line( main_account_data=main_account_data ) #这里可以ignore,或者也可以自己添加默认数据 if intercept_output and evidence_output and wxbasic_output and bg_judge_output: output.extend(intercept_output) output.extend(evidence_output) output.extend(wxbasic_output) output.extend(bg_judge_output) output=[str(item) for item in output] write_string="\001".join(output) writer.write(write_string) writer.write("\n") writer.close()
traceback.print_exc() return [] if __name__ == "__main__": data_file = "/home/sun/桌面/account_model/data/2021-05-08#2021-05-08.txt" cnf = configparser.ConfigParser() cnf.read("feature_extract/feature_cfg.conf") perfix_feature_names = cnf.get('prefix_feature', 'feature_names') perfix_feature_names = perfix_feature_names.split(',') perfix_feature_names = [x.strip() for x in perfix_feature_names] print("perfix_feature_names", perfix_feature_names) feature_names = cnf.get('intercept_feature', 'feature_names') feature_names = feature_names.split(",") feature_names = [x.strip() for x in feature_names] data_wraper = data_interface.DataWraper(src=data_file) data_gen = data_wraper.wrap_batch_data() # skip None next(data_gen) for main_account_data, _ in data_gen: add_time = main_account_data["add_time"] task_create_time = datetime.datetime.strptime(add_time, TIME_FORMAT) task_create_time = datetime.datetime.strptime(add_time, TIME_FORMAT) interrupt_feature = parse_wxsafe_block_data( msg=main_account_data, task_create_time=task_create_time, key="blockmessage") assert len(interrupt_feature) == len( feature_names ), "not match,feature_names is:%d but return is :%d" % ( len(feature_names), len(interrupt_feature))