示例#1
0
def test():
    # data_file = "/home/sun/桌面/account_model/data/2021-05-08#2021-05-08.txt"
    data_file="C:/Users/sunyyao/Desktop/NanGuo/xgb_model/data/2021-05-12#2021-05-12.txt"
    cnf = configparser.ConfigParser()
    cnf.read("feature_extract/feature_cfg.conf")
    perfix_feature_names = cnf.get('prefix_feature', 'feature_names')
    perfix_feature_names = perfix_feature_names.split(',')
    perfix_feature_names = [x.strip() for x in perfix_feature_names]
    print("perfix_feature_names", perfix_feature_names)
    feature_names = cnf.get('evidence_feature', 'feature_names')
    feature_names=feature_names.split(",")
    feature_names=[x.strip() for x in feature_names]

    data_wraper = data_interface.DataWraper(
        src=data_file
    )
    data_gen = data_wraper.wrap_batch_data()
    # skip None
    next(data_gen)
    for main_account_data,spread_account_data in data_gen:
        deal_msg=main_account_data["deal_msg"]
        add_time = main_account_data["add_time"]
        task_create_time = datetime.datetime.strptime(add_time, TIME_FORMAT)
        evidence_feature=parse_line(
            main_account_data,
            spread_account_data,
            task_create_time=task_create_time
        )
        assert len(evidence_feature)==len(feature_names),"not match,feature_names is:%d but return is :%d"%(len(feature_names),len(evidence_feature))
        print(
            list(
                zip(feature_names,evidence_feature)
            )
        )
def main():
    data_wraper=data_interface.DataWraper(
        src=SOURCE_FILE
    )
    data_gen=data_wraper.wrap_batch_data()
    #skip None
    next(data_gen)
    for main_account_data,spread_account_data in data_gen:
        main_account_id=main_account_data["main_account_id"]
        wxid=main_account_data["account"]
        quality=main_account_data["quality"]
        add_time=main_account_data["add_time"]
        task_create_time=datetime.datetime.strptime(add_time,TIME_FORMAT)
        
        output=[
            main_account_id,wxid,quality
        ]

        intercept_output=intercept_feature.pase_line(
            main_account_data=main_account_data,
            task_create_time=task_create_time
        )
        evidence_output=evidence_feature.parse_line(
            main_account_data=main_account_data,
            spread_account_data=spread_account_data,
            task_create_time=task_create_time
        )
        wxbasic_output=wxbasic_feature.parse_line(
            main_account_data=main_account_data,
            spread_account_data=spread_account_data,
            task_create_time=task_create_time
        )
        bg_judge_output=bg_judge_feature.parse_line(
            main_account_data=main_account_data
        )

        #这里可以ignore,或者也可以自己添加默认数据
        if intercept_output and evidence_output and wxbasic_output and bg_judge_output:
            output.extend(intercept_output)
            output.extend(evidence_output)
            output.extend(wxbasic_output)
            output.extend(bg_judge_output)
            
            output=[str(item) for item in output]
            write_string="\001".join(output)
            writer.write(write_string)
            writer.write("\n")
    writer.close()
示例#3
0
        traceback.print_exc()
        return []


if __name__ == "__main__":
    data_file = "/home/sun/桌面/account_model/data/2021-05-08#2021-05-08.txt"
    cnf = configparser.ConfigParser()
    cnf.read("feature_extract/feature_cfg.conf")
    perfix_feature_names = cnf.get('prefix_feature', 'feature_names')
    perfix_feature_names = perfix_feature_names.split(',')
    perfix_feature_names = [x.strip() for x in perfix_feature_names]
    print("perfix_feature_names", perfix_feature_names)
    feature_names = cnf.get('intercept_feature', 'feature_names')
    feature_names = feature_names.split(",")
    feature_names = [x.strip() for x in feature_names]
    data_wraper = data_interface.DataWraper(src=data_file)
    data_gen = data_wraper.wrap_batch_data()
    # skip None
    next(data_gen)
    for main_account_data, _ in data_gen:
        add_time = main_account_data["add_time"]
        task_create_time = datetime.datetime.strptime(add_time, TIME_FORMAT)
        task_create_time = datetime.datetime.strptime(add_time, TIME_FORMAT)
        interrupt_feature = parse_wxsafe_block_data(
            msg=main_account_data,
            task_create_time=task_create_time,
            key="blockmessage")
        assert len(interrupt_feature) == len(
            feature_names
        ), "not match,feature_names is:%d but return is :%d" % (
            len(feature_names), len(interrupt_feature))