예제 #1
0
def one_line(line):

    with cst.TimeRecord("initial") as _:
        fea = map(lambda x: x.strip(),
                  line.split("\t"))[:len(FEA.fea_number_dict)]
        try:
            one_lable = str(
                cst.safe_int(fea[FEA.fea_number_dict[label_name] - 1]))
        except Exception as e:
            print e
            print len(FEA.fea_number_dict), len(fea), line
            print FEA.fea_number_dict
            print fea
            return '\t'.join(["0", max_feature_id_num + ":0"])

    def one_fea((n, fea_value)):
        '''
        1. n,fea_value
        2. fea_name or fea_name_list ,fea_value or fea_value_list

        调用获取v
        :return:
        '''

        fea_name = FEA.num_fea_dict[n]
        fc = FEA.fea_conf[fea_name]
        fun_key = {
            "cate": normal,
            "origin": normal,
            "number": normal,
            "none": none,
            "pair": pair
        }
        if fc.name != cst.label_name:
            return fun_key[fc.method.split("#")[0]](fc, fea_value, fea)

    try:
        rs = filter(
            lambda x: x,
            map(one_fea, enumerate(fea + [0] * (max_len - len(fea)), start=1)))
        if rs and max_feature_id_num == rs[-1][0]:
            data_line = " ".join(
                map(lambda x: ":".join(map(str, x)),
                    sorted(rs, key=lambda x: int(x[0]))))
        else:
            data_line = " ".join(
                map(lambda x: ":".join(map(str, x)),
                    sorted(rs, key=lambda x: int(x[0]))) +
                [max_feature_id_num + ":0"])
    except Exception as e:
        print e
        return '\t'.join([one_lable, ""])
    else:
        return '\t'.join([one_lable, data_line])
예제 #2
0
            return fun_key[fc.method.split("#")[0]](fc, fea_value, fea)

    try:
        rs = filter(
            lambda x: x,
            map(one_fea, enumerate(fea + [0] * (max_len - len(fea)), start=1)))
        if rs and max_feature_id_num == rs[-1][0]:
            data_line = " ".join(
                map(lambda x: ":".join(map(str, x)),
                    sorted(rs, key=lambda x: int(x[0]))))
        else:
            data_line = " ".join(
                map(lambda x: ":".join(map(str, x)),
                    sorted(rs, key=lambda x: int(x[0]))) +
                [max_feature_id_num + ":0"])
    except Exception as e:
        print e
        return '\t'.join([one_lable, ""])
    else:
        return '\t'.join([one_lable, data_line])


import time

t = time.time()
with cst.TimeRecord("total") as _:
    pool = mp.Pool(32)
    rs = filter(lambda x: x, pool.map(one_line, data))
    with codecs.open(feature_lines, 'w', 'utf8') as f:
        f.write('\n'.join(rs))