示例#1
0
def one_line(line):

    with cst.TimeRecord("initial") as _:
        fea = map(lambda x: x.strip(),
                  line.split("\t"))[:len(FEA.fea_number_dict)]
        try:
            one_lable = str(
                cst.safe_int(fea[FEA.fea_number_dict[label_name] - 1]))
        except Exception as e:
            print e
            print len(FEA.fea_number_dict), len(fea), line
            print FEA.fea_number_dict
            print fea
            return '\t'.join(["0", max_feature_id_num + ":0"])

    def one_fea((n, fea_value)):
        '''
        1. n,fea_value
        2. fea_name or fea_name_list ,fea_value or fea_value_list

        调用获取v
        :return:
        '''

        fea_name = FEA.num_fea_dict[n]
        fc = FEA.fea_conf[fea_name]
        fun_key = {
            "cate": normal,
            "origin": normal,
            "number": normal,
            "none": none,
            "pair": pair
        }
        if fc.name != cst.label_name:
            return fun_key[fc.method.split("#")[0]](fc, fea_value, fea)

    try:
        rs = filter(
            lambda x: x,
            map(one_fea, enumerate(fea + [0] * (max_len - len(fea)), start=1)))
        if rs and max_feature_id_num == rs[-1][0]:
            data_line = " ".join(
                map(lambda x: ":".join(map(str, x)),
                    sorted(rs, key=lambda x: int(x[0]))))
        else:
            data_line = " ".join(
                map(lambda x: ":".join(map(str, x)),
                    sorted(rs, key=lambda x: int(x[0]))) +
                [max_feature_id_num + ":0"])
    except Exception as e:
        print e
        return '\t'.join([one_lable, ""])
    else:
        return '\t'.join([one_lable, data_line])
def process_origin(conf,value,frac):
    v = FEA.name_values_dict[conf.name]
    after_sorted = filter(lambda x: wash_data(x),
                           sorted([x.strip() for x in v if cst.isfloat(x)], key=lambda x: float(x)))
    after_filtered = filter(lambda x: not float(conf.filter_threds[0]) <= float(x) <= float(conf.filter_threds[1]),
                            after_sorted) if conf.filter_threds else after_sorted
    return map(lambda x:str(float('%0.3f' % float(x))),[after_filtered[0],after_filtered[-1]])
def process_number(conf, value,frac):
    v = FEA.name_values_dict[conf.name]
    after_sorted = filter(lambda x: wash_data(x),
                          sorted([x.strip() for x in v if cst.isfloat(x)], key=lambda x: float(x)))
    after_filtered = filter(lambda x: not float(conf.filter_threds[0]) <= float(x) <= float(conf.filter_threds[1]),
                            after_sorted) if conf.filter_threds else after_sorted
    sorted_v = after_filtered

    def equal_freq(v,frac):
        # print v,'v'
        freq = sorted(list(set([v[index] for index in range(1, len(v), len(v) / frac)] + [v[-1]])),
                      key=lambda x: float(x))
        return freq

    def equal_dis(v,frac):
        l, h = float(v[0]), float(v[-1])
        l = l if l >= 0 else 0
        KEY = frac
        return sorted(remove_dup([str(l + i * (h - l) / KEY) for i in range(int(KEY) + 1)]), key=lambda x: float(x))

    data_huafen = locals().get("equal_{key}".format(**{"key": value}), None)
    if not data_huafen:
        data_huafen = equal_freq
        # print data_huafen(sorted_v)
    return sorted(list(set(map(lambda x: str(float('%0.3f' % float(x))), data_huafen(sorted_v,frac)))),key=lambda x:float(x))
示例#4
0
def one_conf((conf, v)):
    def print_conf(conf, values, is_exists=False):
        ars_list = '#'.join([v.strip() for v in values]).replace(",", "0x32") \
            if conf.method != "none" and conf.name != cst.label_name  else ""

        print ','.join(
            map(str, [
                conf.name, conf.method,
                conf.filter_threds if conf.filter_threds else "", conf.status,
                ars_list if not is_exists else conf.ars
            ]))

        return ','.join(
            map(str, [
                conf.name, conf.method,
                conf.filter_threds if conf.filter_threds else "", conf.status,
                ars_list if not is_exists else conf.ars
            ]))

    if len(conf.ars) > 0:
        print "conf.ars already exists", conf.ars
        return print_conf(conf, [], True)

    key, value, frac = cst.parse_method(
        conf.method)  # number freq 5 key, value, frac
    if key != "none":
        data_method = globals().get("process_{key}".format(**{"key": key}))
        rs = data_method(conf, value, frac, v)
    else:
        rs = []
    return print_conf(conf, rs)
    def origin(self, fea_name, fea_value):
        k = fea_name

        cf = self.fea_conf[fea_name]
        l, h = map(lambda x: float('%0.3f' % float(x)), cf.arrs_list)
        fea_value = cst.wash(fea_value)
        fea_value = float('%0.3f' % float(fea_value))
        if l <= fea_value <= h:
            return self.add_feature(fea_name, fea_value)
    def origin(self, fea_name, fea_value):
        k = fea_name

        cf = self.fea_conf[fea_name]
        l, h = map(lambda x: float('%0.3f' % float(x)), cf.arrs_list)
        fea_value = cst.wash(fea_value)
        fea_value = float('%0.3f' % float(fea_value))
        if l <= fea_value <= h:
            return self.add_feature(fea_name, fea_value)
 def read_conf(self):
     with codecs.open(self.config, 'r', 'utf8') as f:
         for n, l in enumerate(f.readlines()):
             if not l.startswith("#"):  # 过掉注释
                 n += 1
                 cf = Conf(*l.split(","))
                 cf.method_arr_list = gen_arr_list_by_method_name(cst.parse_method(cf.method)[0], cf.args)
                 self.name_conf_dict[cf.name] = cf
                 self.name_id_dict[cf.name] = n
     self.id_name_dict = {self.name_id_dict[x]: x for x in self.name_id_dict}
     # 初始化特征的value list
     self.name_values_dict = {x: list() for x in self.name_id_dict}
 def read_conf(self):
     with codecs.open(self.config, 'r', 'utf8') as f:
         for n, l in enumerate(f.readlines()):
             if not l.startswith("#"):  # 过掉注释
                 n += 1
                 print l
                 cf = Conf(*l.split(","))
                 cf.arrs_list = gen_cates(cst.parse_method(cf.method)[0], cf.ars)
                 self.fea_conf[cf.name] = cf
                 self.fea_number_dict[cf.name] = n
     self.num_fea_dict = {self.fea_number_dict[x]: x for x in self.fea_number_dict}
     self.fea_number_value_list = {x: list() for x in self.fea_number_dict}
示例#9
0
def process_origin(conf, value, frac, v):
    # v = FEA.fea_number_value_list[conf.name]
    after_sorted = filter(
        lambda x: wash_data(x),
        sorted([x.strip() for x in v if cst.isfloat(x)],
               key=lambda x: float(x)))
    after_filtered = filter(
        lambda x: not float(conf.filter_threds[0]) <= float(x) <= float(
            conf.filter_threds[1]),
        after_sorted) if conf.filter_threds else after_sorted
    return map(lambda x: str(float('%0.3f' % float(x))),
               [after_filtered[0], after_filtered[-1]])
 def read_conf(self):
     with codecs.open(self.config, 'r', 'utf8') as f:
         for n, l in enumerate(f.readlines()):
             if not l.startswith("#"):  # 过掉注释
                 n += 1
                 print l
                 cf = Conf(*l.split(","))
                 cf.arrs_list = gen_cates(
                     cst.parse_method(cf.method)[0], cf.ars)
                 self.fea_conf[cf.name] = cf
                 self.fea_number_dict[cf.name] = n
     self.num_fea_dict = {
         self.fea_number_dict[x]: x
         for x in self.fea_number_dict
     }
     self.fea_number_value_list = {x: list() for x in self.fea_number_dict}
def one_conf(conf):
    def print_conf(conf, values):
        with codecs.open(FEAS_RECONSTRUCT_FILE, 'a', 'utf8') as f:
            f.write(
                    ','.join(map(str, [conf.name, conf.method, "#".join(conf.filter_threds) if conf.filter_threds else "",conf.status,
                                       '#'.join([v.strip() for v in values]).replace(",", "0x32")])) + '\n')
            print(','.join(map(str, [conf.name, conf.method, conf.status,"#".join(conf.filter_threds) if conf.filter_threds else "",
                                     '#'.join([v.strip() for v in values]).replace(",", "0x32")])))

    if conf.method == "none" or conf.name == cst.LABEL:
        print_conf(conf, [])
        return
    key, value, frac = cst.parse_method(conf.method)
    data_method = globals().get("process_{key}".format(**{"key": key}))
    rs = data_method(conf, value,frac)
    print_conf(conf, rs)
示例#12
0
def process_number(conf, value, frac, v):
    # v = FEA.fea_number_value_list[conf.name]

    after_sorted = filter(
        lambda x: wash_data(x),
        sorted([x.strip() for x in v if cst.isfloat(x)],
               key=lambda x: float(x)))
    after_filtered = filter(
        lambda x: conf.filter_method(x),
        after_sorted) if conf.filter_threds else after_sorted
    sorted_v = after_filtered

    def equal_freq(v, frac):
        try:
            freq = sorted(list(
                set([v[index] for index in range(1, len(v),
                                                 len(v) / frac)] + [v[-1]])),
                          key=lambda x: float(x))
        except:
            print v, 'v', "frac is {0}".format(frac)

        return freq

    def equal_dis(v, frac):
        l, h = float(v[0]), float(v[-1])
        l = l if l >= 0 else 0
        KEY = frac
        return sorted(remove_dup(
            [str(l + i * (h - l) / KEY) for i in range(int(KEY) + 1)]),
                      key=lambda x: float(x))

    data_huafen = locals().get("equal_{key}".format(**{"key": value}), None)
    if not data_huafen:
        data_huafen = equal_freq
        # print data_huafen(sorted_v)
    return sorted(list(
        set(
            map(lambda x: str(float('%0.3f' % float(x))),
                data_huafen(sorted_v, frac)))),
                  key=lambda x: float(x))
示例#13
0
def one_line(line):

    with cst.TimeRecord("initial") as _:
        fea = map(lambda x: x.strip(), line.split("\t"))[:len(FEA.fea_number_dict)]
        try:
            one_lable = str(cst.safe_int(fea[FEA.fea_number_dict[label_name] - 1]))
        except Exception as e:
            print e
            print len(FEA.fea_number_dict),len(fea),line
            print FEA.fea_number_dict
            print fea
            return '\t'.join(["0", max_feature_id_num + ":0"])

    def one_fea((n, fea_value)):
        '''
        1. n,fea_value
        2. fea_name or fea_name_list ,fea_value or fea_value_list

        调用获取v
        :return:
        '''

        fea_name = FEA.num_fea_dict[n]
        fc = FEA.fea_conf[fea_name]
        fun_key = {"cate": normal, "origin": normal, "number": normal, "none": none, "pair": pair}
        if fc.name != cst.label_name:
            return fun_key[fc.method.split("#")[0]](fc, fea_value, fea)
    try:
        rs = filter(lambda x: x, map(one_fea, enumerate(fea + [0] * (max_len - len(fea)), start=1)))
        if rs and max_feature_id_num == rs[-1][0]:
            data_line = " ".join(map(lambda x: ":".join(map(str, x)), sorted(rs, key=lambda x: int(x[0]))))
        else:
            data_line = " ".join(map(lambda x: ":".join(map(str, x)), sorted(rs, key=lambda x: int(x[0])))
                                 + [max_feature_id_num + ":0"])
    except Exception as e:
        print e
        return '\t'.join([one_lable, ""])
    else:
        return '\t'.join([one_lable, data_line])
def normal(fc, fea_value, fea):
    return FEA.__getattribute__(cst.parse_method(fc.method)[0])(fc.name, fea_value)
 def pre_cal(self):
     # precalculate
     self.method_arr_list = gen_arr_list_by_method_name(cst.parse_method(self.method)[0], self.args)
示例#16
0
def normal(fc, fea_value, fea):
    return FEA.__getattribute__(cst.parse_method(fc.method)[0])(fc.name,
                                                                fea_value)
示例#17
0
            return fun_key[fc.method.split("#")[0]](fc, fea_value, fea)

    try:
        rs = filter(
            lambda x: x,
            map(one_fea, enumerate(fea + [0] * (max_len - len(fea)), start=1)))
        if rs and max_feature_id_num == rs[-1][0]:
            data_line = " ".join(
                map(lambda x: ":".join(map(str, x)),
                    sorted(rs, key=lambda x: int(x[0]))))
        else:
            data_line = " ".join(
                map(lambda x: ":".join(map(str, x)),
                    sorted(rs, key=lambda x: int(x[0]))) +
                [max_feature_id_num + ":0"])
    except Exception as e:
        print e
        return '\t'.join([one_lable, ""])
    else:
        return '\t'.join([one_lable, data_line])


import time

t = time.time()
with cst.TimeRecord("total") as _:
    pool = mp.Pool(32)
    rs = filter(lambda x: x, pool.map(one_line, data))
    with codecs.open(feature_lines, 'w', 'utf8') as f:
        f.write('\n'.join(rs))
示例#18
0
 def pre_cal(self):
     # precalculate
     self.arrs_list = gen_cates(cst.parse_method(self.method)[0], self.ars)
 def pre_cal(self):
     # precalculate
     self.arrs_list = gen_cates(cst.parse_method(self.method)[0], self.ars)