예제 #1
0
def combine_freq_patterns(support, normalize, trait="extra"):
    if normalize:
        output_fp = os.path.join("result", "feature", "all_freq_pat_support%d_norm.csv" % support)
        input_dir = os.path.join("result", "feature", "freq_pat", "normalized", "support%d" % support)
    else:
        output_fp = os.path.join("result", "feature", "all_freq_pat_support%d.csv" % support)
        input_dir = os.path.join("result", "feature", "freq_pat", "support%d" % support)
    # input_dir = r'result\feature\freq_pat_select\support%s\%s' % (support, trait)

    labels = ["uid"]
    id_y = get_trait_scores()
    ids = sorted(id_y.keys())
    all_features = {id: [] for id in ids}
    for filename in os.listdir(input_dir):
        if not ".csv" in filename:
            continue
        feature = filename[:-4]
        # print feature
        labels.append(feature)
        fp = os.path.join(input_dir, filename)
        id_feature = read_feature(fp)
        for id in all_features:
            all_features[id].extend(id_feature[id])
        # print id_all_features
    fw = open(output_fp, "a")
    labels.append(trait)
    fw.write(",".join(labels) + "\n")
    trait_idx = TRAITS.index(trait)
    for id in ids:
        line = [id]
        line.extend(all_features[id])
        line.append(id_y[id][trait_idx])
        fw.write(",".join(line) + "\n")
    fw.close()
예제 #2
0
def combine_heuristic_fp(fnames, trait="extra"):
    output_fp = os.path.join("result", "feature", "combined_all_%s.csv" % trait)
    input_dir = os.path.join("result", "feature")
    labels = ["uid"]
    id_y = get_trait_scores(trait)
    ids = sorted(id_y.keys())
    all_features = {id: [] for id in ids}
    for fname in fnames:
        fr = open(os.path.join(input_dir, fname), "rU")
        labels.extend(fr.readline().split(",")[1:-1])
        id_feature = {}
        for line in fr.readlines():
            items = line.split(",")
            id = items[0]
            features = items[1:-1]
            all_features[id].extend(features)

    labels.append(trait)
    fw = open(output_fp, "a")
    fw.write(",".join(labels) + "\n")
    for id in ids:
        line = [id]
        line.extend(all_features[id])
        line.append(id_y[id])
        fw.write(",".join(line) + "\n")
    fw.close()
예제 #3
0
def combine_heuristic_features(feature_names, trait="extra"):
    output_fp = os.path.join("result", "feature", "all_heuristic_features_%s.csv" % trait)
    input_dir = os.path.join("result", "feature")
    labels = ["uid"]
    for feature_name in feature_names:
        if "-" in feature_name:
            labels.extend(feature_name.split("-"))
        else:
            labels.append(feature_name)
    id_y = get_trait_scores(trait)
    ids = sorted(id_y.keys())
    all_features = {id: [] for id in ids}
    for feature_name in feature_names:
        fp = os.path.join(input_dir, "%s.csv" % feature_name)
        id_feature = read_feature(fp)
        for id in all_features:
            if id in id_feature:
                all_features[id].extend(id_feature[id])

    fw = open(output_fp, "a")
    labels.append(trait)
    fw.write(",".join(labels) + "\n")
    for id in ids:
        line = [id]
        line.extend(all_features[id])
        line.append(id_y[id])
        fw.write(",".join(line) + "\n")
    fw.close()