示例#1
0
            continue
        
        id = file.split('.')[0][-2:]
        
#         if id in REMOVE_SUBJECTS:
#             continue
    
        print '----------'
        print 'id: ' + id

        seqs = get_seqs(id)
        #print seqs
        #print len(seqs)
        result = get_len_var(seqs)
        print result
        
        id_feature[id] = result
        
    return id_feature


if __name__ == '__main__':  
    id_feature = get_feature()
    #pp.pprint(id_feature)
    #write_feature_to_csv(id_feature, 'len_var', False)

    #pp.pprint(get_seqs('46'))
    
    write_feature_to_csv(id_feature, 'len_var')
    
    
示例#2
0
from util import write_feature_to_csv

def get_feature():
    fr = open(r'dataset\education\grades.csv', 'rU')
    cols = fr.readline()
    id_feature = {}
    for line in fr.readlines():
        items = line.split(',')
        id = items[0][1:]
        grade = float(items[3])
        id_feature[id] = grade
    return id_feature


if __name__ == '__main__':
    id_feature = get_feature()
    write_feature_to_csv(id_feature, 'grade', False)
    
            
        id_feature[id] = result
        
    return id_feature

if __name__ == '__main__':
#     id_feature = get_feature(get_start_var)
#     write_feature_to_csv(id_feature, 'start_time_var')
    
#     id_feature = get_feature(get_start_var_offcampus)
#     write_feature_to_csv(id_feature, 'start_time_var_offcampus')
    
#     id_feature = get_feature(get_end_var)
#     write_feature_to_csv(id_feature, 'end_time_var')
    
#     id_feature = get_feature(get_end_var_offcampus)
#     write_feature_to_csv(id_feature, 'end_time_var_offcampus')
    
    
#     id = '01'
#     in_loc_duration = get_in_loc_duration(id)
#     get_end_time_test(in_loc_duration, id)

    id_feature = get_feature_start_var()
    write_feature_to_csv(id_feature, 'start_time_var')
    
#     id_feature = get_feature_end_var()
#     write_feature_to_csv(id_feature, 'end_time_var')

    
    
def get_freq_pattern(min_support, typed, normalize):
    ### get seqs
    ids = []
    all_seqs = []  # all seqs of all subjects
    seqs_by_subject = []  # n subjects, length n
    for file in os.listdir(wifi_dir):
        if not file.endswith(".csv") or file.endswith("datetime.csv"):
            continue
        id = file.split(".")[0][-2:]
        ids.append(id)
        seqs = get_seqs(id)
        if typed:
            type_seqs = to_loc_type(seqs)
            all_seqs.extend(type_seqs)
            seqs_by_subject.append(type_seqs)
        else:
            all_seqs.extend(seqs)
            seqs_by_subject.append(seqs)

    ### get freq_patterns from all_seqs
    freq_patterns = []
    level = 1
    max_pattern_len = 6
    prev_level_freq = []
    while level <= max_pattern_len:
        freq = gsp(all_seqs, level, prev_level_freq, min_support, typed)
        freq_patterns.extend(freq)
        prev_level_freq = freq
        level += 1
    pp.pprint(freq_patterns)
    print len(freq_patterns)

    ### compute frequency of freq_patterns for each subject
    n = len(ids)  # n subjects
    m = len(freq_patterns)  # m frequent patterns
    count = np.zeros((n, m))  # n x m matrix
    for i in range(n):
        for seq in seqs_by_subject[i]:
            for j, pat in enumerate(freq_patterns):
                pat_str = ",".join(pat)
                if pat_str in ",".join(seq):
                    count[i, j] += 1
    print np.sum(count, axis=0)

    ### use frequency as feature, write to csv
    for j, pat in enumerate(freq_patterns):
        id_feature = {}
        feature = count[:, j]
        # print feature
        for i, id in enumerate(ids):
            id_feature[id] = feature[i]
        feature_name = "fp_" + ";".join(pat)
        if typed:
            write_feature_to_csv(
                id_feature, feature_name, os.path.join("freq_pat", "typed", "support%d" % min_support), False
            )
        elif normalize:
            write_feature_to_csv(
                id_feature, feature_name, os.path.join("freq_pat", "normalized", "support%d" % min_support), True
            )
        else:
            write_feature_to_csv(id_feature, feature_name, os.path.join("freq_pat", "support%d" % min_support), False)