def get_feature(): id_feature = {} for file in os.listdir(wifi_dir): if not file.endswith('.csv') or file.endswith('datetime.csv'): continue id = file.split('.')[0][-2:] # if id in REMOVE_SUBJECTS: # continue print '----------' print 'id: ' + id seqs = get_seqs(id) #print seqs #print len(seqs) result = get_len_var(seqs) print result id_feature[id] = result return id_feature
def get_freq_pattern(min_support, typed, normalize): ### get seqs ids = [] all_seqs = [] # all seqs of all subjects seqs_by_subject = [] # n subjects, length n for file in os.listdir(wifi_dir): if not file.endswith(".csv") or file.endswith("datetime.csv"): continue id = file.split(".")[0][-2:] ids.append(id) seqs = get_seqs(id) if typed: type_seqs = to_loc_type(seqs) all_seqs.extend(type_seqs) seqs_by_subject.append(type_seqs) else: all_seqs.extend(seqs) seqs_by_subject.append(seqs) ### get freq_patterns from all_seqs freq_patterns = [] level = 1 max_pattern_len = 6 prev_level_freq = [] while level <= max_pattern_len: freq = gsp(all_seqs, level, prev_level_freq, min_support, typed) freq_patterns.extend(freq) prev_level_freq = freq level += 1 pp.pprint(freq_patterns) print len(freq_patterns) ### compute frequency of freq_patterns for each subject n = len(ids) # n subjects m = len(freq_patterns) # m frequent patterns count = np.zeros((n, m)) # n x m matrix for i in range(n): for seq in seqs_by_subject[i]: for j, pat in enumerate(freq_patterns): pat_str = ",".join(pat) if pat_str in ",".join(seq): count[i, j] += 1 print np.sum(count, axis=0) ### use frequency as feature, write to csv for j, pat in enumerate(freq_patterns): id_feature = {} feature = count[:, j] # print feature for i, id in enumerate(ids): id_feature[id] = feature[i] feature_name = "fp_" + ";".join(pat) if typed: write_feature_to_csv( id_feature, feature_name, os.path.join("freq_pat", "typed", "support%d" % min_support), False ) elif normalize: write_feature_to_csv( id_feature, feature_name, os.path.join("freq_pat", "normalized", "support%d" % min_support), True ) else: write_feature_to_csv(id_feature, feature_name, os.path.join("freq_pat", "support%d" % min_support), False)