def write_wifi_features_for_knn():
    for i in range(1, 16):
        id_y, label = get_y(i)
        label = LABELS[i-1]
        output_fp = os.path.join(cur_dir, 'data', 'matrix_data', 'for_knn', 'wifi_features',  label + '.csv')
        fw = open(output_fp, 'a')
          
        all_features = {}
        features = ['edit_dist', 'start_time_var', 'end_time_var']
        for feature in features:
            id_feature = read_feature(feature)
            all_features[feature] = id_feature
            
        labels = ['subject_id']
        labels.extend(features)
        labels.append(label)
        fw.write(','.join(labels) + '\n')
        
        for id in ids:
            line = [id]
            for feature in features:
                line.append(all_features[feature][id])
            if id in id_y:
                line.append(str(id_y[id]))
            else:
                line.append('')
            fw.write(','.join(line) + '\n')
            
        fw.close()
def plot(result):
    id_y, label = get_y(3)
    
    y_values = []
    for id in WIFI_ID_HOME:
        #print id
        y_values.append(id_y[str(int(id))])
        
    plt.scatter(result, y_values)
    plt.show()
示例#3
0
def write_histogram_to_csv():
    for i in range(1, 16):
        print i
        id_y, label = get_y(i)
        label = LABELS[i-1]
        print label
        output_fp = os.path.join(cur_dir, 'data', 'matrix_data', 'for_knn', 'freq_histogram',  label + '.csv')
        fw = open(output_fp, 'a')
          
        addr_dir = os.path.join(cur_dir, 'data', 'gps_osm')
        for file in os.listdir(addr_dir):
            if not file.endswith('.csv'):
                continue
            fp = os.path.join(addr_dir, file)
            id = file.split('.')[0][-2:]
              
            change_dt = get_change_date(fp)
            complete, by_complete_dates = get_complete_days(fp, change_dt)
            if len(complete) < 30:
                continue
              
            print 
            print 'subject id: ' + id
              
              
            if not id in id_y:
                continue
              
      
            #sample_days = random.sample(complete, NUM_DAYS)
            sample_days = complete[:NUM_DAYS]
      
            loc_freq = get_loc_freq(sample_days, by_complete_dates)
              
            loc_freq = merge_homes(loc_freq, id)
              
            all_loc_freq = get_all_loc_freq(loc_freq)
              
            all_loc_freq = sorted(all_loc_freq.items(), key=lambda item: item[0])
              
      
            line = [str(item[1]) for item in all_loc_freq]
            line.append(str(id_y[id]))
            fw.write(','.join(line) + '\n')
              
        fw.close()