def main_process():
    KNN, zone_int_dict = get_model()
    # no duplicate value, so reverse this dictionary
    int_zone_dict = dict(zip(zone_int_dict.values(), zone_int_dict.keys()))
    file_list = glob.glob(SRC_FOLDER + '/*csv.gz')
    # mac_sn_dict : { (mac_address,time_accurate_to_10_seconds):[signal_strength according to the routers] }
    mac_sn_dict = {}
    for f in file_list:
        placement_name = re.findall("\d{7}", f)[0]
        router_bssid = PLC_ROUTER_DICT[placement_name]
        try:
            with gzip.open(f, 'rb') as fp:
                reader = csv.reader(fp)
                for row in reader:
                    # jump first row
                    if reader.line_num == 1:
                        continue
                    # process each ros
                    mac = str(row[3]).lower()
                    ssid = str(row[7])
                    time = int(row[0]) / 10 * 10
                    sn = int(row[6])
                    if router_bssid in ROUTERS:
                        index = ROUTERS.index(router_bssid)
                        tmp_key = (mac, time)
                        if tmp_key not in mac_sn_dict:
                            mac_sn_dict[tmp_key] = [-100] * ROUTERS_NUM
                        mac_sn_dict[tmp_key][index] = sn

        except Exception as e:
            print e
            print f
    fout = open(RESULT_CSV, 'w')
    writer = csv.writer(fout)
    writer.writerow(['mac', 'time'] + ROUTERS + ['zone'])
    vender_checker = get_vendor_checker()
    data_row_cache = []

    for key in mac_sn_dict:
        mac = key[0]
        if str(mac) in ROUTERS:
            continue
        vender = vender_checker.get_manuf(str(mac))
        time = datetime.datetime.utcfromtimestamp(key[1] + 8 * 60 * 60)  # transfer to local time
        x_data = numpy.array([map(int, mac_sn_dict[key])])
        predict_result = int_zone_dict[int((KNN.predict(x_data)[0]))]
        tmp_probability = max(KNN.predict_proba(x_data)[0])
        data_row_cache.append([mac, time] + mac_sn_dict[key] + [predict_result, vender])

    print 'sort the result'
    data_row_cache = sorted(data_row_cache, key=lambda x: (x[0], x[1]))

    for row in data_row_cache:
        writer.writerow(row)
    print 'finished'
예제 #2
0
from wifi_util.download_csv import download_csv
from wifi_tracking_training import number_to_char,get_model

LOCAL_DIR = '../data/'
REMOTE_DIR = 'data/combined/wifi/'
LOCATION_ID = '39'
placements = ['8100109','8100110','8100111','8100112']
BUCKET = 'percolata-data'
DATE_TIME = '2015-12-14'
curr_start = 16*4
next_end = 3*4
ROUTERS = ['2c:5d:93:3b:40:59', 'f2:14:24:07:bf:90', 'f0:99:bf:07:24:14', '2c:5d:93:3b:40:58', 'c0:a0:bb:c5:61:56']
SSID_MAP = {'PPS Guest Network':'f2:14:24:07:bf:90','BaySensors':'c0:a0:bb:c5:61:56','Percolata':'2c:5d:93:3b:40:58',\
            'PPS Network':'f0:99:bf:07:24:14','Field Architecture':'0e:18:d6:53:bf:90'}
RESULT_CSV = 'result.csv'
KNN = get_model()

def download_data():
    download_csv(LOCATION_ID,placements,DATE_TIME,BUCKET,curr_start,next_end,LOCAL_DIR,REMOTE_DIR)

def generate_csv():
    # mac_sn_dict : { (mac_address,time_accurate_to_10_seconds):[signal_strength according to the routers] }
    mac_sn_dict = {}
    fl = os.listdir(LOCAL_DIR+str(LOCATION_ID)+'/'+DATE_TIME+'/')
    ROUTERS_NUM = len(ROUTERS)
    for f in fl:
        try:
            with gzip.open(LOCAL_DIR+str(LOCATION_ID)+'/'+DATE_TIME+'/'+f,'rb') as fp:
                reader = csv.reader(fp)
                for row in reader:
                    # jump first row