Пример #1
0
map_mac = {}
map_pt = {}

db = DB('raw.db')
# map_mac : ( m_id | mac )
db.new_table('map_mac', ('m_id integer','mac text'))        
# map_pt : ( p_id | pt | x | y )
db.new_table('map_pt', ('p_id integer','pt text','x real','y real'))
# manifest : (key, value)
db.new_table('manifest', ('key text','value text'))

m_id = 0
p_id = 0
for device in os.listdir('raw_data/devices'):
    r_id = 0
    db.insert(('device','rss_'+device), 'manifest')
    #rss_device : ( r_id | p_id | entry )
    db.new_table('rss_'+device, ('r_id integer','p_id integer','entry text'))
    for txt in os.listdir('raw_data/devices/'+device):
        for line in open('raw_data/devices/'+device+'/'+txt):
            r_id += 1
            dic = {}
            pt,entry = line.strip().split('\t')
            for item in entry.split(' '):
                mac,rss = item.split('@')
                if not mac in map_mac:
                    map_mac[mac] = m_id
                    m_id += 1
                dic[map_mac[mac]] = rss
            if not pt in map_pt:
                map_pt[pt] = p_id
Пример #2
0
def prepare(dataset):
    split_policy = 'default'
    test_pts = [] #only used when split_policy is 'specify'
    #-------------------------------read data----------------------------
    ratedic = {}
    map_pt = {}
    for txt in os.listdir(join(datapath,dataset,'rss')):
        if not txt in map_pt:
            map_pt[txt] = txt[2:]
        for line in open(join(datapath,dataset,'rss',txt)):
            pt,entry = line.strip().split('\t')
            for item in entry.split(' '):
                mac,rss = item.split('@')
                if not mac in ratedic:
                    ratedic[mac] = 0                
                ratedic[mac] += 1
    mac_lst = [mac for mac,rate in ratedic.items() if rate > 500]
    map_mac = dict([(mac,i) for i,mac in enumerate(mac_lst)])
    map_coord = {}
    for line in open(join(datapath,dataset,'map','coord.txt')):
        pt,xy = line.strip().split('\t')
        x,y = xy.split(' ')
        map_coord[pt] = (x,y)
    relation_lst = []
    for line in open(join(datapath,dataset,'map','relation.txt')):
        src,dest,dist,dir_ = line.strip().split('\t')
        relation_lst.append((map_pt[src],map_pt[dest],dist,dir_))
    #-------------------------------store data----------------------------
    db = DB(join(dbpath,'raw.db'))
    tbl_mac = '%s_mac'%dataset
    tbl_pt = '%s_pnt'%dataset
    tbl_relation = '%s_rel'%dataset
    tbl_manifest = '%s_man'%dataset
    tbl_rss = '%s_rss'%dataset

    # [dataset]_mac : ( m_id | mac )
    db.new_table(tbl_mac, ('m_id integer','mac text'))
    db.insertmany([ (m_id,mac) for mac,m_id in map_mac.items()], tbl_mac)
    # [dataset]_pnt : ( p_id | pt | x | y )
    db.new_table(tbl_pt, ('p_id integer','pt text','x real','y real'))
    db.insertmany([(p_id,pt)+map_coord[pt] for pt,p_id in map_pt.items()], tbl_pt)
    #[dataset]_rel: ( src_pt | dest_pt | distance | directions )
    db.new_table(tbl_relation, ('src_pt integer','dest_pt integer','distance real','directions text'))
    db.insertmany(relation_lst, tbl_relation)
    # [dataset]_manifest : (key, value)
    db.new_table(tbl_manifest, ('key text','value text'))
    db.insert(('n_feature',len(map_mac)), tbl_manifest)
    db.insert(('n_point',len(map_pt)), tbl_manifest)
    
#    for device in os.listdir(join(datapath,dataset)):
    r_id = 0
    db.insert(('dataset',tbl_rss), tbl_manifest)
    #[dataset]_rss: ( r_id | p_id | entry )
    db.new_table(tbl_rss, ('r_id integer','p_id integer','entry text'))
    for txt in os.listdir(join(datapath,dataset,'rss')):
        for line in open(join(datapath,dataset,'rss',txt)):
            r_id += 1
            dic = {}
            pt,entry = line.strip().split('\t')
            for item in entry.split(' '):
                mac,rss = item.split('@')
                if not mac in map_mac:  #filter
                    continue
                dic[map_mac[mac]] = rss
            entry = json.dumps(dic)
            db.insert((r_id, map_pt[pt], entry), tbl_rss)

    db.new_table('meta', ('key text','value text'))
    db.insert(('current',dataset),'meta')

    db.commit()
                
    #-------------------------------split data----------------------------
    db_train = DB(join(dbpath,'train.db'))
    db_test = DB(join(dbpath,'test.db'))
#    for device in db.queryone('value', 'manifest', 'key="device"'):
    db_train.new_table(tbl_rss, ('r_id integer','p_id integer','entry text'))
    db_test.new_table(tbl_rss, ('r_id integer','p_id integer','entry text'))
    if split_policy == None:
        #no test data
        db_train.clone_from(db, tbl_rss)
    elif split_policy == 'default':
        #70% train data & 30% test data
        for record in db.queryall(tbl_rss):
            if random.random() > 0.3:
                db_train.insert(record, tbl_rss)
            else:
                db_test.insert(record, tbl_rss)
    elif split_policy == 'specify':
        #user specify
        for record in db.queryall(tbl_rss):
            if record[1] in test_pts:
                db_test.insert(record, tbl_rss)
            else:
                db_train.insert(record, tbl_rss)
    else:
        #error
        pass
    
    db_train.clone_from(db, tbl_manifest)
    db_train.clone_from(db, tbl_mac)
    db_train.clone_from(db, tbl_pt)
    db_train.clone_from(db, tbl_relation)
    db_train.clone_from(db, 'meta')
    
    db_test.clone_from(db, tbl_manifest)
    db_test.clone_from(db, tbl_mac)
    db_test.clone_from(db, tbl_pt)
    db_test.clone_from(db, tbl_relation)
    db_test.clone_from(db, 'meta')

    
    db_train.commit()
    db_test.commit()