예제 #1
0
def loadRawdata(rawfile=None, updbmode=1):
    """
    rawfile: rawdata csv file.
    updbmode: update db mode: 1-all, 2-incr.

    Init *algo* tables with rawdata csv(16 columns) -- SLOW if csv is big, 
        try offline.doClusterAll(rawdata) -> db.loadClusteredData() instead.
    1) db.initTables(): init db tables if update all the db data.
    2) db.updateIndexes(): update tables indexes, drop old idxs if only update db incrementally.
    3) offline.doClusterIncr(): incremental clustering.
    """
    dbips = DB_OFFLINE
    doflush = True
    for dbip in dbips:
        dbsvr = dbsvrs[dbip]
        wppdb = WppDB(dsn=dbsvr['dsn'], dbtype=dbsvr['dbtype'])
        if updbmode == 1:
            # Create WPP tables.
            wppdb.initTables(doDrop=True)
            doflush = False
        # Update indexs.
        wppdb.updateIndexes(doflush)
        # Load csv clustered data into DB tables.
        n_inserts = doClusterIncr(fd_csv=file(rawfile), wppdb=wppdb)
        print 'Added: [%s] clusters, [%s] FPs' % (n_inserts['n_newcids'], n_inserts['n_newfps'])
        # Init ver_uprecs in |wpp_uprecsver| if it's empty.
        if wppdb.getRawdataVersion() is None: 
            wppdb.setRawdataVersion('0')
        wppdb.close()
예제 #2
0
def test_fixPosWLAN():
    """WLAN positioning test case 1-31"""
    # $ for i in $(seq 31); do python wpp/location.py -f $i; done
    # $ egrep -A1 'final|NO cluster' wpp.log |grep -v 'Sele' |grep -v final |grep -v  '\-\-' | \
    # sed  's/^<2011.*$/\[\]/g' |sed 's/\(.*\)/\1,/g'
    poss_ok = [ [], [], [], [], [], [], [],
                [39.912616, 116.3521475, 50],
                [39.912782, 116.352266, 50],
                [39.91257075, 116.35363975, 122.54288388709156],
                [39.912613571428572, 116.35301342857143, 108.37042404018443],
                [39.91257075, 116.35363975, 122.54288388709156],
                [39.91245, 116.352029, 50],
                [39.91257199999999, 116.35131033333333, 96.876048832124482],
                [39.910843, 116.352233, 50],
                [39.912782, 116.352266, 50],
                [39.912506666666665, 116.34972933333334, 50],
                [39.896571000000002, 116.347176, 100],
                [], [],
                [39.894749695652173, 116.34846693478261, 504.49033670393203],
                [39.903174187499999, 116.3043408125, 205.26326958257582],
                [39.911346999999999, 116.367709, 50],
                [39.905437777777777, 116.30197872222223, 71.863904363421156],
                [39.898307000000003, 116.367233, 50],
                [39.896256999999999, 116.345404, 50],
                [39.866599000000001, 116.33084275, 140.93180577820135],
                [39.898285222222221, 116.37795088888889, 50],
                [39.907567142857147, 116.3518077142857, 82.052322921173257],
                [39.906203714285709, 116.31805528571428, 50],
                [39.907556527131781, 116.35137625581396, 50],]
    dbsvr = dbsvrs[DB_ONLINE] 
    wppdb = WppDB(dsn=dbsvr['dsn'], dbtype=dbsvr['dbtype'])
    for i, pos_ok in enumerate(poss_ok):
        len_visAPs, wifis = getWLAN(i+1)
        pos_test = fixPosWLAN(len_visAPs, wifis, wppdb, True)
        assert_equal(pos_ok, pos_test)
예제 #3
0
def crawlAreaLocData():
    """
    1) fetch 100 records with flag area_ok = 0.
    2) try areaLocation(laccid), if OK, then update flag area_ok =1 and quit; else goto 2).
    3) try googleAreaLocation(latlon), if OK, then get geoaddr:[province,city,district]; 
       else |wpp_uprecsinfo|.area_try += 1 and quit.
    4) search area_code for the found district, insert area location 
       (laccid,areacode,areaname_cn) into |wpp_cellarea|, and update flag area_ok = 1.
    """
    fail_history = {}
    dbips = DB_OFFLINE
    for dbip in dbips:
        dbsvr = dbsvrs[dbip]
        wppdb = WppDB(dsn=dbsvr['dsn'], dbtype=dbsvr['dbtype'])
        # select config.CRAWL_LIMIT raw fps which haven't tried for google area location.
        fps_noarea = wppdb.getCrawlFPs()
        for fp in fps_noarea:
            # try areaLocation(laccid)
            laccid = '%s-%s' % (fp[8], fp[9])
            if laccid in fail_history: continue
            time = fp[2]
            print laccid, time
            if wppdb.areaLocation(laccid):
                # area_ok = 1 & quit.
                wppdb.setUprecsAreaStatus(status=1, time=time)
            else:
                print fp
                # try google area location.
                geoaddr = googleAreaLocation( latlon=(fp[11], fp[12]) )
                # area_try += 1 & quit
                wppdb.setUprecAreaTry(area_try=fp[18]+1, time=time)
                if geoaddr:
                    # insert area location info(laccid~geoaddr) into |wpp_cellarea|.
                    # till now, area_location: 'laccid,area_code,province>city>district'.
                    area_location = wppdb.addAreaLocation(laccid=laccid, geoaddr=geoaddr)
                    if not area_location:
                        if not laccid in fail_history: 
                            fail_history[laccid] = geoaddr 
                        print 'Failed to add area location: [%s] for cell[%s]' % \
                              (geoaddr[-1].encode('utf8'), laccid)
                        continue
                    # area_ok = 1 & quit.
                    wppdb.setUprecsAreaStatus(status=1, time=time)
                    print area_location.encode('utf8')  # encode('utf8') for crontab.
                else:
                    if geoaddr is None: sys.exit(0)  # OVER_QUERY_LIMIT.
                    else: pass
예제 #4
0
def main():
    import getopt
    try: opts, args = getopt.getopt(sys.argv[1:], 
            # NO backward compatibility for file handling, so the relevant 
            # methods(os,pprint)/parameters(addr_book,XXXPATH) 
            # imported from standard or 3rd-party modules can be avoided.
            "f:hv",
            ["fake","help","verbose"])
    except getopt.GetoptError:
        print 'Error: getopt!\n'
        usage(); sys.exit(99)

    # Program terminated when NO argument followed!
    #if not opts: usage(); sys.exit(0)

    # vars init.
    verbose = False; wlanfake = 0

    for o,a in opts:
        if o in ("-f", "--fake"):
            if a.isdigit(): 
                wlanfake = int(a)
                if wlanfake >= 0: continue
                else: pass
            else: pass
            print '\nIllegal fake WLAN scan ID: %s!' % a
            usage(); sys.exit(99)
        elif o in ("-h", "--help"):
            usage(); sys.exit(0)
        elif o in ("-v", "--verbose"):
            verbose = True
        else:
            print 'Parameter NOT supported: %s' % o
            usage(); sys.exit(99)


    # Get WLAN scanning results.
    len_visAPs, wifis = getWLAN(wlanfake)

    # Fix current position.
    dbsvr = dbsvrs[DB_ONLINE] 
    wppdb = WppDB(dsn=dbsvr['dsn'], dbtype=dbsvr['dbtype'])
    posresult = fixPosWLAN(len_visAPs, wifis, wppdb, verbose)
    if not posresult: sys.exit(99)
    wpplog.debug('final posfix/poserr: \n%s' % posresult)
예제 #5
0
def updateAlgoData():
    """
    Update from raw data into FPs directly used by location.fixPosWLAN() from WppDB(wpp_clusterid, wpp_cfps).
    1) Retrieve latest incremental rawdata(csv) from remote FTP server(hosted by FPP).
    2) Decompress bzip2, import CSV into wpp_uprecsinfo with its ver_uprecs, Update ver_uprecs in wpp_uprecsver.
    3) Incr clustering inserted rawdata for direct algo use.
    """
    dbips = DB_OFFLINE
    for dbip in dbips:
        dbsvr = dbsvrs[dbip]
        wppdb = WppDB(dsn=dbsvr['dsn'], dbtype=dbsvr['dbtype'])
        ver_wpp = wppdb.getRawdataVersion()
        # Sync rawdata into wpp_uprecsinfo from remote FTP server.
        print 'Probing rawdata version > [%s]' % ver_wpp
        vers_fpp,localbzs = syncFtpUprecs(FTPCFG, ver_wpp)
        if not vers_fpp: print 'Not found!'; continue
        else: print 'Found new vers: %s' % vers_fpp
        # Handle each bzip2 file.
        alerts = {'vers':[], 'details':''}
        tab_rd = 'wpp_uprecsinfo'
        for bzfile in localbzs:
            # Filter out the ver_uprecs info from the name of each bzip file.
            ver_bzfile = bzfile.split('_')[-1].split('.')[0]
            # Update ver_uprecs in wpp_uprecsver to ver_bzfile.
            wppdb.setRawdataVersion(ver_bzfile)
            print '%s\nUpdate ver_uprecs -> [%s]' % ('-'*40, ver_bzfile)
            # Decompress bzip2.
            sys.stdout.write('Decompress & append rawdata ... ')
            csvdat = csv.reader( BZ2File(bzfile) )
            try:
                indat = np_array([ line for line in csvdat ])
            except csv.Error, e:
                sys.exit('\n\nERROR: %s, line %d: %s!\n' % (bzfile, csvdat.line_num, e))
            # Append ver_uprecs(auto-incr),area_ok(0),area_try(0) to raw 16-col fp.
            append_info = np_array([ [ver_bzfile,0,0] for i in xrange(len(indat)) ])
            indat_withvers = np_append(indat, append_info, axis=1).tolist(); print 'Done'
            # Import csv into wpp_uprecsinfo.
            try:
                sys.stdout.write('Import rawdata: ')
                wppdb.insertMany(table_name=tab_rd, indat=indat_withvers, verb=True)
            except Exception, e:
                _lineno = sys._getframe().f_lineno
                _file = sys._getframe().f_code.co_filename
                alerts['details'] += '\n[ver:%s][%s:%s]: %s' % \
                        (ver_bzfile, _file, _lineno, str(e).replace('\n', ' '))
                alerts['vers'].append(ver_bzfile)
                print 'ERROR: Insert Rawdata Failed!'
                continue
예제 #6
0
def fixPos(posreq=None, has_google=False, mc=None):
    xmlnodes = xmlparser(posreq).getchildren()
    # Parameters default vals init.
    lat, lon, ee = 39.9055, 116.3914, 5000 
    errinfo = 'AccuTooBad'; errcode = '102'
    # logic control switch init.
    pos_area = pos_pt = False # Default *PosLevel* is Point if not specified.
    # WppDB connection init.
    dbsvr = dbsvrs[DB_ONLINE] 
    wppdb = WppDB(dsn=dbsvr['dsn'], dbtype=dbsvr['dbtype'])
    # lambda func init.
    f = lambda x : [ node.attrib for node in xmlnodes if node.tag == x ] 
    plevel = f('PosLevel')
    # Area location related parameters interpretation & default vals init.
    plevel = plevel[0]['val'] if plevel else 'Point' 
    acode = addr = ''
    if plevel == 'Hybrid': pos_area = pos_pt = True
    elif plevel == 'Area': pos_area = True
    else: 
        pos_pt = True 
        plevel = 'Point' # PosLevel default *Point*.
    if pos_area: # Area location.
        cell = f('CellInfo')
        if cell: 
            laccid = '%s-%s' % (cell[0]['lac'], cell[0]['cid'])
            acode_addr = wppdb.areaLocation(laccid)
            if acode_addr: 
                acode, addr = acode_addr 
                errinfo='OK'; errcode='100'
            lat = lon = ee = ''
    if pos_pt: # Point location, which returns 3d coordinates.
        macs = f('WLANIdentifier'); rsss = f('WLANMatcher'); need_google = False; 
        if macs and rsss:
            macs = macs[0]['val'].split('|') 
            rsss = rsss[0]['val'].split('|')
            INTERSET = min(CLUSTERKEYSIZE, len(macs)) 
            idxs_max = argsort(rsss)[:INTERSET]
            macsrsss = vstack((macs, rsss))[:,idxs_max]
            wlanloc = fixPosWLAN(INTERSET, macsrsss, wppdb, DEBUG_ALGO)
            if not wlanloc: 
                need_google = True
        else: wlanloc = []
        if not wlanloc: 
            if not pos_area: cell = f('CellInfo')
            if cell:
                if not pos_area: 
                    laccid = '%s-%s' % (cell[0]['lac'], cell[0]['cid'])
                celloc = wppdb.laccidLocation(laccid)
                if not celloc: 
                    need_google = True 
                    wpplog.error('Cell location FAILED!')
                elif celloc[2] > GOOG_ERR_LIMIT: 
                    need_google = False  # googleLocation err too big for wlanloc.
                else: pass
            else: celloc = []
        loc = wlanloc or celloc
        if loc: 
            lat, lon, ee = loc
            errinfo = 'OK'; errcode = '100'
        # TODO: make googleLocation async job when wlanloc fails & celloc succeeds.
        # Try Google location, when wifi location failed && wifi info exists.
        if need_google and has_google: 
            loc_google = googleLocation(macs=macs, rsss=rsss, cellinfo=cell[0], mc=mc) 
            if loc_google:
                lat1, lon1, h, ee_goog = loc_google 
                if not loc:
                    lat, lon, ee = lat1, lon1, ee_goog
                    errinfo = 'OK'; errcode = '100'
                # wifi location import. TODO: make google loc import job async when it's *succeeded*.
                if macs and ee_goog <= GOOG_ERR_LIMIT:
                    t = f('Time')
                    t = t[0]['val'] if t else ''
                    fp = '1000, 1000101, %s%s%s, %s, %s, %s, %s' % \
                            (t,','*9,lat1, lon1, h, '|'.join(macs), '|'.join(rsss))
                    n = doClusterIncr(fd_csv=StringIO(fp), wppdb=wppdb, verb=False)
                    if n['n_newfps'] == 1: 
                        wpplog.info('Added 1 WLAN FP from Google')
                    else: 
                        wpplog.error('Failed to add FP from Google!')
                # Cell location import.
                if cell and not celloc:
                    if ee_goog <= GOOG_ERR_LIMIT: 
                        loc_google[-1] = 500
                    wppdb.addCellLocation(laccid=laccid, loc=loc_google)
                    wpplog.info('Added 1 Cell FP from Google')
            else: wpplog.error('Google location FAILED!')
    wppdb.close()
    if plevel == 'Hybrid': posresp = POS_RESP_FULL % (errcode, errinfo, lat, lon, ee, plevel, acode, addr)
    elif plevel == 'Area': posresp = POS_RESP_AREA % (errcode, errinfo, plevel, acode, addr)
    else: posresp = POS_RESP_PT % (errcode, errinfo, lat, lon, ee, plevel)

    return posresp
예제 #7
0
def main():
    import getopt
    try:
        opts, args = getopt.getopt(sys.argv[1:], "ac:f:hi:k:m:nr:st:uv",
            ["areacrawl","cluster","floor=","help","spid=","kml=","mode=","no-dump",
             "rawdata","scan","to-rmp=","updatedb","verbose"])
    except getopt.GetoptError:
        usage()
        sys.exit(99)

    if not opts: usage(); sys.exit(0)

    # global vars init.
    crawl_area=False; updatedb=False; doLoadRawdata=False; scan=False
    #spid=0; tormp=False; tfail=0; dokml=False; 
    rawfile=None; docluster=False; updbmode=1
    global verbose,pp,floor,nodump
    verbose=False; pp=None; nodump=False; floor=False

    for o,a in opts:
        if o in ("-a", "--areacrawl"):
            crawl_area = True
        elif o in ("-c", "--cluster"):
            if not a.isdigit(): 
                print '\ncluster type: %s should be an INTEGER!' % str(a)
                usage(); sys.exit(99)
            else:
                # 1-All; 2-Incr.
                cluster_type = int(a)
                docluster = True
                rmpfile = sys.argv[3]
                if not os.path.isfile(rmpfile):
                    print 'Raw data file NOT exist: %s!' % rmpfile
                    sys.exit(99)
        #elif o in ("-i", "--spid"):
        #    if a.isdigit(): spid = int(a)
        #    else:
        #        print '\nspid: %s should be an INTEGER!' % str(a)
        #        usage(); sys.exit(99)
        elif o in ("-m", "--mode"):
            if a.isdigit(): 
                updbmode = int(a)
                if not (1 <= updbmode <= 2):
                    print '\nError: updatedb mode: (%d) NOT supported yet!' % updbmode
                    usage(); sys.exit(99)
            else:
                print '\nmode: %s should be an INTEGER!' % str(a)
                usage(); sys.exit(99)
        elif o in ("-r", "--rawdata"):
            if not os.path.isfile(a):
                print 'Rawdata file NOT exist: %s' % a
                sys.exit(99)
            else: 
                doLoadRawdata = True
                rawfile = a
        elif o in ("-s", "--scan"):
            scan = True
        #elif o in ("-t", "--to-rmp"):
        #    if not os.path.isfile(a):
        #        print 'Raw data file NOT exist: %s' % a
        #        sys.exit(99)
        #    else: 
        #        tormp = True
        #        rawfile = a
        #elif o in ("-k", "--kml"):
        #    if not os.path.isfile(a):
        #        print 'cfprints table file NOT exist: %s' % a
        #        sys.exit(99)
        #    else: 
        #        dokml = True
        #        cfpsfile = a
        #elif o in ("-n", "--no-dump"):
        #    nodump = True
        elif o in ("-f", "--floor"):
            if a.isdigit(): 
                floor = int(a)
            else:
                print '\nfloor: %s should be an INTEGER!\n' % str(a)
                usage(); sys.exit(99)
        elif o in ("-u", "--updatedb"):
            updatedb = True
        elif o in ("-v", "--verbose"):
            verbose = True
            pp = PrettyPrinter(indent=2)
        elif o in ("-h", "--help"):
            usage(); sys.exit(0)
        else:
            print 'Parameter NOT supported: %s' % o
            usage(); sys.exit(99)

    if doLoadRawdata:
        loadRawdata(rawfile, updbmode)

    # Update Algorithm related data.
    if updatedb:
        updateAlgoData()

    if crawl_area:
        crawlAreaLocData()

    # Ordinary fingerprints clustering.
    if docluster:
        if cluster_type   == 1: 
            doClusterAll(file(rmpfile))
        elif cluster_type == 2: 
            dbips = DB_OFFLINE
            for dbip in dbips:
                dbsvr = dbsvrs[dbip]
                wppdb = WppDB(dsn=dbsvr['dsn'], dbtype=dbsvr['dbtype'])
                n_inserts = doClusterIncr(fd_csv=file(rmpfile), wppdb=wppdb)
                print 'Added: [%s] clusters, [%s] FPs' % (n_inserts['n_newcids'], n_inserts['n_newfps'])
                wppdb.close()
        else: sys.exit('Unsupported cluster type code: %s!' % cluster_type)

    # KML generation.
    #if dokml:
    #    genKMLfile(cfpsfile)

    ## Raw data to fingerprint convertion.
    #if tormp:
    #    fingerprint = []
    #    fingerprint = genFPs(rawfile)
    #    if not fingerprint:
    #        print 'Error: Fingerprint generation FAILED: %s' % rawfile
    #        sys.exit(99)
    #    if nodump is False:
    #        if not rawfile == None: 
    #            date = strftime('%Y-%m%d')
    #            rmpfilename = DATPATH + date + RMPSUFFIX
    #            dumpCSV(rmpfilename, fingerprint)
    #            print '-'*65
    #            sys.exit(0)
    #        else:
    #            usage(); sys.exit(99)
    #    else:
    #        if verbose: pp.pprint(fingerprint)
    #        else: print fingerprint
    #        sys.exit(0)

    # WLAN scan for FP raw data collection.
    if scan:
        collectFPs()
예제 #8
0
def main():
    import getopt
    try:
        opts, args = getopt.getopt(sys.argv[1:], "ac:f:hi:k:m:nr:st:uv", [
            "areacrawl", "cluster", "floor=", "help", "spid=", "kml=", "mode=",
            "no-dump", "rawdata", "scan", "to-rmp=", "updatedb", "verbose"
        ])
    except getopt.GetoptError:
        usage()
        sys.exit(99)

    if not opts:
        usage()
        sys.exit(0)

    # global vars init.
    crawl_area = False
    updatedb = False
    doLoadRawdata = False
    scan = False
    #spid=0; tormp=False; tfail=0; dokml=False;
    rawfile = None
    docluster = False
    updbmode = 1
    global verbose, pp, floor, nodump
    verbose = False
    pp = None
    nodump = False
    floor = False

    for o, a in opts:
        if o in ("-a", "--areacrawl"):
            crawl_area = True
        elif o in ("-c", "--cluster"):
            if not a.isdigit():
                print '\ncluster type: %s should be an INTEGER!' % str(a)
                usage()
                sys.exit(99)
            else:
                # 1-All; 2-Incr.
                cluster_type = int(a)
                docluster = True
                rmpfile = sys.argv[3]
                if not os.path.isfile(rmpfile):
                    print 'Raw data file NOT exist: %s!' % rmpfile
                    sys.exit(99)
        #elif o in ("-i", "--spid"):
        #    if a.isdigit(): spid = int(a)
        #    else:
        #        print '\nspid: %s should be an INTEGER!' % str(a)
        #        usage(); sys.exit(99)
        elif o in ("-m", "--mode"):
            if a.isdigit():
                updbmode = int(a)
                if not (1 <= updbmode <= 2):
                    print '\nError: updatedb mode: (%d) NOT supported yet!' % updbmode
                    usage()
                    sys.exit(99)
            else:
                print '\nmode: %s should be an INTEGER!' % str(a)
                usage()
                sys.exit(99)
        elif o in ("-r", "--rawdata"):
            if not os.path.isfile(a):
                print 'Rawdata file NOT exist: %s' % a
                sys.exit(99)
            else:
                doLoadRawdata = True
                rawfile = a
        elif o in ("-s", "--scan"):
            scan = True
        #elif o in ("-t", "--to-rmp"):
        #    if not os.path.isfile(a):
        #        print 'Raw data file NOT exist: %s' % a
        #        sys.exit(99)
        #    else:
        #        tormp = True
        #        rawfile = a
        #elif o in ("-k", "--kml"):
        #    if not os.path.isfile(a):
        #        print 'cfprints table file NOT exist: %s' % a
        #        sys.exit(99)
        #    else:
        #        dokml = True
        #        cfpsfile = a
        #elif o in ("-n", "--no-dump"):
        #    nodump = True
        elif o in ("-f", "--floor"):
            if a.isdigit():
                floor = int(a)
            else:
                print '\nfloor: %s should be an INTEGER!\n' % str(a)
                usage()
                sys.exit(99)
        elif o in ("-u", "--updatedb"):
            updatedb = True
        elif o in ("-v", "--verbose"):
            verbose = True
            pp = PrettyPrinter(indent=2)
        elif o in ("-h", "--help"):
            usage()
            sys.exit(0)
        else:
            print 'Parameter NOT supported: %s' % o
            usage()
            sys.exit(99)

    if doLoadRawdata:
        loadRawdata(rawfile, updbmode)

    # Update Algorithm related data.
    if updatedb:
        updateAlgoData()

    if crawl_area:
        crawlAreaLocData()

    # Ordinary fingerprints clustering.
    if docluster:
        if cluster_type == 1:
            doClusterAll(file(rmpfile))
        elif cluster_type == 2:
            dbips = DB_OFFLINE
            for dbip in dbips:
                dbsvr = dbsvrs[dbip]
                wppdb = WppDB(dsn=dbsvr['dsn'], dbtype=dbsvr['dbtype'])
                n_inserts = doClusterIncr(fd_csv=file(rmpfile), wppdb=wppdb)
                print 'Added: [%s] clusters, [%s] FPs' % (
                    n_inserts['n_newcids'], n_inserts['n_newfps'])
                wppdb.close()
        else:
            sys.exit('Unsupported cluster type code: %s!' % cluster_type)

    # KML generation.
    #if dokml:
    #    genKMLfile(cfpsfile)

    ## Raw data to fingerprint convertion.
    #if tormp:
    #    fingerprint = []
    #    fingerprint = genFPs(rawfile)
    #    if not fingerprint:
    #        print 'Error: Fingerprint generation FAILED: %s' % rawfile
    #        sys.exit(99)
    #    if nodump is False:
    #        if not rawfile == None:
    #            date = strftime('%Y-%m%d')
    #            rmpfilename = DATPATH + date + RMPSUFFIX
    #            dumpCSV(rmpfilename, fingerprint)
    #            print '-'*65
    #            sys.exit(0)
    #        else:
    #            usage(); sys.exit(99)
    #    else:
    #        if verbose: pp.pprint(fingerprint)
    #        else: print fingerprint
    #        sys.exit(0)

    # WLAN scan for FP raw data collection.
    if scan:
        collectFPs()
예제 #9
0
def loadRawdata(rawfile=None, updbmode=1):
    """
    rawfile: rawdata csv file.
    updbmode: update db mode: 1-all, 2-incr.

    Init *algo* tables with rawdata csv(16 columns) -- SLOW if csv is big, 
        try offline.doClusterAll(rawdata) -> db.loadClusteredData() instead.
    1) db.initTables(): init db tables if update all the db data.
    2) db.updateIndexes(): update tables indexes, drop old idxs if only update db incrementally.
    3) offline.doClusterIncr(): incremental clustering.
    """
    dbips = DB_OFFLINE
    doflush = True
    for dbip in dbips:
        dbsvr = dbsvrs[dbip]
        wppdb = WppDB(dsn=dbsvr['dsn'], dbtype=dbsvr['dbtype'])
        if updbmode == 1:
            # Create WPP tables.
            wppdb.initTables(doDrop=True)
            doflush = False
        # Update indexs.
        wppdb.updateIndexes(doflush)
        # Load csv clustered data into DB tables.
        n_inserts = doClusterIncr(fd_csv=file(rawfile), wppdb=wppdb)
        print 'Added: [%s] clusters, [%s] FPs' % (n_inserts['n_newcids'],
                                                  n_inserts['n_newfps'])
        # Init ver_uprecs in |wpp_uprecsver| if it's empty.
        if wppdb.getRawdataVersion() is None:
            wppdb.setRawdataVersion('0')
        wppdb.close()
예제 #10
0
def crawlAreaLocData():
    """
    1) fetch 100 records with flag area_ok = 0.
    2) try areaLocation(laccid), if OK, then update flag area_ok =1 and quit; else goto 2).
    3) try googleAreaLocation(latlon), if OK, then get geoaddr:[province,city,district]; 
       else |wpp_uprecsinfo|.area_try += 1 and quit.
    4) search area_code for the found district, insert area location 
       (laccid,areacode,areaname_cn) into |wpp_cellarea|, and update flag area_ok = 1.
    """
    fail_history = {}
    dbips = DB_OFFLINE
    for dbip in dbips:
        dbsvr = dbsvrs[dbip]
        wppdb = WppDB(dsn=dbsvr['dsn'], dbtype=dbsvr['dbtype'])
        # select config.CRAWL_LIMIT raw fps which haven't tried for google area location.
        fps_noarea = wppdb.getCrawlFPs()
        for fp in fps_noarea:
            # try areaLocation(laccid)
            laccid = '%s-%s' % (fp[8], fp[9])
            if laccid in fail_history: continue
            time = fp[2]
            print laccid, time
            if wppdb.areaLocation(laccid):
                # area_ok = 1 & quit.
                wppdb.setUprecsAreaStatus(status=1, time=time)
            else:
                print fp
                # try google area location.
                geoaddr = googleAreaLocation(latlon=(fp[11], fp[12]))
                # area_try += 1 & quit
                wppdb.setUprecAreaTry(area_try=fp[18] + 1, time=time)
                if geoaddr:
                    # insert area location info(laccid~geoaddr) into |wpp_cellarea|.
                    # till now, area_location: 'laccid,area_code,province>city>district'.
                    area_location = wppdb.addAreaLocation(laccid=laccid,
                                                          geoaddr=geoaddr)
                    if not area_location:
                        if not laccid in fail_history:
                            fail_history[laccid] = geoaddr
                        print 'Failed to add area location: [%s] for cell[%s]' % \
                              (geoaddr[-1].encode('utf8'), laccid)
                        continue
                    # area_ok = 1 & quit.
                    wppdb.setUprecsAreaStatus(status=1, time=time)
                    print area_location.encode(
                        'utf8')  # encode('utf8') for crontab.
                else:
                    if geoaddr is None: sys.exit(0)  # OVER_QUERY_LIMIT.
                    else: pass
예제 #11
0
def updateAlgoData():
    """
    Update from raw data into FPs directly used by location.fixPosWLAN() from WppDB(wpp_clusterid, wpp_cfps).
    1) Retrieve latest incremental rawdata(csv) from remote FTP server(hosted by FPP).
    2) Decompress bzip2, import CSV into wpp_uprecsinfo with its ver_uprecs, Update ver_uprecs in wpp_uprecsver.
    3) Incr clustering inserted rawdata for direct algo use.
    """
    dbips = DB_OFFLINE
    for dbip in dbips:
        dbsvr = dbsvrs[dbip]
        wppdb = WppDB(dsn=dbsvr['dsn'], dbtype=dbsvr['dbtype'])
        ver_wpp = wppdb.getRawdataVersion()
        # Sync rawdata into wpp_uprecsinfo from remote FTP server.
        print 'Probing rawdata version > [%s]' % ver_wpp
        vers_fpp, localbzs = syncFtpUprecs(FTPCFG, ver_wpp)
        if not vers_fpp:
            print 'Not found!'
            continue
        else:
            print 'Found new vers: %s' % vers_fpp
        # Handle each bzip2 file.
        alerts = {'vers': [], 'details': ''}
        tab_rd = 'wpp_uprecsinfo'
        for bzfile in localbzs:
            # Filter out the ver_uprecs info from the name of each bzip file.
            ver_bzfile = bzfile.split('_')[-1].split('.')[0]
            # Update ver_uprecs in wpp_uprecsver to ver_bzfile.
            wppdb.setRawdataVersion(ver_bzfile)
            print '%s\nUpdate ver_uprecs -> [%s]' % ('-' * 40, ver_bzfile)
            # Decompress bzip2.
            sys.stdout.write('Decompress & append rawdata ... ')
            csvdat = csv.reader(BZ2File(bzfile))
            try:
                indat = np_array([line for line in csvdat])
            except csv.Error, e:
                sys.exit('\n\nERROR: %s, line %d: %s!\n' %
                         (bzfile, csvdat.line_num, e))
            # Append ver_uprecs(auto-incr),area_ok(0),area_try(0) to raw 16-col fp.
            append_info = np_array([[ver_bzfile, 0, 0]
                                    for i in xrange(len(indat))])
            indat_withvers = np_append(indat, append_info, axis=1).tolist()
            print 'Done'
            # Import csv into wpp_uprecsinfo.
            try:
                sys.stdout.write('Import rawdata: ')
                wppdb.insertMany(table_name=tab_rd,
                                 indat=indat_withvers,
                                 verb=True)
            except Exception, e:
                _lineno = sys._getframe().f_lineno
                _file = sys._getframe().f_code.co_filename
                alerts['details'] += '\n[ver:%s][%s:%s]: %s' % \
                        (ver_bzfile, _file, _lineno, str(e).replace('\n', ' '))
                alerts['vers'].append(ver_bzfile)
                print 'ERROR: Insert Rawdata Failed!'
                continue
            # Incr clustering.
            # file described by fd_csv contains all *location enabled* rawdata from wpp_uprecsinfo.
            strWhere = 'WHERE lat!=0 and lon!=0 and ver_uprecs=%s' % ver_bzfile
            cols_ignored = 3  # 3 status cols to be ignored during clustering: ver_uprecs,area_ok,area_try.
            cols_select = ','.join(wppdb.tbl_field[tab_rd][:-cols_ignored])
            sql = wppdb.sqls['SQL_SELECT'] % (cols_select, '%s %s' %
                                              (tab_rd, strWhere))
            rdata_loc = wppdb.execute(sql=sql, fetch_one=False)
            if not rdata_loc: continue  # NO FPs has location info.
            str_rdata_loc = '\n'.join(
                [','.join([str(col) for col in fp]) for fp in rdata_loc])
            fd_csv = StringIO(str_rdata_loc)
            print 'FPs for Incr clustering selected & ready'
            n_inserts = doClusterIncr(fd_csv=fd_csv, wppdb=wppdb, verb=False)
            print 'AlgoData added: [%s] clusters, [%s] FPs' % (
                n_inserts['n_newcids'], n_inserts['n_newfps'])