def loadRawdata(rawfile=None, updbmode=1): """ rawfile: rawdata csv file. updbmode: update db mode: 1-all, 2-incr. Init *algo* tables with rawdata csv(16 columns) -- SLOW if csv is big, try offline.doClusterAll(rawdata) -> db.loadClusteredData() instead. 1) db.initTables(): init db tables if update all the db data. 2) db.updateIndexes(): update tables indexes, drop old idxs if only update db incrementally. 3) offline.doClusterIncr(): incremental clustering. """ dbips = DB_OFFLINE doflush = True for dbip in dbips: dbsvr = dbsvrs[dbip] wppdb = WppDB(dsn=dbsvr['dsn'], dbtype=dbsvr['dbtype']) if updbmode == 1: # Create WPP tables. wppdb.initTables(doDrop=True) doflush = False # Update indexs. wppdb.updateIndexes(doflush) # Load csv clustered data into DB tables. n_inserts = doClusterIncr(fd_csv=file(rawfile), wppdb=wppdb) print 'Added: [%s] clusters, [%s] FPs' % (n_inserts['n_newcids'], n_inserts['n_newfps']) # Init ver_uprecs in |wpp_uprecsver| if it's empty. if wppdb.getRawdataVersion() is None: wppdb.setRawdataVersion('0') wppdb.close()
def updateAlgoData(): """ Update from raw data into FPs directly used by location.fixPosWLAN() from WppDB(wpp_clusterid, wpp_cfps). 1) Retrieve latest incremental rawdata(csv) from remote FTP server(hosted by FPP). 2) Decompress bzip2, import CSV into wpp_uprecsinfo with its ver_uprecs, Update ver_uprecs in wpp_uprecsver. 3) Incr clustering inserted rawdata for direct algo use. """ dbips = DB_OFFLINE for dbip in dbips: dbsvr = dbsvrs[dbip] wppdb = WppDB(dsn=dbsvr['dsn'], dbtype=dbsvr['dbtype']) ver_wpp = wppdb.getRawdataVersion() # Sync rawdata into wpp_uprecsinfo from remote FTP server. print 'Probing rawdata version > [%s]' % ver_wpp vers_fpp,localbzs = syncFtpUprecs(FTPCFG, ver_wpp) if not vers_fpp: print 'Not found!'; continue else: print 'Found new vers: %s' % vers_fpp # Handle each bzip2 file. alerts = {'vers':[], 'details':''} tab_rd = 'wpp_uprecsinfo' for bzfile in localbzs: # Filter out the ver_uprecs info from the name of each bzip file. ver_bzfile = bzfile.split('_')[-1].split('.')[0] # Update ver_uprecs in wpp_uprecsver to ver_bzfile. wppdb.setRawdataVersion(ver_bzfile) print '%s\nUpdate ver_uprecs -> [%s]' % ('-'*40, ver_bzfile) # Decompress bzip2. sys.stdout.write('Decompress & append rawdata ... ') csvdat = csv.reader( BZ2File(bzfile) ) try: indat = np_array([ line for line in csvdat ]) except csv.Error, e: sys.exit('\n\nERROR: %s, line %d: %s!\n' % (bzfile, csvdat.line_num, e)) # Append ver_uprecs(auto-incr),area_ok(0),area_try(0) to raw 16-col fp. append_info = np_array([ [ver_bzfile,0,0] for i in xrange(len(indat)) ]) indat_withvers = np_append(indat, append_info, axis=1).tolist(); print 'Done' # Import csv into wpp_uprecsinfo. try: sys.stdout.write('Import rawdata: ') wppdb.insertMany(table_name=tab_rd, indat=indat_withvers, verb=True) except Exception, e: _lineno = sys._getframe().f_lineno _file = sys._getframe().f_code.co_filename alerts['details'] += '\n[ver:%s][%s:%s]: %s' % \ (ver_bzfile, _file, _lineno, str(e).replace('\n', ' ')) alerts['vers'].append(ver_bzfile) print 'ERROR: Insert Rawdata Failed!' continue
def updateAlgoData(): """ Update from raw data into FPs directly used by location.fixPosWLAN() from WppDB(wpp_clusterid, wpp_cfps). 1) Retrieve latest incremental rawdata(csv) from remote FTP server(hosted by FPP). 2) Decompress bzip2, import CSV into wpp_uprecsinfo with its ver_uprecs, Update ver_uprecs in wpp_uprecsver. 3) Incr clustering inserted rawdata for direct algo use. """ dbips = DB_OFFLINE for dbip in dbips: dbsvr = dbsvrs[dbip] wppdb = WppDB(dsn=dbsvr['dsn'], dbtype=dbsvr['dbtype']) ver_wpp = wppdb.getRawdataVersion() # Sync rawdata into wpp_uprecsinfo from remote FTP server. print 'Probing rawdata version > [%s]' % ver_wpp vers_fpp, localbzs = syncFtpUprecs(FTPCFG, ver_wpp) if not vers_fpp: print 'Not found!' continue else: print 'Found new vers: %s' % vers_fpp # Handle each bzip2 file. alerts = {'vers': [], 'details': ''} tab_rd = 'wpp_uprecsinfo' for bzfile in localbzs: # Filter out the ver_uprecs info from the name of each bzip file. ver_bzfile = bzfile.split('_')[-1].split('.')[0] # Update ver_uprecs in wpp_uprecsver to ver_bzfile. wppdb.setRawdataVersion(ver_bzfile) print '%s\nUpdate ver_uprecs -> [%s]' % ('-' * 40, ver_bzfile) # Decompress bzip2. sys.stdout.write('Decompress & append rawdata ... ') csvdat = csv.reader(BZ2File(bzfile)) try: indat = np_array([line for line in csvdat]) except csv.Error, e: sys.exit('\n\nERROR: %s, line %d: %s!\n' % (bzfile, csvdat.line_num, e)) # Append ver_uprecs(auto-incr),area_ok(0),area_try(0) to raw 16-col fp. append_info = np_array([[ver_bzfile, 0, 0] for i in xrange(len(indat))]) indat_withvers = np_append(indat, append_info, axis=1).tolist() print 'Done' # Import csv into wpp_uprecsinfo. try: sys.stdout.write('Import rawdata: ') wppdb.insertMany(table_name=tab_rd, indat=indat_withvers, verb=True) except Exception, e: _lineno = sys._getframe().f_lineno _file = sys._getframe().f_code.co_filename alerts['details'] += '\n[ver:%s][%s:%s]: %s' % \ (ver_bzfile, _file, _lineno, str(e).replace('\n', ' ')) alerts['vers'].append(ver_bzfile) print 'ERROR: Insert Rawdata Failed!' continue # Incr clustering. # file described by fd_csv contains all *location enabled* rawdata from wpp_uprecsinfo. strWhere = 'WHERE lat!=0 and lon!=0 and ver_uprecs=%s' % ver_bzfile cols_ignored = 3 # 3 status cols to be ignored during clustering: ver_uprecs,area_ok,area_try. cols_select = ','.join(wppdb.tbl_field[tab_rd][:-cols_ignored]) sql = wppdb.sqls['SQL_SELECT'] % (cols_select, '%s %s' % (tab_rd, strWhere)) rdata_loc = wppdb.execute(sql=sql, fetch_one=False) if not rdata_loc: continue # NO FPs has location info. str_rdata_loc = '\n'.join( [','.join([str(col) for col in fp]) for fp in rdata_loc]) fd_csv = StringIO(str_rdata_loc) print 'FPs for Incr clustering selected & ready' n_inserts = doClusterIncr(fd_csv=fd_csv, wppdb=wppdb, verb=False) print 'AlgoData added: [%s] clusters, [%s] FPs' % ( n_inserts['n_newcids'], n_inserts['n_newfps'])