예제 #1
0
def loadRawdata(rawfile=None, updbmode=1):
    """
    rawfile: rawdata csv file.
    updbmode: update db mode: 1-all, 2-incr.

    Init *algo* tables with rawdata csv(16 columns) -- SLOW if csv is big, 
        try offline.doClusterAll(rawdata) -> db.loadClusteredData() instead.
    1) db.initTables(): init db tables if update all the db data.
    2) db.updateIndexes(): update tables indexes, drop old idxs if only update db incrementally.
    3) offline.doClusterIncr(): incremental clustering.
    """
    dbips = DB_OFFLINE
    doflush = True
    for dbip in dbips:
        dbsvr = dbsvrs[dbip]
        wppdb = WppDB(dsn=dbsvr['dsn'], dbtype=dbsvr['dbtype'])
        if updbmode == 1:
            # Create WPP tables.
            wppdb.initTables(doDrop=True)
            doflush = False
        # Update indexs.
        wppdb.updateIndexes(doflush)
        # Load csv clustered data into DB tables.
        n_inserts = doClusterIncr(fd_csv=file(rawfile), wppdb=wppdb)
        print 'Added: [%s] clusters, [%s] FPs' % (n_inserts['n_newcids'], n_inserts['n_newfps'])
        # Init ver_uprecs in |wpp_uprecsver| if it's empty.
        if wppdb.getRawdataVersion() is None: 
            wppdb.setRawdataVersion('0')
        wppdb.close()
예제 #2
0
def loadRawdata(rawfile=None, updbmode=1):
    """
    rawfile: rawdata csv file.
    updbmode: update db mode: 1-all, 2-incr.

    Init *algo* tables with rawdata csv(16 columns) -- SLOW if csv is big, 
        try offline.doClusterAll(rawdata) -> db.loadClusteredData() instead.
    1) db.initTables(): init db tables if update all the db data.
    2) db.updateIndexes(): update tables indexes, drop old idxs if only update db incrementally.
    3) offline.doClusterIncr(): incremental clustering.
    """
    dbips = DB_OFFLINE
    doflush = True
    for dbip in dbips:
        dbsvr = dbsvrs[dbip]
        wppdb = WppDB(dsn=dbsvr['dsn'], dbtype=dbsvr['dbtype'])
        if updbmode == 1:
            # Create WPP tables.
            wppdb.initTables(doDrop=True)
            doflush = False
        # Update indexs.
        wppdb.updateIndexes(doflush)
        # Load csv clustered data into DB tables.
        n_inserts = doClusterIncr(fd_csv=file(rawfile), wppdb=wppdb)
        print 'Added: [%s] clusters, [%s] FPs' % (n_inserts['n_newcids'],
                                                  n_inserts['n_newfps'])
        # Init ver_uprecs in |wpp_uprecsver| if it's empty.
        if wppdb.getRawdataVersion() is None:
            wppdb.setRawdataVersion('0')
        wppdb.close()
예제 #3
0
def updateAlgoData():
    """
    Update from raw data into FPs directly used by location.fixPosWLAN() from WppDB(wpp_clusterid, wpp_cfps).
    1) Retrieve latest incremental rawdata(csv) from remote FTP server(hosted by FPP).
    2) Decompress bzip2, import CSV into wpp_uprecsinfo with its ver_uprecs, Update ver_uprecs in wpp_uprecsver.
    3) Incr clustering inserted rawdata for direct algo use.
    """
    dbips = DB_OFFLINE
    for dbip in dbips:
        dbsvr = dbsvrs[dbip]
        wppdb = WppDB(dsn=dbsvr['dsn'], dbtype=dbsvr['dbtype'])
        ver_wpp = wppdb.getRawdataVersion()
        # Sync rawdata into wpp_uprecsinfo from remote FTP server.
        print 'Probing rawdata version > [%s]' % ver_wpp
        vers_fpp,localbzs = syncFtpUprecs(FTPCFG, ver_wpp)
        if not vers_fpp: print 'Not found!'; continue
        else: print 'Found new vers: %s' % vers_fpp
        # Handle each bzip2 file.
        alerts = {'vers':[], 'details':''}
        tab_rd = 'wpp_uprecsinfo'
        for bzfile in localbzs:
            # Filter out the ver_uprecs info from the name of each bzip file.
            ver_bzfile = bzfile.split('_')[-1].split('.')[0]
            # Update ver_uprecs in wpp_uprecsver to ver_bzfile.
            wppdb.setRawdataVersion(ver_bzfile)
            print '%s\nUpdate ver_uprecs -> [%s]' % ('-'*40, ver_bzfile)
            # Decompress bzip2.
            sys.stdout.write('Decompress & append rawdata ... ')
            csvdat = csv.reader( BZ2File(bzfile) )
            try:
                indat = np_array([ line for line in csvdat ])
            except csv.Error, e:
                sys.exit('\n\nERROR: %s, line %d: %s!\n' % (bzfile, csvdat.line_num, e))
            # Append ver_uprecs(auto-incr),area_ok(0),area_try(0) to raw 16-col fp.
            append_info = np_array([ [ver_bzfile,0,0] for i in xrange(len(indat)) ])
            indat_withvers = np_append(indat, append_info, axis=1).tolist(); print 'Done'
            # Import csv into wpp_uprecsinfo.
            try:
                sys.stdout.write('Import rawdata: ')
                wppdb.insertMany(table_name=tab_rd, indat=indat_withvers, verb=True)
            except Exception, e:
                _lineno = sys._getframe().f_lineno
                _file = sys._getframe().f_code.co_filename
                alerts['details'] += '\n[ver:%s][%s:%s]: %s' % \
                        (ver_bzfile, _file, _lineno, str(e).replace('\n', ' '))
                alerts['vers'].append(ver_bzfile)
                print 'ERROR: Insert Rawdata Failed!'
                continue
예제 #4
0
def updateAlgoData():
    """
    Update from raw data into FPs directly used by location.fixPosWLAN() from WppDB(wpp_clusterid, wpp_cfps).
    1) Retrieve latest incremental rawdata(csv) from remote FTP server(hosted by FPP).
    2) Decompress bzip2, import CSV into wpp_uprecsinfo with its ver_uprecs, Update ver_uprecs in wpp_uprecsver.
    3) Incr clustering inserted rawdata for direct algo use.
    """
    dbips = DB_OFFLINE
    for dbip in dbips:
        dbsvr = dbsvrs[dbip]
        wppdb = WppDB(dsn=dbsvr['dsn'], dbtype=dbsvr['dbtype'])
        ver_wpp = wppdb.getRawdataVersion()
        # Sync rawdata into wpp_uprecsinfo from remote FTP server.
        print 'Probing rawdata version > [%s]' % ver_wpp
        vers_fpp, localbzs = syncFtpUprecs(FTPCFG, ver_wpp)
        if not vers_fpp:
            print 'Not found!'
            continue
        else:
            print 'Found new vers: %s' % vers_fpp
        # Handle each bzip2 file.
        alerts = {'vers': [], 'details': ''}
        tab_rd = 'wpp_uprecsinfo'
        for bzfile in localbzs:
            # Filter out the ver_uprecs info from the name of each bzip file.
            ver_bzfile = bzfile.split('_')[-1].split('.')[0]
            # Update ver_uprecs in wpp_uprecsver to ver_bzfile.
            wppdb.setRawdataVersion(ver_bzfile)
            print '%s\nUpdate ver_uprecs -> [%s]' % ('-' * 40, ver_bzfile)
            # Decompress bzip2.
            sys.stdout.write('Decompress & append rawdata ... ')
            csvdat = csv.reader(BZ2File(bzfile))
            try:
                indat = np_array([line for line in csvdat])
            except csv.Error, e:
                sys.exit('\n\nERROR: %s, line %d: %s!\n' %
                         (bzfile, csvdat.line_num, e))
            # Append ver_uprecs(auto-incr),area_ok(0),area_try(0) to raw 16-col fp.
            append_info = np_array([[ver_bzfile, 0, 0]
                                    for i in xrange(len(indat))])
            indat_withvers = np_append(indat, append_info, axis=1).tolist()
            print 'Done'
            # Import csv into wpp_uprecsinfo.
            try:
                sys.stdout.write('Import rawdata: ')
                wppdb.insertMany(table_name=tab_rd,
                                 indat=indat_withvers,
                                 verb=True)
            except Exception, e:
                _lineno = sys._getframe().f_lineno
                _file = sys._getframe().f_code.co_filename
                alerts['details'] += '\n[ver:%s][%s:%s]: %s' % \
                        (ver_bzfile, _file, _lineno, str(e).replace('\n', ' '))
                alerts['vers'].append(ver_bzfile)
                print 'ERROR: Insert Rawdata Failed!'
                continue
            # Incr clustering.
            # file described by fd_csv contains all *location enabled* rawdata from wpp_uprecsinfo.
            strWhere = 'WHERE lat!=0 and lon!=0 and ver_uprecs=%s' % ver_bzfile
            cols_ignored = 3  # 3 status cols to be ignored during clustering: ver_uprecs,area_ok,area_try.
            cols_select = ','.join(wppdb.tbl_field[tab_rd][:-cols_ignored])
            sql = wppdb.sqls['SQL_SELECT'] % (cols_select, '%s %s' %
                                              (tab_rd, strWhere))
            rdata_loc = wppdb.execute(sql=sql, fetch_one=False)
            if not rdata_loc: continue  # NO FPs has location info.
            str_rdata_loc = '\n'.join(
                [','.join([str(col) for col in fp]) for fp in rdata_loc])
            fd_csv = StringIO(str_rdata_loc)
            print 'FPs for Incr clustering selected & ready'
            n_inserts = doClusterIncr(fd_csv=fd_csv, wppdb=wppdb, verb=False)
            print 'AlgoData added: [%s] clusters, [%s] FPs' % (
                n_inserts['n_newcids'], n_inserts['n_newfps'])