示例#1
0
def crawlAreaLocData():
    """
    1) fetch 100 records with flag area_ok = 0.
    2) try areaLocation(laccid), if OK, then update flag area_ok =1 and quit; else goto 2).
    3) try googleAreaLocation(latlon), if OK, then get geoaddr:[province,city,district]; 
       else |wpp_uprecsinfo|.area_try += 1 and quit.
    4) search area_code for the found district, insert area location 
       (laccid,areacode,areaname_cn) into |wpp_cellarea|, and update flag area_ok = 1.
    """
    fail_history = {}
    dbips = DB_OFFLINE
    for dbip in dbips:
        dbsvr = dbsvrs[dbip]
        wppdb = WppDB(dsn=dbsvr['dsn'], dbtype=dbsvr['dbtype'])
        # select config.CRAWL_LIMIT raw fps which haven't tried for google area location.
        fps_noarea = wppdb.getCrawlFPs()
        for fp in fps_noarea:
            # try areaLocation(laccid)
            laccid = '%s-%s' % (fp[8], fp[9])
            if laccid in fail_history: continue
            time = fp[2]
            print laccid, time
            if wppdb.areaLocation(laccid):
                # area_ok = 1 & quit.
                wppdb.setUprecsAreaStatus(status=1, time=time)
            else:
                print fp
                # try google area location.
                geoaddr = googleAreaLocation(latlon=(fp[11], fp[12]))
                # area_try += 1 & quit
                wppdb.setUprecAreaTry(area_try=fp[18] + 1, time=time)
                if geoaddr:
                    # insert area location info(laccid~geoaddr) into |wpp_cellarea|.
                    # till now, area_location: 'laccid,area_code,province>city>district'.
                    area_location = wppdb.addAreaLocation(laccid=laccid,
                                                          geoaddr=geoaddr)
                    if not area_location:
                        if not laccid in fail_history:
                            fail_history[laccid] = geoaddr
                        print 'Failed to add area location: [%s] for cell[%s]' % \
                              (geoaddr[-1].encode('utf8'), laccid)
                        continue
                    # area_ok = 1 & quit.
                    wppdb.setUprecsAreaStatus(status=1, time=time)
                    print area_location.encode(
                        'utf8')  # encode('utf8') for crontab.
                else:
                    if geoaddr is None: sys.exit(0)  # OVER_QUERY_LIMIT.
                    else: pass
示例#2
0
def crawlAreaLocData():
    """
    1) fetch 100 records with flag area_ok = 0.
    2) try areaLocation(laccid), if OK, then update flag area_ok =1 and quit; else goto 2).
    3) try googleAreaLocation(latlon), if OK, then get geoaddr:[province,city,district]; 
       else |wpp_uprecsinfo|.area_try += 1 and quit.
    4) search area_code for the found district, insert area location 
       (laccid,areacode,areaname_cn) into |wpp_cellarea|, and update flag area_ok = 1.
    """
    fail_history = {}
    dbips = DB_OFFLINE
    for dbip in dbips:
        dbsvr = dbsvrs[dbip]
        wppdb = WppDB(dsn=dbsvr['dsn'], dbtype=dbsvr['dbtype'])
        # select config.CRAWL_LIMIT raw fps which haven't tried for google area location.
        fps_noarea = wppdb.getCrawlFPs()
        for fp in fps_noarea:
            # try areaLocation(laccid)
            laccid = '%s-%s' % (fp[8], fp[9])
            if laccid in fail_history: continue
            time = fp[2]
            print laccid, time
            if wppdb.areaLocation(laccid):
                # area_ok = 1 & quit.
                wppdb.setUprecsAreaStatus(status=1, time=time)
            else:
                print fp
                # try google area location.
                geoaddr = googleAreaLocation( latlon=(fp[11], fp[12]) )
                # area_try += 1 & quit
                wppdb.setUprecAreaTry(area_try=fp[18]+1, time=time)
                if geoaddr:
                    # insert area location info(laccid~geoaddr) into |wpp_cellarea|.
                    # till now, area_location: 'laccid,area_code,province>city>district'.
                    area_location = wppdb.addAreaLocation(laccid=laccid, geoaddr=geoaddr)
                    if not area_location:
                        if not laccid in fail_history: 
                            fail_history[laccid] = geoaddr 
                        print 'Failed to add area location: [%s] for cell[%s]' % \
                              (geoaddr[-1].encode('utf8'), laccid)
                        continue
                    # area_ok = 1 & quit.
                    wppdb.setUprecsAreaStatus(status=1, time=time)
                    print area_location.encode('utf8')  # encode('utf8') for crontab.
                else:
                    if geoaddr is None: sys.exit(0)  # OVER_QUERY_LIMIT.
                    else: pass