def crawlAreaLocData(): """ 1) fetch 100 records with flag area_ok = 0. 2) try areaLocation(laccid), if OK, then update flag area_ok =1 and quit; else goto 2). 3) try googleAreaLocation(latlon), if OK, then get geoaddr:[province,city,district]; else |wpp_uprecsinfo|.area_try += 1 and quit. 4) search area_code for the found district, insert area location (laccid,areacode,areaname_cn) into |wpp_cellarea|, and update flag area_ok = 1. """ fail_history = {} dbips = DB_OFFLINE for dbip in dbips: dbsvr = dbsvrs[dbip] wppdb = WppDB(dsn=dbsvr['dsn'], dbtype=dbsvr['dbtype']) # select config.CRAWL_LIMIT raw fps which haven't tried for google area location. fps_noarea = wppdb.getCrawlFPs() for fp in fps_noarea: # try areaLocation(laccid) laccid = '%s-%s' % (fp[8], fp[9]) if laccid in fail_history: continue time = fp[2] print laccid, time if wppdb.areaLocation(laccid): # area_ok = 1 & quit. wppdb.setUprecsAreaStatus(status=1, time=time) else: print fp # try google area location. geoaddr = googleAreaLocation(latlon=(fp[11], fp[12])) # area_try += 1 & quit wppdb.setUprecAreaTry(area_try=fp[18] + 1, time=time) if geoaddr: # insert area location info(laccid~geoaddr) into |wpp_cellarea|. # till now, area_location: 'laccid,area_code,province>city>district'. area_location = wppdb.addAreaLocation(laccid=laccid, geoaddr=geoaddr) if not area_location: if not laccid in fail_history: fail_history[laccid] = geoaddr print 'Failed to add area location: [%s] for cell[%s]' % \ (geoaddr[-1].encode('utf8'), laccid) continue # area_ok = 1 & quit. wppdb.setUprecsAreaStatus(status=1, time=time) print area_location.encode( 'utf8') # encode('utf8') for crontab. else: if geoaddr is None: sys.exit(0) # OVER_QUERY_LIMIT. else: pass
def crawlAreaLocData(): """ 1) fetch 100 records with flag area_ok = 0. 2) try areaLocation(laccid), if OK, then update flag area_ok =1 and quit; else goto 2). 3) try googleAreaLocation(latlon), if OK, then get geoaddr:[province,city,district]; else |wpp_uprecsinfo|.area_try += 1 and quit. 4) search area_code for the found district, insert area location (laccid,areacode,areaname_cn) into |wpp_cellarea|, and update flag area_ok = 1. """ fail_history = {} dbips = DB_OFFLINE for dbip in dbips: dbsvr = dbsvrs[dbip] wppdb = WppDB(dsn=dbsvr['dsn'], dbtype=dbsvr['dbtype']) # select config.CRAWL_LIMIT raw fps which haven't tried for google area location. fps_noarea = wppdb.getCrawlFPs() for fp in fps_noarea: # try areaLocation(laccid) laccid = '%s-%s' % (fp[8], fp[9]) if laccid in fail_history: continue time = fp[2] print laccid, time if wppdb.areaLocation(laccid): # area_ok = 1 & quit. wppdb.setUprecsAreaStatus(status=1, time=time) else: print fp # try google area location. geoaddr = googleAreaLocation( latlon=(fp[11], fp[12]) ) # area_try += 1 & quit wppdb.setUprecAreaTry(area_try=fp[18]+1, time=time) if geoaddr: # insert area location info(laccid~geoaddr) into |wpp_cellarea|. # till now, area_location: 'laccid,area_code,province>city>district'. area_location = wppdb.addAreaLocation(laccid=laccid, geoaddr=geoaddr) if not area_location: if not laccid in fail_history: fail_history[laccid] = geoaddr print 'Failed to add area location: [%s] for cell[%s]' % \ (geoaddr[-1].encode('utf8'), laccid) continue # area_ok = 1 & quit. wppdb.setUprecsAreaStatus(status=1, time=time) print area_location.encode('utf8') # encode('utf8') for crontab. else: if geoaddr is None: sys.exit(0) # OVER_QUERY_LIMIT. else: pass