def do_work(paras): venue_id = paras[0] crawl_user_id = paras[1] meta_data = download_meta_data(venue_id) drop_table_venue_meta(crawl_user_id) add_table_venue_meta(crawl_user_id) save_venue_meta(meta_data, crawl_user_id)
def main(): add_table_venue_meta() add_table_venue_photo_4sq() add_table_venue_tips() client = foursquare.Foursquare( config.foursquare_client_id, client_secret=config.foursquare_client_secret) all_plazas = client.venues.search( params={ 'near': 'New York City', 'limit': 50, 'intent': 'browse', 'radius': 5000, 'categoryId': '4bf58dd8d48988d164941735' }) print all_plazas cnt = 0 for v in all_plazas['venues']: time.sleep(10) venue_id = v['id'] print 'progress: %d/%d -> %s' % (cnt, len(all_plazas), v['name']) crawler = VenueMetaCrawler() crawler.grab_meta_data(venue_id) crawler = VenuePhotoCrawlerFoursquare() crawler.grab_photo(venue_id) crawler = VenueTipsCrawler() crawler.grab_tip(venue_id)
def __init__(self): self.jobs = {} self.next_job_id = 0 add_table_venue_meta() add_table_venue_photo_4sq() self.downloading = threading.Event() t = Thread(target = self.infinit_consume) t.setDaemon(True) t.start()
def main(): add_table_venue_meta() add_table_venue_photo_4sq() add_table_venue_tips() client = foursquare.Foursquare(config.foursquare_client_id, client_secret=config.foursquare_client_secret) all_plazas = client.venues.search(params={'near':'New York City', 'limit':50, 'intent':'browse', 'radius':5000, 'categoryId':'4bf58dd8d48988d164941735'} ) print all_plazas cnt = 0 for v in all_plazas['venues']: time.sleep(10) venue_id = v['id'] print 'progress: %d/%d -> %s'%(cnt, len(all_plazas), v['name']) crawler = VenueMetaCrawler() crawler.grab_meta_data(venue_id) crawler = VenuePhotoCrawlerFoursquare() crawler.grab_photo(venue_id) crawler = VenueTipsCrawler() crawler.grab_tip(venue_id)