def main(): sql = """select distinct(foursquare_venue_id) as id from plazas_instaphoto""" cursor = connect_to_mysql() cursor.execute(sql) ids = [] for r in cursor.fetchall(): ids.append(r['id']) do_multithread_job(do_work, ids, 20, './log/dump_pics.log')
def main(): sql = "select id from plazas" cursor = connect_to_mysql() cursor.execute(sql) ids = [] count = 10 for r in cursor.fetchall(): ids.append(r['id']) count-=1; if(count<=0):break do_multithread_job(do_work, ids, 10, 'do_multi.log')
def main(): add_table_region_photos() cur_time = int (time.time() ) #sw_ne = (40.773012,-73.9863145) sw_ne = (40.75953,-73.9863145) periods = [] pre = cur_time jobs = [] client = client = InstagramAPI(client_id = config.instagram_client_id, client_secret = config.instagram_client_secret) while pre>=cur_time-10*3600*24: periods.append( (pre-3*60, pre) ) jobs.append( (sw_ne[0], sw_ne[1], (pre-3*60,pre), client) ) pre-=60*3 do_multithread_job(do_func, jobs, 5, './test_multi_insta_scan.log')
def main(): add_table_region_photos() cur_time = int(time.time()) #sw_ne = (40.773012,-73.9863145) sw_ne = (40.75953, -73.9863145) periods = [] pre = cur_time jobs = [] client = client = InstagramAPI( client_id=config.instagram_client_id, client_secret=config.instagram_client_secret) while pre >= cur_time - 10 * 3600 * 24: periods.append((pre - 3 * 60, pre)) jobs.append((sw_ne[0], sw_ne[1], (pre - 3 * 60, pre), client)) pre -= 60 * 3 do_multithread_job(do_func, jobs, 5, './test_multi_insta_scan.log')
def submit(self, downloading_locker): paras = [] for id in self.venue_ids: paras.append( (id, self.job_id) ) do_multithread_job(instagram_do_work, paras, 10, './log/log_'+str(self.job_id)+'insta.txt') do_multithread_job(foursquare_do_work,paras, 10,'./log/log_'+str(self.job_id)+'_meta.txt') do_multithread_job(foursquare_tips_do_work, paras, 10, './log/log_'+str(self.job_id)+'_tips.txt') do_multithread_job(foursquare_photos_do_work, paras, 10, './log/log_'+str(self.job_id)+'_photos.txt') self.status = 'Finished' downloading_locker.clear() #should export the csv file here meta_file_name = "metadata_user_"+str(self.job_id)+".csv" insta_file_name = "instagram_user_"+str(self.job_id)+".csv" tips_file_name = "tips_user_"+str(self.job_id)+".csv" foursquare_photo_file_name = "foursquare_photo_user_"+str(self.job_id)+".csv" sql = "SELECT * from venue_meta"+str(self.job_id)+""" INTO OUTFILE '/db/www/places-crawl/data/"""+meta_file_name+"""' FIELDS TERMINATED BY ','ENCLOSED BY '"'LINES TERMINATED BY '\n' """ cursor = connect_to_mysql() try: cursor.execute(sql) except Exception as e: print 'OUTPUT TO DB FOLDER ERROR' sql = "SELECT * from venue_photo_instagram"+str(self.job_id)+""" INTO OUTFILE '/db/www/places-crawl/data/"""+insta_file_name+"""' FIELDS TERMINATED BY ','ENCLOSED BY '"'LINES TERMINATED BY '\n' """ try: cursor = connect_to_mysql() cursor.execute(sql) except Exception as e: print "OUTPUT TO INSTAGRAM FOLDER ERROR" sql = "SELECT * from venue_tips"+str(self.job_id)+""" INTO OUTFILE '/db/www/places-crawl/data/"""+tips_file_name+"""' FIELDS TERMINATED BY ','ENCLOSED BY '"'LINES TERMINATED BY '\n' """ try: cursor = connect_to_mysql() cursor.execute(sql) except Exception as e: print "OUTPUT TO FOURSQUARE_TIPS FOLDER ERROR" sql = "SELECT * from venue_photo_4sq"+str(self.job_id)+""" INTO OUTFILE '/db/www/places-crawl/data/"""+foursquare_photo_file_name+"""' FIELDS TERMINATED BY ','ENCLOSED BY '"'LINES TERMINATED BY '\n' """ try: cursor = connect_to_mysql() cursor.execute(sql) except Exception as e: print "OUTPUT TO FOURSQUARE_PHOTO FOLDER ERROR" return "job done"
def main(): already_in = set() sql = """ select distinct(foursquare_venue_id) as id from plazas_instaphoto """ cursor = connect_to_mysql() cursor.execute(sql) for r in cursor.fetchall(): already_in.add( r['id'] ) sql = "select id from plazas_nyc" cursor = connect_to_mysql() cursor.execute(sql) ids = [] for r in cursor.fetchall(): #if r['id'] not in already_in: ids.append(r['id']) do_multithread_job(do_work, ids, 5, './log/download_instagram.log') print ids
def main(): already_in = set() sql = """ select distinct(foursquare_venue_id) as id from plazas_instaphoto """ cursor = connect_to_mysql() cursor.execute(sql) for r in cursor.fetchall(): already_in.add(r['id']) sql = "select id from plazas_nyc" cursor = connect_to_mysql() cursor.execute(sql) ids = [] for r in cursor.fetchall(): #if r['id'] not in already_in: ids.append(r['id']) do_multithread_job(do_work, ids, 5, './log/download_instagram.log') print ids