示例#1
0
def main():
    sql = """select distinct(foursquare_venue_id) as id from plazas_instaphoto"""
    cursor = connect_to_mysql()
    cursor.execute(sql)
    ids = []
    for r in cursor.fetchall():
        ids.append(r['id'])
    do_multithread_job(do_work, ids, 20, './log/dump_pics.log')
示例#2
0
def main():
    sql = "select id from plazas"
    cursor = connect_to_mysql()
    cursor.execute(sql)
    
    ids = []
    count = 10
    for r in cursor.fetchall():
        ids.append(r['id'])
        count-=1;
        if(count<=0):break
    do_multithread_job(do_work, ids, 10, 'do_multi.log')
示例#3
0
def main():
    add_table_region_photos()
    cur_time = int (time.time() )
    #sw_ne = (40.773012,-73.9863145)
    sw_ne = (40.75953,-73.9863145)
    periods = []
    pre = cur_time
    jobs = []
    client = client = InstagramAPI(client_id = config.instagram_client_id, client_secret = config.instagram_client_secret)
    while pre>=cur_time-10*3600*24:
        periods.append( (pre-3*60, pre) )
        jobs.append( (sw_ne[0], sw_ne[1], (pre-3*60,pre), client) ) 
        pre-=60*3
    do_multithread_job(do_func, jobs, 5, './test_multi_insta_scan.log')
示例#4
0
def main():
    add_table_region_photos()
    cur_time = int(time.time())
    #sw_ne = (40.773012,-73.9863145)
    sw_ne = (40.75953, -73.9863145)
    periods = []
    pre = cur_time
    jobs = []
    client = client = InstagramAPI(
        client_id=config.instagram_client_id,
        client_secret=config.instagram_client_secret)
    while pre >= cur_time - 10 * 3600 * 24:
        periods.append((pre - 3 * 60, pre))
        jobs.append((sw_ne[0], sw_ne[1], (pre - 3 * 60, pre), client))
        pre -= 60 * 3
    do_multithread_job(do_func, jobs, 5, './test_multi_insta_scan.log')
示例#5
0
文件: job.py 项目: oeddyo/DataBagel
    def submit(self, downloading_locker):
        paras = []
        for id in self.venue_ids:
            paras.append( (id, self.job_id) )
        do_multithread_job(instagram_do_work, paras, 10, './log/log_'+str(self.job_id)+'insta.txt')
        do_multithread_job(foursquare_do_work,paras, 10,'./log/log_'+str(self.job_id)+'_meta.txt')
        do_multithread_job(foursquare_tips_do_work, paras, 10, './log/log_'+str(self.job_id)+'_tips.txt')
        do_multithread_job(foursquare_photos_do_work, paras, 10, './log/log_'+str(self.job_id)+'_photos.txt')

        self.status = 'Finished'
        downloading_locker.clear() 
        #should export the csv file here
        meta_file_name = "metadata_user_"+str(self.job_id)+".csv"
        insta_file_name = "instagram_user_"+str(self.job_id)+".csv"
        tips_file_name = "tips_user_"+str(self.job_id)+".csv"
        foursquare_photo_file_name = "foursquare_photo_user_"+str(self.job_id)+".csv"
        sql = "SELECT * from venue_meta"+str(self.job_id)+"""
        INTO OUTFILE '/db/www/places-crawl/data/"""+meta_file_name+"""'
        FIELDS TERMINATED BY ','ENCLOSED BY '"'LINES TERMINATED BY '\n'
        """
        cursor = connect_to_mysql()
        try:
            cursor.execute(sql)
        except Exception as e:
            print 'OUTPUT TO DB FOLDER ERROR'

        sql = "SELECT * from venue_photo_instagram"+str(self.job_id)+"""
        INTO OUTFILE '/db/www/places-crawl/data/"""+insta_file_name+"""'
        FIELDS TERMINATED BY ','ENCLOSED BY '"'LINES TERMINATED BY '\n'
        """
        try:
            cursor = connect_to_mysql()
            cursor.execute(sql)
        except Exception as e:
            print "OUTPUT TO INSTAGRAM FOLDER ERROR"
        sql = "SELECT * from venue_tips"+str(self.job_id)+"""
        INTO OUTFILE '/db/www/places-crawl/data/"""+tips_file_name+"""'
        FIELDS TERMINATED BY ','ENCLOSED BY '"'LINES TERMINATED BY '\n'
        """
        try:
            cursor = connect_to_mysql()
            cursor.execute(sql)
        except Exception as e:
            print "OUTPUT TO FOURSQUARE_TIPS FOLDER ERROR"
        sql = "SELECT * from venue_photo_4sq"+str(self.job_id)+"""
        INTO OUTFILE '/db/www/places-crawl/data/"""+foursquare_photo_file_name+"""'
        FIELDS TERMINATED BY ','ENCLOSED BY '"'LINES TERMINATED BY '\n'
        """
        try:
            cursor = connect_to_mysql()
            cursor.execute(sql)
        except Exception as e:
            print "OUTPUT TO FOURSQUARE_PHOTO FOLDER ERROR"
        return "job done"
def main():

    already_in = set()
    sql = """ select distinct(foursquare_venue_id) as id from plazas_instaphoto """
    cursor = connect_to_mysql()
    cursor.execute(sql)
    for r in cursor.fetchall():
        already_in.add( r['id'] )

    sql = "select id from plazas_nyc"
    cursor = connect_to_mysql()
    cursor.execute(sql)
    
    ids = []
    for r in cursor.fetchall():
        #if r['id'] not in already_in:
        ids.append(r['id'])
    do_multithread_job(do_work, ids, 5, './log/download_instagram.log')
    print ids
示例#7
0
def main():

    already_in = set()
    sql = """ select distinct(foursquare_venue_id) as id from plazas_instaphoto """
    cursor = connect_to_mysql()
    cursor.execute(sql)
    for r in cursor.fetchall():
        already_in.add(r['id'])

    sql = "select id from plazas_nyc"
    cursor = connect_to_mysql()
    cursor.execute(sql)

    ids = []
    for r in cursor.fetchall():
        #if r['id'] not in already_in:
        ids.append(r['id'])
    do_multithread_job(do_work, ids, 5, './log/download_instagram.log')
    print ids