class FlickrQuery(): def __init__(self,flickrapikey,flickrSecret): self.api_key = flickrapikey self.fapi = FlickrAPI(flickrapikey, flickrSecret) def searchbyid(self,eventid): photolist = [] query = " prefix lode: <http://linkedevents.org/ontology/> \n\ prefix dc: <http://purl.org/dc/elements/1.1/> \n\ prefix ma: <http://www.w3.org/ns/ma-ont#> \n\ SELECT ?event ?eventTitle ?URI \n\ WHERE { \n\ ?event dc:title ?eventTitle. \n\ ?photo lode:illustrate ?event. \n\ ?photo ma:locator ?URI. \n\ FILTER (?event = <http://data.linkedevents.org/event/eventURI>). \n\ } \n\ " query = query.replace('eventURI',eventid) searchbase = 'http://eventmedia.eurecom.fr/sparql' params = urllib.urlencode({"format": "application/sparql-results+json", "query": query}) f = urllib.urlopen(searchbase + '?' + params) results = simplejson.load(f) try: results = results['results']['bindings'] except: return [] for result in results: url = result['URI']['value'] photolist.append(url) return photolist def searchbytitle(self,title,time,eventid): photolist = [] t1 = time t2 = t1 + datetime.timedelta(days = 5) starttime = t1 + datetime.timedelta(hours = - (t1.hour)) bReturn = 1 idx = 1 while (bReturn ==1): try: rsp = self.fapi.photos_search(api_key=self.api_key, ispublic="1", media="photos", per_page="250", page=str(idx), min_taken_date = str(starttime), max_taken_date = str(t2), text = title.encode('utf-8'), extras = 'date_upload, date_taken, owner_name, geo, tags, machine_tags, url_m' ) idx = idx +1 self.fapi.testFailure(rsp) total_images = rsp.photos[0]['total']; null_test = int(total_images); except: null_test = 0 print sys.exc_info()[0] print sys.exc_info()[1] print ('Exception encountered while querying title for images\n') print type(title),type(title.encode('utf-8')) if null_test == 0: break if null_test >=250*(idx-1): bReturn = 1 else: bReturn = 0 tmpdir = os.path.join(gconfig.metadatadir,'querybytitle') if not os.path.exists(tmpdir): os.makedirs(tmpdir) metadata = os.path.join(tmpdir,'%s_%d.xml' % (eventid,idx-1)) data = parseString(rsp.xml) if not os.path.exists(metadata): f = open(metadata,'w') f.write(data.toprettyxml(encoding='UTF-8')) f.close() q = data.getElementsByTagName('photo') for p in q: url = p.getAttribute('url_m') if url.find('.jpg')>0: photolist.append(url) return photolist def searchbygeo(self,lat,lng,time,eventid): photolist = [] photolist = [] t1 = time t2 = t1 + datetime.timedelta(days = 3) starttime = t1 + datetime.timedelta(hours = - (t1.hour)) bReturn = 1 idx = 1 while (bReturn ==1): try: rsp = self.fapi.photos_search(api_key=self.api_key, ispublic="1", media="photos", per_page="250", page=str(idx), min_taken_date = str(starttime), max_taken_date = str(t2), lat = str(lat), lon = str(lng), radius = str('0.7'), accuracy = '12', extras = 'date_upload, date_taken, owner_name, geo, tags, machine_tags, url_m' ) idx = idx +1 self.fapi.testFailure(rsp) total_images = rsp.photos[0]['total']; null_test = int(total_images); except: null_test = 0 print sys.exc_info()[0] print sys.exc_info()[1] print ('Exception encountered while querying for images\n') if null_test == 0: break if null_test >=250*(idx-1): bReturn = 1 else: bReturn = 0 tmpdir = os.path.join(gconfig.metadatadir,'querybygeo') if not os.path.exists(tmpdir): os.makedirs(tmpdir) metadata = os.path.join(tmpdir,'%s_%d.xml' % (eventid,idx-1)) data = parseString(rsp.xml) if not os.path.exists(metadata): f = open(metadata,'w') f.write(data.toprettyxml(encoding='UTF-8')) f.close() q = data.getElementsByTagName('photo') for p in q: url = p.getAttribute('url_m') if url.find('.jpg')>0: photolist.append(url) return photolist def searchbygeoRadius(self,lat,lng,r,stime,etime, eventid): photolist = [] bReturn = 1 idx = 1 while (bReturn ==1): try: rsp = self.fapi.photos_search(api_key=self.api_key, ispublic="1", media="photos", per_page="250", page=str(idx), min_taken_date = str(stime), max_taken_date = str(etime), lat = str(lat), lon = str(lng), radius = str(r), accuracy = '12', extras = 'date_upload, date_taken, owner_name, geo, tags, machine_tags, url_m' ) idx = idx +1 self.fapi.testFailure(rsp) total_images = rsp.photos[0]['total']; null_test = int(total_images); except: null_test = 0 print sys.exc_info()[0] print sys.exc_info()[1] print ('Exception encountered while querying for images\n') if null_test == 0: break if null_test >=250*(idx-1): bReturn = 1 else: bReturn = 0 tmpdir = os.path.join(gconfig.metadatadir,'querybygeo') if not os.path.exists(tmpdir): os.makedirs(tmpdir) metadata = os.path.join(tmpdir,'%s_%d.xml' % (eventid,idx-1)) data = parseString(rsp.xml) if not os.path.exists(metadata): f = open(metadata,'w') f.write(data.toprettyxml(encoding='UTF-8')) f.close() q = data.getElementsByTagName('photo') for p in q: url = p.getAttribute('url_m') if url.find('.jpg')>0: photolist.append(url) return photolist def outputlist(self,list,id,fname): fw = open(fname,'w') for url in list: fname = url.split('/')[-1] fw.write('%s\%s\n' % (id,fname)) fw.close() def geturlbyid(self,id,list): photos = {} results = [] for p in list: t = p.split('/')[-1] t = t.replace('.jpg','') photos[t] = p for idx in id: if idx in photos: results.append(photos[idx]) return results def OutputList(self,listname,lst): str = '' str += '\n<table align="center" border="1" cellspacing="1" cellpadding="3" width=800><H2>query by %s</H2><tr>' % listname N = len(lst) num = 0 for i in range(0,N): img_file = lst[i] str += '\n<td align="center" valign=top width=30><IMG SRC="%s" width=160 border=1 /></td>' % img_file num = num +1 if (num % 8)==0: str +=('</tr>') str +=('</table>') return str def OutputJson(self,lst): N = len(lst) tmp = [] num = 0 for i in range(0,N): img_file = lst[i] tmp.append({'photo':img_file}) mydict = {} mydict['photos'] = tmp mydict['number'] = str(len(tmp)) return mydict def OutputHtml(self,id,idlist,titlelist,geolist,refinelist): file = open(gconfig.outputdir + '/' + '%s.html' % id,'w') head='<html><head><title> Media illustration </title></head>' file.write('%s\n' % head) head='<body BGCOLOR="#FFFFFF"><center><H1>enriching for Event %s</H1><HR HSIZE="50%%"/>' % id file.write('%s\n'% head) file.write(self.OutputList("machine tag",idlist) ) file.write(self.OutputList("Geo tag",geolist) ) file.write(self.OutputList("title + pruning + refine",refinelist) ) file.close() def OutputXML(self,id,idlist,titlelist,geolist,refinelist): fname = gconfig.outputdir + '/' + '%s.xml' % id setid = set(idlist) setgeo = set(geolist) - setid settitle = set(titlelist) - setid setrefinelist = set(refinelist) - setid sets = [setid,setgeo,settitle,setrefinelist] tmpinfo = ["query by ID","query by Geo - ID", "query by Title - ID"," Pruning and Refine"] doc = Document() query = doc.createElement("query") query.setAttribute("id",id) doc.appendChild(query) results = doc.createElement("PhotoSets") for tmpset,info in zip(sets,tmpinfo): photoset = doc.createElement("photoset") photoset.setAttribute('query',info) photoset.setAttribute('photoNum',str(len(tmpset))) for photo in tmpset: ph = doc.createElement("photo") ph.setAttribute('url', photo) photoset.appendChild(ph) results.appendChild(photoset) query.appendChild(results) f = open(fname, "w") f.write(doc.toprettyxml(encoding='UTF-8')) f.close()
upper_bound = mintime + timeskip * 20 #upper bound of the upper time limit maxtime = .95 * lower_bound + .05 * upper_bound print '\nBinary search on time range upper bound' print 'Lower bound is ' + str(datetime.fromtimestamp(lower_bound)) print 'Upper bound is ' + str(datetime.fromtimestamp(upper_bound)) keep_going = 6 #search stops after a fixed number of iterations while( keep_going > 0 and maxtime < endtime): try: rsp = fapi.photos_search(api_key=flickrAPIKey, ispublic="1", media="photos", per_page="250", page="1", has_geo = "1", #bbox="-180, -90, 180, 90", text=query_string, accuracy="6", #6 is region level. most things seem 10 or better. min_upload_date=str(mintime), max_upload_date=str(maxtime)) ##min_taken_date=str(datetime.fromtimestamp(mintime)), ##max_taken_date=str(datetime.fromtimestamp(maxtime))) #we want to catch these failures somehow and keep going. time.sleep(1) fapi.testFailure(rsp) total_images = rsp.photos[0]['total']; null_test = int(total_images); #want to make sure this won't crash later on for some reason null_test = float(total_images); print '\nnumimgs: ' + total_images print 'mintime: ' + str(mintime) + ' maxtime: ' + str(maxtime) + ' timeskip: ' + str(maxtime - mintime)
counter = -1 while( pagenum <= num_visit_pages ): #for pagenum in range(1, num_visit_pages + 1): #page one is searched twice print(' page number ' + str(pagenum)) try: print("PAGE") print(pagenum) # WARNING THIS QUERY HAS TO MATCH THE SEARCH QUERY!!!! rsp = fapi.photos_search(api_key=flickrAPIKey, ispublic="1", media="photos", per_page="250", page=str(pagenum), has_geo = "0", text=query_string, #extras = "tags, original_format, license, geo, date_taken, date_upload, o_dims, views", #accuracy="6", #6 is region level. min_upload_date=str(1121832000),#mintime), max_upload_date=str(1192165200))#maxtime)) #rsp = fapi.photos_search(api_key=flickrAPIKey, # ispublic="1", # media="photos", # per_page="250", # page='0', #str(pagenum), # sort="interestingness-desc", # has_geo = "0", #bbox="-180, -90, 180, 90", # text=query_string, # #accuracy="6", #6 is region level. most things seem 10 or better.
def run_flickr_query_general(query_args, max_photos = 1000, startDate = "1/1/2010", endDate = "31/12/2011"): socket.setdefaulttimeout(30) #30 second time out on sockets before they throw #an exception. I've been having trouble with urllib.urlopen hanging in the #flickr API. This will show up as exceptions.IOError. #the time out needs to be pretty long, it seems, because the flickr servers can be slow #to respond to our big searches. ########################################################################### # Modify this section to reflect your data and specific search ########################################################################### # flickr auth information: # change these to your flickr api keys and secret # make a new FlickrAPI instance fapi = FlickrAPI(flickrAPIKey, flickrSecret) #print '\n\nquery arguments\n' #print query_args total_images_queried = 0; # number of seconds to skip per query timeskip = 8 * 604800 #one week starttime = convertDate(startDate) #mintime = convertDate(startDate) endtime = convertDate(endDate) # maxtime = startime+20*timeskip maxtime = endtime mintime = endtime-10*timeskip timeskip = min(timeskip, endtime-mintime) print 'Start time: ' + str(datetime.fromtimestamp(starttime)) print 'End time: ' + str(datetime.fromtimestamp(endtime)) #this is the desired number of photos in each block desired_photos = min(250, max_photos) total_image_num = 0 results = {} print 'here' print starttime print mintime # while (maxtime < endtime): while (starttime < mintime): #new approach - adjust maxtime until we get the desired number of images #within a block. We'll need to keep upper bounds and lower #lower bound is well defined (mintime), but upper bound is not. We can't #search all the way from endtime. # lower_bound = mintime + 900 #lower bound OF the upper time limit. must be at least 15 minutes or zero results # upper_bound = mintime + timeskip * 20 #upper bound of the upper time limit # upper_bound = min(upper_bound, endtime) # maxtime = .95 * lower_bound + .05 * upper_bound lower_bound = mintime - 20 * timeskip #lower bound OF the upper time limit. must be at least 15 minutes or zero results upper_bound = maxtime #upper bound of the upper time limit lower_bound = max(lower_bound, starttime) mintime = 0.05 * lower_bound + 0.95 * upper_bound # print '\nBinary search on time range upper bound' # print 'Lower bound is ' + str(datetime.fromtimestamp(lower_bound)) # print 'Upper bound is ' + str(datetime.fromtimestamp(upper_bound)) if total_image_num > max_photos: print 'Number of photos %d > %d limit.' % (total_image_num, max_photos) break print 'here' keep_going = 6 #search stops after a fixed number of iterations while( keep_going > 0 and starttime < mintime): #while( keep_going > 0 and maxtime < endtime): try: # print 'Calling api' rsp = fapi.photos_search(api_key=flickrAPIKey, ispublic="1", media="photos", per_page="250", page="1", #has_geo = "1", #bbox="-180, -90, 180, 90", #text=query_string, #accuracy="6", #6 is region level. most things seem 10 or better. min_upload_date=str(mintime), max_upload_date=str(maxtime), **query_args) ##min_taken_date=str(datetime.fromtimestamp(mintime)), ##max_taken_date=str(datetime.fromtimestamp(maxtime))) #we want to catch these failures somehow and keep going. os_time.sleep(1) fapi.testFailure(rsp) print rsp total_images = rsp.photos[0]['total']; if total_images == '': total_images = '0' # print total_images # print rsp.photos[0] null_test = int(total_images); #want to make sure this won't crash later on for some reason null_test = float(total_images); # print 'numimgs: ' + total_images # print 'mintime: ' + str(mintime) + ' maxtime: ' + str(maxtime) + ' timeskip: ' + str(maxtime - mintime) if( int(total_images) > desired_photos ): # print 'too many photos in block, increasing mintime' lower_bound = mintime mintime = (upper_bound + mintime) / 2 #midpoint between current value and lower bound. # print 'too many photos in block, reducing maxtime' # upper_bound = maxtime # maxtime = (lower_bound + mintime) / 2 #midpoint between current value and lower bound. if( int(total_images) < desired_photos): # print 'too few photos in block, reducing mintime' upper_bound = mintime mintime = (lower_bound + mintime) / 2 # print 'too few photos in block, increasing maxtime' # lower_bound = maxtime # maxtime = (upper_bound + maxtime) / 2 # print 'Lower bound is ' + str(datetime.fromtimestamp(lower_bound)) # print 'Upper bound is ' + str(datetime.fromtimestamp(upper_bound)) if( int(total_images) > 0): #only if we're not in a degenerate case keep_going = keep_going - 1 else: upper_bound = upper_bound + timeskip; except KeyboardInterrupt: print('Keyboard exception while querying for images, exiting\n') raise # except: # print sys.exc_info()[0] #print type(inst) # the exception instance #print inst.args # arguments stored in .args #print inst # __str__ allows args to printed directly # print ('Exception encountered while querying for images\n') #end of while binary search # print 'finished binary search' print 'mintime: ' + str(datetime.fromtimestamp(mintime)) + ' maxtime: ' + str(datetime.fromtimestamp(maxtime)) + ' numimgs: ' + total_images i = getattr(rsp,'photos',None) if i: s = 'numimgs: ' + total_images print s current_image_num = 1; num = int(rsp.photos[0]['pages']) s = 'total pages: ' + str(num) print s #only visit 16 pages max, to try and avoid the dreaded duplicate bug #16 pages = 4000 images, should be duplicate safe. Most interesting pictures will be taken. num_visit_pages = min(16,num) s = 'visiting only ' + str(num_visit_pages) + ' pages ( up to ' + str(num_visit_pages * 250) + ' images)' print s total_images_queried = total_images_queried + min((num_visit_pages * 250), int(total_images)) #print 'stopping before page ' + str(int(math.ceil(num/3) + 1)) + '\n' pagenum = 1; while( pagenum <= num_visit_pages ): #for pagenum in range(1, num_visit_pages + 1): #page one is searched twice print ' page number ' + str(pagenum) try: rsp = fapi.photos_search(api_key=flickrAPIKey, ispublic="1", media="photos", per_page="250", page=str(pagenum), sort="interestingness-desc", #has_geo = "1", #bbox="-180, -90, 180, 90", #text=query_string, #accuracy="6", #6 is region level. most things seem 10 or better. extras = "tags, original_format, license, geo, date_taken, date_upload, o_dims, views", min_upload_date=str(mintime), max_upload_date=str(maxtime), **query_args) ##min_taken_date=str(datetime.fromtimestamp(mintime)), ##max_taken_date=str(datetime.fromtimestamp(maxtime))) os_time.sleep(1) fapi.testFailure(rsp) except KeyboardInterrupt: print('Keyboard exception while querying for images, exiting\n') raise # except: # print sys.exc_info()[0] # #print type(inst) # the exception instance # #print inst.args # arguments stored in .args # #print inst # __str__ allows args to printed directly # print ('Exception encountered while querying for images\n') else: # and print them k = getattr(rsp,'photos',None) if k: m = getattr(rsp.photos[0],'photo',None) if m: for b in rsp.photos[0].photo: if b!=None: photo_id = b['id'] photo_data = { } photo_data['id'] = b['id'] photo_data['secret'] = b['secret'] photo_data['server'] = b['server'] photo_data['farm'] = b['farm'] photo_data['owner'] = b['owner'] photo_data['title'] = b['title'] photo_data['originalsecret'] = b['originalsecret'] photo_data['originalformat'] = b['originalformat'] photo_data['o_height'] = b['o_height'] photo_data['o_width'] = b['o_width'] photo_data['datetaken'] = b['datetaken'].encode("ascii","replace") photo_data['dateupload'] = b['dateupload'].encode("ascii","replace") photo_data['tags'] = b['tags'].encode("ascii","replace") photo_data['license'] = b['license'].encode("ascii","replace") photo_data['latitude'] = b['latitude'].encode("ascii","replace") photo_data['longitude'] = b['longitude'].encode("ascii","replace") photo_data['accuracy'] = b['accuracy'].encode("ascii","replace") photo_data['views'] = b['views'] photo_data['interestingness'] = (current_image_num, total_images) results[photo_id] = photo_data current_image_num = current_image_num + 1; total_image_num = total_image_num + 1; pagenum = pagenum + 1; #this is in the else exception block. It won't increment for a failure. #this block is indented such that it will only run if there are no exceptions #in the original query. That means if there are exceptions, mintime won't be incremented #and it will try again # timeskip = maxtime - mintime #used for initializing next binary search # mintime = maxtime timeskip = maxtime - mintime #used for initializing next binary search maxtime = mintime return results
counter = -1 while (pagenum <= num_visit_pages): #for pagenum in range(1, num_visit_pages + 1): #page one is searched twice print ' page number ' + str(pagenum) try: print("PAGE") print(pagenum) # WARNING THIS QUERY HAS TO MATCH THE SEARCH QUERY!!!! rsp = fapi.photos_search( api_key=flickrAPIKey, ispublic="1", media="photos", per_page="250", page=str(pagenum), has_geo="0", text=query_string, #extras = "tags, original_format, license, geo, date_taken, date_upload, o_dims, views", #accuracy="6", #6 is region level. min_upload_date=str(1121832000), #mintime), max_upload_date=str(1192165200)) #maxtime)) #rsp = fapi.photos_search(api_key=flickrAPIKey, # ispublic="1", # media="photos", # per_page="250", # page='0', #str(pagenum), # sort="interestingness-desc", # has_geo = "0", #bbox="-180, -90, 180, 90", # text=query_string, # #accuracy="6", #6 is region level. most things seem 10 or better.
def run_flickr_query(query_string, max_photos = 1000, startDate = "1/1/2010", endDate = "31/12/2011"): socket.setdefaulttimeout(30) #30 second time out on sockets before they throw #an exception. I've been having trouble with urllib.urlopen hanging in the #flickr API. This will show up as exceptions.IOError. #the time out needs to be pretty long, it seems, because the flickr servers can be slow #to respond to our big searches. ########################################################################### # Modify this section to reflect your data and specific search ########################################################################### # flickr auth information: # change these to your flickr api keys and secret # make a new FlickrAPI instance fapi = FlickrAPI(flickrAPIKey, flickrSecret) print '\n\nquery_string is ' + query_string total_images_queried = 0; # number of seconds to skip per query #timeskip = 62899200 #two years timeskip = 604800 #one week #timeskip = 172800 #two days #timeskip = 86400 #one day #timeskip = 3600 #one hour #timeskip = 2257 #for resuming previous query #mintime = 1121832000 #from im2gps #mintime = 1167407788 # resume crash england #mintime = 1177828976 #resume crash japan #mintime = 1187753798 #resume crash greece #mintime = 1171416400 #resume crash WashingtonDC mintime = 1287878400 # 10/24/2010 maxtime = mintime+timeskip #endtime = 1192165200 #10/12/2007, at the end of im2gps queries endtime = 1351100325 # 10/24/2012 #this is the desired number of photos in each block desired_photos = 250 print "hola" print datetime.fromtimestamp(mintime) print datetime.fromtimestamp(endtime) total_image_num = 0 while (maxtime < endtime): #new approach - adjust maxtime until we get the desired number of images #within a block. We'll need to keep upper bounds and lower #lower bound is well defined (mintime), but upper bound is not. We can't #search all the way from endtime. lower_bound = mintime + 900 #lower bound OF the upper time limit. must be at least 15 minutes or zero results upper_bound = mintime + timeskip * 20 #upper bound of the upper time limit maxtime = .95 * lower_bound + .05 * upper_bound print '\nBinary search on time range upper bound' print 'Lower bound is ' + str(datetime.fromtimestamp(lower_bound)) print 'Upper bound is ' + str(datetime.fromtimestamp(upper_bound)) if total_image_num > max_photos: print 'Number of photos %d > %d limit.' % (total_image_num, max_photos) break keep_going = 6 #search stops after a fixed number of iterations while( keep_going > 0 and maxtime < endtime): try: rsp = fapi.photos_search(api_key=flickrAPIKey, ispublic="1", media="photos", per_page="250", page="1", #has_geo = "1", #bbox="-180, -90, 180, 90", text=query_string, #accuracy="6", #6 is region level. most things seem 10 or better. min_upload_date=str(mintime), max_upload_date=str(maxtime)) ##min_taken_date=str(datetime.fromtimestamp(mintime)), ##max_taken_date=str(datetime.fromtimestamp(maxtime))) #we want to catch these failures somehow and keep going. time.sleep(1) fapi.testFailure(rsp) total_images = rsp.photos[0]['total']; null_test = int(total_images); #want to make sure this won't crash later on for some reason null_test = float(total_images); print '\nnumimgs: ' + total_images print 'mintime: ' + str(mintime) + ' maxtime: ' + str(maxtime) + ' timeskip: ' + str(maxtime - mintime) if( int(total_images) > desired_photos ): print 'too many photos in block, reducing maxtime' upper_bound = maxtime maxtime = (lower_bound + maxtime) / 2 #midpoint between current value and lower bound. if( int(total_images) < desired_photos): print 'too few photos in block, increasing maxtime' lower_bound = maxtime maxtime = (upper_bound + maxtime) / 2 print 'Lower bound is ' + str(datetime.fromtimestamp(lower_bound)) print 'Upper bound is ' + str(datetime.fromtimestamp(upper_bound)) if( int(total_images) > 0): #only if we're not in a degenerate case keep_going = keep_going - 1 else: upper_bound = upper_bound + timeskip; except KeyboardInterrupt: print('Keyboard exception while querying for images, exiting\n') raise except: print sys.exc_info()[0] #print type(inst) # the exception instance #print inst.args # arguments stored in .args #print inst # __str__ allows args to printed directly print ('Exception encountered while querying for images\n') #end of while binary search print 'finished binary search' s = '\nmintime: ' + str(mintime) + ' maxtime: ' + str(maxtime) print s out_file.write(s + '\n') i = getattr(rsp,'photos',None) if i: s = 'numimgs: ' + total_images print s out_file.write(s + '\n') current_image_num = 1; num = int(rsp.photos[0]['pages']) s = 'total pages: ' + str(num) print s out_file.write(s + '\n') #only visit 16 pages max, to try and avoid the dreaded duplicate bug #16 pages = 4000 images, should be duplicate safe. Most interesting pictures will be taken. num_visit_pages = min(16,num) s = 'visiting only ' + str(num_visit_pages) + ' pages ( up to ' + str(num_visit_pages * 250) + ' images)' print s out_file.write(s + '\n') total_images_queried = total_images_queried + min((num_visit_pages * 250), int(total_images)) #print 'stopping before page ' + str(int(math.ceil(num/3) + 1)) + '\n' pagenum = 1; while( pagenum <= num_visit_pages ): #for pagenum in range(1, num_visit_pages + 1): #page one is searched twice print ' page number ' + str(pagenum) try: rsp = fapi.photos_search(api_key=flickrAPIKey, ispublic="1", media="photos", per_page="250", page=str(pagenum), sort="interestingness-desc", #has_geo = "1", #bbox="-180, -90, 180, 90", text=query_string, #accuracy="6", #6 is region level. most things seem 10 or better. extras = "tags, original_format, license, geo, date_taken, date_upload, o_dims, views", min_upload_date=str(mintime), max_upload_date=str(maxtime)) ##min_taken_date=str(datetime.fromtimestamp(mintime)), ##max_taken_date=str(datetime.fromtimestamp(maxtime))) time.sleep(1) fapi.testFailure(rsp) except KeyboardInterrupt: print('Keyboard exception while querying for images, exiting\n') raise except: print sys.exc_info()[0] #print type(inst) # the exception instance #print inst.args # arguments stored in .args #print inst # __str__ allows args to printed directly print ('Exception encountered while querying for images\n') else: # and print them k = getattr(rsp,'photos',None) if k: m = getattr(rsp.photos[0],'photo',None) if m: for b in rsp.photos[0].photo: if b!=None: out_file.write('photo: ' + b['id'] + ' ' + b['secret'] + ' ' + b['server'] + ' ' + b['farm'] + '\n') out_file.write('owner: ' + b['owner'] + '\n') out_file.write('title: ' + b['title'].encode("ascii","replace") + '\n') out_file.write('originalsecret: ' + b['originalsecret'] + '\n') out_file.write('originalformat: ' + b['originalformat'] + '\n') out_file.write('o_height: ' + b['o_height'] + '\n') out_file.write('o_width: ' + b['o_width'] + '\n') out_file.write('datetaken: ' + b['datetaken'].encode("ascii","replace") + '\n') out_file.write('dateupload: ' + b['dateupload'].encode("ascii","replace") + '\n') out_file.write('tags: ' + b['tags'].encode("ascii","replace") + '\n') out_file.write('license: ' + b['license'].encode("ascii","replace") + '\n') out_file.write('latitude: ' + b['latitude'].encode("ascii","replace") + '\n') out_file.write('longitude: ' + b['longitude'].encode("ascii","replace") + '\n') out_file.write('accuracy: ' + b['accuracy'].encode("ascii","replace") + '\n') out_file.write('views: ' + b['views'] + '\n') out_file.write('interestingness: ' + str(current_image_num) + ' out of ' + str(total_images) + '\n'); out_file.write('\n') current_image_num = current_image_num + 1; total_image_num = total_image_num + 1; pagenum = pagenum + 1; #this is in the else exception block. It won't increment for a failure. #this block is indented such that it will only run if there are no exceptions #in the original query. That means if there are exceptions, mintime won't be incremented #and it will try again timeskip = maxtime - mintime #used for initializing next binary search mintime = maxtime out_file.write('Total images queried: ' + str(total_images_queried) + '\n') out_file.close
out_file = open(output_filename,'w') fapi = FlickrAPI(flickrAPIKey, flickrSecret) i = 0 page_num = range(1,5) for i in page_num: page_nu = str(i) try: rsp = fapi.photos_search(api_key = flickrAPIKey, ispublic = "1", media = "photos", per_page = "250", #seems like it is max page = page_nu, text = query_string) time.sleep(1) fapi.testFailure(rsp) total_images = rsp.photos[0]['total'] for b in rsp.photos[0].photo: if b!=None: out_file.write('photo: ' + b['id'] + ' ' + b['secret'] + ' ' + b['server'] + '\n') out_file.write('owner: ' + b['owner'] + '\n') out_file.write('title: ' + b['title'].encode("ascii","replace") + '\n') out_file.write('tags: ' + b['tags'].encode("ascii","replace") + '\n') out_file.write('\n')
fapi = FlickrAPI(flickrAPIKey, flickrSecret) i = 0 page_num = range(1, 5) for i in page_num: page_nu = str(i) try: rsp = fapi.photos_search( api_key=flickrAPIKey, ispublic="1", media="photos", per_page="250", #seems like it is max page=page_nu, text=query_string) time.sleep(1) fapi.testFailure(rsp) total_images = rsp.photos[0]['total'] for b in rsp.photos[0].photo: if b != None: out_file.write('photo: ' + b['id'] + ' ' + b['secret'] + ' ' + b['server'] + '\n') out_file.write('owner: ' + b['owner'] + '\n') out_file.write('title: ' + b['title'].encode("ascii", "replace") + '\n')