def run(self): while True: imageUrl, imagePath = self.queue.get() try: retry(urllib.urlretrieve, 3)(imageUrl, imagePath) finally: self.queue.task_done()
def harvest(url): print "downloading: " + url data = retry(urllib2.urlopen, 3)(url) # cache the data because this file-like object is not seekable cached = "" for s in data: cached += s dom = minidom.parseString(cached) # check for error error = dom.getElementsByTagName('error') if len(error) > 0: errType = error[0].getAttribute('code') desc = getText(error) raise Exception(errType + ": " + desc) save(cached) records = dom.getElementsByTagName('record') countRecords = len(records) for record in records: handleRecord(record) queue.join() nodelist = dom.getElementsByTagName('resumptionToken') if len(nodelist) == 0: return None, countRecords strToken = getText(nodelist[0]) return strToken, countRecords
def harvest(url): print "downloading: " + url data = retry(urllib2.urlopen, 3)(url) # cache the data because this file-like object is not seekable cached = "" for s in data: cached += s dom = minidom.parseString(cached) # check for error error = dom.getElementsByTagName('error') if len(error) > 0: errType = error[0].getAttribute('code') desc = getText(error) raise Exception(errType + ": " +desc) save(cached) records = dom.getElementsByTagName('record') countRecords = len(records) for record in records: handleRecord(record) queue.join() nodelist = dom.getElementsByTagName('resumptionToken') if len(nodelist) == 0: return None, countRecords strToken = getText(nodelist[0]) return strToken, countRecords