Пример #1
0
    def run(self):
        while True:
            imageUrl, imagePath = self.queue.get()

            try:
                retry(urllib.urlretrieve, 3)(imageUrl, imagePath)
            finally:
                self.queue.task_done()
Пример #2
0
  def run(self):
    while True:
      imageUrl, imagePath = self.queue.get()

      try:
        retry(urllib.urlretrieve, 3)(imageUrl, imagePath)
      finally:
        self.queue.task_done()
Пример #3
0
def harvest(url):
    print "downloading: " + url
    data = retry(urllib2.urlopen, 3)(url)

    # cache the data because this file-like object is not seekable
    cached = ""
    for s in data:
        cached += s

    dom = minidom.parseString(cached)

    # check for error
    error = dom.getElementsByTagName('error')
    if len(error) > 0:
        errType = error[0].getAttribute('code')
        desc = getText(error)
        raise Exception(errType + ": " + desc)

    save(cached)
    records = dom.getElementsByTagName('record')
    countRecords = len(records)

    for record in records:
        handleRecord(record)

    queue.join()

    nodelist = dom.getElementsByTagName('resumptionToken')
    if len(nodelist) == 0: return None, countRecords
    strToken = getText(nodelist[0])

    return strToken, countRecords
Пример #4
0
def harvest(url):
  print "downloading: " + url
  data = retry(urllib2.urlopen, 3)(url)

  # cache the data because this file-like object is not seekable
  cached  = ""
  for s in data:
    cached += s

  dom = minidom.parseString(cached)

  # check for error
  error = dom.getElementsByTagName('error')
  if len(error) > 0:
    errType = error[0].getAttribute('code')
    desc = getText(error)
    raise Exception(errType + ": " +desc)

  save(cached)
  records = dom.getElementsByTagName('record')
  countRecords = len(records)

  for record in records:
    handleRecord(record)

  queue.join()

  nodelist = dom.getElementsByTagName('resumptionToken')
  if len(nodelist) == 0: return None, countRecords
  strToken = getText(nodelist[0])

  return strToken, countRecords