Пример #1
0
 def _(self):
     self.num_batch_done = 0
     while True:
         try:
             for items in islice(f(self), self.num_batch_done, None):
                 self.num_batch_done += 1
                 yield items
             if self.num_retry > 0:
                 logger.info(
                     "Fetch retry %d success for url %s, num_batch %d ",
                     self.num_retry, self.url, self.num_batch_done)
             break
         except Exception as e:
             self.num_retry += 1
             msg = "Fetch failed for url %s, tried %d/%d times. Exception: %s. " % (
                 self.url, self.num_retry, MAX_RETRY, e)
             fail_fast = False
             emsg = str(e)
             if not any(
                 [emsg.find(s) >= 0 for s in [
                     "Connection refused",
                 ]]):
                 # ["many open file", "404"]
                 fail_fast = True
                 msg += "no need to retry."
             if fail_fast or self.num_retry >= MAX_RETRY:
                 logger.warning(msg)
                 from dpark.schedule import FetchFailed
                 raise FetchFailed(self.uri, self.sid, self.mid, self.rid)
             else:
                 sleep_time = RETRY_INTERVALS[self.num_retry - 1]
                 msg += "sleep %d secs" % (sleep_time, )
                 logger.debug(msg)
                 time.sleep(sleep_time)
Пример #2
0
 def _(self):
     self.num_batch_done = 0
     while True:
         try:
             for items in islice(f(self), self.num_batch_done, None):
                 self.num_batch_done += 1
                 yield items
             if self.num_retry > 0:
                 logger.info(
                     "Fetch retry %d success for url %s, num_batch %d ",
                     self.num_retry, self.url, self.num_batch_done)
             break
         except Exception as e:
             logger.exception("Fetch Fail")
             self.num_retry += 1
             msg = "Fetch failed for url %s, tried %d/%d times. Exception: %r. " % (
                 self.url, self.num_retry, MAX_RETRY, e)
             fail_fast = False
             if isinstance(e,
                           IOError) and str(e).find("many open file") >= 0:
                 fail_fast = True
             if fail_fast or self.num_retry >= MAX_RETRY:
                 msg += "GIVE UP!"
                 logger.warning(msg)
                 from dpark.schedule import FetchFailed
                 raise FetchFailed(self.uri, self.sid, self.mid, self.rid)
             else:
                 sleep_time = 2**self.num_retry * 0.5  # 0.5, 1.0, 2.0
                 msg += "sleep %d secs" % (sleep_time, )
                 logger.warning(msg)
                 time.sleep(sleep_time)
Пример #3
0
 def on_fail(self, e):
     self.num_retry += 1
     msg = "Fetch failed for url %s, %d/%d. exception: %s. " % (
         self.url, self.num_retry, self.max_retry, e)
     fail_fast = False
     if isinstance(e, IOError) and str(e).find("many open file") >= 0:
         fail_fast = True
     if fail_fast or self.num_retry >= self.max_retry:
         msg += "GIVE UP!"
         logger.warning(msg)
         from dpark.schedule import FetchFailed
         raise FetchFailed(self.uri, self.sid, self.mid, self.rid)
     else:
         logger.debug(msg)
         time.sleep(2**self.num_retry * 0.1)
Пример #4
0
    def fetch_one(self, uri, shuffleId, part, reduceId):
        if uri == LocalFileShuffle.getServerUri():
            # urllib can open local file
            url = 'file://' + LocalFileShuffle.getOutputFile(
                shuffleId, part, reduceId)
        else:
            url = "%s/%d/%d/%d" % (uri, shuffleId, part, reduceId)
        logger.debug("fetch %s", url)

        tries = 2
        while True:
            try:
                f = urllib.request.urlopen(url)
                if f.code == 404:
                    f.close()
                    raise IOError("not found")

                d = f.read()
                flag = d[:1]
                length, = struct.unpack("I", d[1:5])
                if length != len(d):
                    raise ValueError(
                        "length not match: expected %d, but got %d" %
                        (length, len(d)))
                d = decompress(d[5:])
                f.close()
                if flag == b'm':
                    d = marshal.loads(d)
                elif flag == b'p':
                    d = six.moves.cPickle.loads(d)
                else:
                    raise ValueError("invalid flag")
                return d
            except Exception as e:
                logger.debug(
                    "Fetch failed for shuffle %d,"
                    " reduce %d, %d, %s, %s, try again", shuffleId, reduceId,
                    part, url, e)
                tries -= 1
                if not tries:
                    logger.warning(
                        "Fetch failed for shuffle %d,"
                        " reduce %d, %d, %s, %s", shuffleId, reduceId, part,
                        url, e)
                    from dpark.schedule import FetchFailed
                    raise FetchFailed(uri, shuffleId, part, reduceId)
                time.sleep(2**(2 - tries) * 0.1)