def run(self, attempId): logger.debug("shuffling %d of %s", self.partition, self.rdd) for i, bucket in self._prepare_shuffle(self.rdd): try: if marshalable(bucket): flag, d = 'm', marshal.dumps(bucket) else: flag, d = 'p', cPickle.dumps(bucket, -1) except ValueError: flag, d = 'p', cPickle.dumps(bucket, -1) cd = compress(d) for tried in range(1, 4): try: path = LocalFileShuffle.getOutputFile( self.shuffleId, self.partition, i, len(cd) * tried) with atomic_file(path, bufsize=1024 * 4096) as f: f.write(flag + struct.pack("I", 5 + len(cd))) f.write(cd) break except IOError, e: logger.warning("write %s failed: %s, try again (%d)", path, e, tried) else: raise
def run(self, attempId): logger.debug("shuffling %d of %s", self.partition, self.rdd) for i, bucket in self.rdd._prepare_shuffle(self.split, self.partitioner, self.aggregator): try: if marshalable(bucket): flag, d = 'm', marshal.dumps(bucket) else: flag, d = 'p', cPickle.dumps(bucket, -1) except ValueError: flag, d = 'p', cPickle.dumps(bucket, -1) cd = compress(d) for tried in range(1, 4): try: path = LocalFileShuffle.getOutputFile(self.shuffleId, self.partition, i, len(cd) * tried) tpath = path + ".%s.%s" % (socket.gethostname(), os.getpid()) f = open(tpath, 'wb', 1024*4096) f.write(flag + struct.pack("I", 5 + len(cd))) f.write(cd) f.close() os.rename(tpath, path) break except IOError, e: logger.warning("write %s failed: %s, try again (%d)", path, e, tried) try: os.remove(tpath) except OSError: pass else: raise
def run(self, attempId): logger.debug("shuffling %d of %s", self.partition, self.rdd) for i, bucket in self.rdd._prepare_shuffle(self.split, self.partitioner, self.aggregator): try: if marshalable(bucket): flag, d = 'm', marshal.dumps(bucket) else: flag, d = 'p', cPickle.dumps(bucket, -1) except ValueError: flag, d = 'p', cPickle.dumps(bucket, -1) cd = compress(d) for tried in range(1, 4): try: path = LocalFileShuffle.getOutputFile( self.shuffleId, self.partition, i, len(cd) * tried) tpath = path + ".%s.%s" % (socket.gethostname(), os.getpid()) f = open(tpath, 'wb', 1024 * 4096) f.write(flag + struct.pack("I", 5 + len(cd))) f.write(cd) f.close() os.rename(tpath, path) break except IOError, e: logger.warning("write %s failed: %s, try again (%d)", path, e, tried) try: os.remove(tpath) except OSError: pass else: raise
def run_without_sorted(self, it): for i, bucket in it: try: if marshalable(bucket): flag, d = b'm', marshal.dumps(bucket) else: flag, d = b'p', six.moves.cPickle.dumps(bucket, -1) except ValueError: flag, d = b'p', six.moves.cPickle.dumps(bucket, -1) cd = compress(d) env.task_stats.bytes_shuffle_write += len(cd) for tried in range(1, 4): try: path = LocalFileShuffle.getOutputFile( self.shuffleId, self.partition, i, len(cd) * tried) with atomic_file(path, bufsize=1024 * 4096) as f: f.write(flag + struct.pack("I", 5 + len(cd))) f.write(cd) break except IOError as e: logger.warning("write %s failed: %s, try again (%d)", path, e, tried) else: raise e return LocalFileShuffle.getServerUri()
def run_task(task_data): try: gc.disable() task, ntry = cPickle.loads(decompress(task_data)) setproctitle('dpark worker %s: run task %s' % (Script, task)) Accumulator.clear() result = task.run(ntry) accUpdate = Accumulator.values() if marshalable(result): flag, data = 0, marshal.dumps(result) else: flag, data = 1, cPickle.dumps(result, -1) data = compress(data) if len(data) > TASK_RESULT_LIMIT: path = LocalFileShuffle.getOutputFile(0, ntry, task.id, len(data)) f = open(path, 'w') f.write(data) f.close() data = '/'.join([LocalFileShuffle.getServerUri()] + path.split('/')[-3:]) flag += 2 return mesos_pb2.TASK_FINISHED, cPickle.dumps((Success(), (flag, data), accUpdate), -1) except FetchFailed, e: return mesos_pb2.TASK_FAILED, cPickle.dumps((e, None, None), -1)
def run_task(task_data): try: gc.disable() task, task_try_id = loads(decompress(task_data)) ttid = TTID(task_try_id) Accumulator.clear() result = task.run(ttid.ttid) env.task_stats.bytes_max_rss = resource.getrusage( resource.RUSAGE_SELF).ru_maxrss * 1024 accUpdate = Accumulator.values() MutableDict.flush() if marshalable(result): try: flag, data = 0, marshal.dumps(result) except Exception: flag, data = 1, cPickle.dumps(result, -1) else: flag, data = 1, cPickle.dumps(result, -1) data = compress(data) if len(data) > TASK_RESULT_LIMIT: path = LocalFileShuffle.getOutputFile(0, task.id, ttid.task_try, len(data)) f = open(path, 'wb') f.write(data) f.close() data = '/'.join([LocalFileShuffle.getServerUri()] + path.split('/')[-3:]) flag += 2 return TaskState.finished, cPickle.dumps( ((flag, data), accUpdate, env.task_stats), -1) except FetchFailed as e: return TaskState.failed, TaskEndReason.fetch_failed, str( e), cPickle.dumps(e) except Exception as e: import traceback msg = traceback.format_exc() ename = e.__class__.__name__ fatal_exceptions = (DparkUserFatalError, ArithmeticError, ValueError, LookupError, SyntaxError, TypeError, AssertionError) prefix = "FATAL" if isinstance(e, fatal_exceptions) else "FAILED" return TaskState.failed, '{}_EXCEPTION_{}'.format( prefix, ename), msg, cPickle.dumps(e) finally: gc.collect() gc.enable()
def run(self, attempId): logger.debug("shuffling %d of %s", self.partition, self.rdd) numOutputSplits = self.partitioner.numPartitions getPartition = self.partitioner.getPartition mergeValue = self.aggregator.mergeValue createCombiner = self.aggregator.createCombiner buckets = [{} for i in range(numOutputSplits)] for k, v in self.rdd.iterator(self.split): bucketId = getPartition(k) bucket = buckets[bucketId] r = bucket.get(k, None) if r is not None: bucket[k] = mergeValue(r, v) else: bucket[k] = createCombiner(v) for i in range(numOutputSplits): try: if marshalable(buckets[i]): flag, d = 'm', marshal.dumps(buckets[i]) else: flag, d = 'p', cPickle.dumps(buckets[i], -1) except ValueError: flag, d = 'p', cPickle.dumps(buckets[i], -1) cd = compress(d) for tried in range(1, 4): try: path = LocalFileShuffle.getOutputFile( self.shuffleId, self.partition, i, len(cd) * tried) tpath = path + ".%s.%s" % (socket.gethostname(), os.getpid()) f = open(tpath, 'wb', 1024 * 4096) f.write(flag + struct.pack("I", 5 + len(cd))) f.write(cd) f.close() os.rename(tpath, path) break except IOError, e: logger.warning("write %s failed: %s, try again (%d)", path, e, tried) try: os.remove(tpath) except OSError: pass else: raise
def run_task(task_data): try: gc.disable() task, task_try_id = loads(decompress(task_data)) ttid = TTID(task_try_id) Accumulator.clear() result = task.run(ttid.ttid) env.task_stats.bytes_max_rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss * 1024 accUpdate = Accumulator.values() MutableDict.flush() if marshalable(result): try: flag, data = 0, marshal.dumps(result) except Exception: flag, data = 1, cPickle.dumps(result, -1) else: flag, data = 1, cPickle.dumps(result, -1) data = compress(data) if len(data) > TASK_RESULT_LIMIT: path = LocalFileShuffle.getOutputFile(0, task.id, ttid.task_try, len(data)) f = open(path, 'wb') f.write(data) f.close() data = '/'.join( [LocalFileShuffle.getServerUri()] + path.split('/')[-3:] ) flag += 2 return TaskState.finished, cPickle.dumps(((flag, data), accUpdate, env.task_stats), -1) except FetchFailed as e: return TaskState.failed, TaskEndReason.fetch_failed, str(e), cPickle.dumps(e) except Exception as e: import traceback msg = traceback.format_exc() ename = e.__class__.__name__ fatal_exceptions = (DparkUserFatalError, ArithmeticError, ValueError, LookupError, SyntaxError, TypeError, AssertionError) prefix = "FATAL" if isinstance(e, fatal_exceptions) else "FAILED" return TaskState.failed, '{}_EXCEPTION_{}'.format(prefix, ename), msg, cPickle.dumps(e) finally: gc.collect() gc.enable()
def run_task(task_data): try: gc.disable() task, task_try_id = loads(decompress(task_data)) ttid = TTID(task_try_id) Accumulator.clear() result = task.run(ttid.ttid) env.task_stats.bytes_max_rss = resource.getrusage( resource.RUSAGE_SELF).ru_maxrss * 1024 accUpdate = Accumulator.values() MutableDict.flush() if marshalable(result): try: flag, data = 0, marshal.dumps(result) except Exception: flag, data = 1, cPickle.dumps(result, -1) else: flag, data = 1, cPickle.dumps(result, -1) data = compress(data) if len(data) > TASK_RESULT_LIMIT: path = LocalFileShuffle.getOutputFile(0, task.id, ttid.task_try, len(data)) f = open(path, 'wb') f.write(data) f.close() data = '/'.join([LocalFileShuffle.getServerUri()] + path.split('/')[-3:]) flag += 2 return 'TASK_FINISHED', cPickle.dumps( (Success(), (flag, data), accUpdate, env.task_stats), -1) except FetchFailed as e: return 'TASK_FAILED', cPickle.dumps((e, None, None, None), -1) except: import traceback msg = traceback.format_exc() return 'TASK_FAILED', cPickle.dumps( (OtherFailure(msg), None, None, None), -1) finally: gc.collect() gc.enable()
def run_with_sorted(self, it): serializer = GroupByAutoBatchedSerializer( ) if self.iter_values else AutoBatchedSerializer() for i, bucket in it: for tried in range(1, 4): try: path = LocalFileShuffle.getOutputFile( self.shuffleId, self.partition, i) with atomic_file(path, bufsize=1024 * 4096) as f: items = sorted(bucket.items(), key=lambda x: x[0]) serializer.dump_stream(items, f) env.task_stats.bytes_shuffle_write += f.tell() break except IOError as e: logger.warning("write %s failed: %s, try again (%d)", path, e, tried) else: raise e return LocalFileShuffle.getServerUri()
def run(self, attempId): logger.debug("shuffling %d of %s", self.partition, self.rdd) numOutputSplits = self.partitioner.numPartitions getPartition = self.partitioner.getPartition mergeValue = self.aggregator.mergeValue createCombiner = self.aggregator.createCombiner buckets = [{} for i in range(numOutputSplits)] for k,v in self.rdd.iterator(self.split): bucketId = getPartition(k) bucket = buckets[bucketId] r = bucket.get(k, None) if r is not None: bucket[k] = mergeValue(r, v) else: bucket[k] = createCombiner(v) for i in range(numOutputSplits): try: if marshalable(buckets[i]): flag, d = 'm', marshal.dumps(buckets[i]) else: flag, d = 'p', cPickle.dumps(buckets[i], -1) except ValueError: flag, d = 'p', cPickle.dumps(buckets[i], -1) cd = compress(d) for tried in range(1, 4): try: path = LocalFileShuffle.getOutputFile(self.shuffleId, self.partition, i, len(cd) * tried) tpath = path + ".%s.%s" % (socket.gethostname(), os.getpid()) f = open(tpath, 'wb', 1024*4096) f.write(flag + struct.pack("I", 5 + len(cd))) f.write(cd) f.close() os.rename(tpath, path) break except IOError, e: logger.warning("write %s failed: %s, try again (%d)", path, e, tried) try: os.remove(tpath) except OSError: pass else: raise
def run_task(task_data): try: gc.disable() task, ntry = loads(decompress(task_data)) Accumulator.clear() result = task.run(ntry) accUpdate = Accumulator.values() MutableDict.flush() if marshalable(result): try: flag, data = 0, marshal.dumps(result) except Exception as e: flag, data = 1, cPickle.dumps(result, -1) else: flag, data = 1, cPickle.dumps(result, -1) data = compress(data) if len(data) > TASK_RESULT_LIMIT: path = LocalFileShuffle.getOutputFile(0, ntry, task.id, len(data)) f = open(path, 'w') f.write(data) f.close() data = '/'.join( [LocalFileShuffle.getServerUri()] + path.split('/')[-3:] ) flag += 2 return 'TASK_FINISHED', cPickle.dumps( (Success(), (flag, data), accUpdate), -1) except FetchFailed as e: return 'TASK_FAILED', cPickle.dumps((e, None, None), -1) except: import traceback msg = traceback.format_exc() return 'TASK_FAILED', cPickle.dumps( (OtherFailure(msg), None, None), -1) finally: close_mfs() gc.collect() gc.enable()
def run(self, attempId): logger.debug("shuffling %d of %s", self.partition, self.rdd) for i, bucket in self._prepare_shuffle(self.rdd): try: if marshalable(bucket): flag, d = 'm', marshal.dumps(bucket) else: flag, d = 'p', cPickle.dumps(bucket, -1) except ValueError: flag, d = 'p', cPickle.dumps(bucket, -1) cd = compress(d) for tried in range(1, 4): try: path = LocalFileShuffle.getOutputFile(self.shuffleId, self.partition, i, len(cd) * tried) with atomic_file(path, bufsize=1024*4096) as f: f.write(flag + struct.pack("I", 5 + len(cd))) f.write(cd) break except IOError, e: logger.warning("write %s failed: %s, try again (%d)", path, e, tried) else: raise
result = task.run(ntry) accUpdate = Accumulator.values() MutableDict.flush() if marshalable(result): try: flag, data = 0, marshal.dumps(result) except Exception, e: flag, data = 1, cPickle.dumps(result, -1) else: flag, data = 1, cPickle.dumps(result, -1) data = compress(data) if len(data) > TASK_RESULT_LIMIT: path = LocalFileShuffle.getOutputFile(0, ntry, task.id, len(data)) f = open(path, 'w') f.write(data) f.close() data = '/'.join([LocalFileShuffle.getServerUri()] + path.split('/')[-3:]) flag += 2 return mesos_pb2.TASK_FINISHED, cPickle.dumps( (Success(), (flag, data), accUpdate), -1) except FetchFailed, e: return mesos_pb2.TASK_FAILED, cPickle.dumps((e, None, None), -1) except: import traceback msg = traceback.format_exc() return mesos_pb2.TASK_FAILED, cPickle.dumps(
def _get_path(self, i, size): return LocalFileShuffle.getOutputFile(self.shuffle_id, self.map_id, i, size)
Accumulator.clear() result = task.run(ntry) accUpdate = Accumulator.values() if marshalable(result): try: flag, data = 0, marshal.dumps(result) except Exception, e: flag, data = 1, cPickle.dumps(result, -1) else: flag, data = 1, cPickle.dumps(result, -1) data = compress(data) if len(data) > TASK_RESULT_LIMIT: path = LocalFileShuffle.getOutputFile(0, ntry, task.id, len(data)) f = open(path, 'w') f.write(data) f.close() data = '/'.join([LocalFileShuffle.getServerUri()] + path.split('/')[-3:]) flag += 2 return mesos_pb2.TASK_FINISHED, cPickle.dumps((Success(), (flag, data), accUpdate), -1) except FetchFailed, e: return mesos_pb2.TASK_FAILED, cPickle.dumps((e, None, None), -1) except : import traceback msg = traceback.format_exc() return mesos_pb2.TASK_FAILED, cPickle.dumps((OtherFailure(msg), None, None), -1) finally: setproctitle('dpark worker: idle')