Exemplo n.º 1
0
 def initialize(cls, isMaster):
     cls.shuffleDir = [p for p in env.get('WORKDIR') 
             if os.path.exists(os.path.dirname(p))]
     if not cls.shuffleDir:
         return
     cls.serverUri = env.get('SERVER_URI', 'file://' + cls.shuffleDir[0])
     logger.debug("shuffle dir: %s", cls.shuffleDir)
Exemplo n.º 2
0
 def initialize(cls, isMaster):
     cls.shuffleDir = [p for p in env.get('WORKDIR')
             if os.path.exists(os.path.dirname(p))]
     if not cls.shuffleDir:
         return
     cls.serverUri = env.get('SERVER_URI')
     logger.debug("shuffle dir: %s", cls.shuffleDir)
Exemplo n.º 3
0
Arquivo: cache.py Projeto: npc7/dpark
    def __init__(self, isMaster):
        LocalCacheTracker.__init__(self, isMaster)
        if isMaster:
            self.server = CacheTrackerServer(self.locs)
            self.server.start()
            addr = self.server.addr
            env.register('CacheTrackerAddr', addr)
        else:
            cachedir = os.path.join(env.get('WORKDIR')[0], 'cache')
            self.cache = DiskCache(self, cachedir)
            addr = env.get('CacheTrackerAddr')

        self.client = CacheTrackerClient(addr)
Exemplo n.º 4
0
Arquivo: cache.py Projeto: npc7/dpark
    def __init__(self, isMaster):
        LocalCacheTracker.__init__(self, isMaster)
        if isMaster:
            self.server = CacheTrackerServer(self.locs)
            self.server.start()
            addr = self.server.addr
            env.register("CacheTrackerAddr", addr)
        else:
            cachedir = os.path.join(env.get("WORKDIR")[0], "cache")
            self.cache = DiskCache(self, cachedir)
            addr = env.get("CacheTrackerAddr")

        self.client = CacheTrackerClient(addr)
Exemplo n.º 5
0
    def start(self):
        if self._started:
            return

        self._started = True
        start_download_manager()
        self.guide_addr = env.get(GUIDE_ADDR)
        self.download_addr = env.get(DOWNLOAD_ADDR)
        self.cache = Cache()
        self.ctx = zmq.Context()
        self.shared_uuid_fn_dict = _download_manager.shared_uuid_fn_dict
        self.shared_uuid_map_dict = _download_manager.shared_uuid_map_dict
        self.download_cond = _download_manager.download_cond
Exemplo n.º 6
0
    def get(self, key):
        p = self.get_path(key)
        if os.path.exists(p):
            return self.load(open(p, 'rb'))

        # load from other node
        if not env.get('SERVER_URI'):
            return
        rdd_id, index = key
        locs = self.tracker.getCacheUri(rdd_id, index)
        if not locs:
            return

        serve_uri = locs[-1]
        uri = '%s/cache/%s' % (serve_uri, os.path.basename(p))
        try:
            f = urllib.request.urlopen(uri)
        except IOError:
            logger.warning('urlopen cache uri %s failed', uri)
            raise
        if f.code == 404:
            logger.warning('load from cache %s failed', uri)
            self.tracker.removeHost(rdd_id, index, serve_uri)
            f.close()
            return
        return self.load(f)
Exemplo n.º 7
0
 def __init__(self):
     cachedir = os.path.join(env.get("WORKDIR")[0], "cache")
     self.cache = DiskCache(self, cachedir)
     self.client = env.trackerClient
     if env.isMaster:
         self.locs = env.trackerServer.locs
     self.rdds = {}
Exemplo n.º 8
0
    def _get_path(self):
        dirs = env.get('WORKDIR')
        if not dirs:
            raise RuntimeError('No available workdir')

        path = os.path.join(dirs[0], 'mutable_dict')
        if os.path.exists(path):
            return path

        st = os.statvfs(dirs[0])
        ratio = st.f_bfree * 1.0 / st.f_blocks
        if ratio >= 0.66:
            mkdir_p(path)
            return path

        for d in dirs[1:]:
            p = os.path.join(d, 'mutable_dict')
            try:
                os.makedirs(p)
                os.symlink(p, path)
            except OSError as e:
                pass

            return path

        raise RuntimeError('Cannot find suitable workdir')
Exemplo n.º 9
0
def run_task(task, aid):
    try:
        setproctitle('dpark worker %s: run task %s' % (Script, task))
        Accumulator.clear()
        result = task.run(aid)
        accUpdate = Accumulator.values()
        try:
            flag, data = 0, marshal.dumps(result)
        except ValueError:
            flag, data = 1, cPickle.dumps(result)

        if len(data) > TASK_RESULT_LIMIT:
            workdir = env.get('WORKDIR')
            path = os.path.join(workdir, str(task.id) + '.result')
            with open(path, 'w') as f:
                f.write(data)
            data = path
            flag += 2

        setproctitle('dpark worker: idle')
        return mesos_pb2.TASK_FINISHED, cPickle.dumps(
            (task.id, Success(), (flag, data), accUpdate), -1)
    except Exception, e:
        import traceback
        msg = traceback.format_exc()
        setproctitle('dpark worker: idle')
        return mesos_pb2.TASK_FAILED, cPickle.dumps(
            (task.id, OtherFailure(msg), None, None), -1)
Exemplo n.º 10
0
 def __init__(self):
     cachedir = os.path.join(env.get('WORKDIR')[0], 'cache')
     self.cache = DiskCache(self, cachedir)
     self.client = env.trackerClient
     if env.trackerServer is not None:
         self.locs = env.trackerServer.locs
     self.rdds = {}
Exemplo n.º 11
0
    def getOutputFile(cls, shuffle_id, input_id, output_id, datasize=0):
        """
            datasize < 0: disk first
            datasize > 0: memfirst
            datasize = 0: read only, use link
        """
        shuffleDir = env.get('WORKDIR')
        path = os.path.join(shuffleDir[0], str(shuffle_id), str(input_id))
        mkdir_p(path)
        p = os.path.join(path, str(output_id))
        if datasize != 0 and len(shuffleDir) > 1:
            use_disk = datasize < 0
            if datasize > 0:
                st = os.statvfs(path)
                free = st.f_bfree * st.f_bsize
                ratio = st.f_bfree * 1.0 / st.f_blocks
                use_disk = free < max(datasize, 1 << 30) or ratio < 0.66

            if use_disk:
                d2 = os.path.join(
                    random.choice(shuffleDir[1:]),
                    str(shuffle_id), str(input_id))
                mkdir_p(d2)
                p2 = os.path.join(d2, str(output_id))
                if os.path.exists(p):
                    os.remove(p)
                os.symlink(p2, p)
                if os.path.islink(p2):
                    os.unlink(p2)  # p == p2
                return p2
        return p
Exemplo n.º 12
0
    def get(self, key):
        p = self.get_path(key)
        if os.path.exists(p):
            return self.load(open(p, 'rb'))

        # load from other node
        if not env.get('SERVER_URI'):
            return
        rdd_id, index = key
        locs = self.tracker.getCacheUri(rdd_id, index)
        if not locs:
            return

        serve_uri = locs[-1]
        uri = '%s/cache/%s' % (serve_uri, os.path.basename(p))
        try:
            f = urllib.request.urlopen(uri)
        except IOError:
            logger.warning('urlopen cache uri %s failed', uri)
            raise
        if f.code == 404:
            logger.warning('load from cache %s failed', uri)
            self.tracker.removeHost(rdd_id, index, serve_uri)
            f.close()
            return
        return self.load(f)
Exemplo n.º 13
0
 def __init__(self):
     cachedir = os.path.join(env.get('WORKDIR')[0], 'cache')
     self.cache = DiskCache(self, cachedir)
     self.client = env.trackerClient
     if env.isMaster:
         self.locs = env.trackerServer.locs
     self.rdds = {}
Exemplo n.º 14
0
    def initialize(cls, is_master):
        Broadcast.initialize(is_master)
        sock = env.ctx.socket(zmq.REP)
        sock.setsockopt(zmq.LINGER, 0)
        port = sock.bind_to_random_port("tcp://0.0.0.0")
        cls.tracker_addr = 'tcp://%s:%d' % (cls.host, port)

        def run():
            logger.debug("TreeBroadcast tracker started at %s", 
                    cls.tracker_addr)
            while True:
                uuid = sock.recv_pyobj()
                obj = cls.guides.get(uuid, '')
                sock.send_pyobj(obj and (obj.guide_addr, len(obj.blocks)))
                if not uuid:
                    break
            sock.close()
            logger.debug("TreeBroadcast tracker stopped")

        if is_master:
            cls.tracker_thread = spawn(run)
            env.register('TreeBroadcastTrackerAddr', cls.tracker_addr)
        else:
            cls.tracker_addr = env.get('TreeBroadcastTrackerAddr')
            
        logger.debug("TreeBroadcast initialized")
Exemplo n.º 15
0
def run_task(task_data):
    try:
        gc.disable()
        task, ntry = cPickle.loads(decompress(task_data))
        setproctitle('dpark worker %s: run task %s' % (Script, task))
        
        Accumulator.clear()
        result = task.run(ntry)
        accUpdate = Accumulator.values()

        if marshalable(result):
            flag, data = 0, marshal.dumps(result)
        else:
            flag, data = 1, cPickle.dumps(result, -1)
        data = compress(data)

        if len(data) > TASK_RESULT_LIMIT:
            workdir = env.get('WORKDIR')
            name = 'task_%s_%s.result' % (task.id, ntry)
            path = os.path.join(workdir, name) 
            f = open(path, 'w')
            f.write(data)
            f.close()
            data = LocalFileShuffle.getServerUri() + '/' + name
            flag += 2

        return mesos_pb2.TASK_FINISHED, cPickle.dumps((task.id, Success(), (flag, data), accUpdate), -1)
    except Exception, e:
        import traceback
        msg = traceback.format_exc()
        return mesos_pb2.TASK_FAILED, cPickle.dumps((task.id, OtherFailure(msg), None, None), -1)
Exemplo n.º 16
0
def run_task(task, aid):
    try:
        setproctitle('dpark worker %s: run task %s' % (Script, task))
        Accumulator.clear()
        result = task.run(aid)
        accUpdate = Accumulator.values()
        try:
            flag, data = 0, marshal.dumps(result)
        except ValueError:
            flag, data = 1, cPickle.dumps(result)

        if len(data) > TASK_RESULT_LIMIT and env.dfs:
            workdir = env.get('WORKDIR')
            path = os.path.join(workdir, str(task.id)+'.result')
            with open(path, 'w') as f:
                f.write(data)
            data = path
            flag += 2

        setproctitle('dpark worker: idle')
        return mesos_pb2.TASK_FINISHED, cPickle.dumps((task.id, Success(), (flag, data), accUpdate), -1)
    except Exception, e:
        import traceback
        msg = traceback.format_exc()
        setproctitle('dpark worker: idle')
        return mesos_pb2.TASK_FAILED, cPickle.dumps((task.id, OtherFailure(msg), None, None), -1)
Exemplo n.º 17
0
    def _get_path(self):
        dirs = env.get('WORKDIR')
        if not dirs:
            raise RuntimeError('No available workdir')

        path = os.path.join(dirs[0], 'mutable_dict')
        if os.path.exists(path):
            return path

        st = os.statvfs(dirs[0])
        ratio = st.f_bfree * 1.0 / st.f_blocks
        if ratio >= 0.66:
            mkdir_p(path)
            return path

        for d in dirs[1:]:
            p = os.path.join(d, 'mutable_dict')
            try:
                os.makedirs(p)
                os.symlink(p, path)
            except OSError:
                pass

            return path

        raise RuntimeError('Cannot find suitable workdir')
Exemplo n.º 18
0
    def __init__(self, items):
        self.bufsize = 4096 * 1024
        self.buf = None
        self.offset = 0
        dirs = env.get('WORKDIR')
        self.path = path = os.path.join(
            random.choice(dirs[1:]) if dirs[1:] else dirs[0],
            'shuffle-%s.tmp.gz' % uuid.uuid4().hex)

        with atomic_file(path, bufsize=self.bufsize) as f:
            f = gzip.GzipFile(fileobj=f)
            items = sorted(items, key=lambda k_v: k_v[0])
            try:
                for i in items:
                    s = marshal.dumps(i)
                    f.write(struct.pack("I", len(s)))
                    f.write(s)
                self.loads = marshal.loads
            except Exception:
                f.rewind()
                for i in items:
                    s = six.moves.cPickle.dumps(i)
                    f.write(struct.pack("I", len(s)))
                    f.write(s)
                self.loads = six.moves.cPickle.loads
            f.close()
Exemplo n.º 19
0
    def initialize(cls, is_master):
        Broadcast.initialize(is_master)
        sock = env.ctx.socket(zmq.REP)
        sock.setsockopt(zmq.LINGER, 0)
        port = sock.bind_to_random_port("tcp://0.0.0.0")
        cls.tracker_addr = 'tcp://%s:%d' % (cls.host, port)

        def run():
            logger.debug("TreeBroadcast tracker started at %s",
                         cls.tracker_addr)
            while True:
                uuid = sock.recv_pyobj()
                obj = cls.guides.get(uuid, '')
                sock.send_pyobj(obj and (obj.guide_addr, len(obj.blocks)))
                if not uuid:
                    break
            sock.close()
            logger.debug("TreeBroadcast tracker stopped")

        if is_master:
            cls.tracker_thread = spawn(run)
            env.register('TreeBroadcastTrackerAddr', cls.tracker_addr)
        else:
            cls.tracker_addr = env.get('TreeBroadcastTrackerAddr')

        logger.debug("TreeBroadcast initialized")
Exemplo n.º 20
0
def run_task(task, ntry):
    try:
        setproctitle('dpark worker %s: run task %s' % (Script, task))
        Accumulator.clear()
        gc.disable()
        result = task.run(ntry)
        accUpdate = Accumulator.values()

        if marshalable(result):
            flag, data = 0, marshal.dumps(result)
        else:
            flag, data = 1, cPickle.dumps(result, -1)
        data = compress(data)

        if len(data) > TASK_RESULT_LIMIT:
            workdir = env.get('WORKDIR')
            name = 'task_%s_%s.result' % (task.id, ntry)
            path = os.path.join(workdir, name)
            f = open(path, 'w')
            f.write(data)
            f.close()
            data = LocalFileShuffle.getServerUri() + '/' + name
            flag += 2

        return mesos_pb2.TASK_FINISHED, cPickle.dumps(
            (task.id, Success(), (flag, data), accUpdate), -1)
    except Exception, e:
        import traceback
        msg = traceback.format_exc()
        return mesos_pb2.TASK_FAILED, cPickle.dumps(
            (task.id, OtherFailure(msg), None, None), -1)
Exemplo n.º 21
0
    def getOutputFile(cls, shuffle_id, input_id, output_id, datasize=0):
        """
            datasize < 0: disk first
            datasize > 0: memfirst
            datasize = 0: read only, use link
        """
        shuffleDir = env.get('WORKDIR')
        path = os.path.join(shuffleDir[0], str(shuffle_id), str(input_id))
        mkdir_p(path)
        p = os.path.join(path, str(output_id))
        if datasize != 0 and len(shuffleDir) > 1:
            use_disk = datasize < 0
            if datasize > 0:
                st = os.statvfs(path)
                free = st.f_bfree * st.f_bsize
                ratio = st.f_bfree * 1.0 / st.f_blocks
                use_disk = free < max(datasize, 1 << 30) or ratio < 0.66

            if use_disk:
                d2 = os.path.join(random.choice(shuffleDir[1:]),
                                  str(shuffle_id), str(input_id))
                mkdir_p(d2)
                p2 = os.path.join(d2, str(output_id))
                if os.path.exists(p):
                    os.remove(p)
                os.symlink(p2, p)
                if os.path.islink(p2):
                    os.unlink(p2)  # p == p2
                return p2
        return p
Exemplo n.º 22
0
    def start(self, is_master):
        if is_master:
            self.guides = {}
            self.guide_addr, self.guide_thread = self.start_guide()
            env.register('BroadcastGuideAddr', self.guide_addr)
        else:
            self.guide_addr = env.get('BroadcastGuideAddr')

        logger.debug("broadcast started: %s", self.guide_addr)
Exemplo n.º 23
0
    def start(self, is_master):
        if is_master:
            self.guides = {}
            self.guide_addr, self.guide_thread = self.start_guide()
            env.register('BroadcastGuideAddr', self.guide_addr)
        else:
            self.guide_addr = env.get('BroadcastGuideAddr')

        logger.debug("broadcast started: %s", self.guide_addr)
Exemplo n.º 24
0
 def __init__(self, isMaster):
     LocalMapOutputTracker.__init__(self, isMaster)
     if isMaster:
         self.server = MapOutputTrackerServer(self.serverUris)
         self.server.start()
         addr = self.server.addr
         env.register('MapOutputTrackerAddr', addr)
     else:
         addr = env.get('MapOutputTrackerAddr')
     self.client = MapOutputTrackerClient(addr)
     logger.debug("MapOutputTracker started")
Exemplo n.º 25
0
    def _get_path(self):
        dirs = env.get('WORKDIR')
        if not dirs:
            raise Exception('No available workdir')

        path = os.path.join(dirs[0], 'mutable_dict')
        if not os.path.exists(path):
            try:
                os.mkdir(path)
            except OSError, e:
                pass
Exemplo n.º 26
0
 def __init__(self, isMaster):
     LocalMapOutputTracker.__init__(self, isMaster)
     if isMaster:
         self.server = MapOutputTrackerServer(self.serverUris)
         self.server.start()
         addr = self.server.addr
         env.register('MapOutputTrackerAddr', addr)
     else:
         addr = env.get('MapOutputTrackerAddr')
     self.client = MapOutputTrackerClient(addr)
     logger.debug("MapOutputTracker started")
Exemplo n.º 27
0
 def getOrCompute(self, rdd, split):
     key = (rdd.id, split.index)
     cachedVal = self.cache.get(key)
     if cachedVal is not None:
         logger.debug("Found partition in cache! %s", key)
         return cachedVal
     
     logger.debug("partition not in cache, %s", key)
     r = self.cache.put(key, rdd.compute(split), is_iterator=True)
     serve_uri = env.get('SERVER_URI')
     if serve_uri:
         self.addHost(rdd.id, split.index, serve_uri)
     return r
Exemplo n.º 28
0
    def _flush(self):
        if not self.updated:
            return

        updated_keys = {}
        path = self._get_path()
        uri = env.get('SERVER_URI')
        server_uri = '%s/%s' % (uri, os.path.basename(path))

        st = os.statvfs(path)
        ratio = st.f_bfree * 1.0 / st.f_blocks
        if ratio < 0.66:
            raise Exception('Insufficient disk space')

        for k,v in self.updated.items():
            key = self._get_key(k)
            if key in updated_keys:
                updated_keys[key][k] = v
            else:
                updated_keys[key] = {k:v}

        uid = uuid.uuid4().get_hex()
        for key, updated in updated_keys.items():
            new = self._fetch_missing(key)
            for k,v in updated.items():
                if v is None:
                    new.pop(k)
                else:
                    new[k] = v

            filename = '%s_%s_%s' % (key, self.generation, uid)
            fn = os.path.join(path, filename)
            if os.path.exists(fn):
                raise RuntimeError('conflict uuid for mutable_dict')

            url = '%s/%s' % (server_uri, filename)
            with open(fn+'.tmp', 'wb+') as f:
                data = compress(cPickle.dumps(new))
                f.write(struct.pack('<I', len(data)+4) + data)

            os.rename(fn+'.tmp', fn)
            env.trackerClient.call(AddItemMessage('mutable_dict_new:%s' % key, url))

            files = glob.glob(os.path.join(path, '%s_*' % key))
            for f in files:
                if int(f.split('_')[-2]) < self.generation -1:
                    try:
                        os.remove(f)
                    except OSError, e:
                        pass
Exemplo n.º 29
0
    def _flush(self):
        if not self.updated:
            return

        updated_keys = {}
        dirname = "mutable_dict"
        tmppath = env.workdir.alloc_tmp_dir(dirname)
        path = env.workdir.export(tmppath, dirname)
        uri = env.get('SERVER_URI')
        server_uri = '%s/%s' % (uri, os.path.basename(path))

        for k, v in self.updated.items():
            key = self._get_key(k)
            if key in updated_keys:
                updated_keys[key][k] = v
            else:
                updated_keys[key] = {k: v}

        uid = uuid_pkg.uuid4().get_hex()
        for key, updated in updated_keys.items():
            new = self._fetch_missing(key)
            for k, v in updated.items():
                if v is None:
                    new.pop(k)
                else:
                    new[k] = v

            filename = '%s_%s_%s' % (key, self.generation, uid)
            fn = os.path.join(path, filename)
            if os.path.exists(fn):
                raise RuntimeError('conflict uuid for mutable_dict')

            url = '%s/%s' % (server_uri, filename)
            with atomic_file(fn) as f:
                data = compress(cPickle.dumps(new))
                f.write(struct.pack('<I', len(data) + 4) + data)

            env.trackerClient.call(
                AddItemMessage('mutable_dict_new:%s' % key, url))

            files = glob.glob(os.path.join(path, '%s-*' % self.uuid))
            for f in files:
                if int(f.split('_')[-2]) < self.generation - 1:
                    try:
                        os.remove(f)
                    except OSError:
                        pass

        self.updated.clear()
        self.data = LRUDict(self.cacheLimit)
Exemplo n.º 30
0
    def _flush(self):
        if not self.updated:
            return

        updated_keys = {}
        dirname = "mutable_dict"
        tmppath = env.workdir.alloc_tmp_dir(dirname)
        path = env.workdir.export(tmppath, dirname)
        uri = env.get('SERVER_URI')
        server_uri = '%s/%s' % (uri, os.path.basename(path))

        for k, v in self.updated.items():
            key = self._get_key(k)
            if key in updated_keys:
                updated_keys[key][k] = v
            else:
                updated_keys[key] = {k: v}

        uid = uuid_pkg.uuid4().get_hex()
        for key, updated in updated_keys.items():
            new = self._fetch_missing(key)
            for k, v in updated.items():
                if v is None:
                    new.pop(k)
                else:
                    new[k] = v

            filename = '%s_%s_%s' % (key, self.generation, uid)
            fn = os.path.join(path, filename)
            if os.path.exists(fn):
                raise RuntimeError('conflict uuid for mutable_dict')

            url = '%s/%s' % (server_uri, filename)
            with atomic_file(fn) as f:
                data = compress(cPickle.dumps(new))
                f.write(struct.pack('<I', len(data) + 4) + data)

            env.trackerClient.call(AddItemMessage('mutable_dict_new:%s' % key, url))

            files = glob.glob(os.path.join(path, '%s-*' % self.uuid))
            for f in files:
                if int(f.split('_')[-2]) < self.generation - 1:
                    try:
                        os.remove(f)
                    except OSError:
                        pass

        self.updated.clear()
        self.data = LRUDict(self.cacheLimit)
Exemplo n.º 31
0
    def start(self):
        if self._started:
            return

        self.manager = manager = Manager()
        self.shared_uuid_fn_dict = manager.dict()
        self.shared_uuid_map_dict = manager.dict()
        self.shared_master_blocks = manager.dict()
        self.download_cond = Condition()

        self._started = True
        self.ctx = zmq.Context()
        self.host = socket.gethostname()
        if GUIDE_ADDR not in env.environ:
            start_guide_manager()

        self.guide_addr = env.get(GUIDE_ADDR)
        self.random_inst = random.SystemRandom()
        self.server_addr, self.server_thread = self.start_server()
        self.uuid_state_dict = {}
        self.uuid_map_dict = {}
        self.work_dirs = env.get('WORKDIR')
        self.master_broadcast_blocks = {}
        env.register(DOWNLOAD_ADDR, self.server_addr)
Exemplo n.º 32
0
    def start(self):
        if self._started:
            return

        self.manager = manager = Manager()
        self.shared_uuid_fn_dict = manager.dict()
        self.shared_uuid_map_dict = manager.dict()
        self.shared_master_blocks = manager.dict()
        self.download_cond = Condition()

        self._started = True
        self.ctx = zmq.Context()
        self.host = socket.gethostname()
        if GUIDE_ADDR not in env.environ:
            start_guide_manager()

        self.guide_addr = env.get(GUIDE_ADDR)
        self.random_inst = random.SystemRandom()
        self.server_addr, self.server_thread = self.start_server()
        self.uuid_state_dict = {}
        self.uuid_map_dict = {}
        self.work_dirs = env.get('WORKDIR')
        self.master_broadcast_blocks = {}
        env.register(DOWNLOAD_ADDR, self.server_addr)
Exemplo n.º 33
0
    def getOrCompute(self, rdd, split):
        key = (rdd.id, split.index)
        cachedVal = self.cache.get(key)
        if cachedVal is not None:
            logger.debug("Found partition in cache! %s", key)
            for i in cachedVal:
                yield i

        else:
            logger.debug("partition not in cache, %s", key)
            for i in self.cache.put(key, rdd.compute(split), is_iterator=True):
                yield i

            serve_uri = env.get('SERVER_URI')
            if serve_uri:
                self.addHost(rdd.id, split.index, serve_uri)
Exemplo n.º 34
0
    def __init__(self, isMaster):
        LocalCacheTracker.__init__(self, isMaster)
        if isMaster:
            self.cache = Cache()
        else:
            self.cache = LocalCache(mmapCache).newKeySpace()

        if isMaster:
            self.server = CacheTrackerServer(self.locs)
            self.server.start()
            addr = self.server.addr
            env.register('CacheTrackerAddr', addr)
        else:
            addr = env.get('CacheTrackerAddr')

        self.client = CacheTrackerClient(addr)
Exemplo n.º 35
0
    def getOrCompute(self, rdd, split):
        key = (rdd.id, split.index)
        cachedVal = self.cache.get(key)
        if cachedVal is not None:
            logger.debug("Found partition in cache! %s", key)
            for i in cachedVal:
                yield i

        else:
            logger.debug("partition not in cache, %s", key)
            for i in self.cache.put(key, rdd.compute(split), is_iterator=True):
                yield i

            serve_uri = env.get("SERVER_URI")
            if serve_uri:
                self.addHost(rdd.id, split.index, serve_uri)
Exemplo n.º 36
0
    def get(self, key):
        p = self.get_path(key)
        if os.path.exists(p):
            return self.load(open(p, "rb"))

        # load from other node
        if not env.get("SERVER_URI"):
            return
        rdd_id, index = key
        locs = self.tracker.getCacheUri(rdd_id, index)
        if not locs:
            return

        serve_uri = locs[-1]
        uri = "%s/cache/%s" % (serve_uri, os.path.basename(p))
        f = urllib.urlopen(uri)
        if f.code == 404:
            logger.warning("load from cache %s failed", uri)
            self.tracker.removeHost(rdd_id, index, serve_uri)
            f.close()
            return
        return self.load(f)
Exemplo n.º 37
0
 def getOutputFile(cls, shuffle_id, input_id, output_id, datasize=0):
     shuffleDir = env.get('WORKDIR')
     path = os.path.join(shuffleDir[0], str(shuffle_id), str(input_id))
     mkdir_p(path)
     p = os.path.join(path, str(output_id))
     if datasize > 0 and len(shuffleDir) > 1:
         # datasize > 0 means its writing
         st = os.statvfs(path)
         free = st.f_bfree * st.f_bsize
         ratio = st.f_bfree * 1.0 / st.f_blocks
         if free < max(datasize, 1 << 30) or ratio < 0.66:
             d2 = os.path.join(random.choice(shuffleDir[1:]),
                               str(shuffle_id), str(input_id))
             mkdir_p(d2)
             p2 = os.path.join(d2, str(output_id))
             if os.path.exists(p):
                 os.remove(p)
             os.symlink(p2, p)
             if os.path.islink(p2):
                 os.unlink(p2)  # p == p2
             return p2
     return p
Exemplo n.º 38
0
 def getServerUri(cls):
     return env.get('SERVER_URI')
Exemplo n.º 39
0
 def get_tmp(cls):
     dirs = env.get('WORKDIR')
     d = random.choice(dirs[1:]) if dirs[1:] else dirs[0]
     mkdir_p(d)
     return os.path.join(d, 'shuffle-%s.tmp' % uuid.uuid4().hex)
Exemplo n.º 40
0
 def get_tmp(cls):
     dirs = env.get('WORKDIR')
     d = random.choice(dirs[1:]) if dirs[1:] else dirs[0]
     mkdir_p(d)
     return os.path.join(d, 'shuffle-%s.tmp' % uuid.uuid4().hex)
Exemplo n.º 41
0
 def getServerUri(cls):
     return env.get('SERVER_URI')
Exemplo n.º 42
0
    def getExecutorInfo(self, framework_id):
        info = Dict()
        info.framework_id.value = framework_id
        info.command.value = '%s %s' % (
            sys.executable,
            os.path.abspath(
                os.path.join(os.path.dirname(__file__), 'executor.py')))
        info.executor_id.value = env.get('DPARK_ID', 'default')
        info.command.environment.variables = variables = []

        v = Dict()
        variables.append(v)
        v.name = 'UID'
        v.value = str(os.getuid())

        v = Dict()
        variables.append(v)
        v.name = 'GID'
        v.value = str(os.getgid())

        container_image = self._get_container_image()
        if container_image:
            info.container.type = 'DOCKER'
            info.container.docker.image = container_image
            info.container.docker.parameters = parameters = []
            p = Dict()
            p.key = 'memory-swap'
            p.value = '-1'
            parameters.append(p)

            info.container.volumes = volumes = []
            for path in ['/etc/passwd', '/etc/group']:
                v = Dict()
                volumes.append(v)
                v.host_path = v.container_path = path
                v.mode = 'RO'

            for path in conf.MOOSEFS_MOUNT_POINTS:
                v = Dict()
                volumes.append(v)
                v.host_path = v.container_path = path
                v.mode = 'RW'

            for path in conf.DPARK_WORK_DIR.split(','):
                v = Dict()
                volumes.append(v)
                v.host_path = v.container_path = path
                v.mode = 'RW'

            def _mount_volume(_volumes, _host_path, _container_path, _mode):
                _v = Dict()
                _volumes.append(_v)
                _v.container_path = _container_path
                _v.mode = _mode
                if _host_path:
                    _v.host_path = _host_path

            if self.options.volumes:
                for volume in self.options.volumes.split(','):
                    fields = volume.split(':')
                    if len(fields) == 3:
                        host_path, container_path, mode = fields
                        mode = mode.upper()
                        assert mode in ('RO', 'RW')
                    elif len(fields) == 2:
                        host_path, container_path = fields
                        mode = 'RW'
                    elif len(fields) == 1:
                        container_path, = fields
                        host_path = ''
                        mode = 'RW'
                    else:
                        raise Exception('cannot parse volume %s', volume)
                    _mount_volume(volumes, host_path, container_path, mode)

        info.resources = resources = []

        mem = Dict()
        resources.append(mem)
        mem.name = 'mem'
        mem.type = 'SCALAR'
        mem.scalar.value = EXECUTOR_MEMORY

        cpus = Dict()
        resources.append(cpus)
        cpus.name = 'cpus'
        cpus.type = 'SCALAR'
        cpus.scalar.value = EXECUTOR_CPUS

        Script = os.path.realpath(sys.argv[0])
        info.name = Script

        info.data = encode_data(
            marshal.dumps((Script, os.getcwd(), sys.path, dict(os.environ),
                           self.task_per_node, self.out_logger.addr,
                           self.err_logger.addr, self.logLevel, self.color,
                           env.environ)))
        assert len(info.data) < (50 << 20), \
            'Info data too large: %s' % (len(info.data),)
        return info