Exemplo n.º 1
0
    def start(self):
        if self.started:
            return
        self.started = True
        logger.debug("start env in %s", os.getpid())
        for d in self.workdir:
            util.mkdir_p(d)

        if 'TRACKER_ADDR' not in self.environ:
            from dpark.tracker import TrackerServer
            trackerServer = self.trackerServer = TrackerServer()
            self.trackerServer.start()
            self.register('TRACKER_ADDR', trackerServer.addr)

        from dpark.tracker import TrackerClient
        addr = self.get('TRACKER_ADDR')
        self.trackerClient = TrackerClient(addr)

        from dpark.cache import CacheTracker
        self.cacheTracker = CacheTracker()

        from dpark.shuffle import MapOutputTracker
        self.mapOutputTracker = MapOutputTracker()
        from dpark.shuffle import ParallelShuffleFetcher
        self.shuffleFetcher = ParallelShuffleFetcher(2)

        from dpark.broadcast import start_guide_manager, GUIDE_ADDR
        if GUIDE_ADDR not in self.environ:
            start_guide_manager()

        logger.debug("env started")
Exemplo n.º 2
0
Arquivo: env.py Projeto: npc7/dpark
    def start(self, isMaster, environ={}, isLocal=False):
        if self.started:
            return
        logger.debug("start env in %s: %s %s", os.getpid(), isMaster, environ)
        self.isMaster = isMaster
        self.isLocal = isLocal
        if isMaster:
            roots = conf.DPARK_WORK_DIR
            if isinstance(roots, str):
                roots = roots.split(",")
            if isLocal:
                root = roots[0]  # for local mode
                if not os.path.exists(root):
                    os.mkdir(root, 0777)
                    os.chmod(root, 0777)  # because of umask

            name = "%s-%s-%d" % (time.strftime("%Y%m%d-%H%M%S"), socket.gethostname(), os.getpid())
            self.workdir = [os.path.join(root, name) for root in roots]
            for d in self.workdir:
                if not os.path.exists(d):
                    try:
                        os.makedirs(d)
                    except OSError:
                        pass
            self.environ["WORKDIR"] = self.workdir
            self.environ["COMPRESS"] = util.COMPRESS
        else:
            self.environ.update(environ)
            if self.environ["COMPRESS"] != util.COMPRESS:
                raise Exception("no %s available" % self.environ["COMPRESS"])

        self.ctx = zmq.Context()

        from dpark.cache import CacheTracker, LocalCacheTracker

        if isLocal:
            self.cacheTracker = LocalCacheTracker(isMaster)
        else:
            self.cacheTracker = CacheTracker(isMaster)

        from dpark.shuffle import LocalFileShuffle, MapOutputTracker, LocalMapOutputTracker

        LocalFileShuffle.initialize(isMaster)
        if isLocal:
            self.mapOutputTracker = LocalMapOutputTracker(isMaster)
        else:
            self.mapOutputTracker = MapOutputTracker(isMaster)
        from dpark.shuffle import SimpleShuffleFetcher, ParallelShuffleFetcher

        # self.shuffleFetcher = SimpleShuffleFetcher()
        self.shuffleFetcher = ParallelShuffleFetcher(2)

        from dpark.broadcast import TheBroadcast

        TheBroadcast.initialize(isMaster)

        self.started = True
        logger.debug("env started")
Exemplo n.º 3
0
    def start(self, isMaster, environ={}):
        if self.started:
            return
        logger.debug("start env in %s: %s %s", os.getpid(), isMaster, environ)
        self.isMaster = isMaster
        if isMaster:
            roots = conf.DPARK_WORK_DIR
            if isinstance(roots, str):
                roots = roots.split(',')
            name = '%s-%s-%d' % (time.strftime("%Y%m%d-%H%M%S"),
                                 socket.gethostname(), os.getpid())
            self.workdir = [os.path.join(root, name) for root in roots]
            for d in self.workdir:
                if not os.path.exists(d):
                    try:
                        os.makedirs(d)
                    except OSError:
                        pass
            self.environ['SERVER_URI'] = 'file://' + self.workdir[0]
            self.environ['WORKDIR'] = self.workdir
            self.environ['COMPRESS'] = util.COMPRESS
        else:
            self.environ.update(environ)
            if self.environ['COMPRESS'] != util.COMPRESS:
                raise Exception("no %s available" % self.environ['COMPRESS'])

        self.ctx = zmq.Context()

        from dpark.tracker import TrackerServer, TrackerClient
        if isMaster:
            self.trackerServer = TrackerServer()
            self.trackerServer.start()
            addr = self.trackerServer.addr
            env.register('TrackerAddr', addr)
        else:
            addr = env.get('TrackerAddr')

        self.trackerClient = TrackerClient(addr)

        from dpark.cache import CacheTracker
        self.cacheTracker = CacheTracker()

        from dpark.shuffle import LocalFileShuffle, MapOutputTracker
        LocalFileShuffle.initialize(isMaster)
        self.mapOutputTracker = MapOutputTracker()
        from dpark.shuffle import SimpleShuffleFetcher, ParallelShuffleFetcher
        #self.shuffleFetcher = SimpleShuffleFetcher()
        self.shuffleFetcher = ParallelShuffleFetcher(2)

        from dpark.broadcast import start_manager
        start_manager(isMaster)

        self.started = True
        logger.debug("env started")
Exemplo n.º 4
0
Arquivo: env.py Projeto: npc7/dpark
    def start(self, isMaster, environ={}, isLocal=False):
        if self.started:
            return
        logger.debug("start env in %s: %s %s", os.getpid(),
                isMaster, environ)
        self.isMaster = isMaster
        self.isLocal = isLocal
        if isMaster:
            roots = conf.DPARK_WORK_DIR
            if isinstance(roots, str):
                roots = roots.split(',')
            if isLocal:
                root = roots[0] # for local mode 
                if not os.path.exists(root):
                    os.mkdir(root, 0777)
                    os.chmod(root, 0777) # because of umask

            name = '%s-%s-%d' % (time.strftime("%Y%m%d-%H%M%S"),
                socket.gethostname(), os.getpid())
            self.workdir = [os.path.join(root, name) for root in roots]
            for d in self.workdir:
                if not os.path.exists(d):
                    try: os.makedirs(d)
                    except OSError: pass
            self.environ['WORKDIR'] = self.workdir
            self.environ['COMPRESS'] = util.COMPRESS
        else:
            self.environ.update(environ)
            if self.environ['COMPRESS'] != util.COMPRESS:
                raise Exception("no %s available" % self.environ['COMPRESS'])

        self.ctx = zmq.Context()

        from dpark.cache import CacheTracker, LocalCacheTracker
        if isLocal:
            self.cacheTracker = LocalCacheTracker(isMaster)
        else:
            self.cacheTracker = CacheTracker(isMaster)

        from dpark.shuffle import LocalFileShuffle, MapOutputTracker, LocalMapOutputTracker
        LocalFileShuffle.initialize(isMaster)
        if isLocal:
            self.mapOutputTracker = LocalMapOutputTracker(isMaster)
        else:
            self.mapOutputTracker = MapOutputTracker(isMaster)
        from dpark.shuffle import SimpleShuffleFetcher, ParallelShuffleFetcher
        #self.shuffleFetcher = SimpleShuffleFetcher()
        self.shuffleFetcher = ParallelShuffleFetcher(2)

        from dpark.broadcast import TheBroadcast
        TheBroadcast.initialize(isMaster)

        self.started = True
        logger.debug("env started")
Exemplo n.º 5
0
    def start(self, isMaster, environ={}):
        if self.started:
            return
        logger.debug("start env in %s: %s %s", os.getpid(), isMaster, environ)
        self.isMaster = isMaster
        if isMaster:
            roots = conf.DPARK_WORK_DIR
            if isinstance(roots, str):
                roots = roots.split(',')
            name = '%s-%s-%d' % (time.strftime("%Y%m%d-%H%M%S"),
                                 socket.gethostname(), os.getpid())
            self.workdir = [os.path.join(root, name) for root in roots]
            try:
                for d in self.workdir:
                    util.mkdir_p(d)
            except OSError as e:
                if environ.get('is_local', False):
                    raise e

            self.environ['SERVER_URI'] = 'file://' + self.workdir[0]
            self.environ['WORKDIR'] = self.workdir
            self.environ['COMPRESS'] = util.COMPRESS
        else:
            self.environ.update(environ)
            if self.environ['COMPRESS'] != util.COMPRESS:
                raise Exception("no %s available" % self.environ['COMPRESS'])

        self.ctx = zmq.Context()

        from dpark.tracker import TrackerServer, TrackerClient
        if isMaster:
            self.trackerServer = TrackerServer()
            self.trackerServer.start()
            addr = self.trackerServer.addr
            env.register('TrackerAddr', addr)
        else:
            addr = env.get('TrackerAddr')

        self.trackerClient = TrackerClient(addr)

        from dpark.cache import CacheTracker
        self.cacheTracker = CacheTracker()

        from dpark.shuffle import LocalFileShuffle, MapOutputTracker
        LocalFileShuffle.initialize(isMaster)
        self.mapOutputTracker = MapOutputTracker()
        from dpark.shuffle import ParallelShuffleFetcher
        self.shuffleFetcher = ParallelShuffleFetcher(2)

        from dpark.broadcast import start_manager
        start_manager(isMaster)

        self.started = True
        logger.debug("env started")
Exemplo n.º 6
0
    def start_slave(self):
        """Called after env is updated."""
        if self.slave_started:
            return
        self.slave_started = True

        self._start_common()

        compress = self.get(self.COMPRESS)
        if compress != utils.COMPRESS:
            raise Exception("no %s available" % compress)

        # init clients
        from dpark.tracker import TrackerClient
        self.trackerClient = TrackerClient(self.get(self.TRACKER_ADDR))

        from dpark.cache import CacheTracker
        self.cacheTracker = CacheTracker()

        self.workdir.init(env.get(self.DPARK_ID))
        logger.debug("env started")
Exemplo n.º 7
0
Arquivo: env.py Projeto: XuYong/dpark
class DparkEnv:
    environ = {}
    @classmethod
    def register(cls, name, value):
        cls.environ[name] = value
    @classmethod
    def get(cls, name, default=None):
        return cls.environ.get(name, default)

    def __init__(self):
        self.started = False

    def start(self, isMaster, environ={}, isLocal=False):
        if self.started:
            return
        logger.debug("start env in %s: %s %s", os.getpid(),
                isMaster, environ)
        self.isMaster = isMaster
        self.isLocal = isLocal
        if isMaster:
            roots = conf.DPARK_WORK_DIR.split(',')
            if isLocal:
                root = roots[0] # for local mode 
                if not os.path.exists(root):
                    os.mkdir(root, 0777)
                    os.chmod(root, 0777) # because of umask

            name = '%s-%s-%d' % (time.strftime("%Y%m%d-%H%M%S"),
                socket.gethostname(), os.getpid())
            self.workdir = [os.path.join(root, name) for root in roots]
            for d in self.workdir:
                if not os.path.exists(d):
                    try: os.makedirs(d)
                    except OSError: pass
            self.environ['WORKDIR'] = self.workdir
            self.environ['COMPRESS'] = util.COMPRESS
        else:
            self.environ.update(environ)
            if self.environ['COMPRESS'] != util.COMPRESS:
                raise Exception("no %s available" % self.environ['COMPRESS'])

        self.ctx = zmq.Context()

        from dpark.cache import CacheTracker, LocalCacheTracker
        if isLocal:
            self.cacheTracker = LocalCacheTracker(isMaster)
        else:
            self.cacheTracker = CacheTracker(isMaster)

        from dpark.shuffle import LocalFileShuffle, MapOutputTracker, LocalMapOutputTracker
        LocalFileShuffle.initialize(isMaster)
        if isLocal:
            self.mapOutputTracker = LocalMapOutputTracker(isMaster)
        else:
            self.mapOutputTracker = MapOutputTracker(isMaster)
        from dpark.shuffle import SimpleShuffleFetcher, ParallelShuffleFetcher
        #self.shuffleFetcher = SimpleShuffleFetcher()
        self.shuffleFetcher = ParallelShuffleFetcher(2)

        from dpark.broadcast import TheBroadcast
        TheBroadcast.initialize(isMaster)

        self.started = True
        logger.debug("env started")

    def stop(self):
        if not getattr(self, 'started', False):
            return
        logger.debug("stop env in %s", os.getpid())
        self.shuffleFetcher.stop()
        self.cacheTracker.stop()
        self.mapOutputTracker.stop()
        from dpark.broadcast import TheBroadcast
        TheBroadcast.shutdown()
       
        logger.debug("cleaning workdir ...")
        for d in self.workdir:
            shutil.rmtree(d, True)
        logger.debug("done.")

        self.started = False
Exemplo n.º 8
0
class DparkEnv:
    environ = {}

    @classmethod
    def register(cls, name, value):
        cls.environ[name] = value

    @classmethod
    def get(cls, name, default=None):
        return cls.environ.get(name, default)

    def __init__(self):
        self.started = False

    def start(self, isMaster, environ={}):
        if self.started:
            return
        logger.debug("start env in %s: %s %s", os.getpid(), isMaster, environ)
        self.isMaster = isMaster
        if isMaster:
            roots = conf.DPARK_WORK_DIR
            if isinstance(roots, str):
                roots = roots.split(',')
            name = '%s-%s-%d' % (time.strftime("%Y%m%d-%H%M%S"),
                                 socket.gethostname(), os.getpid())
            self.workdir = [os.path.join(root, name) for root in roots]
            try:
                for d in self.workdir:
                    util.mkdir_p(d)
            except OSError as e:
                if environ.get('is_local', False):
                    raise e

            self.environ['SERVER_URI'] = 'file://' + self.workdir[0]
            self.environ['WORKDIR'] = self.workdir
            self.environ['COMPRESS'] = util.COMPRESS
        else:
            self.environ.update(environ)
            if self.environ['COMPRESS'] != util.COMPRESS:
                raise Exception("no %s available" % self.environ['COMPRESS'])

        self.ctx = zmq.Context()

        from dpark.tracker import TrackerServer, TrackerClient
        if isMaster:
            self.trackerServer = TrackerServer()
            self.trackerServer.start()
            addr = self.trackerServer.addr
            env.register('TrackerAddr', addr)
        else:
            addr = env.get('TrackerAddr')

        self.trackerClient = TrackerClient(addr)

        from dpark.cache import CacheTracker
        self.cacheTracker = CacheTracker()

        from dpark.shuffle import LocalFileShuffle, MapOutputTracker
        LocalFileShuffle.initialize(isMaster)
        self.mapOutputTracker = MapOutputTracker()
        from dpark.shuffle import ParallelShuffleFetcher
        self.shuffleFetcher = ParallelShuffleFetcher(2)

        from dpark.broadcast import start_manager
        start_manager(isMaster)

        self.started = True
        logger.debug("env started")

    def stop(self):
        if not getattr(self, 'started', False):
            return
        logger.debug("stop env in %s", os.getpid())
        self.shuffleFetcher.stop()
        self.cacheTracker.stop()
        self.mapOutputTracker.stop()
        if self.isMaster:
            self.trackerServer.stop()
        from dpark.broadcast import stop_manager
        stop_manager()

        logger.debug("cleaning workdir ...")
        for d in self.workdir:
            shutil.rmtree(d, True)
        logger.debug("done.")

        self.started = False
Exemplo n.º 9
0
Arquivo: env.py Projeto: 0x55aa/dpark
class DparkEnv:
    environ = {}

    @classmethod
    def register(cls, name, value):
        cls.environ[name] = value

    @classmethod
    def get(cls, name, default=None):
        return cls.environ.get(name, default)

    def __init__(self):
        self.started = False

    def start(self, isMaster, environ={}):
        if self.started:
            return
        logger.debug("start env in %s: %s %s", os.getpid(), isMaster, environ)
        self.isMaster = isMaster
        if isMaster:
            roots = conf.DPARK_WORK_DIR
            if isinstance(roots, str):
                roots = roots.split(',')
            name = '%s-%s' % (socket.gethostname(), uuid.uuid4())
            self.workdir = [os.path.join(root, name) for root in roots]
            try:
                for d in self.workdir:
                    util.mkdir_p(d)
            except OSError as e:
                if environ.get('is_local', False):
                    raise e

            self.environ['SERVER_URI'] = 'file://' + self.workdir[0]
            self.environ['WORKDIR'] = self.workdir
            self.environ['COMPRESS'] = util.COMPRESS
        else:
            self.environ.update(environ)
            if self.environ['COMPRESS'] != util.COMPRESS:
                raise Exception("no %s available" % self.environ['COMPRESS'])

        self.ctx = zmq.Context()

        from dpark.tracker import TrackerServer, TrackerClient
        if isMaster:
            self.trackerServer = TrackerServer()
            self.trackerServer.start()
            addr = self.trackerServer.addr
            env.register('TrackerAddr', addr)
        else:
            addr = env.get('TrackerAddr')

        self.trackerClient = TrackerClient(addr)

        from dpark.cache import CacheTracker
        self.cacheTracker = CacheTracker()

        from dpark.shuffle import LocalFileShuffle, MapOutputTracker
        LocalFileShuffle.initialize(isMaster)
        self.mapOutputTracker = MapOutputTracker()
        from dpark.shuffle import ParallelShuffleFetcher
        self.shuffleFetcher = ParallelShuffleFetcher(2)

        from dpark.broadcast import start_manager
        start_manager(isMaster)

        self.started = True
        logger.debug("env started")

    def stop(self):
        if not getattr(self, 'started', False):
            return
        logger.debug("stop env in %s", os.getpid())
        self.shuffleFetcher.stop()
        self.cacheTracker.stop()
        self.mapOutputTracker.stop()
        if self.isMaster:
            self.trackerServer.stop()
        from dpark.broadcast import stop_manager
        stop_manager()

        logger.debug("cleaning workdir ...")
        for d in self.workdir:
            shutil.rmtree(d, True)
        logger.debug("done.")

        self.started = False
Exemplo n.º 10
0
class DparkEnv:
    environ = {}

    @classmethod
    def register(cls, name, value):
        cls.environ[name] = value

    @classmethod
    def get(cls, name, default=None):
        return cls.environ.get(name, default)

    def __init__(self):
        self.started = False

    def start(self, isMaster, environ={}, isLocal=False):
        if self.started:
            return
        logger.debug("start env in %s: %s %s", os.getpid(), isMaster, environ)
        self.isMaster = isMaster
        self.isLocal = isLocal
        if isMaster:
            roots = conf.DPARK_WORK_DIR.split(',')
            if isLocal:
                root = roots[0]  # for local mode
                if not os.path.exists(root):
                    os.mkdir(root, 0777)
                    os.chmod(root, 0777)  # because of umask

            name = '%s-%s-%d' % (time.strftime("%Y%m%d-%H%M%S"),
                                 socket.gethostname(), os.getpid())
            self.workdir = [os.path.join(root, name) for root in roots]
            for d in self.workdir:
                if not os.path.exists(d):
                    try:
                        os.makedirs(d)
                    except OSError:
                        pass
            self.environ['WORKDIR'] = self.workdir
            self.environ['COMPRESS'] = util.COMPRESS
        else:
            self.environ.update(environ)
            if self.environ['COMPRESS'] != util.COMPRESS:
                raise Exception("no %s available" % self.environ['COMPRESS'])

        self.ctx = zmq.Context()

        from dpark.cache import CacheTracker, LocalCacheTracker
        if isLocal:
            self.cacheTracker = LocalCacheTracker(isMaster)
        else:
            self.cacheTracker = CacheTracker(isMaster)

        from dpark.shuffle import LocalFileShuffle, MapOutputTracker, LocalMapOutputTracker
        LocalFileShuffle.initialize(isMaster)
        if isLocal:
            self.mapOutputTracker = LocalMapOutputTracker(isMaster)
        else:
            self.mapOutputTracker = MapOutputTracker(isMaster)
        from dpark.shuffle import SimpleShuffleFetcher, ParallelShuffleFetcher
        #self.shuffleFetcher = SimpleShuffleFetcher()
        self.shuffleFetcher = ParallelShuffleFetcher(2)

        from dpark.broadcast import TheBroadcast
        TheBroadcast.initialize(isMaster)

        self.started = True
        logger.debug("env started")

    def stop(self):
        if not getattr(self, 'started', False):
            return
        logger.debug("stop env in %s", os.getpid())
        self.shuffleFetcher.stop()
        self.cacheTracker.stop()
        self.mapOutputTracker.stop()
        from dpark.broadcast import TheBroadcast
        TheBroadcast.shutdown()

        logger.debug("cleaning workdir ...")
        for d in self.workdir:
            shutil.rmtree(d, True)
        logger.debug("done.")

        self.started = False
Exemplo n.º 11
0
class DparkEnv:
    environ = {}
    trackerServer = None

    @classmethod
    def register(cls, name, value):
        cls.environ[name] = value

    @classmethod
    def get(cls, name, default=None):
        return cls.environ.get(name, default)

    def __init__(self):
        self.started = False
        self.task_stats = TaskStats()
        name = self.get('DPARK_ID')
        if name is None:
            name = '%s-%s' % (socket.gethostname(), uuid.uuid4())
            self.register('DPARK_ID', name)

        self.workdir = self.get('WORKDIR')
        if self.workdir is None:
            roots = conf.DPARK_WORK_DIR
            if isinstance(roots, str):
                roots = roots.split(',')

            if not roots:
                logger.warning('Cannot get WORKDIR, use temp dir instead.')
                roots = [tempfile.gettempdir()]

            self.workdir = [os.path.join(root, name) for root in roots]
            self.register('WORKDIR', self.workdir)

        if 'SERVER_URI' not in self.environ:
            self.register('SERVER_URI', 'file://' + self.workdir[0])

        compress = self.get('COMPRESS')
        if compress is None:
            self.register('COMPRESS', util.COMPRESS)
            compress = self.get('COMPRESS')

        if compress != util.COMPRESS:
            raise Exception("no %s available" % compress)

    def start(self):
        if self.started:
            return
        self.started = True
        logger.debug("start env in %s", os.getpid())
        for d in self.workdir:
            util.mkdir_p(d)

        if 'TRACKER_ADDR' not in self.environ:
            from dpark.tracker import TrackerServer
            trackerServer = self.trackerServer = TrackerServer()
            self.trackerServer.start()
            self.register('TRACKER_ADDR', trackerServer.addr)

        from dpark.tracker import TrackerClient
        addr = self.get('TRACKER_ADDR')
        self.trackerClient = TrackerClient(addr)

        from dpark.cache import CacheTracker
        self.cacheTracker = CacheTracker()

        from dpark.shuffle import MapOutputTracker
        self.mapOutputTracker = MapOutputTracker()
        from dpark.shuffle import ParallelShuffleFetcher
        self.shuffleFetcher = ParallelShuffleFetcher(2)

        from dpark.broadcast import start_guide_manager, GUIDE_ADDR
        if GUIDE_ADDR not in self.environ:
            start_guide_manager()

        logger.debug("env started")

    def stop(self):
        if not getattr(self, 'started', False):
            return
        self.started = False
        logger.debug("stop env in %s", os.getpid())
        self.trackerClient.stop()
        self.shuffleFetcher.stop()
        self.cacheTracker.stop()
        self.mapOutputTracker.stop()
        if self.trackerServer is not None:
            self.trackerServer.stop()
            self.environ.pop('TRACKER_ADDR', None)

        from dpark.broadcast import stop_manager
        stop_manager()

        logger.debug("cleaning workdir ...")
        for d in self.workdir:
            shutil.rmtree(d, True)
        logger.debug("done.")
Exemplo n.º 12
0
class DparkEnv(object):

    SERVER_URI = "SERVER_URI"
    COMPRESS = "COMPRESS"
    TRACKER_ADDR = "TRACKER_ADDR"
    DPARK_ID = "DPARK_ID"

    def __init__(self):
        self.environ = {}
        self.task_stats = TaskStats()
        self.workdir = WorkDir()

        self.master_started = False
        self.slave_started = False

        # master
        self.trackerServer = None
        self.cacheTrackerServer = None

        # slave
        #   trackerServer clients
        self.trackerClient = None
        self.cacheTracker = None
        #   threads
        self.meminfo = MemoryChecker()
        self.shuffleFetcher = None

    def register(self, name, value):
        self.environ[name] = value

    def get(self, name, default=None):
        return self.environ.get(name, default)

    @property
    def server_uri(self):
        return self.environ[self.SERVER_URI]

    def start_master(self):
        if self.master_started:
            return
        self.master_started = True
        logger.debug("start master env in pid %s", os.getpid())

        self._start_common()
        self.register(self.COMPRESS, utils.COMPRESS)

        dpark_id = '{}-{}'.format(socket.gethostname(), uuid_pkg.uuid4())
        logger.info("dpark_id = %s", dpark_id)
        self.register(self.DPARK_ID, dpark_id)

        self.workdir.init(dpark_id)

        self.register(self.SERVER_URI, 'file://' + self.workdir.main)  # overwrited when executor starts

        # start centra servers in scheduler
        from dpark.tracker import TrackerServer
        self.trackerServer = TrackerServer()
        self.trackerServer.start()
        self.register(self.TRACKER_ADDR, self.trackerServer.addr)

        from dpark.cache import CacheTrackerServer
        self.cacheTrackerServer = CacheTrackerServer()

        from dpark.broadcast import start_guide_manager
        start_guide_manager()

        self._start_common()

        logger.debug("master env started")

    def _start_common(self):
        if not self.shuffleFetcher:
            from dpark.shuffle import ParallelShuffleFetcher
            self.shuffleFetcher = ParallelShuffleFetcher(2)  # start lazy, use also in scheduler(rdd.iterator)

    def start_slave(self):
        """Called after env is updated."""
        if self.slave_started:
            return
        self.slave_started = True

        self._start_common()

        compress = self.get(self.COMPRESS)
        if compress != utils.COMPRESS:
            raise Exception("no %s available" % compress)

        # init clients
        from dpark.tracker import TrackerClient
        self.trackerClient = TrackerClient(self.get(self.TRACKER_ADDR))

        from dpark.cache import CacheTracker
        self.cacheTracker = CacheTracker()

        self.workdir.init(env.get(self.DPARK_ID))
        logger.debug("env started")

    def stop(self):
        logger.debug("stop env in pid %s", os.getpid())

        if self.master_started:
            if self.trackerServer is not None:
                self.trackerServer.stop()
                self.environ.pop(self.TRACKER_ADDR, None)
            self.master_started = False

        if self.slave_started:
            self.trackerClient.stop()
            self.cacheTracker.stop()
            self.slave_started = False

        if self.slave_started or self.master_started:
            self.shuffleFetcher.stop()

        from dpark.broadcast import stop_manager
        stop_manager()

        logger.debug("cleaning workdir ...")
        self.workdir.clean_up()
        logger.debug("env stopped.")