示例#1
0
文件: master.py 项目: laocheng/cola
    def stop_job(self, job_name):
        job_master = self.job_tracker.get_job_master(job_name)
        stage = Stage(job_master.workers, 'stop_job')
        stage.barrier(True, job_name)

        stage = Stage(job_master.workers, 'clear_job')
        stage.barrier(True, job_name)

        self.job_tracker.remove_job(job_name)

        self.logger.debug('stop job: %s' % job_name)
示例#2
0
文件: master.py 项目: Andelfin/cola
    def run_job(self, job_name, unzip=False, 
                wait_for_workers=False):
        if wait_for_workers:
            while not self.stopped.is_set():
                if len(self.worker_tracker.workers) > 0:
                    break
                stopped = self.stopped.wait(3)
                if stopped:
                    return

        if unzip:
            self._unzip(job_name)
        
        job_path = os.path.join(self.job_dir, job_name)
        job_desc = import_job_desc(job_path)
        job_master = JobMaster(self.ctx, job_name, job_desc, 
                               self.worker_tracker.workers.keys())
        job_master.init()
        self.job_tracker.register_job(job_name, job_master)
        self._register_runned_job(job_name, job_desc)
        
        zip_file = os.path.join(self.zip_dir, job_name+'.zip')
        for worker in job_master.workers:
            FileTransportClient(worker, zip_file).send_file()
        
        self.logger.debug(
            'entering the master prepare stage, job id: %s' % job_name)
        self.logger.debug(
            'job available workers: %s' % job_master.workers)
        stage = Stage(job_master.workers, 'prepare', logger=self.logger)
        prepared_ok = stage.barrier(True, job_name)
        if not prepared_ok:
            self.logger.error("prepare for running failed")
            return
        
        self.logger.debug(
            'entering the master run_job stage, job id: %s' % job_name)
        stage = Stage(job_master.workers, 'run_job', logger=self.logger)
        run_ok = stage.barrier(True, job_name)
        if not run_ok:
            self.logger.error("run job failed, job id: %s" % job_name)
示例#3
0
文件: master.py 项目: laocheng/cola
    def pack_job_error(self, job_name):
        job_master = self.job_tracker.get_job_master(job_name)
        stage = Stage(job_master.workers, 'pack_job_error')
        stage.barrier(True, job_name)

        error_dir = os.path.join(self.working_dir, 'errors')
        if not os.path.exists(error_dir):
            os.makedirs(error_dir)
        error_filename = os.path.join(error_dir, '%s_errors.zip' % job_name)

        suffix = '%s_errors.zip' % job_name
        temp_dir = tempfile.mkdtemp()
        try:
            for name in os.listdir(self.zip_dir):
                if name.endswith(suffix):
                    shutil.move(os.path.join(self.zip_dir, name), temp_dir)
            ZipHandler.compress(error_filename, temp_dir)
        finally:
            shutil.rmtree(temp_dir)

        return error_filename
示例#4
0
文件: master.py 项目: awai0707/cola
 def pack_job_error(self, job_name):
     job_master = self.job_tracker.get_job_master(job_name)
     stage = Stage(job_master.workers, 'pack_job_error')
     stage.barrier(True, job_name)
     
     error_dir = os.path.join(self.working_dir, 'errors')
     if not os.path.exists(error_dir):
         os.makedirs(error_dir)
     error_filename = os.path.join(error_dir, '%s_errors.zip'%job_name)
     
     suffix = '%s_errors.zip' % job_name
     temp_dir = tempfile.mkdtemp()
     try:
         for name in os.listdir(self.zip_dir):
             if name.endswith(suffix):
                 shutil.move(os.path.join(self.zip_dir, name), temp_dir)
         ZipHandler.compress(error_filename, temp_dir)
     finally:
         shutil.rmtree(temp_dir)
         
     return error_filename
示例#5
0
文件: master.py 项目: awai0707/cola
    def stop_job(self, job_name):
        job_master = self.job_tracker.get_job_master(job_name)
        stage = Stage(job_master.workers, 'stop_job')
        stage.barrier(True, job_name)
        
        stage = Stage(job_master.workers, 'clear_job')
        stage.barrier(True, job_name)
        
        self.job_tracker.remove_job(job_name)

        self.logger.debug('stop job: %s' % job_name)
示例#6
0
    def run_job(self, job_name, unzip=False, 
                wait_for_workers=False):
        if wait_for_workers:
            while not self.stopped.is_set():
                if len(self.worker_tracker.workers) > 0:
                    break
                stopped = self.stopped.wait(3)
                if stopped:
                    return

        if unzip:
            self._unzip(job_name)
        
        job_path = os.path.join(self.job_dir, job_name)
        job_desc = import_job_desc(job_path)
        job_master = JobMaster(self.ctx, job_name, job_desc, 
                               self.worker_tracker.workers.keys())
        job_master.init()
        self.job_tracker.register_job(job_name, job_master)
        self._register_runned_job(job_name, job_desc)
        
        zip_file = os.path.join(self.zip_dir, job_name+'.zip')
        for worker in job_master.workers:
            FileTransportClient(worker, zip_file).send_file()
        
        self.logger.debug(
            'entering the master prepare stage, job id: %s' % job_name)
        self.logger.debug(
            'job available workers: %s' % job_master.workers)
        stage = Stage(job_master.workers, 'prepare')
        stage.barrier(True, job_name)
        
        self.logger.debug(
            'entering the master run_job stage, job id: %s' % job_name)
        stage = Stage(job_master.workers, 'run_job')
        stage.barrier(True, job_name)
示例#7
0
文件: master.py 项目: laocheng/cola
 def _shutdown_all_workers(self):
     stage = Stage(self.worker_tracker.workers.keys(), 'shutdown')
     stage.barrier(True)
示例#8
0
文件: master.py 项目: awai0707/cola
 def _shutdown_all_workers(self):
     stage = Stage(self.worker_tracker.workers.keys(), 'shutdown')
     stage.barrier(True)