示例#1
0
    def __init__(self,
                 exe,
                 copy_exe=True,
                 setup_script=None,
                 filename='jobs.condor',
                 out_dir='logs', out_file='$(cluster).$(process).out',
                 err_dir='logs', err_file='$(cluster).$(process).err',
                 log_dir='logs', log_file='$(cluster).$(process).log',
                 cpus=1, memory='100MB', disk='100MB',
                 certificate=False,
                 transfer_hdfs_input=True,
                 share_exe_setup=True,
                 common_input_files=None,
                 hdfs_store=None,
                 dag_mode=False,
                 other_args=None):
        super(JobSet, self).__init__()
        self.exe = exe
        self.copy_exe = copy_exe
        self.setup_script = setup_script
        # Check output filenames are not rubbish
        for f in [filename, out_file, err_file, log_file]:
            check_good_filename(f)
        self.filename = os.path.abspath(filename)
        self.out_dir = os.path.realpath(str(out_dir))
        self.out_file = str(out_file)
        self.err_dir = os.path.realpath(str(err_dir))
        self.err_file = str(err_file)
        self.log_dir = os.path.realpath(str(log_dir))
        self.log_file = str(log_file)
        self.cpus = int(cpus) if int(cpus) >= 1 else 1
        self.memory = str(memory)
        self.disk = str(disk)
        self.certificate = certificate
        self.transfer_hdfs_input = transfer_hdfs_input
        self.share_exe_setup = share_exe_setup
        # can't use X[:] or [] idiom as [:] evaulated first (so breaks on None)
        if not common_input_files:
            common_input_files = []
        self.common_input_files = common_input_files[:]
        self.common_input_file_mirrors = []  # To hold FileMirror obj
        if hdfs_store is None:
            raise IOError('Need to specify hdfs_store')
        self.hdfs_store = hdfs_store
        # self.dag_mode = dag_mode
        self.job_template = os.path.join(os.path.dirname(__file__), 'templates/job.condor')
        self.other_job_args = other_args
        # Hold all Job object this JobSet manages, key is Job name.
        self.jobs = OrderedDict()

        # Setup directories
        # ---------------------------------------------------------------------
        for d in [self.out_dir, self.err_dir, self.log_dir, self.hdfs_store]:
            if d:
                check_dir_create(d)

        self.setup_common_input_file_mirrors(self.hdfs_store)
示例#2
0
    def write(self):
        """Write DAG to file and causes all Jobs to write their HTCondor submit files."""
        dag_contents = self.generate_dag_contents()
        log.info('Writing DAG to %s', self.dag_filename)
        check_dir_create(os.path.dirname(self.dag_filename))
        with open(self.dag_filename, 'w') as dfile:
            dfile.write(dag_contents)

        # Write job files for each JobSet
        for manager in self.get_jobsets():
            manager.write(dag_mode=True)
示例#3
0
    def write(self):
        """Write DAG to file and causes all Jobs to write their HTCondor submit files."""
        dag_contents = self.generate_dag_contents()
        log.info("Writing DAG to %s", self.dag_filename)
        check_dir_create(os.path.dirname(os.path.realpath(self.dag_filename)))
        with open(self.dag_filename, "w") as dfile:
            dfile.write(dag_contents)

        # Write job files for each JobSet
        for manager in self.get_jobsets():
            manager.write(dag_mode=True)
示例#4
0
    def write(self, dag_mode):
        """Write jobs to HTCondor job file."""

        with open(self.job_template) as tfile:
            template = tfile.read()

        file_contents = self.generate_file_contents(template, dag_mode)

        log.info('Writing HTCondor job file to %s', self.filename)
        check_dir_create(os.path.dirname(os.path.realpath(self.filename)))
        with open(self.filename, 'w') as jfile:
            jfile.write(file_contents)
示例#5
0
    def write(self, dag_mode):
        """Write jobs to HTCondor job file."""

        with open(self.job_template) as tfile:
            template = tfile.read()

        file_contents = self.generate_file_contents(template, dag_mode)

        log.info('Writing HTCondor job file to %s', self.filename)
        check_dir_create(os.path.dirname(os.path.realpath(self.filename)))
        with open(self.filename, 'w') as jfile:
            jfile.write(file_contents)
示例#6
0
    def transfer_to_hdfs(self):
        """Transfer files across to HDFS.

        Auto-creates HDFS mirror dir if it doesn't exist, but only if
        there are 1 or more files to transfer.

        Will not transfer exe or setup script if manager.share_exe_setup is True.
        That is left for the manager to do.
        """
        # skip the exe.setup script - the JobSet should handle this itself.
        files_to_transfer = []
        for ifile in self.input_file_mirrors:
            if ((ifile.original == ifile.hdfs) or (self.manager.share_exe_setup and
                    ifile.original in [self.manager.exe, self.manager.setup_script])):
                continue
            files_to_transfer.append(ifile)

        if len(files_to_transfer) > 0:
            check_dir_create(self.hdfs_mirror_dir)

        for ifile in files_to_transfer:
            log.info('Copying %s -->> %s', ifile.original, ifile.hdfs)
            cp_hdfs(ifile.original, ifile.hdfs)
示例#7
0
    def transfer_to_hdfs(self):
        """Transfer files across to HDFS.

        Auto-creates HDFS mirror dir if it doesn't exist, but only if
        there are 1 or more files to transfer.

        Will not transfer exe or setup script if manager.share_exe_setup is True.
        That is left for the manager to do.
        """
        # skip the exe.setup script - the JobSet should handle this itself.
        files_to_transfer = []
        for ifile in self.input_file_mirrors:
            if ((ifile.original == ifile.hdfs) or (self.manager.share_exe_setup and
                    ifile.original in [self.manager.exe, self.manager.setup_script])):
                continue
            files_to_transfer.append(ifile)

        if len(files_to_transfer) > 0:
            check_dir_create(self.hdfs_mirror_dir)

        for ifile in files_to_transfer:
            log.info('Copying %s -->> %s', ifile.original, ifile.hdfs)
            cp_hdfs(ifile.original, ifile.hdfs)
示例#8
0
    def __init__(self,
                 exe,
                 copy_exe=True,
                 setup_script=None,
                 filename='jobs.condor',
                 out_dir='logs', out_file='$(cluster).$(process).out',
                 err_dir='logs', err_file='$(cluster).$(process).err',
                 log_dir='logs', log_file='$(cluster).$(process).log',
                 cpus=1, memory='100MB', disk='100MB',
                 certificate=False,
                 transfer_hdfs_input=True,
                 share_exe_setup=True,
                 common_input_files=None,
                 hdfs_store=None,
                 dag_mode=False,
                 other_args=None):
        super(JobSet, self).__init__()
        self.exe = exe
        self.copy_exe = copy_exe
        self.setup_script = setup_script
        self.filename = filename
        self.out_dir = os.path.realpath(str(out_dir))
        self.out_file = str(out_file)
        self.err_dir = os.path.realpath(str(err_dir))
        self.err_file = str(err_file)
        self.log_dir = os.path.realpath(str(log_dir))
        self.log_file = str(log_file)
        self.cpus = int(cpus) if int(cpus) >= 1 else 1
        self.memory = str(memory)
        self.disk = str(disk)
        self.certificate = certificate
        self.transfer_hdfs_input = transfer_hdfs_input
        self.share_exe_setup = share_exe_setup
        # can't use X[:] or [] idiom as [:] evaulated first (so breaks on None)
        if not common_input_files:
            common_input_files = []
        self.common_input_files = common_input_files[:]
        self.common_input_file_mirrors = []  # To hold FileMirror obj
        if hdfs_store is None:
            raise IOError('Need to specify hdfs_store')
        self.hdfs_store = hdfs_store
        # self.dag_mode = dag_mode
        self.job_template = os.path.join(os.path.dirname(__file__), 'templates/job.condor')
        self.other_job_args = other_args
        # Hold all Job object this JobSet manages, key is Job name.
        self.jobs = OrderedDict()


        # Setup directories
        # ---------------------------------------------------------------------
        for d in [self.out_dir, self.err_dir, self.log_dir, self.hdfs_store]:
            if d:
                log.info('Making directory %s', d)
                check_dir_create(d)

        # Check output filenames are not blank
        # ---------------------------------------------------------------------
        for f in [self.out_file, self.err_file, self.log_file]:
            bad_filenames = ['', '.']
            if f in bad_filenames:
                raise OSError('Bad output filename')

        # Setup mirrors for any common input files
        # ---------------------------------------------------------------------
        self.setup_common_input_file_mirrors(self.hdfs_store)