Пример #1
0
    def filelist(self, sample, working_dir):
        # JMTBAD are there performance problems by not matching the json to the files per job?
        json_fn = os.path.join(working_dir, 'cs.json')
        if hasattr(sample, 'json') and sample.json:
            shutil.copy2(sample.json, json_fn)
        else:
            touch(json_fn)

        filenames = sample.filenames
        if not self.is_cmsRun:
            filenames = self.normalize_fns(filenames)

        if sample.split_by == 'events':
            per = sample.events_per
            assert sample.nevents_orig > 0 or sample.total_events > 0
            nevents = sample.total_events if sample.total_events > 0 else sample.nevents_orig
            njobs = int_ceil(nevents, per)
            fn_groups = [filenames]
        else:
            use_njobs = sample.files_per < 0
            per = abs(sample.files_per)
            if sample.total_files > 0:
                filenames = filenames[:sample.total_files]
            njobs = getattr(sample, 'njobs', int_ceil(len(filenames), per))
            fn_groups = [
                x for x in (filenames[i * per:(i + 1) * per]
                            for i in xrange(njobs)) if x
            ]
            if not use_njobs:
                njobs = len(fn_groups)  # let it fail downward
        if self._njobs is not None:
            assert self._njobs <= njobs
            njobs = self._njobs

        encoded_filelist = base64.b64encode(
            zlib.compress(pickle.dumps(fn_groups, -1)))

        files_to_write = [
            ('cs_outputfiles', self.output_files),
            ('cs_stageoutfiles', self.stageout_files),
            ('cs_filelist.py',
             self.filelist_py_template.replace('__FILELIST__',
                                               encoded_filelist)),
            ('cs_njobs', str(njobs)),
            ('cs_jobmap', '\n'.join(str(i) for i in xrange(njobs)) +
             '\n'),  # will be more complicated for resubmits
            ('cs_primaryds', sample.primary_dataset),
            ('cs_samplename', sample.name),
            ('cs_timestamp',
             (self.timestamp +
              timedelta(seconds=self.nsubmits)).strftime('%y%m%d_%H%M%S')),
        ]

        for fn, content in files_to_write:
            open(os.path.join(working_dir, fn), 'wt').write(content)

        return njobs
Пример #2
0
    def submit(self, sample):
        self.nsubmits += 1
        print 'submit', self.batch_name, sample.name,

        if self.dataset:
            try:
                sample.set_curr_dataset(self.dataset)
            except KeyError:
                print "\033[1m warning: \033[0m sample %s not submitted, doesn't have dataset %s" % (
                    sample.name, self.dataset)
                return

        if sample.split_by == 'events' and not sample.is_mc:
            print "\033[1m warning: \033[0m sample %s not submitted because can't split by events on data sample"
            return

        working_dir = os.path.join(self.batch_dir, 'condor_%s' % sample.name)
        if os.path.exists(working_dir):
            print "\033[1m warning: \033[0m sample %s not submitted, working dir %s already exists" % (
                sample.name, working_dir)
            return

        os.mkdir(working_dir)
        touch(os.path.join(working_dir, 'cs_dir'))
        open(os.path.join(working_dir, 'cs_ex'), 'wt').write(self.ex_str)

        njobs = self.filelist(sample, working_dir)
        pset_fn = self.pset(sample, working_dir)

        jdl_fn = os.path.join(working_dir, 'cs_submit.jdl')
        open(jdl_fn,
             'wt').write(self.jdl_template.replace('__NJOBS__', str(njobs)))

        if not self.testing:
            self._submit(working_dir, njobs)
        else:
            print 'in testing mode, not submitting anything.'
            if pset_fn:
                diff_out, diff_ret = popen('diff -uN %s %s' %
                                           (self.pset_template_fn, pset_fn),
                                           return_exit_code=True)
                if diff_ret != 0:
                    print '.py diff:\n---------'
                    print diff_out
                    raw_input('continue?')
                    print
    def filelist(self, sample, working_dir):
        # JMTBAD are there performance problems by not matching the json to the files per job?
        json_fn = os.path.join(working_dir, 'cs.json')
        if hasattr(sample, 'json') and sample.json:
            shutil.copy2(sample.json, json_fn)
        else:
            touch(json_fn)

        filenames = sample.filenames
        if not self.is_cmsRun:
            filenames = self.normalize_fns(filenames)

        if sample.split_by == 'events':
            per = sample.events_per
            assert sample.nevents_orig > 0
            njobs = int_ceil(sample.nevents_orig, per)
            fn_groups = [filenames]
        else:
            use_njobs = sample.files_per < 0
            per = abs(sample.files_per)
            njobs = getattr(sample, 'njobs', int_ceil(len(filenames), per))
            fn_groups = [x for x in (filenames[i*per:(i+1)*per] for i in xrange(njobs)) if x]
            if not use_njobs:
                njobs = len(fn_groups) # let it fail downward
        if self._njobs is not None:
            assert self._njobs <= njobs
            njobs = self._njobs

        encoded_filelist = base64.b64encode(zlib.compress(pickle.dumps(fn_groups, -1)))

        files_to_write = [
            ('cs_outputfiles',   self.output_files),
            ('cs_stageoutfiles', self.stageout_files),
            ('cs_filelist.py',   self.filelist_py_template.replace('__FILELIST__', encoded_filelist)),
            ('cs_njobs',         str(njobs)),
            ('cs_jobmap',        '\n'.join(str(i) for i in xrange(njobs)) + '\n'), # will be more complicated for resubmits
            ('cs_primaryds',     sample.primary_dataset),
            ('cs_samplename',    sample.name),
            ('cs_timestamp',     (self.timestamp + timedelta(seconds=self.nsubmits)).strftime('%y%m%d_%H%M%S')),
            ]

        for fn, content in files_to_write:
            open(os.path.join(working_dir, fn), 'wt').write(content)

        return njobs
    def submit(self, sample):
        self.nsubmits += 1
        print 'batch', self.batch_name, 'sample', sample.name, 

        if self.dataset:
            try:
                sample.set_curr_dataset(self.dataset)
            except KeyError:
                print "\033[1m warning: \033[0m sample %s not submitted, doesn't have dataset %s" % (sample.name, self.dataset)
                return

        if sample.split_by == 'events' and not sample.is_mc:
            print "\033[1m warning: \033[0m sample %s not submitted because can't split by events on data sample"
            return

        working_dir = os.path.join(self.batch_dir, 'condor_%s' % sample.name)
        if os.path.exists(working_dir):
            print "\033[1m warning: \033[0m sample %s not submitted, working dir %s already exists" % (sample.name, working_dir)
            return

        os.mkdir(working_dir)
        touch(os.path.join(working_dir, 'cs_dir'))
        open(os.path.join(working_dir, 'cs_ex'), 'wt').write(self.ex_str)

        njobs = self.filelist(sample, working_dir)
        pset_fn = self.pset(sample, working_dir)

        jdl_fn = os.path.join(working_dir, 'cs_submit.jdl')
        open(jdl_fn, 'wt').write(self.jdl_template.replace('__NJOBS__', str(njobs)))

        if not self.testing:
            self._submit(working_dir, njobs)
        else:
            print 'in testing mode, not submitting anything.'
            if pset_fn:
                diff_out, diff_ret = popen('diff -uN %s %s' % (self.pset_template_fn, pset_fn), return_exit_code=True)
                if diff_ret != 0:
                    print '.py diff:\n---------'
                    print diff_out
                    raw_input('continue?')
                    print
Пример #5
0
def set_cs_done(wd):
    return touch(cs_done_fn(wd))