def __create_ntuple_layer(self): LOG_STEM = 'ntuple_job.$(cluster).$(process)' jobs = [] run_config = self.__config input_files = run_config['files'] if self.__variables['test']: input_files = [input_files[0]] job_set = htc.JobSet( exe=self.__run_script, copy_exe=True, setup_script=self.__setup_script, filename=os.path.join( self.__job_dir, 'ntuple_production.condor'), out_dir=self.__job_log_dir, out_file=LOG_STEM + '.out', err_dir=self.__job_log_dir, err_file=LOG_STEM + '.err', log_dir=self.__job_log_dir, log_file=LOG_STEM + '.log', share_exe_setup=True, common_input_files=self.__input_files, transfer_hdfs_input=False, hdfs_store=run_config['outLFNDirBase'] + '/tmp', certificate=self.REQUIRE_GRID_CERT, cpus=1, memory='1500MB' ) parameters = 'files={files} output_file={output_file} {params}' if run_config['lumiMask']: parameters += ' json_url={0}'.format(run_config['lumiMask']) n_files_per_group = SPLITTING_BY_FILE['DEFAULT'] for name, value in SPLITTING_BY_FILE.items(): if name in run_config['inputDataset']: n_files_per_group = value grouped_files = make_even_chunks(input_files, n_files_per_group) for i, f in enumerate(grouped_files): output_file = '{dataset}_ntuple_{job_number}.root'.format( dataset=run_config['outputDatasetTag'], job_number=i) args = parameters.format( files=','.join(f), output_file=output_file, params=run_config['pyCfgParams'] ) rel_out_dir = os.path.relpath(RESULTDIR, NTPROOT) rel_log_dir = os.path.relpath(LOGDIR, NTPROOT) rel_out_file = os.path.join(rel_out_dir, output_file) rel_log_file = os.path.join(rel_log_dir, 'ntp.log') job = htc.Job( name='ntuple_job_{0}'.format(i), args=args, output_files=[rel_out_file, rel_log_file]) job_set.add_job(job) jobs.append(job) return jobs
def test_make_chunks(self): from ntp.utils import make_even_chunks l = list(range(100)) size_of_chunk = 9 n_groups = int(len(l) / size_of_chunk) chunks = list(make_even_chunks(l, size_of_chunk)) self.assertEqual(len(chunks), n_groups) for c in chunks: self.assertGreaterEqual(len(c), size_of_chunk)
def test_make_chunks(self): from ntp.utils import make_even_chunks l = list(range(100)) size_of_chunk = 9 n_groups = int(len(l) / size_of_chunk) chunks = list(make_even_chunks(l, size_of_chunk)) self.assertEqual(len(chunks), n_groups) for c in chunks: self.assertGreaterEqual(len(c), size_of_chunk)
def __create_ntuple_layer(self): jobs = [] run_config = self.__config input_files = run_config['files'] if self.__variables['test']: input_files = [input_files[0]] job_set = htc.JobSet(exe=self.__run_script, copy_exe=True, setup_script=self.__setup_script, filename=os.path.join(self.__job_dir, 'ntuple_production.condor'), out_dir=self.__job_log_dir, out_file=LOG_STEM + '.out', err_dir=self.__job_log_dir, err_file=LOG_STEM + '.err', log_dir=self.__job_log_dir, log_file=LOG_STEM + '.log', share_exe_setup=True, common_input_files=self.__input_files, transfer_hdfs_input=False, hdfs_store=run_config['outLFNDirBase'] + '/tmp', certificate=self.REQUIRE_GRID_CERT, cpus=1, memory='1500MB') parameters = 'files={files} output_file={output_file} {params}' if run_config['lumiMask']: parameters += ' json_url={0}'.format(run_config['lumiMask']) n_files_per_group = SPLITTING_BY_FILE['DEFAULT'] for name, value in SPLITTING_BY_FILE.items(): if name in run_config['inputDataset']: n_files_per_group = value grouped_files = make_even_chunks(input_files, n_files_per_group) for i, f in enumerate(grouped_files): output_file = '{dataset}_ntuple_{job_number}.root'.format( dataset=run_config['outputDatasetTag'], job_number=i) args = parameters.format(files=','.join(f), output_file=output_file, params=run_config['pyCfgParams']) rel_out_dir = os.path.relpath(RESULTDIR, NTPROOT) rel_log_dir = os.path.relpath(LOGDIR, NTPROOT) rel_out_file = os.path.join(rel_out_dir, output_file) rel_log_file = os.path.join(rel_log_dir, 'ntp.log') job = htc.Job(name='ntuple_job_{0}'.format(i), args=args, output_files=[rel_out_file, rel_log_file]) job_set.add_job(job) jobs.append(job) return jobs
def create_job_layer(self, input_files, mode): jobs = [] self.__root_output_files = [] config = self.__config if self.__variables['test']: input_files = [input_files[0]] self.__config['input_files'] = input_files hdfs_store = config['outputDir'] job_set = htc.JobSet( exe=self.__run_script, copy_exe=True, setup_script=self.__setup_script, filename=os.path.join(self.__job_dir, '{0}.condor'.format(PREFIX)), out_dir=self.__job_log_dir, out_file=OUT_FILE, err_dir=self.__job_log_dir, err_file=ERR_FILE, log_dir=self.__job_log_dir, log_file=LOG_FILE, share_exe_setup=True, common_input_files=self.__input_files, transfer_hdfs_input=True, hdfs_store=hdfs_store, certificate=self.REQUIRE_GRID_CERT, cpus=1, memory='900MB', other_args={'Requirements':'( !stringListMember(substr(Target.Machine,0,2),"sm,bs") )'}, ) parameters = 'files={files} output_file_suffix={suffix} mode={mode}' parameters += ' dataset={dataset}' dataset = config['parameters']['dataset'] n_files_per_group = N_FILES_PER_ANALYSIS_JOB for name, value in SPLITTING_BY_FILE.items(): if name in dataset: n_files_per_group = value grouped_files = make_even_chunks( input_files, size_of_chunk=n_files_per_group) for i, f in enumerate(grouped_files): suffix = 'atOutput_{job_number}.root'.format( dataset=dataset, mode=mode, job_number=i ) args = parameters.format( files=','.join(f), suffix=suffix, mode=mode, dataset=dataset, ) output_file = '_'.join([dataset, mode, suffix]) rel_out_dir = os.path.relpath(RESULTDIR, NTPROOT) rel_log_dir = os.path.relpath(LOGDIR, NTPROOT) rel_out_file = os.path.join(rel_out_dir, output_file) rel_log_file = os.path.join(rel_log_dir, 'ntp.log') job = htc.Job( name='{0}_{1}_job_{2}'.format(PREFIX, mode, i), args=args, output_files=[rel_out_file, rel_log_file]) job_set.add_job(job) jobs.append(job) return jobs
def test_make_chunks_one_element(self): from ntp.utils import make_even_chunks l = [5] chunks = list(make_even_chunks(l, 50)) self.assertEqual(len(chunks), 1)
def create_job_layer(self, input_files, mode): jobs = [] self.__root_output_files = [] config = self.__config if self.__variables['test']: input_files = [input_files[0]] self.__config['input_files'] = input_files hdfs_store = config['outputDir'] job_set = htc.JobSet( exe=self.__run_script, copy_exe=True, setup_script=self.__setup_script, filename=os.path.join(self.__job_dir, '{0}.condor'.format(PREFIX)), out_dir=self.__job_log_dir, out_file=OUT_FILE, err_dir=self.__job_log_dir, err_file=ERR_FILE, log_dir=self.__job_log_dir, log_file=LOG_FILE, share_exe_setup=True, common_input_files=self.__input_files, transfer_hdfs_input=False, hdfs_store=hdfs_store, certificate=self.REQUIRE_GRID_CERT, cpus=1, memory='1500MB', ) job_set.job_template='/storage/ec6821/NTupleProd/new/NTupleProduction/job.condor' parameters = 'files={files} output_file_suffix={suffix} mode={mode}' parameters += ' dataset={dataset}' n_files_per_group = N_FILES_PER_ANALYSIS_JOB grouped_files = make_even_chunks( input_files, size_of_chunk=n_files_per_group) dataset = config['parameters']['dataset'] for i, f in enumerate(grouped_files): suffix = 'atOutput_{job_number}.root'.format( dataset=dataset, mode=mode, job_number=i ) args = parameters.format( files=','.join(f), suffix=suffix, mode=mode, dataset=dataset, ) output_file = '_'.join([dataset, mode, suffix]) rel_out_dir = os.path.relpath(RESULTDIR, NTPROOT) rel_log_dir = os.path.relpath(LOGDIR, NTPROOT) rel_out_file = os.path.join(rel_out_dir, output_file) rel_log_file = os.path.join(rel_log_dir, 'ntp.log') job = htc.Job( name='{0}_{1}_job_{2}'.format(PREFIX, mode, i), args=args, output_files=[rel_out_file, rel_log_file]) job_set.add_job(job) jobs.append(job) return jobs
def test_make_chunks_one_element(self): from ntp.utils import make_even_chunks l = [5] chunks = list(make_even_chunks(l, 50)) self.assertEqual(len(chunks), 1)