def submit_job(self): if self.__submitted: return False job_array = WriteList( self.merge_lists(), "%s/%s.txt" % (self.engine().config_dir(), id_generator(31))) final_merge_name = WriteList( self.temporary_files(), "%s/%s.txt" % (self.engine().config_dir(), id_generator(30))) if not self.engine().submit_array( script="ClusterSubmission/Merge.sh", sub_job=self.job_name(), mem=self.engine().merge_mem(), env_vars=[ ("JobConfigList", job_array), ("OutFileList", final_merge_name), ("ALRB_rootVersion", ROOTVERSION), ], hold_jobs=self.hold_jobs(), run_time=self.engine().merge_time(), array_size=len(self.merge_lists())): return False self.__submitted = True if not self.child(): return True if not self.child().submit_job(): return False return self.engine().submit_clean_job( hold_jobs=[self.engine().subjob_name(self.child().job_name())], to_clean=self.temporary_files(), sub_job=self.job_name())
def submit_move_job(self, hold_jobs=[], to_move=[], destination="", source_dir="", sub_job=""): move_cfg = "" if len(to_move) > 0: move_cfg = "%s/Move_%s.txt" % (self.config_dir(), id_generator(35)) WriteList(to_move, move_cfg) elif len(source_dir) > 0: move_cfg = source_dir else: logging.error("<submit_move_job> Nothing to move") return False if len(destination) == 0: logging.error( "<submit_move_job> No destination where to move provided") return False return self.submit_job(script="ClusterSubmission/Move.sh", mem=100, env_vars=[ ("DestinationDir", destination), ("FromDir", move_cfg), ], hold_jobs=hold_jobs, sub_job="Move%s%s" % ("" if len(sub_job) == 0 else "-", sub_job), run_time="01:00:00")
def write_ship_file(self, env_vars): ship_file_name = WriteList(["#!/bin/bash"] + [ "export %s='%s'" % (var, val) for var, val in env_vars + self.common_env_vars() ], "%s/%s.sh" % (self.config_dir(), id_generator(74))) os.system("chmod 0700 %s" % (ship_file_name)) return ship_file_name
def submit_copy_job( self, hold_jobs=[], to_copy=[], ### Give particular files to copy destination="", source_dir="", ### Optional sub_job=""): copy_cfg = "" if len(to_copy) > 0: copy_cfg = "%s/Copy_%s.txt" % (self.config_dir(), id_generator(35)) WriteList(to_copy, copy_cfg) elif len(source_dir) > 0: copy_cfg = source_dir else: print "<submit_copy_job> Nothing to copy" return False if len(destination) == 0: print "<Submit_copy_job> Where to copy everything?" return False return self.submit_job(script="ClusterSubmission/Copy.sh", mem=100, env_vars=[ ("DestinationDir", destination), ("FromDir", copy_cfg), ], hold_jobs=hold_jobs, sub_job="Copy%s%s" % ("" if len(sub_job) == 0 else "-", sub_job), run_time="01:00:00")
def downloadDataSets(InputDatasets, Destination, RSE="", use_singularity=False): ### Apparently rucio does no longer work in combination with AthAnalysis. So let's ### execute it from a singulartity container Cmds = [] image_to_choose = setupBatchSubmitArgParser().get_default("SingularityImage") home_dir = setupBatchSubmitArgParser().get_default("BaseFolder") + "/TMP/.singularity/" CreateDirectory(Destination, False) if use_singularity: CreateDirectory(home_dir, False) to_clean = [] for DS in InputDatasets: if not use_singularity: Cmds += ["rucio download %s --ndownloader 32 %s --dir %s" % (DS, "" if len(RSE) == 0 else "--rse %s" % (RSE), Destination)] else: singularity_dir = home_dir + "/" + id_generator(21) to_clean += [singularity_dir] singularity_script = WriteList([ "#!/bin/bash", "export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase", "source ${ATLAS_LOCAL_ROOT_BASE}/user/atlasLocalSetup.sh", "lsetup rucio", "echo 'rucio download %s --ndownloader 32 %s --dir %s'" % (DS, "" if len(RSE) == 0 else "--rse %s" % (RSE), Destination), "rucio download %s --ndownloader 32 %s --dir %s" % (DS, "" if len(RSE) == 0 else "--rse %s" % (RSE), Destination), ], "%s/to_exec.sh" % (singularity_dir)) os.system("chmod 0777 " + singularity_script) Cmds += [ "singularity exec --cleanenv -H %s:/alrb -B %s:/srv %s/%s %s" % (singularity_dir, Destination, SINGULARITY_DIR, image_to_choose, singularity_script) ] ExecuteCommands(ListOfCmds=Cmds, MaxCurrent=8) for c in to_clean: os.system("rm -rf %s" % (c))
def submit_clean_job(self, hold_jobs=[], to_clean=[], sub_job=""): clean_cfg = "%s/Clean_%s.txt" % (self.config_dir(), id_generator(35)) WriteList(to_clean, clean_cfg) return self.submit_job(script="ClusterSubmission/Clean.sh", mem=100, env_vars=[("ToClean", clean_cfg)], hold_jobs=hold_jobs, sub_job="Clean%s%s" % ("" if len(sub_job) == 0 else "-", sub_job), run_time="01:00:00")
def link_to_copy_area(self, config_file): config_path = ResolvePath(config_file) if not config_path: return None ### Create the directory CreateDirectory(self.config_dir(), False) ### Keep the ending of the file but rename it to a random thing final_path = "%s/%s.%s" % (self.config_dir(), id_generator(45), config_file[config_file.rfind(".") + 1:]) os.system("cp %s %s" % (config_path, final_path)) return final_path
def __assemble_merge_list(self, files_to_merge): copied_in = [x for x in files_to_merge] if self.__shuffle_files: shuffle(copied_in) merge_lists = [] merge_in = [] for i, fi in enumerate(copied_in): if i > 0 and i % self.__files_per_job == 0: merge_name = "%s/%s.txt" % (self.engine().config_dir(), id_generator(85)) WriteList(merge_in, merge_name) merge_lists += [merge_name] merge_in = [] merge_in += [fi] ### Pack the last remenants into a last merge job if len(merge_in) > 0: merge_name = "%s/%s.txt" % (self.engine().config_dir(), id_generator(85)) WriteList(merge_in, merge_name) merge_lists += [merge_name] return merge_lists
def __setup_prw_helper(self, config_files=[]): prw_helper = ROOT.XAMPP.PileupHelper(id_generator(24)) prw_config_files = ROOT.std.vector(str)() for f in config_files: if IsROOTFile(f): prw_config_files.push_back(f) prw_helper.loadPRWperiod_fullsim(prw_config_files) if len(prw_helper.getPRWperiods_fullsim()) != 1: print "WARNING: More than one period..." exit(1) return prw_helper
def __init__(self, thread_name="", subthread=-1, thread_engine=None, dependencies=[], script_exec=""): threading.Thread.__init__(self) self.__engine = thread_engine self.__name = thread_name self.__sub_num = subthread self.__isSuccess = False self.__started = False self.__dependencies = [d for d in dependencies] self.__script_to_exe = script_exec self.__tmp_dir = "%s/%s" % (thread_engine.tmp_dir(), id_generator(50)) CreateDirectory(self.__tmp_dir, True) self.__env_vars = [("LOCAL_TASK_ID", "%d" % (self.thread_number())), ("TMPDIR", self.__tmp_dir)]
def pack_environment(self, env_vars, script): exec_script = self.link_to_copy_area(script) if not exec_script: return False ship_file = self.write_ship_file(env_vars) if self.run_singularity(): ship_file = self.write_ship_file([ ("CONTAINER_SCRIPT", exec_script), ("CONTAINER_IMAGE", self.singularity_container()), ("CONTAINER_SHIPING_FILE", ship_file), ]) exec_script = self.link_to_copy_area( ResolvePath("ClusterSubmission/Singularity.sh")) env_script = WriteList([ "#!/bin/bash", "source %s" % (ship_file), "source %s" % (exec_script) ], "%s/EnvScript_%s.sh" % (self.config_dir(), id_generator(50))) os.system("chmod 0700 %s" % (env_script)) return env_script
def __init__(self, outFileName="", files_to_merge=[], hold_jobs=[], cluster_engine=None, files_per_job=5, final_split=1, shuffle_files=True): self.__out_name = outFileName self.__shuffle_files = shuffle_files self.__cluster_engine = cluster_engine self.__hold_jobs = [h for h in hold_jobs] self.__files_per_job = files_per_job if files_per_job > 1 else 2 self.__merge_lists = self.__assemble_merge_list(files_to_merge) self.__tmp_out_files = [] self.__child_job = None self.__parent_job = None self.__submitted = False if len(self.__merge_lists) > final_split: self.__tmp_out_files = [ "%s/%s.root" % (self.engine().tmp_dir(), id_generator(100)) for d in range(len(self.__merge_lists)) ] self.__child_job = self.create_merge_interface( final_split=final_split) self.__child_job.set_parent(self) elif final_split == 1 or len(self.__merge_lists) == 1: CreateDirectory(self.engine().out_dir(), False) self.__tmp_out_files = [ "%s/%s.root" % (self.engine().out_dir(), self.outFileName()) ] else: CreateDirectory(self.engine().out_dir(), False) self.__tmp_out_files = [ "%s/%s_%d.root" % (self.engine().out_dir(), self.outFileName(), i + 1) for i in range(min(final_split, len(self.__merge_lists))) ]
def submit_build_job(self): if self.__submitted_build: print "ERROR <submit_build_job>: Build job is already submitted" return False if not self.submit_hook(): return False if self.send_build_job() and not self.submit_job( script="ClusterSubmission/Build.sh", sub_job="Build", mem=self.__buildMem, env_vars=[("CleanOut", self.out_dir()), ("CleanLogs", self.log_dir()), ("CleanTmp", self.tmp_dir()), ("nCoresToUse", self.__buildCores), ("COPYAREA", self.build_dir())], run_time=self.__buildTime, hold_jobs=self.__holdBuild): return False elif not self.send_build_job(): if not CreateDirectory(self.log_dir(), False) or not CreateDirectory( self.out_dir(), False): return False Dummy_Job = "%s/%s.sh" % (self.config_dir(), id_generator(35)) WriteList([ "#!/bin/bash", "echo \"I'm a dummy build job. Will wait 60 seconds until everything is scheduled\"", "sleep 120" ], Dummy_Job) if not self.submit_job(script=Dummy_Job, sub_job="Build", mem=100, env_vars=[], run_time="00:05:00", hold_jobs=self.__holdBuild): return False self.__submitted_build = True self.lock_area() return True
def submit_build_job(self): if self.check_submitted_build(): logging.warning( "<submit_build_job>: Build job is already submitted") return True if not self.submit_hook(): return False ### Few cluster engines go crazy if the log files of the own jobs are deleted ### Make sure that the build job deletes the log dir before submission if not CreateDirectory(self.log_dir(), True): return False if self.send_build_job() and not self.submit_job( script="ClusterSubmission/Build.sh", sub_job="Build", mem=self.get_build_mem(), n_cores=self.get_build_cores(), env_vars=[("CleanOut", self.out_dir()), ("CleanTmp", self.tmp_dir()), ("nCoresToUse", self.get_build_cores()), ("COPYAREA", self.build_dir())], run_time=self.get_build_time(), hold_jobs=self.get_build_hold_jobs()): return False elif not self.send_build_job(): if not CreateDirectory(self.out_dir(), False): return False Dummy_Job = WriteList([ "#!/bin/bash", "echo \"I'm a dummy build job. Will wait 15 seconds until everything is scheduled\"", "sleep 15" ], "%s/%s.sh" % (self.config_dir(), id_generator(35))) if not self.submit_job(script=Dummy_Job, sub_job="Build", mem=100, run_time="00:05:00", hold_jobs=self.__holdBuild): return False self.__submitted_build = True self.lock_area() return True
def _write_submission_file(self, sub_job, exec_script, env_vars=[], mem=1, run_time='00:00:01', nproc=1, arraylength=-1): self.set_cluster_control_module( "ClusterSubmission/ClusterControlHTCONDOR.sh") if not exec_script: logging.error("<_write_submission_file> No exec_script was given!") return False if mem < 0: logging.error( "<_write_submission_file> No memory requirement for the job was specified." ) return False job_name = self.subjob_name(sub_job) if len([ x for x in self.__job_dependency_dict if x.getJobName() == job_name ]): logging.error( "The job %s has already been defined. Please ensure unique job names" % (job_name)) return False log_string = "%s/%s%s" % ( self.log_dir(), sub_job if len(sub_job) else job_name, "_$(CONDOR_TASK_ID)" if arraylength > 0 else "") exec_script = self.pack_environment(env_vars, exec_script) submision_content = [] submision_content += [ "universe = vanilla", "executable = %s" % (exec_script), "output = %s.out" % (log_string), "error = %s.err" % (log_string), "log = %s.log" % (log_string), #"transfer_executable = True", "notification = Error", "notify_user = %s" % (self.mail_user()), "request_memory = %d" % (mem), "on_exit_remove = (ExitBySignal == False) && (ExitCode == 0)", "request_cpus = %d" % (nproc), #### Extra attributes "+MaxRuntime = %d" % (TimeToSeconds(run_time)), ### CERN cloud "+RequestRuntime = %d" % (TimeToSeconds(run_time)), ### DESY cloud "+MyProject = %s" % (self.accountinggroup()) if self.accountinggroup() else "", ] if arraylength > 0: submision_content += [ "environment = CONDOR_TASK_ID=$(CONDOR_TASK_ID)" ] submision_content += [ "queue", ] self.__job_dependency_dict += [ HTCondorJob(job_name=job_name, submission_file=WriteList( submision_content, "%s/%s.sub" % (self.config_dir(), id_generator(25))), arraylength=arraylength, engine=self, abbreviation=self.__assign_abb_letter()) ] self.__submitted_jobs += 1 if arraylength <= 1 else arraylength return True