def build_list_of_files(file_list): if len(file_list) != len(set(file_list)): # Check that runlist does not have duplicates fatal_msg("Runlist has duplicated entries, fix runlist!") not_readable = [] for i in file_list: # Check that input files can be open f = TFile(i.strip(), "READ") if not f.IsOpen(): verbose_msg("Cannot open AOD file:", i, color=bcolors.WARNING) not_readable.append(i) if len(not_readable) > 0: warning_msg(len(not_readable), "files cannot be read and will be skipped") for i in not_readable: file_list.remove(i) files_per_batch = [] iter_file_list = iter(file_list) for i in range(0, len(file_list)): sub_set = list(islice(iter_file_list, batch_size)) if len(sub_set) <= 0: continue files_per_batch.append(sub_set) run_list = [] if len(files_per_batch) > 0: for i, lines in enumerate(files_per_batch): p = os.path.join(out_path, f"{i}") if not os.path.isdir(p): os.makedirs(p) run_list.append(os.path.join( p, f"ListForRun5Analysis.{i}.txt")) with open(run_list[-1], "w") as f: for j in lines: f.write(j.strip() + "\n") msg("Number of runs:", len(run_list)) return run_list
def download_objects(input_file="t.root", out_path="/tmp/ccdbtest2/", host="http://ccdb-test.cern.ch:8080", overwrite=False): msg("Downloading CCDB objects from input file", input_file) out_path = os.path.normpath(out_path) f = TFile(input_file, "READ") lk = f.GetListOfKeys() obj_done = [] for i in lk: name = i.GetName() cycle = i.GetCycle() if name in obj_done: continue obj_done.append(name) obj = f.Get(f"{name};{cycle}") name = name.replace("--", "/") limits = [int(obj.GetPointY(j)) for j in range(obj.GetN())] verbose_msg(name, len(limits), "First", limits[0], convert_timestamp(limits[0]), "Last", limits[-1], convert_timestamp(limits[-1])) for j in limits: get_ccdb_obj(name, j, out_path=out_path, host=host, show=False, verbose=True, tag=True, overwrite_preexisting=overwrite) f.Close() for i in obj_downloaded: msg("Downloaded", obj_downloaded[i], i)
def main(paths_to_check, host): if 0: # Iterative search # Initializing timestamp objects for i in paths_to_check: list_ccdb_object(i) # Performing iterative search iterative_search() else: # Use the CCDB API for i in paths_to_check: i = i.strip() if i == "": continue if "." in i: # It's a file! verbose_msg("Using", i, "as input file") with open(i, "r") as f: for j in f: j = j.strip() if i == "": continue useapi(ccdb_path=j, host=host) else: useapi(ccdb_path=i, host=host) print_timestamps() # Saving to disk write_timestamps(entry_name="Created:")
def build_list_of_files(file_list): verbose_msg("Building list of files from", file_list) # Check that runlist does not have duplicates unique_file_list = set(file_list) if len(file_list) != len(unique_file_list): # for i in file_list fatal_msg("Runlist has duplicated entries, fix runlist!", len(unique_file_list), "unique files, while got", len(file_list), "files") file_status = { "Does not exist": [], "Cannot be open": [], "Was recovered": [], "Is Ok": [] } if check_input_file_integrity: # Check that input files can be open for i in file_list: verbose_msg("Checking that TFile", i.strip(), "can be processed") file_status[is_root_file_sane(i)] = i recovered_files = file_status["Was recovered"] not_readable = [] for i in file_status: if i == "Is Ok": continue not_readable += file_status[i] if len(recovered_files) > 0: msg( "Recovered", len(recovered_files), "files:\n", ) if len(not_readable) > 0: warning_msg(len(not_readable), "over", len(file_list), "files cannot be read and will be skipped") for i in not_readable: if i not in file_list: warning_msg("did not find file to remove", f"'{i}'") file_list.remove(i) files_per_batch = [] iter_file_list = iter(file_list) for i in range(0, len(file_list)): sub_set = list(islice(iter_file_list, batch_size)) if len(sub_set) <= 0: continue files_per_batch.append(sub_set) run_list = [] if len(files_per_batch) > 0: for i, lines in enumerate(files_per_batch): p = os.path.join(out_path, f"{i}") if not os.path.isdir(p): os.makedirs(p) run_list.append(os.path.join(p, f"ListForRun5Analysis.{i}.txt")) with open(run_list[-1], "w") as f: for j in lines: f.write(j.strip() + "\n") msg("Number of runs:", len(run_list)) return run_list
def run_o2_analysis(tmp_script_name, remove_tmp_script=False): verbose_msg("> starting run with", tmp_script_name) cmd = f"bash {tmp_script_name}" run_cmd(cmd) if remove_tmp_script: os.remove(tmp_script_name) verbose_msg("< end run with", tmp_script_name)
def copylist(fname="", jobs=InputArgument(1, "Number of parallel jobs to use", ["--njobs", "-j"], int)): """Takes a text file and downloads the files from grid""" if jobs is None: jobs = 1 verbose_msg("Copying files from list", fname, "with", jobs, "jobs") fname = path.normpath(fname) if not path.isfile(fname): warning_msg("Input file not provided! Aborting") return sofar = copied(fname, "So far") f = open(fname, "r") Group = [] for line in f: if "%" in line: msg("Character % encountered! Aborting") break if "#" in line: msg("Character # encountered! Skipping") continue line = "./" + line if jobs == 1: copyfile(line) else: Group.append(line) if jobs > 1: msg("Copying list in parallel with", jobs, "jobs") run_in_parallel(processes=jobs, job_runner=copyfile, job_arguments=Group, job_message="Downloading files", linearize_single_core=True) copied(fname, extra_msg="In recent run", last_time=sofar)
def copied(fname="", extra_msg="", last_time=None, check_root_files=True): """Checks if how many files of a text list were correctly copied from grid to the PC""" verbose_msg("Checking how many files were copied from from list", fname) fname = fname.strip() f = open(fname, "r") n_to_copy = 0 n_copied = 0 not_sane = [] for line in f: if "%" in line: break if "#" in line: continue line = path.normpath("./" + line.strip()) n_to_copy += 1 if path.isfile(line): n_copied += 1 if check_root_files: if not check_root_file(line): msg(f"'{line}' downloaded but with issues", color=bcolors.WARNING) not_sane.append(line) else: msg(f"'{line}' yet to download", color=bcolors.OKBLUE) if last_time is not None: n_copied -= last_time[1] msg( extra_msg, "downloaded {}/{}, {:.1f}%".format( n_copied, n_to_copy, 100 * float(n_copied) / float(n_to_copy)), f" -- copied {n_copied} files more, in total copied {last_time[1] + n_copied} files" if last_time is not None else "", f"{len(not_sane)} are not OK" if len(not_sane) > 0 else "") return n_to_copy, n_copied
def inspect(name, tree_name): tree_name = f"{name}/{tree_name}" t = input_file.Get(tree_name) if not t: warning_msg("Did not get tree", tree_name) return -1 if verbose: input_file.Get(name).ls() verbose_msg(tree_name, t.GetEntries()) return t.GetEntries()
def write_instructions(instructions, n=1, check_status=False): verbose_msg("--\t", instructions.strip()) tmp_script.write(f"{instructions}" + "".join(["\n"] * n)) if check_status: tmp_script.write("\nReturnValue=$?\n") tmp_script.write("if [[ $ReturnValue != 0 ]]; then\n") tmp_script.write(" echo \"Encountered error with command: '") tmp_script.write(instructions.replace("\"", "\\\"").strip()) tmp_script.write("'\"\n") tmp_script.write(" exit $ReturnValue\n") tmp_script.write("fi\n\n")
def main(input_file_name="/tmp/AO2D.root", verbose=False): global bad_files verbose_msg("Checking file", input_file_name) input_file = TFile(input_file_name, "READ") if verbose: input_file.ls() list_of_keys = input_file.GetListOfKeys() def inspect(name, tree_name): tree_name = f"{name}/{tree_name}" t = input_file.Get(tree_name) if not t: warning_msg("Did not get tree", tree_name) return -1 if verbose: input_file.Get(name).ls() verbose_msg(tree_name, t.GetEntries()) return t.GetEntries() for df_index, i in enumerate(list_of_keys): if i.GetName() == "metaData": continue def add_bad(): # print(i.GetName()) bad_files.setdefault(input_file_name, []).append(i.GetName()) dictionary_of_counts = { "O2bc": None, "O2collision": None, "O2track": None, "O2trackcov": None, "O2trackextra": None } for j in dictionary_of_counts: dictionary_of_counts[j] = inspect(i.GetName(), j) if dictionary_of_counts[j] < 0: add_bad() def must_be_same(*args): counts = [] names = [] for k in args: counts.append(dictionary_of_counts[k]) names.append(k) if len(set(counts)) != 1: add_bad() warning_msg("Did not get equal counts for", ", ".join(names), counts, "in DF", df_index, "/", len(list_of_keys), ":", i.GetName()) must_be_same("O2track", "O2trackcov", "O2trackextra")
def is_root_file_sane(file_name_to_check): file_name_to_check = file_name_to_check.strip() if not os.path.isfile(file_name_to_check): warning_msg("File", file_name_to_check, "does not exist") return "Does not exist" file_to_check = TFile(file_name_to_check, "READ") if not file_to_check.IsOpen(): warning_msg("Cannot open AOD file:", file_name_to_check) return "Cannot be open" elif file_to_check.TestBit(TFile.kRecovered): verbose_msg(file_name_to_check, "was a recovered file") return "Was recovered" else: verbose_msg(file_name_to_check, "is OK") return "Is Ok"
def proceed(handle_exit=True): msg(f"Downloading '{toget}'", color=bcolors.OKGREEN) print_now() if Version == 0: cpycmd = "alien_cp -v {} file:{}".format(toget, todir) else: cpycmd = "alien_cp -v {} file://{}".format(toget, todir) verbose_msg("Running command", cpycmd) if handle_exit: try: run_cmd(cpycmd) except KeyboardInterrupt: return False else: run_cmd(cpycmd) return True
def opt(entry, require=True): try: o = parser.get(config_entry, entry) b = ['yes', 'no', 'on', 'off', 'true', 'false'] for i in b: if o.lower() == i: o = parser.getboolean(config_entry, entry) break verbose_msg("Got option", entry, "=", f"'{o}'") running_options[entry] = o return o except: if require: fatal_msg("Missing entry", f"'{entry}'", "in configuration file", f"'{configuration_file}'") return None
def run_o2_analysis(tmp_script_name, remove_tmp_script=False, explore_bad_files=False, time_it=True): global number_of_runs verbose_msg("> starting run with", tmp_script_name) cmd = f"bash {tmp_script_name}" if do_bash_script: with open("parallelbash.sh", "a") as fout: with open("parallelbash.sh", "r") as fin: lastline = fin.readlines()[-1] if lastline.startswith("#"): lastline = int(lastline.strip("#")) else: lastline = 0 fout.write(f"echo Running {lastline}\n") fout.write(f"{cmd} &\n") lastline += 1 if lastline % (bash_parallel_jobs + 1) == 0: fout.write(f"wait\n") fout.write(f"\n#{lastline}\n") return if explore_bad_files: if run_cmd(cmd, check_status=True, throw_fatal=False, time_it=time_it) == False: list_name = os.listdir(os.path.dirname(tmp_script_name)) for i in list_name: if "ListForRun5Analysis" in i: list_name = i break if type(list_name) != list: with open( os.path.join(os.path.dirname(tmp_script_name), list_name)) as f: list_name = [] for i in f: list_name.append(i) warning_msg("Issue when running", tmp_script_name, "with", list_name) else: run_cmd(cmd, log_file=f"{tmp_script_name}.log", time_it=time_it) if remove_tmp_script: os.remove(tmp_script_name) verbose_msg("< end run with", tmp_script_name) return tmp_script_name
def set_config(config_file, config, value): config = config.strip() value = value.strip() config_string = f"{config} {value}" run_cmd("sed -i -e \"" f"s/{config} .*$/{config_string}" "\" " + config_file) # Checking that the file has the correct configuration with open(config_file) as f: has_it = False config_string = config_string.replace("\\", "").strip("/") for lineno, line in enumerate(f): if line.strip() == config_string: verbose_msg(f"Found config string '{config_string}'", f"at line #{lineno} '{line.strip()}'") has_it = True break if not has_it: fatal_msg("Configuration file", config_file, f"does not have config string '{config_string}'")
def check_root_file(file_name): if not file_name.endswith(".root"): warning_msg("Testing a non root file:", file_name) return True if not path.isfile(file_name): warning_msg("Testing a non existing file:", file_name) return True try: f = TFile(file_name, "READ") if f.TestBit(TFile.kRecovered): msg("File", file_name, "was recovered", color=bcolors.WARNING) return False if not f.IsOpen(): msg("File", file_name, "is not open", color=bcolors.WARNING) return False except OSError: msg("Issue when checking file", file_name, color=bcolors.WARNING) return False verbose_msg(file_name, "is ok and has size", os.path.getsize(file_name) * 1e-6, "MB") return True
def useapi(ccdb_path, host): global timestamps objectlist = get_ccdb_api(host).list(ccdb_path, False, "text/plain") bunch_objects = [] starting_sequence = "ID: " for i in objectlist.split("\n"): if starting_sequence in i: bunch_objects.append("") if len(bunch_objects) <= 0: warning_msg("Skipping", i, "because found no object there") continue bunch_objects[-1] += f"{i}\n" verbose_msg("Found", len(bunch_objects), "object in path", ccdb_path) for counter, i in enumerate(bunch_objects): if 0: print(f"Object #{counter}/{len(bunch_objects)-1}") print(i) t = {} for j in i.split("\n"): save_fields(j, fields_of_interest=t) # print(t) timestamps.setdefault(ccdb_path, []).append(t)
def iterative_search(maximum_found_objects=2000, max_search_iterations=-20, minimum_timestamp=1615197295100, delta_timestamp=1 * 1000): """ delta_timestamp is in milliseconds """ for i in timestamps: verbose_msg("Iteratively searching for", i, "with", max_search_iterations, "iterations") delta = delta_timestamp iterations = 0 while True: iterations += 1 if max_search_iterations > 0 and iterations > max_search_iterations: msg("Max search iterations for", i, f"({iterations} < {max_search_iterations})") break last_timestamp = timestamps[i][-1]["Valid-From:"] if last_timestamp - delta < minimum_timestamp: msg("Found old enough", i, f"({last_timestamp} < {minimum_timestamp})") break listing_status = list_ccdb_object(i, timestamp=last_timestamp - delta) if listing_status == 0: verbose_msg( "++ Found an object", (last_timestamp - timestamps[i][-1]["Valid-From:"]) * 0.001, "seconds younger with delta", delta, "ms") delta = delta_timestamp else: delta += delta_timestamp if maximum_found_objects > 0 and len( timestamps[i]) >= maximum_found_objects: msg("Found enough", i, f"({maximum_found_objects})") break print_timestamps()
def process_run(run_number): processing_time = time.time() verbose_msg("> starting run", run_number) run_cmd(f"bash runner{run_number}.sh") aod_name = f"AODRun5.{run_number}.root" if not os.path.isfile(aod_name): msg(f"++ something went wrong for run {run_number}, no output AOD file {aod_name} found.", f"Please check: 'AODRun5.{run_number}.log'", color=bcolors.FAIL) verbose_msg("< complete run", run_number) processing_time = time.time() - processing_time verbose_msg(f"-- took {processing_time} seconds --", color=bcolors.BOKGREEN)
def merge_aod(in_path="", out_path="./", input_file="AO2D.root", must_have="ctf", bunch_size=50, skip_already_existing=True): in_path = os.path.normpath(in_path) out_path = os.path.normpath(out_path) file_list = [] for root, dirs, files in os.walk(in_path): for file in files: if file == input_file: to_merge = os.path.abspath(os.path.join(root, file)) print(to_merge) if must_have is not None and must_have in to_merge: file_list.append(to_merge) verbose_msg("Found", len(file_list), "files called", input_file) # Divide it in bunches file_list = [ file_list[i:i + bunch_size] for i in range(0, len(file_list), bunch_size) ] for i in enumerate(file_list): bunch_size = 0 with open("inputfile.txt", "w") as f: for j in i[1]: f.write(f"{j}\n") bunch_size += os.path.getsize(j) out_aod = os.path.join(out_path, f"AO2D_{i[0]}.root") verbose_msg("Merging bunch of", len(i[1]), "files. I.e.", bunch_size * 1e-6, "MB") if skip_already_existing and os.path.isfile(out_aod): verbose_msg(out_aod, "already existing, skipping") continue tmp_aod = os.path.join(out_path, "MergedAOD.root") run_cmd( f"o2-aod-merger --input inputfile.txt --output {tmp_aod} --skip-non-existing-files", comment=f"Merging AODs into {out_aod}") os.rename(tmp_aod, out_aod) merged_size = os.path.getsize(out_aod) msg("Produced a merged file of", merged_size * 1e-6, "MB from", bunch_size * 1e-6, "MB, compression:", merged_size / bunch_size)
def do_copy(in_file, out_file=None, in_path=None): """Function to copy files""" in_file = os.path.normpath(in_file) # Normalize path if out_file is None: # If left unconfigured use the same name but put in the current path out_file = os.path.basename(in_file) out_file = os.path.normpath(out_file) # Normalize path if in_path is not None: in_file = os.path.join(in_path, in_file) in_file = os.path.expanduser(os.path.expandvars(in_file)) if avoid_file_copy: if os.path.isfile(out_file) or (in_file == out_file): verbose_msg("Skipping copy of", in_file, "to", out_file, "because of --avoid-config-copy") else: verbose_msg("Linking ", in_file, "to", out_file, "because of --avoid-config-copy") os.symlink(in_file, out_file) return verbose_msg("Copying", in_file, "to", out_file) shutil.copy2(in_file, out_file)
def list_ccdb_object( ccdb_path, timestamp=1950004155840, host="http://ccdb-test.cern.ch:8080", tmp_file="out.txt", # fields_to_save=["Valid-Until:", # "Valid-From:", # "Created:"], fields_to_save=["Valid-From:", "Created:"], time_it=False): global timestamps listing_calls[ccdb_path] = listing_calls.setdefault(ccdb_path, 0) + 1 verbose_msg("Listing MO", ccdb_path, "for timestamp", timestamp, convert_timestamp(timestamp), "in host", host, "iteration #", listing_calls[ccdb_path]) run_cmd( f"curl -i -L {host}/{ccdb_path}/{timestamp} --output {tmp_file} 2> /dev/null && cat {tmp_file} | head -n 20 > {tmp_file}2", check_status=False, time_it=time_it) os.rename(f"{tmp_file}2", tmp_file) t = {} with open(tmp_file) as f: for i in f: # Checking that it was ok i = i.strip() if "HTTP" in i: if i == "HTTP/1.1 404": verbose_msg("Did not find object") return # print(i) for j in fields_to_save: if j in i: i = i.split(" ") t[i[0]] = int(i[1]) single_line = " ---> " for i in fields_to_save: single_line += f"{i} {t[i]} {convert_timestamp(t[i])} +++ " verbose_msg(single_line) timestamps.setdefault(ccdb_path, []).append(t) return 0
def get_ccdb_obj(ccdb_path, timestamp, out_path, host, show, tag=False, overwrite_preexisting=True, use_o2_api=True, check_metadata=True, interesting_metadata=[ "ObjectType", "PassName", "PeriodName", "RunNumber", "Valid-From", "Valid-Until", "" ]): """ Gets the ccdb object from 'ccdb_path' and 'timestamp' and downloads it into 'out_path' If 'tag' is True then the filename will be renamed after the timestamp. """ def check_rootfile(fname): try: f = TFile(fname, "READ") if f.TestBit(TFile.kRecovered): warning_msg("File", fname, "was recovered") return False elif not f.IsOpen(): warning_msg("File", fname, "is not open") return False except OSError: warning_msg("Issue when checking file", fname) return False return True verbose_msg("Getting obj", host, ccdb_path, "with timestamp", timestamp, convert_timestamp(timestamp)) out_name = "snapshot.root" if tag: out_name = f"snapshot_{timestamp}.root" out_path = os.path.normpath(out_path) fullname = os.path.join(out_path, ccdb_path, out_name) if os.path.isfile(fullname) and not overwrite_preexisting: if check_rootfile(fullname): msg("File", fullname, "already existing, not overwriting") return if use_o2_api: api = get_ccdb_api(host) if timestamp == -1: timestamp = o2.ccdb.getCurrentTimestamp() metadata = std.map('string,string')() api.retrieveBlob(ccdb_path, out_path, metadata, timestamp) if tag: os.rename(os.path.join(out_path, ccdb_path, "snapshot.root"), fullname) else: cmd = f"o2-ccdb-downloadccdbfile --host {host} --path {ccdb_path} --dest {out_path} --timestamp {timestamp}" cmd += f" -o {out_name}" print(cmd) subprocess.run(cmd.split()) if not os.path.isfile(fullname): raise ValueError("File", fullname, "not found") if not check_rootfile(fullname): raise ValueError("File", fullname, "is not Ok") if check_metadata: f = TFile(os.path.join(fullname), "READ") meta = f.Get("ccdb_meta") verbose_msg("Metadata") m_d = {"Valid-From": None, "Valid-Until": None} for i in meta: if i[0] in m_d: m_d[i[0]] = int(i[1]) if interesting_metadata[0] != "" and i[ 0] not in interesting_metadata: continue if i[0] in m_d: verbose_msg(i, convert_timestamp(int(i[1]))) else: verbose_msg(i) if timestamp < m_d["Valid-From"] or timestamp > m_d["Valid-Until"]: warning_msg("Timestamp asked is outside of window", timestamp, m_d) def print_info(entry): print("Object", entry, meta[entry]) print_info("Last-Modified") if show: obj = f.Get("ccdb_object") obj.Draw() time_box = TPaveText(.01, .9, 0.3, 0.99, "NDC") time_box.AddText(ccdb_path) time_box.AddText(f"timestamp {timestamp}") time_box.AddText(f"{convert_timestamp(timestamp)}") time_box.Draw() gPad.Update() input("Press enter to continue") # obj.Print("ALL") return fullname
def listfiles(Path=None, What=InputArgument("AO2D.root", "Name of the file to look for", "-w"), MakeXML=False, MustHave=InputArgument(None, "String that must be in good files path", ["-m"], nargs="+"), MustHaveCount=InputArgument( 1, "How many times the MustHave string must be present", ["-nm"], thistype=int), MustNotHave=InputArgument( None, "String that must not be in good files path", ["-M"]), MustNotHaveCount=InputArgument( 1, "How many times the MustHave string must be present", ["-NM"], thistype=int), SubDirs="", User=None, MainPath=""): """ Lists the content of the path given in input. Puts the content to file if required. Can also form the output in the xml format so as to run on grid, this is done if the output filename has the xml extension. """ verbose_msg("Listing files", What, "in path", Path) if Path is None or Path == "": raise ValueError("Passed empty path", Path) if User is None: User = getpass.getuser() msg("Getting user:"******"Using path:", Path) PathToScan = path.join(MainPath, User[0], User, Path) if What == None: for i in run_cmd("alien_ls {}".format(PathToScan), check_status=False): print(i) return bashCommand = "alien_find " # Printing name of output list if MakeXML: bashCommand += " -x collection " bashCommand += "{} {} ".format(PathToScan, What) bashCommand = bashCommand.strip() verbose_msg("This is the list of found files:") list_of_found_files = run_cmd(bashCommand, print_output=False, check_status=False).split("\n") FilterList = [] for i in list_of_found_files: if not MakeXML and What not in i: continue if MustHave is not None: hasit = True if type(MustHave) is not list: raise ValueError("Musthave is not a list!", MustHave) for e in MustHave: if e not in i: hasit = False if i.count(e) < MustHaveCount: hasit = False if not hasit: msg(f"Discarding line '{i}' as it doesn't have '{MustHave}' {MustHaveCount} times", color=bcolors.OKBLUE) continue if MustNotHave and MustNotHave in i: if i.count(MustNotHave) >= MustNotHaveCount: msg(f"Discarding line '{i}' as it has '{MustNotHave}' {MustNotHaveCount} times", color=bcolors.OKBLUE) continue if SubDirs: istrip = i.replace(PathToScan, "").strip().strip("/") verbose_msg(istrip) istrip = istrip.split("/") istrip = istrip[:-1] verbose_msg("here:", istrip, len(istrip)) if len(istrip) != int(SubDirs): continue FilterList.append(i) msg(f"Found {len(FilterList)} files responding to all criteria") return FilterList
def set_o2_analysis(o2_analyses=["o2-analysis-hf-task-d0 --pipeline qa-tracking-kine:4,qa-tracking-resolution:4"], o2_arguments="--shm-segment-size 16000000000 --readers 4 --configuration json://$PWD/dpl-config_std.json", input_file="listfiles.txt", tag="QA", output_files=["AnalysisResults.root", "AnalysisResults_trees.root", "QAResults.root"], dpl_configuration_file=None): """ Function to prepare everything you need for your O2 analysis. From the output folder to the script containing the O2 workflow. The output can be found in the same directory as the input data. """ # Defining log file log_file = f"log_{tag.lower()}.log" verbose_msg("Configuring the tasks with O2", color=bcolors.BOKBLUE) # Creating output directory output_path = os.path.dirname(os.path.abspath(input_file)) # Checking input file verbose_msg("Using", input_file, "as input file") if not input_file.endswith(".root"): input_file = f"@{os.path.join(os.getcwd(), input_file)}" # Creating the script to run O2 tmp_script_name = os.path.join(output_path, f"tmpscript_{tag.lower()}.sh") with open(tmp_script_name, "w") as tmp_script: verbose_msg("Writing o2 instructions to", f"'{tmp_script_name}'") def write_instructions(instructions, n=1, check_status=False): verbose_msg("--\t", instructions.strip()) tmp_script.write(f"{instructions}" + "".join(["\n"]*n)) if check_status: tmp_script.write("\nReturnValue=$?\n") tmp_script.write("if [[ $ReturnValue != 0 ]]; then\n") tmp_script.write(" echo \"Encountered error with command: '") tmp_script.write(instructions.replace("\"", "\\\"").strip()) tmp_script.write("'\"\n") tmp_script.write(" exit $ReturnValue\n") tmp_script.write("fi\n") write_instructions(f"#!/bin/bash", n=2) write_instructions(f"cd {output_path}", n=2) # Move to run dir write_instructions(f"pwd", n=2) # Move to run dir for i in output_files: # Removing old output write_instructions(f"rm -v {i} 2>&1") i = i.replace(".root", f"_{tag}") write_instructions(f"rm -v {i}.root 2>&1") write_instructions("\n") o2_workflow = "" for i in o2_analyses: line = f"{i} {o2_arguments}" if i == o2_analyses[0]: line += f" --aod-file {input_file}" if dpl_configuration_file is not None: line += f" --configuration json://{dpl_configuration_file}" if len(o2_analyses) > 1 and i != o2_analyses[-1]: line = f"{line} | \\\n \t" else: line = f"{line}" o2_workflow += line log_line = "echo \"Running: \n \t"+o2_workflow.replace("\t", "")+"\"" log_line += f" > {log_file}" write_instructions(log_line, n=2) write_instructions( o2_workflow + f" >> {log_file} \n \t", check_status=True) write_instructions("\n") write_instructions( f"if grep -q \"\[ERROR\]\" {log_file}; then echo \": got some errors in '{log_file}'\" && exit 1; fi") write_instructions( f"if grep -q \"\[FATAL\]\" {log_file}; then echo \": got some fatals in '{log_file}'\" && exit 1; fi") write_instructions("\n") for i in output_files: # renaming output with tag r = i.replace(".root", f"_{tag}.root") write_instructions(f"mv {i} {r} 2>&1") write_instructions("\n") write_instructions("exit 0") return tmp_script_name
def main(mode, input_file, out_path, out_tag="", batch_size=4, n_max_files=100, dpl_configuration_file=None, njobs=1, merge_output=True, merge_only=False, shm_mem_size=16000000000, rate_lim=1000000000, readers=1, avoid_overwriting_merge=False, clean_localhost_after_running=True, extra_arguments="", resume_previous_analysis=False, check_input_file_integrity=True, analysis_timeout=None, linearize_single_core=True): if do_bash_script: njobs = 1 linearize_single_core = True if len(input_file) == 1: input_file = input_file[0] else: input_file = input_file[0:n_max_files] if not merge_only: msg("Running", f"'{mode}'", "analysis on", f"'{input_file}'", color=bcolors.BOKBLUE) msg("Maximum", n_max_files, "files with batch size", batch_size, "and", njobs, "jobs" if njobs > 1 else "job", color=bcolors.BOKBLUE) else: msg("Merging output of", f"'{mode}'", "analysis", color=bcolors.BOKBLUE) if analysis_timeout is not None: msg("Using analysis timeout of", analysis_timeout, "seconds", color=bcolors.BOKBLUE) analysis_timeout = f"--time-limit {analysis_timeout}" else: analysis_timeout = "" o2_arguments = f"-b --shm-segment-size {shm_mem_size} --aod-memory-rate-limit {rate_lim} --readers {readers} {analysis_timeout}" o2_arguments += extra_arguments if mode not in analyses: raise ValueError("Did not find analyses matching mode", mode, ", please choose in", ", ".join(analyses.keys())) an = analyses[mode] tag = mode + out_tag # Build input file list input_file_list = [] def is_root_file_sane(file_name_to_check): file_name_to_check = file_name_to_check.strip() if not os.path.isfile(file_name_to_check): warning_msg("File", file_name_to_check, "does not exist") return "Does not exist" file_to_check = TFile(file_name_to_check, "READ") if not file_to_check.IsOpen(): warning_msg("Cannot open AOD file:", file_name_to_check) return "Cannot be open" elif file_to_check.TestBit(TFile.kRecovered): verbose_msg(file_name_to_check, "was a recovered file") return "Was recovered" else: verbose_msg(file_name_to_check, "is OK") return "Is Ok" def build_list_of_files(file_list): verbose_msg("Building list of files from", file_list) # Check that runlist does not have duplicates unique_file_list = set(file_list) if len(file_list) != len(unique_file_list): # for i in file_list fatal_msg("Runlist has duplicated entries, fix runlist!", len(unique_file_list), "unique files, while got", len(file_list), "files") file_status = { "Does not exist": [], "Cannot be open": [], "Was recovered": [], "Is Ok": [] } if check_input_file_integrity: # Check that input files can be open for i in file_list: verbose_msg("Checking that TFile", i.strip(), "can be processed") file_status[is_root_file_sane(i)] = i recovered_files = file_status["Was recovered"] not_readable = [] for i in file_status: if i == "Is Ok": continue not_readable += file_status[i] if len(recovered_files) > 0: msg( "Recovered", len(recovered_files), "files:\n", ) if len(not_readable) > 0: warning_msg(len(not_readable), "over", len(file_list), "files cannot be read and will be skipped") for i in not_readable: if i not in file_list: warning_msg("did not find file to remove", f"'{i}'") file_list.remove(i) files_per_batch = [] iter_file_list = iter(file_list) for i in range(0, len(file_list)): sub_set = list(islice(iter_file_list, batch_size)) if len(sub_set) <= 0: continue files_per_batch.append(sub_set) run_list = [] if len(files_per_batch) > 0: for i, lines in enumerate(files_per_batch): p = os.path.join(out_path, f"{i}") if not os.path.isdir(p): os.makedirs(p) run_list.append(os.path.join(p, f"ListForRun5Analysis.{i}.txt")) with open(run_list[-1], "w") as f: for j in lines: f.write(j.strip() + "\n") msg("Number of runs:", len(run_list)) return run_list if type(input_file) is list: input_file = [os.path.join(os.getcwd(), i) for i in input_file] input_file_list = build_list_of_files(input_file) elif not input_file.endswith(".root"): with open(input_file, "r") as f: lines = f.readlines() msg("Building input list from", len(lines), "inputs, limiting to", n_max_files) if len(lines) > n_max_files: lines = lines[0:n_max_files] lines = [ os.path.join(os.path.dirname(os.path.abspath(input_file)), i) for i in lines ] input_file_list = build_list_of_files(lines) else: input_file_list = [os.path.join(os.getcwd(), input_file)] if dpl_configuration_file is not None: dpl_configuration_file = os.path.join(os.getcwd(), dpl_configuration_file) run_list = [] for i, j in enumerate(input_file_list): run_list.append( set_o2_analysis(an, o2_arguments=o2_arguments, input_file=j, tag=tag, dpl_configuration_file=dpl_configuration_file, resume_previous_analysis=resume_previous_analysis, write_runner_script=not merge_only)) if not merge_only: if do_bash_script: with open("parallelbash.sh", "w") as f: f.write(f"#!/bin/bash\n\n") f.write(f"echo \"Start running\"\n\n") f.write(f"date\n\n") f.write("""function trap_ctrlc (){ # perform cleanup here echo "Ctrl-C caught...performing clean up" exit 2 }\n\n""") f.write("""trap "trap_ctrlc" 2\n""") run_in_parallel( processes=njobs, job_runner=run_o2_analysis, job_arguments=run_list, job_message=f"Running analysis, it's {datetime.datetime.now()}", linearize_single_core=linearize_single_core) if do_bash_script: with open("parallelbash.sh", "a") as f: f.write(f"wait\n\n") f.write(f"date\n\n") msg("Now run bash script `bash parallelbash.sh`") return if clean_localhost_after_running: run_cmd( "find /tmp/ -maxdepth 1 -name localhost* -user $(whoami) | xargs rm -v 2>&1", check_status=False) if (merge_output or merge_only) and len(run_list) > 1: files_to_merge = [] for i in input_file_list: p = os.path.dirname(os.path.abspath(i)) for j in os.listdir(p): if j.endswith(f"_{tag}.root"): files_to_merge.append(os.path.join(p, j)) if len(files_to_merge) == 0: warning_msg("Did not find any file to merge for tag", tag) return files_per_type = {} # List of files to be merged per type # List of files to be merged per type that are not declared sane non_sane_files_per_type = {} for i in files_to_merge: if is_root_file_sane(i) != "Is Ok": non_sane_files_per_type[fn].setdefault(fn, []).append(i) warning_msg("Result file", i, "is not sane") continue fn = os.path.basename(i) files_per_type.setdefault(fn, []) files_per_type[fn].append(i) for i in non_sane_files_per_type: warning_msg("Non sane files for type", i) for j in non_sane_files_per_type[i]: msg(j) merged_files = [] for i in files_per_type: merged_file = os.path.join(out_path, i) if avoid_overwriting_merge and os.path.isfile(merged_file): warning_msg( "file", merged_file, "is already found, remove it before merging, you can use the --mergeonly flag to avoid running the analysis again" ) continue merged_files.append(merged_file) merge_file_list = os.path.join( os.path.dirname(os.path.abspath(merged_file)), "tomerge_" + "".join(i.split(".")[:-1]) + ".txt") verbose_msg("List of files to be merged:", merge_file_list) with open(merge_file_list, "w") as fmerge: for j in files_per_type[i]: fmerge.write(j + "\n") if len(files_per_type[i]) > len(run_list): fatal_msg("Trying to merge too many files of type", i, "for tag", tag, ":", len(files_per_type[i]), "vs", len(run_list), "runs") msg("Merging", len(files_per_type[i]), "files to", merged_file) run_cmd( f"hadd -j {njobs} -f {merged_file} `cat {merge_file_list}`", log_file=merge_file_list.replace(".txt", ".log"), time_it=True, comment=f"Merging to {merged_file}") if len(merged_files) == 0: warning_msg("Merged no files") else: msg("Merging completed, merged:", *merged_files, color=bcolors.BOKGREEN)
def set_o2_analysis( o2_analyses=[ "o2-analysis-hf-task-d0 --pipeline qa-tracking-kine:4,qa-tracking-resolution:4" ], o2_arguments="--shm-segment-size 16000000000 --readers 4 --configuration json://$PWD/dpl-config_std.json", input_file="listfiles.txt", tag="QA", output_files=[ "AnalysisResults.root", "AnalysisResults_trees.root", "QAResults.root" ], dpl_configuration_file=None, resume_previous_analysis=False, write_runner_script=True): """ Function to prepare everything you need for your O2 analysis. From the output folder to the script containing the O2 workflow. The output can be found in the same directory as the input data. """ # Creating output directory output_path = os.path.dirname(os.path.abspath(input_file)) # Creating the script to run O2 tmp_script_name = os.path.join(output_path, f"tmpscript_{tag.lower()}.sh") if not write_runner_script: # Returning in case write_runner_script is False return tmp_script_name # Defining log file log_file = f"log_{tag.lower()}.log" verbose_msg("Configuring the tasks with O2", color=bcolors.BOKBLUE) # Checking input file verbose_msg("Using", input_file, "as input file") if not input_file.endswith(".root"): input_file = f"@{os.path.join(os.getcwd(), input_file)}" # Writing instructions to runner script with open(tmp_script_name, "w") as tmp_script: verbose_msg("Writing o2 instructions to", f"'{tmp_script_name}'") def write_instructions(instructions, n=1, check_status=False): verbose_msg("--\t", instructions.strip()) tmp_script.write(f"{instructions}" + "".join(["\n"] * n)) if check_status: tmp_script.write("\nReturnValue=$?\n") tmp_script.write("if [[ $ReturnValue != 0 ]]; then\n") tmp_script.write(" echo \"Encountered error with command: '") tmp_script.write(instructions.replace("\"", "\\\"").strip()) tmp_script.write("'\"\n") tmp_script.write(" exit $ReturnValue\n") tmp_script.write("fi\n\n") write_instructions(f"#!/bin/bash", n=2) # Move to run dir write_instructions(f"cd {output_path} || exit 1", n=2) # Print run dir write_instructions(f"pwd", n=2) write_instructions(f"echo Running \"$0\"", n=2) def get_tagged_output_file(output_file_name): return output_file_name.replace(".root", f"_{tag}.root") for i in output_files: # Removing old output write_instructions(f"[ -f {i} ] && rm -v {i} 2>&1") i = get_tagged_output_file(i) if resume_previous_analysis: write_instructions( f"[ -f {i} ] && echo 'file {i} already present, continuing' && exit 0" ) else: write_instructions(f"[ -f {i} ] && rm -v {i} 2>&1") write_instructions("") o2_workflow = "" for i in o2_analyses: line = f"{i} {o2_arguments}" if i == o2_analyses[0]: line += f" --aod-file {input_file}" if dpl_configuration_file is not None: line += f" --configuration json://{os.path.normpath(dpl_configuration_file)}" if len(o2_analyses) > 1 and i != o2_analyses[-1]: line = f"{line} | \\\n \t" else: line = f"{line}" if line.count("configuration") > 1: fatal_msg("Cannot have more than one configuration") o2_workflow += line write_instructions(f"O2Workflow=\"{o2_workflow}\"", n=2) write_instructions("if [[ -z \"${1}\" ]]; then", n=2) write_instructions(" echo \"Running: \n \t ${O2Workflow}\"" f" > {log_file}") write_instructions(" eval \"${O2Workflow}\"" f" >> {log_file}", check_status=True) write_instructions("else") write_instructions(" eval \"${O2Workflow}\"") write_instructions("fi") # Print run dir write_instructions("pwd") for i in ["ERROR", "FATAL", "crash"]: write_instructions( f"if grep -q \"\[{i}\]\" {log_file}; then echo \": got some {i}s in '{log_file}'\" && exit 1; fi" ) write_instructions("") for i in output_files: # renaming output with tag write_instructions( f"[ -f {i} ] && mv {i} {get_tagged_output_file(i)} 2>&1") write_instructions(f"date", n=2) write_instructions(f"echo Completed \"$0\"", n=2) write_instructions("\nexit 0") return tmp_script_name
def main(input_files, do_merge=True, sanity_file=None, max_bunch_size=200, out_path="./", over_write_lists=False, jobs=1): msg("Merging to", out_path, "with maximum input size", max_bunch_size) out_path = os.path.normpath(out_path) if not os.path.exists(out_path): warning_msg("Output path", out_path, "does not exist") ans = input("Create it? (Y/[N])") if ans == "Y": os.makedirs(out_path) else: msg("Exit") return sane_files = None if sanity_file is not None: msg("Using sanity file", sanity_file) sane_files = [] with open(sanity_file, "r") as f: for i in f: sane_files.append(os.path.abspath(os.path.normpath(i.strip()))) size_of_files = {} for i in input_files: i = os.path.normpath(i.strip()) if sane_files is not None and os.path.abspath(i) not in sane_files: msg("Skipping", i, "because not in sanity file") continue size_of_files[i] = os.path.getsize(i) * 1e-6 bunched_files = [[]] bunched_sizes = [] bunch_size = [] for i in size_of_files: verbose_msg("Checking file", i, "of size", size_of_files[i], "MB") if sum(bunch_size) > max_bunch_size: verbose_msg("Bunch size", sum(bunch_size), "reached limit with", len(bunch_size), "files", max_bunch_size, "MB", "preparing next bunch!") bunched_files.append([]) bunched_sizes.append(sum(bunch_size)) bunch_size = [] bunch_size.append(size_of_files[i]) bunched_files[-1].append(i) bunched_sizes.append(sum(bunch_size)) verbose_msg("Got", len(bunched_files), "bunches") for i, j in enumerate(bunched_files): verbose_msg(f"{i})", bunched_sizes[i], "MB, with", len(j), j) msg("Preparing", len(bunched_files), "bunched lists") bunched_aod_names.clear() for i, j in enumerate(bunched_files): fn = f"aod_merge_list_bunch{i}.txt" verbose_msg("Writing bunch", i, "to", fn) if not over_write_lists: if os.path.isfile(fn): fatal_msg(fn, "already present, remove it first") with open(fn, "w") as f: for k in j: f.write(k + "\n") if do_merge: out_aod = os.path.join(out_path, f"AO2D_Merge_{i}.root") if os.path.isfile(out_aod): fatal_msg(out_aod, "already present") bunched_aod_names[fn] = { "out_aod": out_aod, "file_index": i, "total_files": len(bunched_files), "input_size": bunched_sizes[i] } run_in_parallel(jobs, run_merge, list(bunched_aod_names.keys()), job_message="Running AOD merging", linearize_single_core=True)
def main(mode, input_file, out_path, out_tag="", batch_size=4, n_max_files=100, dpl_configuration_file=None, njobs=1, merge_output=True, merge_only=False, shm_mem_size=16000000000, rate_lim=1000000000, readers=1, avoid_overwriting_merge=False, clean_localhost_after_running=True, extra_arguments=""): if len(input_file) == 1: input_file = input_file[0] else: input_file = input_file[0:n_max_files] if not merge_only: msg("Running", f"'{mode}'", "analysis on", f"'{input_file}'", color=bcolors.BOKBLUE) msg("Maximum", n_max_files, "files with batch size", batch_size, "and", njobs, "jobs" if njobs > 1 else "job", color=bcolors.BOKBLUE) else: msg("Merging output of", f"'{mode}'", "analysis", color=bcolors.BOKBLUE) o2_arguments = f"-b --shm-segment-size {shm_mem_size} --aod-memory-rate-limit {rate_lim} --readers {readers}" o2_arguments += extra_arguments if mode not in analyses: raise ValueError("Did not find analyses matching mode", mode, ", please choose in", ", ".join(analyses.keys())) an = analyses[mode] tag = mode + out_tag # Build input file list input_file_list = [] def build_list_of_files(file_list): if len(file_list) != len( set(file_list)): # Check that runlist does not have duplicates fatal_msg("Runlist has duplicated entries, fix runlist!") not_readable = [] for i in file_list: # Check that input files can be open f = TFile(i.strip(), "READ") if not f.IsOpen(): verbose_msg("Cannot open AOD file:", i, color=bcolors.WARNING) not_readable.append(i) if len(not_readable) > 0: warning_msg(len(not_readable), "files cannot be read and will be skipped") for i in not_readable: file_list.remove(i) files_per_batch = [] iter_file_list = iter(file_list) for i in range(0, len(file_list)): sub_set = list(islice(iter_file_list, batch_size)) if len(sub_set) <= 0: continue files_per_batch.append(sub_set) run_list = [] if len(files_per_batch) > 0: for i, lines in enumerate(files_per_batch): p = os.path.join(out_path, f"{i}") if not os.path.isdir(p): os.makedirs(p) run_list.append(os.path.join(p, f"ListForRun5Analysis.{i}.txt")) with open(run_list[-1], "w") as f: for j in lines: f.write(j.strip() + "\n") msg("Number of runs:", len(run_list)) return run_list if type(input_file) is list: input_file = [os.path.join(os.getcwd(), i) for i in input_file] input_file_list = build_list_of_files(input_file) elif not input_file.endswith(".root"): with open(input_file, "r") as f: lines = f.readlines() msg("Building input list from", len(lines), "inputs, limiting to", n_max_files) if len(lines) > n_max_files: lines = lines[0:n_max_files] input_file_list = build_list_of_files(lines) else: input_file_list = [os.path.join(os.getcwd(), input_file)] if dpl_configuration_file is not None: dpl_configuration_file = os.path.join(os.getcwd(), dpl_configuration_file) run_list = [] for i, j in enumerate(input_file_list): run_list.append( set_o2_analysis(an, o2_arguments=o2_arguments, input_file=j, tag=tag, dpl_configuration_file=dpl_configuration_file)) if not merge_only: run_in_parallel(processes=njobs, job_runner=run_o2_analysis, job_arguments=run_list, job_message="Running analysis") if clean_localhost_after_running: run_cmd( "find /tmp/ -maxdepth 1 -name localhost* -user $(whoami) | xargs rm -v" ) if (merge_output or merge_only) and len(run_list) > 1: files_to_merge = [] for i in input_file_list: p = os.path.dirname(os.path.abspath(i)) for j in os.listdir(p): if j.endswith(f"_{tag}.root"): files_to_merge.append(os.path.join(p, j)) if len(files_to_merge) == 0: warning_msg("Did not find any file to merge for tag", tag) return if len(files_to_merge) > len(run_list): fatal_msg("Trying to merge too many files!", tag) msg("Merging", len(files_to_merge), "results", color=bcolors.BOKBLUE) files_per_type = {} # List of files to be merged per type for i in files_to_merge: fn = os.path.basename(i) files_per_type.setdefault(fn, []) files_per_type[fn].append(i) merged_files = [] for i in files_per_type: merged_file = os.path.join(out_path, i) if avoid_overwriting_merge and os.path.isfile(merged_file): warning_msg( "file", merged_file, "is already found, remove it before merging, you can use the --mergeonly flag to avoid running the analysis again" ) continue merged_files.append(merged_file) merge_file_list = os.path.join( os.path.dirname(os.path.abspath(merged_file)), "tomerge_" + "".join(i.split(".")[:-1]) + ".txt") verbose_msg("List of files to be merged:", merge_file_list) with open(merge_file_list, "w") as fmerge: for j in files_per_type[i]: fmerge.write(j + "\n") run_cmd( f"hadd -j {njobs} -f {merged_file} `cat {merge_file_list}`", log_file=merge_file_list.replace(".txt", ".log")) if len(merged_files) == 0: warning_msg("Merged no files") else: msg("Merging completed, merged:", *merged_files, color=bcolors.BOKGREEN)
def main(configuration_file, config_entry, njobs, nruns, nevents, qa, output_path, clean_delphes_files, create_luts, turn_off_vertexing, append_production, use_nuclei, avoid_file_copy, debug_aod, tof_mismatch): arguments = locals() # List of arguments to put into the log parser = configparser.RawConfigParser() parser.read(configuration_file) if config_entry not in parser.keys(): k = list(parser.keys()) k.sort() fatal_msg( f"Did not find configuration entry '{config_entry}' in config file", configuration_file + "\n\t Available entries:\n\t\t" + "\n\t\t".join(k)) run_cmd("./clean.sh > /dev/null 2>&1", check_status=False) # Dictionary of fetched options running_options = {} for i in arguments: running_options["ARG " + i] = arguments[i] def opt(entry, require=True): try: o = parser.get(config_entry, entry) b = ['yes', 'no', 'on', 'off', 'true', 'false'] for i in b: if o.lower() == i: o = parser.getboolean(config_entry, entry) break verbose_msg("Got option", entry, "=", f"'{o}'") running_options[entry] = o return o except: if require: fatal_msg("Missing entry", f"'{entry}'", "in configuration file", f"'{configuration_file}'") return None # Config from the config file # simulation configuration if output_path is None: output_path = "" output_path = os.path.join(os.getcwd(), output_path) msg("Output will be found in", f"'{output_path}'") if not os.path.isdir(output_path): msg("Creating output path") os.makedirs(output_path) if not os.path.isdir(output_path): raise RuntimeError("Cannot find output path", output_path) # detector configuration bField = opt("bField") sigmaT = opt("sigmaT") sigmaT0 = opt("sigmaT0") tof_radius = opt("tof_radius") rich_radius = opt("rich_radius") rich_index = opt("rich_index") forward_rich_index = opt("forward_rich_index") minimum_track_radius = opt("minimum_track_radius") etaMax = opt("etamax") barrel_half_length = opt("barrel_half_length") # copy relevant files in the working directory def do_copy(in_file, out_file=None, in_path=None): """Function to copy files""" in_file = os.path.normpath(in_file) # Normalize path if out_file is None: # If left unconfigured use the same name but put in the current path out_file = os.path.basename(in_file) out_file = os.path.normpath(out_file) # Normalize path if in_path is not None: in_file = os.path.join(in_path, in_file) in_file = os.path.expanduser(os.path.expandvars(in_file)) if avoid_file_copy: if os.path.isfile(out_file) or (in_file == out_file): verbose_msg("Skipping copy of", in_file, "to", out_file, "because of --avoid-config-copy") else: verbose_msg("Copying", in_file, "to", out_file, "because of --avoid-config-copy") run_cmd(f"cp {in_file} {out_file}", comment="Copying files without python") return verbose_msg("Copying", in_file, "to", out_file) shutil.copy2(in_file, out_file) # Fetching the propagation card do_copy(opt("propagate_card"), "propagate.tcl", in_path=opt("card_path")) lut_path = opt("lut_path") lut_tag = opt("lut_tag") lut_tag = f"rmin{int(float(minimum_track_radius))}.{lut_tag}" lut_particles = ["el", "mu", "pi", "ka", "pr"] if use_nuclei: lut_particles += ["de", "tr", "he3"] if create_luts: # Creating LUTs verbose_msg("Creating LUTs") lut_path = os.path.join(lut_path, "create_luts.sh") run_cmd( f"{lut_path} -p {lut_path} -t {lut_tag} -B {float(bField)*0.1} -R {minimum_track_radius} -P \"0 1 2 3 4 5 6\" -j 1 -F 2>&1", f"Creating the lookup tables with tag {lut_tag} from {lut_path} script" ) else: # Fetching LUTs verbose_msg(f"Fetching LUTs with tag {lut_tag} from path {lut_path}") for i in lut_particles: lut_bg = "{}kG".format(bField).replace(".", "") do_copy(f"lutCovm.{i}.{lut_bg}.{lut_tag}.dat", f"lutCovm.{i}.dat", in_path=lut_path) # Checking that we actually have LUTs for i in lut_particles: i = f"lutCovm.{i}.dat" if not os.path.isfile(i): fatal_msg("Did not find LUT file", i) custom_gen = opt("custom_gen", require=False) if custom_gen is None: # Checking that the generators are defined generators = opt("generators", require=False) if generators is None: fatal_msg( "Did not find any generator configuration corresponding to the entry", config_entry, "in your configuration file", configuration_file) generators = generators.split(" ") for i in generators: do_copy(i) msg("Using pythia with configuration", generators) else: def check_duplicate(option_name): if f" {option_name}" in custom_gen: fatal_msg(f"Remove '{option_name}' from", custom_gen, "as it will be automatically set") for i in ["--output", "-o", "--nevents", "-n"]: check_duplicate(i) if "INPUT_FILES" in custom_gen: input_hepmc_files = custom_gen.replace("INPUT_FILES", "").strip().split(" ") input_hepmc_file_list = [] for i in input_hepmc_files: input_hepmc_file_list += glob.glob(os.path.normpath(i)) if len(input_hepmc_file_list) >= nruns: input_hepmc_file_list = input_hepmc_file_list[0:nruns] else: nruns = len(input_hepmc_file_list) if len(input_hepmc_file_list) <= 0: fatal_msg( "Did not find any input file matching to the request:", custom_gen) custom_gen = f"INPUT_FILES " + " ".join(input_hepmc_file_list) msg( "Using", len(input_hepmc_file_list), "input HepMC file" + ("" if len(input_hepmc_file_list) == 1 else "s"), input_hepmc_file_list) else: msg("Using custom generator", custom_gen) # Printing configuration msg(" --- running createO2tables.py", color=bcolors.HEADER) msg(" n. jobs =", njobs) msg(" n. runs =", nruns) msg(" events per run =", nevents) msg(" tot. events =", "{:.0e}".format(nevents * nruns)) msg(" LUT path =", f"'{lut_path}'") msg(" --- with detector configuration", color=bcolors.HEADER) msg(" B field =", bField, "[kG]") msg(" Barrel radius =", minimum_track_radius, "[cm]") msg(" Barrel half length =", barrel_half_length, "[cm]") if create_luts: msg(" Minimum track radius =", minimum_track_radius, "[cm]") msg(" LUT =", lut_tag) msg(" etaMax =", etaMax) msg(" --- with TOF configuration", color=bcolors.HEADER) msg(" sigmaT =", sigmaT, "[ns]") msg(" sigmaT0 =", sigmaT0, "[ns]") msg(" tof_radius =", tof_radius, "[cm]") msg(" --- with RICH configuration", color=bcolors.HEADER) msg(" rich_radius =", rich_radius, "[cm]") msg(" rich_index =", rich_index) msg(" --- with Forward RICH configuration", color=bcolors.HEADER) msg(" forward_rich_index =", forward_rich_index) aod_path = opt("aod_path") do_copy("createO2tables.h", in_path=aod_path) do_copy("createO2tables.C", in_path=aod_path) do_copy("muonAccEffPID.root", in_path=aod_path) if qa: do_copy("diagnostic_tools/dpl-config_std.json") def set_config(config_file, config, value): config = config.strip() value = value.strip() config_string = f"{config} {value}" run_cmd("sed -i -e \"" f"s/{config} .*$/{config_string}" "\" " + config_file) # Checking that the file has the correct configuration with open(config_file) as f: has_it = False config_string = config_string.replace("\\", "").strip("/") for lineno, line in enumerate(f): if line.strip() == config_string: verbose_msg(f"Found config string '{config_string}'", f"at line #{lineno} '{line.strip()}'") has_it = True break if not has_it: fatal_msg("Configuration file", config_file, f"does not have config string '{config_string}'") # set magnetic field set_config("propagate.tcl", "set barrel_Bz", f"{bField}" "e\-1/") set_config("createO2tables.C", "const double Bz = ", f"{bField}" "e\-1\;/") if turn_off_vertexing: set_config("createO2tables.C", "constexpr bool do_vertexing = ", "false\;/") else: # Check that the geometry file for the vertexing is there if not os.path.isfile("o2sim_grp.root") or not os.path.isfile( "o2sim_geometry.root"): run_cmd( "mkdir tmpo2sim && cd tmpo2sim && o2-sim -m PIPE ITS MFT -g boxgen -n 1 -j 1 --configKeyValues 'BoxGun.number=1' && cp o2sim_grp.root .. && cp o2sim_geometry.root .. && cd .. && rm -r tmpo2sim" ) if use_nuclei: set_config("createO2tables.C", "constexpr bool enable_nuclei = ", "true\;/") if debug_aod: set_config("createO2tables.C", "constexpr bool debug_qa = ", "true\;/") if tof_mismatch: if not tof_mismatch in [1, 2]: fatal_msg("tof_mismatch", tof_mismatch, "is not 1 or 2") set_config("createO2tables.C", "constexpr int tof_mismatch = ", f"{tof_mismatch}\;/") if qa: set_config("dpl-config_std.json", "\\\"d_bz\\\":", "\\\"" f"{bField}" "\\\"\,/") # set barrel_radius set_config("propagate.tcl", "set barrel_Radius", f"{minimum_track_radius}" "e\-2/") # set barrel_half_length set_config("propagate.tcl", "set barrel_HalfLength", f"{barrel_half_length}" "e\-2/") # set tof_radius set_config("createO2tables.C", "constexpr double tof_radius =", f"{tof_radius}" "\;/") # set tof_length set_config("createO2tables.C", "const double tof_length =", f"{barrel_half_length}" "\;/") # set rich_radius set_config("createO2tables.C", "constexpr double rich_radius =", f"{rich_radius}" "\;/") # set rich_index set_config("createO2tables.C", "const double rich_index =", f"{rich_index}" "\;/") # set forward_rich_index set_config("createO2tables.C", "const double forward_rich_index =", f"{forward_rich_index}" "\;/") # set acceptance set_config("propagate.tcl", "set barrel_Acceptance", "\{ 0.0 + 1.0 * fabs(eta) < " f"{etaMax}" " \}/") # set time resolution set_config("propagate.tcl", "set barrel_TimeResolution", f"{sigmaT}" "e\-9/") set_config("createO2tables.C", "const double tof_sigmat =", f"{sigmaT}" "\;/") set_config("createO2tables.C", "const double tof_sigmat0 =", f"{sigmaT0}" "\;/") run_list = range(nruns) if append_production: if output_path is None: fatal_msg("Output path is not defined, cannot append") last_preexisting_aod = [ each for each in os.listdir(output_path) if each.endswith('.root') and "AODRun5" in each ] if len(last_preexisting_aod) == 0: fatal_msg("Appending to a non existing production") last_preexisting_aod = sorted([ int(each.replace("AODRun5.", "").replace(".root", "")) for each in last_preexisting_aod ])[-1] + 1 msg(f" Appending to production with {last_preexisting_aod} AODs", color=bcolors.BWARNING) run_list = range(last_preexisting_aod, last_preexisting_aod + nruns) def configure_run(run_number): # Create executable that runs Generation, Delphes and analysis runner_file = f"runner{run_number}.sh" with open(runner_file, "w") as f_run: def write_to_runner(line, log_file=None, check_status=False): """ Writes commands to runner """ log_line = "" if log_file is not None: log_line = f" &> {log_file} 2>&1" line += log_line line += "\n" f_run.write(line) if check_status: f_run.write("\nReturnValue=$?\n") f_run.write("if [[ $ReturnValue != 0 ]]; then\n") f_run.write(" echo \"Encountered error with command: '") line = line.replace(log_line, "") f_run.write(line.replace("\"", "\\\"").strip()) f_run.write("'\"\n") if log_file is not None: f_run.write(" echo \"Check log: '") f_run.write(log_file.strip() + "'\"\n") f_run.write(" exit $ReturnValue\n") f_run.write("fi\n") def copy_and_link(file_name): """ In runner, copies file to output path (if different from current) and links it to current """ if os.path.normpath(output_path) != os.getcwd(): write_to_runner(f"mv {file_name} {output_path} \n") write_to_runner( f"ln -s {os.path.join(output_path, file_name)} . \n") write_to_runner("#! /usr/bin/env bash\n") delphes_file = f"delphes.{run_number}.root" delphes_log_file = delphes_file.replace(".root", ".log") hepmc_file = None mc_seed = random.randint(1, 800000000) if custom_gen: # Using HEPMC hepmc_file = f"hepmcfile.{run_number}.hepmc" if "INPUT_FILES" in custom_gen: input_hepmc_file = custom_gen.replace( "INPUT_FILES", "").strip().split(" ") input_hepmc_file = input_hepmc_file[run_number] write_to_runner(f"ln -s {input_hepmc_file}" f" {hepmc_file} \n") else: gen_log_file = f"gen.{run_number}.log" custom_gen_option = f" --output {hepmc_file} --nevents {nevents} --seed {mc_seed}" write_to_runner(custom_gen + custom_gen_option, log_file=gen_log_file, check_status=True) write_to_runner( f"DelphesHepMC propagate.tcl {delphes_file} {hepmc_file}", log_file=delphes_log_file, check_status=True) else: # Using DelphesPythia # copy generator configuration generator_cfg = f"generator.{run_number}.cfg" generator_orig = generators[0].split("/")[-1] do_copy(generator_orig, generator_cfg) # Adjust configuration file with open(generator_cfg, "a") as f_cfg: # number of events and random seed f_cfg.write(f"\n\n\n#### Additional part ###\n\n\n\n") f_cfg.write(f"Main:numberOfEvents {nevents}\n") f_cfg.write(f"Random:setSeed = on\n") f_cfg.write(f"Random:seed = {mc_seed}\n") # collision time spread [mm/c] f_cfg.write("Beams:allowVertexSpread on \n") f_cfg.write("Beams:sigmaTime 60.\n") for i in generators[1:]: with open(i.split("/")[-1], "r") as f_append: f_cfg.write(f_append.read()) write_to_runner( f"DelphesPythia8 propagate.tcl {generator_cfg} {delphes_file}", log_file=delphes_log_file, check_status=True) aod_file = f"AODRun5.{run_number}.root" aod_log_file = aod_file.replace(".root", ".log") write_to_runner( f"root -l -b -q 'createO2tables.C+(\"{delphes_file}\", \"tmp_{aod_file}\", 0)'", log_file=aod_log_file, check_status=True) # Check that there were no O2 errors write_to_runner( f"if grep -q \"\[ERROR\]\" {aod_log_file}; then echo \": got some errors in '{aod_log_file}'\" && echo \"Found some ERROR in this log\" >> {aod_log_file}; fi" ) write_to_runner( f"if grep -q \"\[FATAL\]\" {aod_log_file}; then echo \": got some fatals in '{aod_log_file}'\" && echo \"Found some FATAL in this log\" >> {aod_log_file} && exit 1; fi" ) # Rename the temporary AODs to standard AODs write_to_runner(f"mv tmp_{aod_file} {aod_file}", check_status=True) if not clean_delphes_files: copy_and_link(delphes_file) if hepmc_file is not None: copy_and_link(hepmc_file) copy_and_link(aod_file) if clean_delphes_files: write_to_runner(f"rm {delphes_file}") write_to_runner(f"rm {generator_cfg}") if hepmc_file is not None: write_to_runner(f"rm {hepmc_file}") write_to_runner("exit 0\n") # Configuring all the runs for i in run_list: configure_run(i) # Compiling the table creator macro once for all run_cmd("root -l -b -q 'createO2tables.C+(\"\")' > /dev/null 2>&1", comment="to compile the table creator only once, before running") if not os.path.isfile("createO2tables_C.so"): run_cmd("root -l -b -q 'createO2tables.C+(\"\")'", comment="to compile with full log") fatal_msg("'createO2tables.C' did not compile!") total_processing_time = time.time() msg(" --- start processing the runs ", color=bcolors.HEADER) run_in_parallel(processes=njobs, job_runner=process_run, job_arguments=run_list, job_message="Running production") # merge runs when all done msg(" --- all runs are processed, so long", color=bcolors.HEADER) total_processing_time = time.time() - total_processing_time msg(f"-- took {total_processing_time} seconds in total --", color=bcolors.BOKGREEN) # Writing the list of produced AODs output_list_file = "listfiles.txt" with open(output_list_file, "w") as listfiles: for i in os.listdir("."): if "AODRun5." in i and i.endswith(".root"): listfiles.write(f"{os.getcwd()}/{i}\n") # Writing summary of production summaryfile = "summary.txt" with open(summaryfile, "w") as f: f.write("\n## Summary of last run ##\n") now = datetime.now() dt_string = now.strftime("%d/%m/%Y %H:%M:%S") f.write(f"Finished at {dt_string}\n") f.write(f"Took {total_processing_time} seconds\n") def write_config(entry, prefix=""): f.write(prefix + entry.strip("ARG ") + f" = {running_options[entry]}\n") f.write("\n## Configuration ##\n") for i in running_options: if "ARG" in i: write_config(i, prefix=" - ") f.write("\n## Options ##\n") for i in running_options: if "ARG" not in i: write_config(i, prefix=" * ") output_size = sum( os.path.getsize(os.path.join(output_path, f)) for f in os.listdir(output_path) if os.path.isfile(os.path.join(output_path, f))) f.write("\n## Size of the ouput ##\n") f.write(f" - {output_size} bytes\n") f.write(f" - {output_size/1e6} MB\n") f.write(f" - {output_size/1e9} GB\n") run_cmd("echo >> " + summaryfile) run_cmd("echo + DelphesO2 Version + >> " + summaryfile) run_cmd("git rev-parse HEAD >> " + summaryfile, check_status=False) if os.path.normpath(output_path) != os.getcwd(): if append_production: s = os.path.join(output_path, summaryfile) run_cmd(f"echo '' >> {s}") run_cmd(f"echo ' **' >> {s}") run_cmd(f"echo 'Appended production' >> {s}") run_cmd(f"echo ' **' >> {s}") run_cmd(f"echo '' >> {s}") run_cmd(f"cat {summaryfile} >> {s}") else: run_cmd(f"mv {summaryfile} {output_path}") run_cmd(f"ln -s {os.path.join(output_path, summaryfile)} ./") if qa: msg(" --- running test analysis", color=bcolors.HEADER) run_cmd( f"./diagnostic_tools/doanalysis.py TrackQA RICH TOF -i {output_list_file} -M 25 -B 25" ) if tof_mismatch == 1: # TOF mismatch in create mode run_cmd( f"hadd -j {njobs} -f tofMM.root tof_mismatch_template_DF_*.root && rm tof_mismatch_template_DF_*.root" )