def copylist(fname="", jobs=InputArgument(1, "Number of parallel jobs to use", ["--njobs", "-j"], int)): """Takes a text file and downloads the files from grid""" if jobs is None: jobs = 1 verbose_msg("Copying files from list", fname, "with", jobs, "jobs") fname = path.normpath(fname) if not path.isfile(fname): warning_msg("Input file not provided! Aborting") return sofar = copied(fname, "So far") f = open(fname, "r") Group = [] for line in f: if "%" in line: msg("Character % encountered! Aborting") break if "#" in line: msg("Character # encountered! Skipping") continue line = "./" + line if jobs == 1: copyfile(line) else: Group.append(line) if jobs > 1: msg("Copying list in parallel with", jobs, "jobs") run_in_parallel(processes=jobs, job_runner=copyfile, job_arguments=Group, job_message="Downloading files", linearize_single_core=True) copied(fname, extra_msg="In recent run", last_time=sofar)
def main(): parser = DownloaderArgumentParser() parser.add_argument( '--name', metavar='PAT[,PAT...]', help= 'download only models whose names match at least one of the specified patterns' ) parser.add_argument( '--list', type=Path, metavar='FILE.LST', help= 'download only models whose names match at least one of the patterns in the specified file' ) parser.add_argument('--all', action='store_true', help='download all available models') parser.add_argument('--print_all', action='store_true', help='print all available models') parser.add_argument( '--precisions', metavar='PREC[,PREC...]', help= 'download only models with the specified precisions (actual for DLDT networks)' ) parser.add_argument('-o', '--output_dir', type=Path, metavar='DIR', default=Path.cwd(), help='path where to save models') parser.add_argument( '--cache_dir', type=Path, metavar='DIR', help='directory to use as a cache for downloaded files') parser.add_argument('--num_attempts', type=positive_int_arg, metavar='N', default=1, help='attempt each download up to N times') parser.add_argument('--progress_format', choices=('text', 'json'), default='text', help='which format to use for progress reporting') # unlike Model Converter, -jauto is not supported here, because CPU count has no # relation to the optimal number of concurrent downloads parser.add_argument('-j', '--jobs', type=positive_int_arg, metavar='N', default=1, help='how many downloads to perform concurrently') args = parser.parse_args() def make_reporter(context): return common.Reporter( context, enable_human_output=args.progress_format == 'text', enable_json_output=args.progress_format == 'json') reporter = make_reporter(common.DirectOutputContext()) cache = NullCache() if args.cache_dir is None else DirCache(args.cache_dir) models = common.load_models_from_args(parser, args) failed_models = set() if args.precisions is None: requested_precisions = common.KNOWN_PRECISIONS else: requested_precisions = set(args.precisions.split(',')) unknown_precisions = requested_precisions - common.KNOWN_PRECISIONS if unknown_precisions: sys.exit('Unknown precisions specified: {}.'.format(', '.join( sorted(unknown_precisions)))) reporter.print_group_heading('Downloading models') with contextlib.ExitStack() as exit_stack: session_factory = ThreadSessionFactory(exit_stack) if args.jobs == 1: results = [ download_model(reporter, args, cache, session_factory, requested_precisions, model) for model in models ] else: results = common.run_in_parallel( args.jobs, lambda context, model: download_model( make_reporter(context), args, cache, session_factory, requested_precisions, model), models) failed_models = { model.name for model, successful in zip(models, results) if not successful } reporter.print_group_heading('Post-processing') for model in models: if model.name in failed_models or not model.postprocessing: continue reporter.emit_event('model_postprocessing_begin', model=model.name) output = args.output_dir / model.subdirectory for postproc in model.postprocessing: postproc.apply(reporter, output) reporter.emit_event('model_postprocessing_end', model=model.name) if failed_models: reporter.print('FAILED:') for failed_model_name in failed_models: reporter.print(failed_model_name) sys.exit(1)
def main(mode, input_file, out_path, out_tag="", batch_size=4, n_max_files=100, dpl_configuration_file=None, njobs=1, merge_output=True, merge_only=False, shm_mem_size=16000000000, rate_lim=1000000000, readers=1, avoid_overwriting_merge=False, clean_localhost_after_running=True, extra_arguments="", resume_previous_analysis=False, check_input_file_integrity=True, analysis_timeout=None, linearize_single_core=True): if do_bash_script: njobs = 1 linearize_single_core = True if len(input_file) == 1: input_file = input_file[0] else: input_file = input_file[0:n_max_files] if not merge_only: msg("Running", f"'{mode}'", "analysis on", f"'{input_file}'", color=bcolors.BOKBLUE) msg("Maximum", n_max_files, "files with batch size", batch_size, "and", njobs, "jobs" if njobs > 1 else "job", color=bcolors.BOKBLUE) else: msg("Merging output of", f"'{mode}'", "analysis", color=bcolors.BOKBLUE) if analysis_timeout is not None: msg("Using analysis timeout of", analysis_timeout, "seconds", color=bcolors.BOKBLUE) analysis_timeout = f"--time-limit {analysis_timeout}" else: analysis_timeout = "" o2_arguments = f"-b --shm-segment-size {shm_mem_size} --aod-memory-rate-limit {rate_lim} --readers {readers} {analysis_timeout}" o2_arguments += extra_arguments if mode not in analyses: raise ValueError("Did not find analyses matching mode", mode, ", please choose in", ", ".join(analyses.keys())) an = analyses[mode] tag = mode + out_tag # Build input file list input_file_list = [] def is_root_file_sane(file_name_to_check): file_name_to_check = file_name_to_check.strip() if not os.path.isfile(file_name_to_check): warning_msg("File", file_name_to_check, "does not exist") return "Does not exist" file_to_check = TFile(file_name_to_check, "READ") if not file_to_check.IsOpen(): warning_msg("Cannot open AOD file:", file_name_to_check) return "Cannot be open" elif file_to_check.TestBit(TFile.kRecovered): verbose_msg(file_name_to_check, "was a recovered file") return "Was recovered" else: verbose_msg(file_name_to_check, "is OK") return "Is Ok" def build_list_of_files(file_list): verbose_msg("Building list of files from", file_list) # Check that runlist does not have duplicates unique_file_list = set(file_list) if len(file_list) != len(unique_file_list): # for i in file_list fatal_msg("Runlist has duplicated entries, fix runlist!", len(unique_file_list), "unique files, while got", len(file_list), "files") file_status = { "Does not exist": [], "Cannot be open": [], "Was recovered": [], "Is Ok": [] } if check_input_file_integrity: # Check that input files can be open for i in file_list: verbose_msg("Checking that TFile", i.strip(), "can be processed") file_status[is_root_file_sane(i)] = i recovered_files = file_status["Was recovered"] not_readable = [] for i in file_status: if i == "Is Ok": continue not_readable += file_status[i] if len(recovered_files) > 0: msg( "Recovered", len(recovered_files), "files:\n", ) if len(not_readable) > 0: warning_msg(len(not_readable), "over", len(file_list), "files cannot be read and will be skipped") for i in not_readable: if i not in file_list: warning_msg("did not find file to remove", f"'{i}'") file_list.remove(i) files_per_batch = [] iter_file_list = iter(file_list) for i in range(0, len(file_list)): sub_set = list(islice(iter_file_list, batch_size)) if len(sub_set) <= 0: continue files_per_batch.append(sub_set) run_list = [] if len(files_per_batch) > 0: for i, lines in enumerate(files_per_batch): p = os.path.join(out_path, f"{i}") if not os.path.isdir(p): os.makedirs(p) run_list.append(os.path.join(p, f"ListForRun5Analysis.{i}.txt")) with open(run_list[-1], "w") as f: for j in lines: f.write(j.strip() + "\n") msg("Number of runs:", len(run_list)) return run_list if type(input_file) is list: input_file = [os.path.join(os.getcwd(), i) for i in input_file] input_file_list = build_list_of_files(input_file) elif not input_file.endswith(".root"): with open(input_file, "r") as f: lines = f.readlines() msg("Building input list from", len(lines), "inputs, limiting to", n_max_files) if len(lines) > n_max_files: lines = lines[0:n_max_files] lines = [ os.path.join(os.path.dirname(os.path.abspath(input_file)), i) for i in lines ] input_file_list = build_list_of_files(lines) else: input_file_list = [os.path.join(os.getcwd(), input_file)] if dpl_configuration_file is not None: dpl_configuration_file = os.path.join(os.getcwd(), dpl_configuration_file) run_list = [] for i, j in enumerate(input_file_list): run_list.append( set_o2_analysis(an, o2_arguments=o2_arguments, input_file=j, tag=tag, dpl_configuration_file=dpl_configuration_file, resume_previous_analysis=resume_previous_analysis, write_runner_script=not merge_only)) if not merge_only: if do_bash_script: with open("parallelbash.sh", "w") as f: f.write(f"#!/bin/bash\n\n") f.write(f"echo \"Start running\"\n\n") f.write(f"date\n\n") f.write("""function trap_ctrlc (){ # perform cleanup here echo "Ctrl-C caught...performing clean up" exit 2 }\n\n""") f.write("""trap "trap_ctrlc" 2\n""") run_in_parallel( processes=njobs, job_runner=run_o2_analysis, job_arguments=run_list, job_message=f"Running analysis, it's {datetime.datetime.now()}", linearize_single_core=linearize_single_core) if do_bash_script: with open("parallelbash.sh", "a") as f: f.write(f"wait\n\n") f.write(f"date\n\n") msg("Now run bash script `bash parallelbash.sh`") return if clean_localhost_after_running: run_cmd( "find /tmp/ -maxdepth 1 -name localhost* -user $(whoami) | xargs rm -v 2>&1", check_status=False) if (merge_output or merge_only) and len(run_list) > 1: files_to_merge = [] for i in input_file_list: p = os.path.dirname(os.path.abspath(i)) for j in os.listdir(p): if j.endswith(f"_{tag}.root"): files_to_merge.append(os.path.join(p, j)) if len(files_to_merge) == 0: warning_msg("Did not find any file to merge for tag", tag) return files_per_type = {} # List of files to be merged per type # List of files to be merged per type that are not declared sane non_sane_files_per_type = {} for i in files_to_merge: if is_root_file_sane(i) != "Is Ok": non_sane_files_per_type[fn].setdefault(fn, []).append(i) warning_msg("Result file", i, "is not sane") continue fn = os.path.basename(i) files_per_type.setdefault(fn, []) files_per_type[fn].append(i) for i in non_sane_files_per_type: warning_msg("Non sane files for type", i) for j in non_sane_files_per_type[i]: msg(j) merged_files = [] for i in files_per_type: merged_file = os.path.join(out_path, i) if avoid_overwriting_merge and os.path.isfile(merged_file): warning_msg( "file", merged_file, "is already found, remove it before merging, you can use the --mergeonly flag to avoid running the analysis again" ) continue merged_files.append(merged_file) merge_file_list = os.path.join( os.path.dirname(os.path.abspath(merged_file)), "tomerge_" + "".join(i.split(".")[:-1]) + ".txt") verbose_msg("List of files to be merged:", merge_file_list) with open(merge_file_list, "w") as fmerge: for j in files_per_type[i]: fmerge.write(j + "\n") if len(files_per_type[i]) > len(run_list): fatal_msg("Trying to merge too many files of type", i, "for tag", tag, ":", len(files_per_type[i]), "vs", len(run_list), "runs") msg("Merging", len(files_per_type[i]), "files to", merged_file) run_cmd( f"hadd -j {njobs} -f {merged_file} `cat {merge_file_list}`", log_file=merge_file_list.replace(".txt", ".log"), time_it=True, comment=f"Merging to {merged_file}") if len(merged_files) == 0: warning_msg("Merged no files") else: msg("Merging completed, merged:", *merged_files, color=bcolors.BOKGREEN)
def main(): parser = argparse.ArgumentParser() parser.add_argument( '-d', '--download_dir', type=Path, metavar='DIR', default=Path.cwd(), help='root of the directory tree with downloaded model files') parser.add_argument( '-o', '--output_dir', type=Path, metavar='DIR', help='root of the directory tree to place converted files into') parser.add_argument( '--name', metavar='PAT[,PAT...]', help= 'convert only models whose names match at least one of the specified patterns' ) parser.add_argument( '--list', type=Path, metavar='FILE.LST', help= 'convert only models whose names match at least one of the patterns in the specified file' ) parser.add_argument('--all', action='store_true', help='convert all available models') parser.add_argument('--print_all', action='store_true', help='print all available models') parser.add_argument( '--precisions', metavar='PREC[,PREC...]', help= 'run only conversions that produce models with the specified precisions' ) parser.add_argument('-p', '--python', type=Path, metavar='PYTHON', default=sys.executable, help='Python executable to run Model Optimizer with') parser.add_argument('--mo', type=Path, metavar='MO.PY', help='Model Optimizer entry point script') parser.add_argument('--add_mo_arg', dest='extra_mo_args', metavar='ARG', action='append', help='Extra argument to pass to Model Optimizer') parser.add_argument( '--dry_run', action='store_true', help='Print the conversion commands without running them') parser.add_argument('-j', '--jobs', type=num_jobs_arg, default=1, help='number of conversions to run concurrently') # aliases for backwards compatibility parser.add_argument('--add-mo-arg', dest='extra_mo_args', action='append', help=argparse.SUPPRESS) parser.add_argument('--dry-run', action='store_true', help=argparse.SUPPRESS) args = parser.parse_args() mo_path = args.mo if mo_path is None: try: mo_path = Path(os.environ['INTEL_OPENVINO_DIR'] ) / 'deployment_tools/model_optimizer/mo.py' except KeyError: sys.exit( 'Unable to locate Model Optimizer. ' + 'Use --mo or run setupvars.sh/setupvars.bat from the OpenVINO toolkit.' ) extra_mo_args = args.extra_mo_args or [] if args.precisions is None: requested_precisions = common.KNOWN_PRECISIONS else: requested_precisions = set(args.precisions.split(',')) unknown_precisions = requested_precisions - common.KNOWN_PRECISIONS if unknown_precisions: sys.exit('Unknown precisions specified: {}.'.format(', '.join( sorted(unknown_precisions)))) models = common.load_models_from_args(parser, args) output_dir = args.download_dir if args.output_dir is None else args.output_dir def convert(reporter, model): if model.mo_args is None: reporter.print_section_heading( 'Skipping {} (no conversions defined)', model.name) reporter.print() return True model_precisions = requested_precisions & model.precisions if not model_precisions: reporter.print_section_heading( 'Skipping {} (all conversions skipped)', model.name) reporter.print() return True model_format = model.framework if model.conversion_to_onnx_args: if not convert_to_onnx(reporter, model, output_dir, args): return False model_format = 'onnx' expanded_mo_args = [ string.Template(arg).substitute( dl_dir=args.download_dir / model.subdirectory, mo_dir=mo_path.parent, conv_dir=output_dir / model.subdirectory, config_dir=common.MODEL_ROOT / model.subdirectory) for arg in model.mo_args ] for model_precision in sorted(model_precisions): mo_cmd = [ str(args.python), '--', str(mo_path), '--framework={}'.format(model_format), '--data_type={}'.format(model_precision), '--output_dir={}'.format(output_dir / model.subdirectory / model_precision), '--model_name={}'.format(model.name), *expanded_mo_args, *extra_mo_args ] reporter.print_section_heading( '{}Converting {} to IR ({})', '(DRY RUN) ' if args.dry_run else '', model.name, model_precision) reporter.print('Conversion command: {}', common.command_string(mo_cmd)) if not args.dry_run: reporter.print(flush=True) if not reporter.job_context.subprocess(mo_cmd): return False reporter.print() return True reporter = common.Reporter(common.DirectOutputContext()) if args.jobs == 1 or args.dry_run: results = [convert(reporter, model) for model in models] else: results = common.run_in_parallel( args.jobs, lambda context, model: convert(common.Reporter(context), model), models) failed_models = [ model.name for model, successful in zip(models, results) if not successful ] if failed_models: reporter.print('FAILED:') for failed_model_name in failed_models: reporter.print(failed_model_name) sys.exit(1)
def main(input_files, do_merge=True, sanity_file=None, max_bunch_size=200, out_path="./", over_write_lists=False, jobs=1): msg("Merging to", out_path, "with maximum input size", max_bunch_size) out_path = os.path.normpath(out_path) if not os.path.exists(out_path): warning_msg("Output path", out_path, "does not exist") ans = input("Create it? (Y/[N])") if ans == "Y": os.makedirs(out_path) else: msg("Exit") return sane_files = None if sanity_file is not None: msg("Using sanity file", sanity_file) sane_files = [] with open(sanity_file, "r") as f: for i in f: sane_files.append(os.path.abspath(os.path.normpath(i.strip()))) size_of_files = {} for i in input_files: i = os.path.normpath(i.strip()) if sane_files is not None and os.path.abspath(i) not in sane_files: msg("Skipping", i, "because not in sanity file") continue size_of_files[i] = os.path.getsize(i) * 1e-6 bunched_files = [[]] bunched_sizes = [] bunch_size = [] for i in size_of_files: verbose_msg("Checking file", i, "of size", size_of_files[i], "MB") if sum(bunch_size) > max_bunch_size: verbose_msg("Bunch size", sum(bunch_size), "reached limit with", len(bunch_size), "files", max_bunch_size, "MB", "preparing next bunch!") bunched_files.append([]) bunched_sizes.append(sum(bunch_size)) bunch_size = [] bunch_size.append(size_of_files[i]) bunched_files[-1].append(i) bunched_sizes.append(sum(bunch_size)) verbose_msg("Got", len(bunched_files), "bunches") for i, j in enumerate(bunched_files): verbose_msg(f"{i})", bunched_sizes[i], "MB, with", len(j), j) msg("Preparing", len(bunched_files), "bunched lists") bunched_aod_names.clear() for i, j in enumerate(bunched_files): fn = f"aod_merge_list_bunch{i}.txt" verbose_msg("Writing bunch", i, "to", fn) if not over_write_lists: if os.path.isfile(fn): fatal_msg(fn, "already present, remove it first") with open(fn, "w") as f: for k in j: f.write(k + "\n") if do_merge: out_aod = os.path.join(out_path, f"AO2D_Merge_{i}.root") if os.path.isfile(out_aod): fatal_msg(out_aod, "already present") bunched_aod_names[fn] = { "out_aod": out_aod, "file_index": i, "total_files": len(bunched_files), "input_size": bunched_sizes[i] } run_in_parallel(jobs, run_merge, list(bunched_aod_names.keys()), job_message="Running AOD merging", linearize_single_core=True)
def main(mode, input_file, out_path, out_tag="", batch_size=4, n_max_files=100, dpl_configuration_file=None, njobs=1, merge_output=True, merge_only=False, shm_mem_size=16000000000, rate_lim=1000000000, readers=1, avoid_overwriting_merge=False, clean_localhost_after_running=True, extra_arguments=""): if len(input_file) == 1: input_file = input_file[0] else: input_file = input_file[0:n_max_files] if not merge_only: msg("Running", f"'{mode}'", "analysis on", f"'{input_file}'", color=bcolors.BOKBLUE) msg("Maximum", n_max_files, "files with batch size", batch_size, "and", njobs, "jobs" if njobs > 1 else "job", color=bcolors.BOKBLUE) else: msg("Merging output of", f"'{mode}'", "analysis", color=bcolors.BOKBLUE) o2_arguments = f"-b --shm-segment-size {shm_mem_size} --aod-memory-rate-limit {rate_lim} --readers {readers}" o2_arguments += extra_arguments if mode not in analyses: raise ValueError("Did not find analyses matching mode", mode, ", please choose in", ", ".join(analyses.keys())) an = analyses[mode] tag = mode + out_tag # Build input file list input_file_list = [] def build_list_of_files(file_list): if len(file_list) != len( set(file_list)): # Check that runlist does not have duplicates fatal_msg("Runlist has duplicated entries, fix runlist!") not_readable = [] for i in file_list: # Check that input files can be open f = TFile(i.strip(), "READ") if not f.IsOpen(): verbose_msg("Cannot open AOD file:", i, color=bcolors.WARNING) not_readable.append(i) if len(not_readable) > 0: warning_msg(len(not_readable), "files cannot be read and will be skipped") for i in not_readable: file_list.remove(i) files_per_batch = [] iter_file_list = iter(file_list) for i in range(0, len(file_list)): sub_set = list(islice(iter_file_list, batch_size)) if len(sub_set) <= 0: continue files_per_batch.append(sub_set) run_list = [] if len(files_per_batch) > 0: for i, lines in enumerate(files_per_batch): p = os.path.join(out_path, f"{i}") if not os.path.isdir(p): os.makedirs(p) run_list.append(os.path.join(p, f"ListForRun5Analysis.{i}.txt")) with open(run_list[-1], "w") as f: for j in lines: f.write(j.strip() + "\n") msg("Number of runs:", len(run_list)) return run_list if type(input_file) is list: input_file = [os.path.join(os.getcwd(), i) for i in input_file] input_file_list = build_list_of_files(input_file) elif not input_file.endswith(".root"): with open(input_file, "r") as f: lines = f.readlines() msg("Building input list from", len(lines), "inputs, limiting to", n_max_files) if len(lines) > n_max_files: lines = lines[0:n_max_files] input_file_list = build_list_of_files(lines) else: input_file_list = [os.path.join(os.getcwd(), input_file)] if dpl_configuration_file is not None: dpl_configuration_file = os.path.join(os.getcwd(), dpl_configuration_file) run_list = [] for i, j in enumerate(input_file_list): run_list.append( set_o2_analysis(an, o2_arguments=o2_arguments, input_file=j, tag=tag, dpl_configuration_file=dpl_configuration_file)) if not merge_only: run_in_parallel(processes=njobs, job_runner=run_o2_analysis, job_arguments=run_list, job_message="Running analysis") if clean_localhost_after_running: run_cmd( "find /tmp/ -maxdepth 1 -name localhost* -user $(whoami) | xargs rm -v" ) if (merge_output or merge_only) and len(run_list) > 1: files_to_merge = [] for i in input_file_list: p = os.path.dirname(os.path.abspath(i)) for j in os.listdir(p): if j.endswith(f"_{tag}.root"): files_to_merge.append(os.path.join(p, j)) if len(files_to_merge) == 0: warning_msg("Did not find any file to merge for tag", tag) return if len(files_to_merge) > len(run_list): fatal_msg("Trying to merge too many files!", tag) msg("Merging", len(files_to_merge), "results", color=bcolors.BOKBLUE) files_per_type = {} # List of files to be merged per type for i in files_to_merge: fn = os.path.basename(i) files_per_type.setdefault(fn, []) files_per_type[fn].append(i) merged_files = [] for i in files_per_type: merged_file = os.path.join(out_path, i) if avoid_overwriting_merge and os.path.isfile(merged_file): warning_msg( "file", merged_file, "is already found, remove it before merging, you can use the --mergeonly flag to avoid running the analysis again" ) continue merged_files.append(merged_file) merge_file_list = os.path.join( os.path.dirname(os.path.abspath(merged_file)), "tomerge_" + "".join(i.split(".")[:-1]) + ".txt") verbose_msg("List of files to be merged:", merge_file_list) with open(merge_file_list, "w") as fmerge: for j in files_per_type[i]: fmerge.write(j + "\n") run_cmd( f"hadd -j {njobs} -f {merged_file} `cat {merge_file_list}`", log_file=merge_file_list.replace(".txt", ".log")) if len(merged_files) == 0: warning_msg("Merged no files") else: msg("Merging completed, merged:", *merged_files, color=bcolors.BOKGREEN)
def main(configuration_file, config_entry, njobs, nruns, nevents, qa, output_path, clean_delphes_files, create_luts, turn_off_vertexing, append_production, use_nuclei, avoid_file_copy, debug_aod, tof_mismatch): arguments = locals() # List of arguments to put into the log parser = configparser.RawConfigParser() parser.read(configuration_file) if config_entry not in parser.keys(): k = list(parser.keys()) k.sort() fatal_msg( f"Did not find configuration entry '{config_entry}' in config file", configuration_file + "\n\t Available entries:\n\t\t" + "\n\t\t".join(k)) run_cmd("./clean.sh > /dev/null 2>&1", check_status=False) # Dictionary of fetched options running_options = {} for i in arguments: running_options["ARG " + i] = arguments[i] def opt(entry, require=True): try: o = parser.get(config_entry, entry) b = ['yes', 'no', 'on', 'off', 'true', 'false'] for i in b: if o.lower() == i: o = parser.getboolean(config_entry, entry) break verbose_msg("Got option", entry, "=", f"'{o}'") running_options[entry] = o return o except: if require: fatal_msg("Missing entry", f"'{entry}'", "in configuration file", f"'{configuration_file}'") return None # Config from the config file # simulation configuration if output_path is None: output_path = "" output_path = os.path.join(os.getcwd(), output_path) msg("Output will be found in", f"'{output_path}'") if not os.path.isdir(output_path): msg("Creating output path") os.makedirs(output_path) if not os.path.isdir(output_path): raise RuntimeError("Cannot find output path", output_path) # detector configuration bField = opt("bField") sigmaT = opt("sigmaT") sigmaT0 = opt("sigmaT0") tof_radius = opt("tof_radius") rich_radius = opt("rich_radius") rich_index = opt("rich_index") forward_rich_index = opt("forward_rich_index") minimum_track_radius = opt("minimum_track_radius") etaMax = opt("etamax") barrel_half_length = opt("barrel_half_length") # copy relevant files in the working directory def do_copy(in_file, out_file=None, in_path=None): """Function to copy files""" in_file = os.path.normpath(in_file) # Normalize path if out_file is None: # If left unconfigured use the same name but put in the current path out_file = os.path.basename(in_file) out_file = os.path.normpath(out_file) # Normalize path if in_path is not None: in_file = os.path.join(in_path, in_file) in_file = os.path.expanduser(os.path.expandvars(in_file)) if avoid_file_copy: if os.path.isfile(out_file) or (in_file == out_file): verbose_msg("Skipping copy of", in_file, "to", out_file, "because of --avoid-config-copy") else: verbose_msg("Copying", in_file, "to", out_file, "because of --avoid-config-copy") run_cmd(f"cp {in_file} {out_file}", comment="Copying files without python") return verbose_msg("Copying", in_file, "to", out_file) shutil.copy2(in_file, out_file) # Fetching the propagation card do_copy(opt("propagate_card"), "propagate.tcl", in_path=opt("card_path")) lut_path = opt("lut_path") lut_tag = opt("lut_tag") lut_tag = f"rmin{int(float(minimum_track_radius))}.{lut_tag}" lut_particles = ["el", "mu", "pi", "ka", "pr"] if use_nuclei: lut_particles += ["de", "tr", "he3"] if create_luts: # Creating LUTs verbose_msg("Creating LUTs") lut_path = os.path.join(lut_path, "create_luts.sh") run_cmd( f"{lut_path} -p {lut_path} -t {lut_tag} -B {float(bField)*0.1} -R {minimum_track_radius} -P \"0 1 2 3 4 5 6\" -j 1 -F 2>&1", f"Creating the lookup tables with tag {lut_tag} from {lut_path} script" ) else: # Fetching LUTs verbose_msg(f"Fetching LUTs with tag {lut_tag} from path {lut_path}") for i in lut_particles: lut_bg = "{}kG".format(bField).replace(".", "") do_copy(f"lutCovm.{i}.{lut_bg}.{lut_tag}.dat", f"lutCovm.{i}.dat", in_path=lut_path) # Checking that we actually have LUTs for i in lut_particles: i = f"lutCovm.{i}.dat" if not os.path.isfile(i): fatal_msg("Did not find LUT file", i) custom_gen = opt("custom_gen", require=False) if custom_gen is None: # Checking that the generators are defined generators = opt("generators", require=False) if generators is None: fatal_msg( "Did not find any generator configuration corresponding to the entry", config_entry, "in your configuration file", configuration_file) generators = generators.split(" ") for i in generators: do_copy(i) msg("Using pythia with configuration", generators) else: def check_duplicate(option_name): if f" {option_name}" in custom_gen: fatal_msg(f"Remove '{option_name}' from", custom_gen, "as it will be automatically set") for i in ["--output", "-o", "--nevents", "-n"]: check_duplicate(i) if "INPUT_FILES" in custom_gen: input_hepmc_files = custom_gen.replace("INPUT_FILES", "").strip().split(" ") input_hepmc_file_list = [] for i in input_hepmc_files: input_hepmc_file_list += glob.glob(os.path.normpath(i)) if len(input_hepmc_file_list) >= nruns: input_hepmc_file_list = input_hepmc_file_list[0:nruns] else: nruns = len(input_hepmc_file_list) if len(input_hepmc_file_list) <= 0: fatal_msg( "Did not find any input file matching to the request:", custom_gen) custom_gen = f"INPUT_FILES " + " ".join(input_hepmc_file_list) msg( "Using", len(input_hepmc_file_list), "input HepMC file" + ("" if len(input_hepmc_file_list) == 1 else "s"), input_hepmc_file_list) else: msg("Using custom generator", custom_gen) # Printing configuration msg(" --- running createO2tables.py", color=bcolors.HEADER) msg(" n. jobs =", njobs) msg(" n. runs =", nruns) msg(" events per run =", nevents) msg(" tot. events =", "{:.0e}".format(nevents * nruns)) msg(" LUT path =", f"'{lut_path}'") msg(" --- with detector configuration", color=bcolors.HEADER) msg(" B field =", bField, "[kG]") msg(" Barrel radius =", minimum_track_radius, "[cm]") msg(" Barrel half length =", barrel_half_length, "[cm]") if create_luts: msg(" Minimum track radius =", minimum_track_radius, "[cm]") msg(" LUT =", lut_tag) msg(" etaMax =", etaMax) msg(" --- with TOF configuration", color=bcolors.HEADER) msg(" sigmaT =", sigmaT, "[ns]") msg(" sigmaT0 =", sigmaT0, "[ns]") msg(" tof_radius =", tof_radius, "[cm]") msg(" --- with RICH configuration", color=bcolors.HEADER) msg(" rich_radius =", rich_radius, "[cm]") msg(" rich_index =", rich_index) msg(" --- with Forward RICH configuration", color=bcolors.HEADER) msg(" forward_rich_index =", forward_rich_index) aod_path = opt("aod_path") do_copy("createO2tables.h", in_path=aod_path) do_copy("createO2tables.C", in_path=aod_path) do_copy("muonAccEffPID.root", in_path=aod_path) if qa: do_copy("diagnostic_tools/dpl-config_std.json") def set_config(config_file, config, value): config = config.strip() value = value.strip() config_string = f"{config} {value}" run_cmd("sed -i -e \"" f"s/{config} .*$/{config_string}" "\" " + config_file) # Checking that the file has the correct configuration with open(config_file) as f: has_it = False config_string = config_string.replace("\\", "").strip("/") for lineno, line in enumerate(f): if line.strip() == config_string: verbose_msg(f"Found config string '{config_string}'", f"at line #{lineno} '{line.strip()}'") has_it = True break if not has_it: fatal_msg("Configuration file", config_file, f"does not have config string '{config_string}'") # set magnetic field set_config("propagate.tcl", "set barrel_Bz", f"{bField}" "e\-1/") set_config("createO2tables.C", "const double Bz = ", f"{bField}" "e\-1\;/") if turn_off_vertexing: set_config("createO2tables.C", "constexpr bool do_vertexing = ", "false\;/") else: # Check that the geometry file for the vertexing is there if not os.path.isfile("o2sim_grp.root") or not os.path.isfile( "o2sim_geometry.root"): run_cmd( "mkdir tmpo2sim && cd tmpo2sim && o2-sim -m PIPE ITS MFT -g boxgen -n 1 -j 1 --configKeyValues 'BoxGun.number=1' && cp o2sim_grp.root .. && cp o2sim_geometry.root .. && cd .. && rm -r tmpo2sim" ) if use_nuclei: set_config("createO2tables.C", "constexpr bool enable_nuclei = ", "true\;/") if debug_aod: set_config("createO2tables.C", "constexpr bool debug_qa = ", "true\;/") if tof_mismatch: if not tof_mismatch in [1, 2]: fatal_msg("tof_mismatch", tof_mismatch, "is not 1 or 2") set_config("createO2tables.C", "constexpr int tof_mismatch = ", f"{tof_mismatch}\;/") if qa: set_config("dpl-config_std.json", "\\\"d_bz\\\":", "\\\"" f"{bField}" "\\\"\,/") # set barrel_radius set_config("propagate.tcl", "set barrel_Radius", f"{minimum_track_radius}" "e\-2/") # set barrel_half_length set_config("propagate.tcl", "set barrel_HalfLength", f"{barrel_half_length}" "e\-2/") # set tof_radius set_config("createO2tables.C", "constexpr double tof_radius =", f"{tof_radius}" "\;/") # set tof_length set_config("createO2tables.C", "const double tof_length =", f"{barrel_half_length}" "\;/") # set rich_radius set_config("createO2tables.C", "constexpr double rich_radius =", f"{rich_radius}" "\;/") # set rich_index set_config("createO2tables.C", "const double rich_index =", f"{rich_index}" "\;/") # set forward_rich_index set_config("createO2tables.C", "const double forward_rich_index =", f"{forward_rich_index}" "\;/") # set acceptance set_config("propagate.tcl", "set barrel_Acceptance", "\{ 0.0 + 1.0 * fabs(eta) < " f"{etaMax}" " \}/") # set time resolution set_config("propagate.tcl", "set barrel_TimeResolution", f"{sigmaT}" "e\-9/") set_config("createO2tables.C", "const double tof_sigmat =", f"{sigmaT}" "\;/") set_config("createO2tables.C", "const double tof_sigmat0 =", f"{sigmaT0}" "\;/") run_list = range(nruns) if append_production: if output_path is None: fatal_msg("Output path is not defined, cannot append") last_preexisting_aod = [ each for each in os.listdir(output_path) if each.endswith('.root') and "AODRun5" in each ] if len(last_preexisting_aod) == 0: fatal_msg("Appending to a non existing production") last_preexisting_aod = sorted([ int(each.replace("AODRun5.", "").replace(".root", "")) for each in last_preexisting_aod ])[-1] + 1 msg(f" Appending to production with {last_preexisting_aod} AODs", color=bcolors.BWARNING) run_list = range(last_preexisting_aod, last_preexisting_aod + nruns) def configure_run(run_number): # Create executable that runs Generation, Delphes and analysis runner_file = f"runner{run_number}.sh" with open(runner_file, "w") as f_run: def write_to_runner(line, log_file=None, check_status=False): """ Writes commands to runner """ log_line = "" if log_file is not None: log_line = f" &> {log_file} 2>&1" line += log_line line += "\n" f_run.write(line) if check_status: f_run.write("\nReturnValue=$?\n") f_run.write("if [[ $ReturnValue != 0 ]]; then\n") f_run.write(" echo \"Encountered error with command: '") line = line.replace(log_line, "") f_run.write(line.replace("\"", "\\\"").strip()) f_run.write("'\"\n") if log_file is not None: f_run.write(" echo \"Check log: '") f_run.write(log_file.strip() + "'\"\n") f_run.write(" exit $ReturnValue\n") f_run.write("fi\n") def copy_and_link(file_name): """ In runner, copies file to output path (if different from current) and links it to current """ if os.path.normpath(output_path) != os.getcwd(): write_to_runner(f"mv {file_name} {output_path} \n") write_to_runner( f"ln -s {os.path.join(output_path, file_name)} . \n") write_to_runner("#! /usr/bin/env bash\n") delphes_file = f"delphes.{run_number}.root" delphes_log_file = delphes_file.replace(".root", ".log") hepmc_file = None mc_seed = random.randint(1, 800000000) if custom_gen: # Using HEPMC hepmc_file = f"hepmcfile.{run_number}.hepmc" if "INPUT_FILES" in custom_gen: input_hepmc_file = custom_gen.replace( "INPUT_FILES", "").strip().split(" ") input_hepmc_file = input_hepmc_file[run_number] write_to_runner(f"ln -s {input_hepmc_file}" f" {hepmc_file} \n") else: gen_log_file = f"gen.{run_number}.log" custom_gen_option = f" --output {hepmc_file} --nevents {nevents} --seed {mc_seed}" write_to_runner(custom_gen + custom_gen_option, log_file=gen_log_file, check_status=True) write_to_runner( f"DelphesHepMC propagate.tcl {delphes_file} {hepmc_file}", log_file=delphes_log_file, check_status=True) else: # Using DelphesPythia # copy generator configuration generator_cfg = f"generator.{run_number}.cfg" generator_orig = generators[0].split("/")[-1] do_copy(generator_orig, generator_cfg) # Adjust configuration file with open(generator_cfg, "a") as f_cfg: # number of events and random seed f_cfg.write(f"\n\n\n#### Additional part ###\n\n\n\n") f_cfg.write(f"Main:numberOfEvents {nevents}\n") f_cfg.write(f"Random:setSeed = on\n") f_cfg.write(f"Random:seed = {mc_seed}\n") # collision time spread [mm/c] f_cfg.write("Beams:allowVertexSpread on \n") f_cfg.write("Beams:sigmaTime 60.\n") for i in generators[1:]: with open(i.split("/")[-1], "r") as f_append: f_cfg.write(f_append.read()) write_to_runner( f"DelphesPythia8 propagate.tcl {generator_cfg} {delphes_file}", log_file=delphes_log_file, check_status=True) aod_file = f"AODRun5.{run_number}.root" aod_log_file = aod_file.replace(".root", ".log") write_to_runner( f"root -l -b -q 'createO2tables.C+(\"{delphes_file}\", \"tmp_{aod_file}\", 0)'", log_file=aod_log_file, check_status=True) # Check that there were no O2 errors write_to_runner( f"if grep -q \"\[ERROR\]\" {aod_log_file}; then echo \": got some errors in '{aod_log_file}'\" && echo \"Found some ERROR in this log\" >> {aod_log_file}; fi" ) write_to_runner( f"if grep -q \"\[FATAL\]\" {aod_log_file}; then echo \": got some fatals in '{aod_log_file}'\" && echo \"Found some FATAL in this log\" >> {aod_log_file} && exit 1; fi" ) # Rename the temporary AODs to standard AODs write_to_runner(f"mv tmp_{aod_file} {aod_file}", check_status=True) if not clean_delphes_files: copy_and_link(delphes_file) if hepmc_file is not None: copy_and_link(hepmc_file) copy_and_link(aod_file) if clean_delphes_files: write_to_runner(f"rm {delphes_file}") write_to_runner(f"rm {generator_cfg}") if hepmc_file is not None: write_to_runner(f"rm {hepmc_file}") write_to_runner("exit 0\n") # Configuring all the runs for i in run_list: configure_run(i) # Compiling the table creator macro once for all run_cmd("root -l -b -q 'createO2tables.C+(\"\")' > /dev/null 2>&1", comment="to compile the table creator only once, before running") if not os.path.isfile("createO2tables_C.so"): run_cmd("root -l -b -q 'createO2tables.C+(\"\")'", comment="to compile with full log") fatal_msg("'createO2tables.C' did not compile!") total_processing_time = time.time() msg(" --- start processing the runs ", color=bcolors.HEADER) run_in_parallel(processes=njobs, job_runner=process_run, job_arguments=run_list, job_message="Running production") # merge runs when all done msg(" --- all runs are processed, so long", color=bcolors.HEADER) total_processing_time = time.time() - total_processing_time msg(f"-- took {total_processing_time} seconds in total --", color=bcolors.BOKGREEN) # Writing the list of produced AODs output_list_file = "listfiles.txt" with open(output_list_file, "w") as listfiles: for i in os.listdir("."): if "AODRun5." in i and i.endswith(".root"): listfiles.write(f"{os.getcwd()}/{i}\n") # Writing summary of production summaryfile = "summary.txt" with open(summaryfile, "w") as f: f.write("\n## Summary of last run ##\n") now = datetime.now() dt_string = now.strftime("%d/%m/%Y %H:%M:%S") f.write(f"Finished at {dt_string}\n") f.write(f"Took {total_processing_time} seconds\n") def write_config(entry, prefix=""): f.write(prefix + entry.strip("ARG ") + f" = {running_options[entry]}\n") f.write("\n## Configuration ##\n") for i in running_options: if "ARG" in i: write_config(i, prefix=" - ") f.write("\n## Options ##\n") for i in running_options: if "ARG" not in i: write_config(i, prefix=" * ") output_size = sum( os.path.getsize(os.path.join(output_path, f)) for f in os.listdir(output_path) if os.path.isfile(os.path.join(output_path, f))) f.write("\n## Size of the ouput ##\n") f.write(f" - {output_size} bytes\n") f.write(f" - {output_size/1e6} MB\n") f.write(f" - {output_size/1e9} GB\n") run_cmd("echo >> " + summaryfile) run_cmd("echo + DelphesO2 Version + >> " + summaryfile) run_cmd("git rev-parse HEAD >> " + summaryfile, check_status=False) if os.path.normpath(output_path) != os.getcwd(): if append_production: s = os.path.join(output_path, summaryfile) run_cmd(f"echo '' >> {s}") run_cmd(f"echo ' **' >> {s}") run_cmd(f"echo 'Appended production' >> {s}") run_cmd(f"echo ' **' >> {s}") run_cmd(f"echo '' >> {s}") run_cmd(f"cat {summaryfile} >> {s}") else: run_cmd(f"mv {summaryfile} {output_path}") run_cmd(f"ln -s {os.path.join(output_path, summaryfile)} ./") if qa: msg(" --- running test analysis", color=bcolors.HEADER) run_cmd( f"./diagnostic_tools/doanalysis.py TrackQA RICH TOF -i {output_list_file} -M 25 -B 25" ) if tof_mismatch == 1: # TOF mismatch in create mode run_cmd( f"hadd -j {njobs} -f tofMM.root tof_mismatch_template_DF_*.root && rm tof_mismatch_template_DF_*.root" )
input_files = [] for i in args.input_files: i = os.path.normpath(i) if i.endswith(".root"): input_files.append(i) elif i.endswith(".txt"): with open(i, "r") as f: for j in f: j = j.strip() input_files.append( os.path.join(os.path.abspath(os.path.dirname(i)), os.path.normpath(j))) run_in_parallel(args.njobs, main, input_files, "Checking file", linearize_single_core=True) if len(bad_files) > 0: warning_msg("There were", len(bad_files), "bad files") for i in bad_files: msg(i) if args.output is not None: msg("Writing good files to", args.output) with open(args.output, "w") as f: for i in input_files: if not i in bad_files: f.write(i + "\n")