示例#1
0
def copylist(fname="",
             jobs=InputArgument(1, "Number of parallel jobs to use",
                                ["--njobs", "-j"], int)):
    """Takes a text file and downloads the files from grid"""
    if jobs is None:
        jobs = 1
    verbose_msg("Copying files from list", fname, "with", jobs, "jobs")
    fname = path.normpath(fname)
    if not path.isfile(fname):
        warning_msg("Input file not provided! Aborting")
        return
    sofar = copied(fname, "So far")
    f = open(fname, "r")
    Group = []
    for line in f:
        if "%" in line:
            msg("Character % encountered! Aborting")
            break
        if "#" in line:
            msg("Character # encountered! Skipping")
            continue
        line = "./" + line
        if jobs == 1:
            copyfile(line)
        else:
            Group.append(line)
    if jobs > 1:
        msg("Copying list in parallel with", jobs, "jobs")
        run_in_parallel(processes=jobs,
                        job_runner=copyfile,
                        job_arguments=Group,
                        job_message="Downloading files",
                        linearize_single_core=True)
    copied(fname, extra_msg="In recent run", last_time=sofar)
示例#2
0
    def build_list_of_files(file_list):
        verbose_msg("Building list of files from", file_list)
        # Check that runlist does not have duplicates
        unique_file_list = set(file_list)
        if len(file_list) != len(unique_file_list):
            # for i in file_list
            fatal_msg("Runlist has duplicated entries, fix runlist!",
                      len(unique_file_list), "unique files, while got",
                      len(file_list), "files")
        file_status = {
            "Does not exist": [],
            "Cannot be open": [],
            "Was recovered": [],
            "Is Ok": []
        }
        if check_input_file_integrity:  # Check that input files can be open
            for i in file_list:
                verbose_msg("Checking that TFile", i.strip(),
                            "can be processed")
                file_status[is_root_file_sane(i)] = i
        recovered_files = file_status["Was recovered"]
        not_readable = []
        for i in file_status:
            if i == "Is Ok":
                continue
            not_readable += file_status[i]
        if len(recovered_files) > 0:
            msg(
                "Recovered",
                len(recovered_files),
                "files:\n",
            )
        if len(not_readable) > 0:
            warning_msg(len(not_readable), "over", len(file_list),
                        "files cannot be read and will be skipped")
            for i in not_readable:
                if i not in file_list:
                    warning_msg("did not find file to remove", f"'{i}'")
                file_list.remove(i)

        files_per_batch = []
        iter_file_list = iter(file_list)
        for i in range(0, len(file_list)):
            sub_set = list(islice(iter_file_list, batch_size))
            if len(sub_set) <= 0:
                continue
            files_per_batch.append(sub_set)
        run_list = []
        if len(files_per_batch) > 0:
            for i, lines in enumerate(files_per_batch):
                p = os.path.join(out_path, f"{i}")
                if not os.path.isdir(p):
                    os.makedirs(p)
                run_list.append(os.path.join(p,
                                             f"ListForRun5Analysis.{i}.txt"))
                with open(run_list[-1], "w") as f:
                    for j in lines:
                        f.write(j.strip() + "\n")
        msg("Number of runs:", len(run_list))
        return run_list
示例#3
0
    def build_list_of_files(file_list):
        if len(file_list) != len(set(file_list)):  # Check that runlist does not have duplicates
            fatal_msg("Runlist has duplicated entries, fix runlist!")
        not_readable = []
        for i in file_list:  # Check that input files can be open
            f = TFile(i.strip(), "READ")
            if not f.IsOpen():
                verbose_msg("Cannot open AOD file:", i, color=bcolors.WARNING)
                not_readable.append(i)
        if len(not_readable) > 0:
            warning_msg(len(not_readable),
                        "files cannot be read and will be skipped")
            for i in not_readable:
                file_list.remove(i)

        files_per_batch = []
        iter_file_list = iter(file_list)
        for i in range(0, len(file_list)):
            sub_set = list(islice(iter_file_list, batch_size))
            if len(sub_set) <= 0:
                continue
            files_per_batch.append(sub_set)
        run_list = []
        if len(files_per_batch) > 0:
            for i, lines in enumerate(files_per_batch):
                p = os.path.join(out_path, f"{i}")
                if not os.path.isdir(p):
                    os.makedirs(p)
                run_list.append(os.path.join(
                    p, f"ListForRun5Analysis.{i}.txt"))
                with open(run_list[-1], "w") as f:
                    for j in lines:
                        f.write(j.strip() + "\n")
        msg("Number of runs:", len(run_list))
        return run_list
示例#4
0
 def inspect(name, tree_name):
     tree_name = f"{name}/{tree_name}"
     t = input_file.Get(tree_name)
     if not t:
         warning_msg("Did not get tree", tree_name)
         return -1
     if verbose:
         input_file.Get(name).ls()
     verbose_msg(tree_name, t.GetEntries())
     return t.GetEntries()
示例#5
0
 def must_be_same(*args):
     counts = []
     names = []
     for k in args:
         counts.append(dictionary_of_counts[k])
         names.append(k)
     if len(set(counts)) != 1:
         add_bad()
         warning_msg("Did not get equal counts for",
                     ", ".join(names), counts, "in DF", df_index, "/",
                     len(list_of_keys), ":", i.GetName())
示例#6
0
def main(input_files, args=None):
    if type(input_files) is not list:
        input_files = [input_files]
    if len(input_files) <= 0:
        warning_msg("Passed no input, use: --input_files")
        return
    if args.command == "listfiles":
        for i in input_files:
            list_of_files = []
            if os.path.isfile(i):
                paths_to_list = []
                with open(i) as fsecondary:
                    for j in fsecondary:
                        j = j.strip().strip(" ").strip(",")
                        if j == "":
                            continue
                        for k in j.split(","):
                            paths_to_list.append(k)
                for j in paths_to_list:
                    list_of_files += listfiles(
                        Path=j,
                        What=args.what,
                        MustHave=args.musthave,
                        MustHaveCount=args.musthavecount,
                        MustNotHaveCount=args.mustnothavecount,
                        MustNotHave=args.mustnothave)
            else:
                list_of_files = listfiles(
                    Path=i,
                    What=args.what,
                    MustHave=args.musthave,
                    MustHaveCount=args.musthavecount,
                    MustNotHaveCount=args.mustnothavecount,
                    MustNotHave=args.mustnothave)
            append = args.append
            do_write_files = args.outfile
            if len(list_of_files) > 0 and do_write_files:
                writefiles(list_of_files,
                           do_write_files,
                           append=(i == list_of_files[0]) or append)
    elif args.command == "copyfile":
        for i in input_files:
            copyfile(i)
    elif args.command == "copylist":
        for i in input_files:
            copylist(i, jobs=args.jobs)
    elif args.command == "copied":
        for i in input_files:
            print(copied(i))
    elif args.command == "merge_aod":
        for i in input_files:
            merge_aod(i, input_file=args.what)
    else:
        warning_msg("Did not do anything")
示例#7
0
 def is_root_file_sane(file_name_to_check):
     file_name_to_check = file_name_to_check.strip()
     if not os.path.isfile(file_name_to_check):
         warning_msg("File", file_name_to_check, "does not exist")
         return "Does not exist"
     file_to_check = TFile(file_name_to_check, "READ")
     if not file_to_check.IsOpen():
         warning_msg("Cannot open AOD file:", file_name_to_check)
         return "Cannot be open"
     elif file_to_check.TestBit(TFile.kRecovered):
         verbose_msg(file_name_to_check, "was a recovered file")
         return "Was recovered"
     else:
         verbose_msg(file_name_to_check, "is OK")
         return "Is Ok"
示例#8
0
def run_o2_analysis(tmp_script_name,
                    remove_tmp_script=False,
                    explore_bad_files=False,
                    time_it=True):
    global number_of_runs
    verbose_msg("> starting run with", tmp_script_name)
    cmd = f"bash {tmp_script_name}"
    if do_bash_script:
        with open("parallelbash.sh", "a") as fout:
            with open("parallelbash.sh", "r") as fin:
                lastline = fin.readlines()[-1]
                if lastline.startswith("#"):
                    lastline = int(lastline.strip("#"))
                else:
                    lastline = 0
                fout.write(f"echo Running {lastline}\n")
                fout.write(f"{cmd} &\n")
                lastline += 1
                if lastline % (bash_parallel_jobs + 1) == 0:
                    fout.write(f"wait\n")
                fout.write(f"\n#{lastline}\n")

        return

    if explore_bad_files:
        if run_cmd(cmd, check_status=True, throw_fatal=False,
                   time_it=time_it) == False:
            list_name = os.listdir(os.path.dirname(tmp_script_name))
            for i in list_name:
                if "ListForRun5Analysis" in i:
                    list_name = i
                    break
            if type(list_name) != list:
                with open(
                        os.path.join(os.path.dirname(tmp_script_name),
                                     list_name)) as f:
                    list_name = []
                    for i in f:
                        list_name.append(i)
            warning_msg("Issue when running", tmp_script_name, "with",
                        list_name)
    else:
        run_cmd(cmd, log_file=f"{tmp_script_name}.log", time_it=time_it)
    if remove_tmp_script:
        os.remove(tmp_script_name)
    verbose_msg("< end run with", tmp_script_name)
    return tmp_script_name
            def get_the_daughters():
                idaughters = []
                if d0 > -1 and d1 > -1:
                    for j in range(d0, d1 + 1):
                        entry = numpy.where(npy["part_index"] == j)[0]
                        if len(entry) > 1:
                            raise ValueError("Entry size is too high!")
                        if len(entry) == 0:
                            raise ValueError("Entry size is too low!")
                        entry = entry[0]
                        if 0:
                            d_m0 = npy["fMother0"][entry]
                            d_m1 = npy["fMother1"][entry]
                        else:
                            d_m0 = npy["fIndexArray_Mothers"][entry][0]
                            d_m1 = npy["fIndexArray_Mothers"][entry][
                                int(npy["fIndexArray_Mothers_size"][entry]) -
                                1]

                        if d_m0 != part_index and d_m1 != part_index:
                            if not continue_on_inconsistency:
                                raise ValueError("Daughter", j,
                                                 "has a different mother!",
                                                 "d_m0", d_m0, "d_m1", d_m1,
                                                 "w.r.t.", part_index)
                            else:
                                warning_msg("Daughter", j,
                                            "has a different mother!", "d_m0",
                                            d_m0, "d_m1", d_m1, "w.r.t.",
                                            part_index)
                        if d_m0 == d_m1 and 0:
                            raise ValueError("Daughter has same mother!", d_m0,
                                             d_m1)
                        idaughters.append(entry)
                if len(idaughters) == 0:
                    warning_msg("Found no daughters")
                    return idaughters
                # Checking that indices are increasing
                if sorted(idaughters) != idaughters:
                    raise ValueError("Daughters are not in order!")
                # Checking that indices have no holes
                if idaughters != [*range(idaughters[0], idaughters[-1] + 1)]:
                    raise ValueError("Daughters have hole in indices!",
                                     idaughters)
                return idaughters
 def check_momentum(daughters):
     d_p = daughters_pxpypz(daughters)
     if d_p is None:
         return
     m_p = [px, py, pz]
     m_p_d = {0: "Px", 1: "Py", 2: "Pz"}
     momentum_format = "(px={:.5f}, py={:.5f}, pz={:.5f})"
     for j in enumerate(m_p):
         if abs(j[1] - d_p[j[0]]) > 0.001:
             e_msg = [
                 "Non-closure in", m_p_d[j[0]], "=",
                 momentum_format.format(*d_p)
             ]
             if not continue_on_inconsistency:
                 raise ValueError(*e_msg)
             else:
                 warning_msg(*e_msg)
                 warning_msg("           mother =",
                             momentum_format.format(*m_p))
示例#11
0
def check_root_file(file_name):
    if not file_name.endswith(".root"):
        warning_msg("Testing a non root file:", file_name)
        return True
    if not path.isfile(file_name):
        warning_msg("Testing a non existing file:", file_name)
        return True
    try:
        f = TFile(file_name, "READ")
        if f.TestBit(TFile.kRecovered):
            msg("File", file_name, "was recovered", color=bcolors.WARNING)
            return False
        if not f.IsOpen():
            msg("File", file_name, "is not open", color=bcolors.WARNING)
            return False
    except OSError:
        msg("Issue when checking file", file_name, color=bcolors.WARNING)
        return False
    verbose_msg(file_name, "is ok and has size",
                os.path.getsize(file_name) * 1e-6, "MB")
    return True
示例#12
0
def useapi(ccdb_path, host):
    global timestamps
    objectlist = get_ccdb_api(host).list(ccdb_path, False, "text/plain")
    bunch_objects = []
    starting_sequence = "ID: "
    for i in objectlist.split("\n"):
        if starting_sequence in i:
            bunch_objects.append("")
        if len(bunch_objects) <= 0:
            warning_msg("Skipping", i, "because found no object there")
            continue
        bunch_objects[-1] += f"{i}\n"
    verbose_msg("Found", len(bunch_objects), "object in path", ccdb_path)
    for counter, i in enumerate(bunch_objects):
        if 0:
            print(f"Object #{counter}/{len(bunch_objects)-1}")
            print(i)
        t = {}
        for j in i.split("\n"):
            save_fields(j, fields_of_interest=t)
        # print(t)
        timestamps.setdefault(ccdb_path, []).append(t)
示例#13
0
 def check_rootfile(fname):
     try:
         f = TFile(fname, "READ")
         if f.TestBit(TFile.kRecovered):
             warning_msg("File", fname, "was recovered")
             return False
         elif not f.IsOpen():
             warning_msg("File", fname, "is not open")
             return False
     except OSError:
         warning_msg("Issue when checking file", fname)
         return False
     return True
    def print_evt(event_filter=">= 0"):
        pdg_db = TDatabasePDG()
        ev_df = df.Filter(f"fIndexMcCollisions {event_filter}")
        npy = ev_df.AsNumpy()
        print()
        lastmother = 0
        for i, part_index in enumerate(npy["part_index"]):
            ev = npy["fIndexMcCollisions"][i]
            count("events", ev)
            if 0:
                m0 = npy["fMother0"][i]
                m1 = npy["fMother1"][i]
                d0 = npy["fDaughter0"][i]
                d1 = npy["fDaughter1"][i]
            else:
                m_arr = npy["fIndexArray_Mothers"][i]
                d_arr = npy["fIndexSlice_Daughters"][i]
                m_size = npy["fIndexArray_Mothers_size"][i]
                # print(m_size)
                # print("Mothers", m_arr)
                # print("Daughters", d_arr)

                if len(m_arr) == 0:
                    m0 = -1
                    m1 = -1
                else:
                    m0 = m_arr[0]
                    m1 = m_arr[int(m_size) - 1]
                d0 = d_arr[0]
                d1 = d_arr[1]
                # print(d_arr)
            pdg = npy["fPdgCode"][i]
            px = npy["fPx"][i]
            py = npy["fPy"][i]
            pz = npy["fPz"][i]
            eta = npy["eta"][i]
            is_ps = bool(npy["isPhysicalPrimary"][i])
            is_pt = bool(npy["isProducedByTransport"][i])
            process = npy["fStatusCode"][i]

            def getpname(pdg_code):
                p = pdg_db.GetParticle(int(pdg_code))
                if p:
                    p = p.GetName()
                else:
                    p = "Undef"
                return p

            part = getpname(pdg)
            summary_line = f"  ({part_index}) ev {ev} m0 {m0} m1 {m1}, d0 {d0} d1 {d1}, pdg {pdg} '{part}', physical primary {is_ps}, in transport {is_pt}, process {process}"
            if abs(pdg) not in [21, 2101, 2103, 2203, 1, 2, 3, 4, 5
                                ] and m0 > -1:
                if lastmother != m0 and count("mothers", m0):
                    raise ValueError("Duplicate mothers for ", summary_line)
            lastmother = m0
            if d1 > -1 and d0 > d1:
                if not continue_on_inconsistency:
                    raise ValueError("d0 > d1:", summary_line)
                else:
                    warning_msg("d0 > d1 for", part_index)

            def get_the_daughters():
                idaughters = []
                if d0 > -1 and d1 > -1:
                    for j in range(d0, d1 + 1):
                        entry = numpy.where(npy["part_index"] == j)[0]
                        if len(entry) > 1:
                            raise ValueError("Entry size is too high!")
                        if len(entry) == 0:
                            raise ValueError("Entry size is too low!")
                        entry = entry[0]
                        if 0:
                            d_m0 = npy["fMother0"][entry]
                            d_m1 = npy["fMother1"][entry]
                        else:
                            d_m0 = npy["fIndexArray_Mothers"][entry][0]
                            d_m1 = npy["fIndexArray_Mothers"][entry][
                                int(npy["fIndexArray_Mothers_size"][entry]) -
                                1]

                        if d_m0 != part_index and d_m1 != part_index:
                            if not continue_on_inconsistency:
                                raise ValueError("Daughter", j,
                                                 "has a different mother!",
                                                 "d_m0", d_m0, "d_m1", d_m1,
                                                 "w.r.t.", part_index)
                            else:
                                warning_msg("Daughter", j,
                                            "has a different mother!", "d_m0",
                                            d_m0, "d_m1", d_m1, "w.r.t.",
                                            part_index)
                        if d_m0 == d_m1 and 0:
                            raise ValueError("Daughter has same mother!", d_m0,
                                             d_m1)
                        idaughters.append(entry)
                if len(idaughters) == 0:
                    warning_msg("Found no daughters")
                    return idaughters
                # Checking that indices are increasing
                if sorted(idaughters) != idaughters:
                    raise ValueError("Daughters are not in order!")
                # Checking that indices have no holes
                if idaughters != [*range(idaughters[0], idaughters[-1] + 1)]:
                    raise ValueError("Daughters have hole in indices!",
                                     idaughters)
                return idaughters

            def daughters_pxpypz(daughters):
                d_px = 0
                d_py = 0
                d_pz = 0
                if len(daughters) == 0:
                    return None
                for j in daughters:
                    d_px += npy["fPx"][j]
                    d_py += npy["fPy"][j]
                    d_pz += npy["fPz"][j]
                return d_px, d_py, d_pz

            def daughters_pdg(daughters):
                d_pdgs = []
                for j in daughters:
                    d_pdgs.append(npy["fPdgCode"][j])
                return d_pdgs

            def check_momentum(daughters):
                d_p = daughters_pxpypz(daughters)
                if d_p is None:
                    return
                m_p = [px, py, pz]
                m_p_d = {0: "Px", 1: "Py", 2: "Pz"}
                momentum_format = "(px={:.5f}, py={:.5f}, pz={:.5f})"
                for j in enumerate(m_p):
                    if abs(j[1] - d_p[j[0]]) > 0.001:
                        e_msg = [
                            "Non-closure in", m_p_d[j[0]], "=",
                            momentum_format.format(*d_p)
                        ]
                        if not continue_on_inconsistency:
                            raise ValueError(*e_msg)
                        else:
                            warning_msg(*e_msg)
                            warning_msg("           mother =",
                                        momentum_format.format(*m_p))

            def is_decay_channel(desired_pdg_codes,
                                 daughters,
                                 fill_counter=True,
                                 min_prongs=0,
                                 max_prongs=10):
                d_pdgs = daughters_pdg(daughters)
                if len(daughters) >= min_prongs and len(
                        daughters) <= max_prongs:
                    print(pdg, part, "decaying in", len(d_pdgs), "particles")
                    for i, j in enumerate(d_pdgs):
                        if 0:
                            this_m0 = npy["fMother0"][daughters[i]]
                            this_m1 = npy["fMother1"][daughters[i]]
                        else:
                            this_m0 = npy["fIndexArray_Mothers"][
                                daughters[i]][0]
                            this_m1 = npy["fIndexArray_Mothers"][daughters[i]][
                                int(npy["fIndexArray_Mothers_size"][
                                    daughters[i]]) - 1]

                        print(" >", j, getpname(j), "index", daughters[i],
                              npy["part_index"][daughters[i]], "m0", this_m0,
                              "m1", this_m1, " -> physical primary",
                              npy["isPhysicalPrimary"][daughters[i]])
                if desired_pdg_codes is not None:
                    for i in desired_pdg_codes:
                        if i not in d_pdgs:
                            return False
                if fill_counter:
                    count(
                        f"{bcolors.BOKGREEN} {pdg} {part} {bcolors.ENDC} in {d_pdgs}",
                        part_index)
                return True

            extra = []
            if m0 < 0 and m1 < 0 and d0 < 1 and d1 < 0:
                extra.append("Sterile")
            if d1 < 0 and d1 != d0:
                extra.append(bcolors.BWARNING + "Problematic" + bcolors.ENDC)
            if pdg in pdg_of_interest:
                extra.append(", px={:.3f} py={:.2f} pz={:.2f}".format(
                    px, py, pz))
                extra.append(", eta={:.4f}".format(eta))
                extra.append(bcolors.BOKGREEN + "PDG of interest" +
                             bcolors.ENDC)
            extra = " ".join(extra)
            extra = extra.strip()

            count(part, part_index)
            if verbose or pdg in pdg_of_interest:
                print(summary_line, extra)
            if pdg in pdg_of_interest:
                daughters = get_the_daughters()
                check_momentum(daughters)
                is_decay_channel(None, daughters=daughters, fill_counter=True)
示例#15
0
def get_ccdb_obj(ccdb_path,
                 timestamp,
                 out_path,
                 host,
                 show,
                 tag=False,
                 overwrite_preexisting=True,
                 use_o2_api=True,
                 check_metadata=True,
                 interesting_metadata=[
                     "ObjectType", "PassName", "PeriodName", "RunNumber",
                     "Valid-From", "Valid-Until", ""
                 ]):
    """
    Gets the ccdb object from 'ccdb_path' and 'timestamp' and downloads it into 'out_path'
    If 'tag' is True then the filename will be renamed after the timestamp.
    """
    def check_rootfile(fname):
        try:
            f = TFile(fname, "READ")
            if f.TestBit(TFile.kRecovered):
                warning_msg("File", fname, "was recovered")
                return False
            elif not f.IsOpen():
                warning_msg("File", fname, "is not open")
                return False
        except OSError:
            warning_msg("Issue when checking file", fname)
            return False
        return True

    verbose_msg("Getting obj", host, ccdb_path, "with timestamp", timestamp,
                convert_timestamp(timestamp))
    out_name = "snapshot.root"
    if tag:
        out_name = f"snapshot_{timestamp}.root"
    out_path = os.path.normpath(out_path)
    fullname = os.path.join(out_path, ccdb_path, out_name)
    if os.path.isfile(fullname) and not overwrite_preexisting:
        if check_rootfile(fullname):
            msg("File", fullname, "already existing, not overwriting")
            return
    if use_o2_api:
        api = get_ccdb_api(host)
        if timestamp == -1:
            timestamp = o2.ccdb.getCurrentTimestamp()
        metadata = std.map('string,string')()
        api.retrieveBlob(ccdb_path, out_path, metadata, timestamp)
        if tag:
            os.rename(os.path.join(out_path, ccdb_path, "snapshot.root"),
                      fullname)
    else:
        cmd = f"o2-ccdb-downloadccdbfile --host {host} --path {ccdb_path} --dest {out_path} --timestamp {timestamp}"
        cmd += f" -o {out_name}"
        print(cmd)
        subprocess.run(cmd.split())
    if not os.path.isfile(fullname):
        raise ValueError("File", fullname, "not found")
    if not check_rootfile(fullname):
        raise ValueError("File", fullname, "is not Ok")
    if check_metadata:
        f = TFile(os.path.join(fullname), "READ")
        meta = f.Get("ccdb_meta")
        verbose_msg("Metadata")
        m_d = {"Valid-From": None, "Valid-Until": None}
        for i in meta:
            if i[0] in m_d:
                m_d[i[0]] = int(i[1])
            if interesting_metadata[0] != "" and i[
                    0] not in interesting_metadata:
                continue
            if i[0] in m_d:
                verbose_msg(i, convert_timestamp(int(i[1])))
            else:
                verbose_msg(i)
        if timestamp < m_d["Valid-From"] or timestamp > m_d["Valid-Until"]:
            warning_msg("Timestamp asked is outside of window", timestamp, m_d)

        def print_info(entry):
            print("Object", entry, meta[entry])

        print_info("Last-Modified")
        if show:
            obj = f.Get("ccdb_object")
            obj.Draw()
            time_box = TPaveText(.01, .9, 0.3, 0.99, "NDC")
            time_box.AddText(ccdb_path)
            time_box.AddText(f"timestamp {timestamp}")
            time_box.AddText(f"{convert_timestamp(timestamp)}")
            time_box.Draw()
            gPad.Update()
            input("Press enter to continue")
            # obj.Print("ALL")
    return fullname
示例#16
0
    input_files = []
    for i in args.input_files:
        i = os.path.normpath(i)
        if i.endswith(".root"):
            input_files.append(i)
        elif i.endswith(".txt"):
            with open(i, "r") as f:
                for j in f:
                    j = j.strip()
                    input_files.append(
                        os.path.join(os.path.abspath(os.path.dirname(i)),
                                     os.path.normpath(j)))

    run_in_parallel(args.njobs,
                    main,
                    input_files,
                    "Checking file",
                    linearize_single_core=True)
    if len(bad_files) > 0:
        warning_msg("There were", len(bad_files), "bad files")
        for i in bad_files:
            msg(i)

    if args.output is not None:
        msg("Writing good files to", args.output)
        with open(args.output, "w") as f:
            for i in input_files:
                if not i in bad_files:
                    f.write(i + "\n")
示例#17
0
def main(mode,
         input_file,
         out_path,
         out_tag="",
         batch_size=4,
         n_max_files=100,
         dpl_configuration_file=None,
         njobs=1,
         merge_output=True,
         merge_only=False,
         shm_mem_size=16000000000,
         rate_lim=1000000000,
         readers=1,
         avoid_overwriting_merge=False,
         clean_localhost_after_running=True,
         extra_arguments=""):
    if len(input_file) == 1:
        input_file = input_file[0]
    else:
        input_file = input_file[0:n_max_files]
    if not merge_only:
        msg("Running",
            f"'{mode}'",
            "analysis on",
            f"'{input_file}'",
            color=bcolors.BOKBLUE)
        msg("Maximum",
            n_max_files,
            "files with batch size",
            batch_size,
            "and",
            njobs,
            "jobs" if njobs > 1 else "job",
            color=bcolors.BOKBLUE)
    else:
        msg("Merging output of",
            f"'{mode}'",
            "analysis",
            color=bcolors.BOKBLUE)
    o2_arguments = f"-b --shm-segment-size {shm_mem_size} --aod-memory-rate-limit {rate_lim} --readers {readers}"
    o2_arguments += extra_arguments
    if mode not in analyses:
        raise ValueError("Did not find analyses matching mode", mode,
                         ", please choose in", ", ".join(analyses.keys()))
    an = analyses[mode]
    tag = mode + out_tag
    # Build input file list
    input_file_list = []

    def build_list_of_files(file_list):
        if len(file_list) != len(
                set(file_list)):  # Check that runlist does not have duplicates
            fatal_msg("Runlist has duplicated entries, fix runlist!")
        not_readable = []
        for i in file_list:  # Check that input files can be open
            f = TFile(i.strip(), "READ")
            if not f.IsOpen():
                verbose_msg("Cannot open AOD file:", i, color=bcolors.WARNING)
                not_readable.append(i)
        if len(not_readable) > 0:
            warning_msg(len(not_readable),
                        "files cannot be read and will be skipped")
            for i in not_readable:
                file_list.remove(i)

        files_per_batch = []
        iter_file_list = iter(file_list)
        for i in range(0, len(file_list)):
            sub_set = list(islice(iter_file_list, batch_size))
            if len(sub_set) <= 0:
                continue
            files_per_batch.append(sub_set)
        run_list = []
        if len(files_per_batch) > 0:
            for i, lines in enumerate(files_per_batch):
                p = os.path.join(out_path, f"{i}")
                if not os.path.isdir(p):
                    os.makedirs(p)
                run_list.append(os.path.join(p,
                                             f"ListForRun5Analysis.{i}.txt"))
                with open(run_list[-1], "w") as f:
                    for j in lines:
                        f.write(j.strip() + "\n")
        msg("Number of runs:", len(run_list))
        return run_list

    if type(input_file) is list:
        input_file = [os.path.join(os.getcwd(), i) for i in input_file]
        input_file_list = build_list_of_files(input_file)
    elif not input_file.endswith(".root"):
        with open(input_file, "r") as f:
            lines = f.readlines()
            msg("Building input list from", len(lines), "inputs, limiting to",
                n_max_files)
            if len(lines) > n_max_files:
                lines = lines[0:n_max_files]
            input_file_list = build_list_of_files(lines)
    else:
        input_file_list = [os.path.join(os.getcwd(), input_file)]

    if dpl_configuration_file is not None:
        dpl_configuration_file = os.path.join(os.getcwd(),
                                              dpl_configuration_file)

    run_list = []
    for i, j in enumerate(input_file_list):
        run_list.append(
            set_o2_analysis(an,
                            o2_arguments=o2_arguments,
                            input_file=j,
                            tag=tag,
                            dpl_configuration_file=dpl_configuration_file))
    if not merge_only:
        run_in_parallel(processes=njobs,
                        job_runner=run_o2_analysis,
                        job_arguments=run_list,
                        job_message="Running analysis")
        if clean_localhost_after_running:
            run_cmd(
                "find /tmp/ -maxdepth 1 -name localhost* -user $(whoami) | xargs rm -v"
            )

    if (merge_output or merge_only) and len(run_list) > 1:
        files_to_merge = []
        for i in input_file_list:
            p = os.path.dirname(os.path.abspath(i))
            for j in os.listdir(p):
                if j.endswith(f"_{tag}.root"):
                    files_to_merge.append(os.path.join(p, j))
        if len(files_to_merge) == 0:
            warning_msg("Did not find any file to merge for tag", tag)
            return
        if len(files_to_merge) > len(run_list):
            fatal_msg("Trying to merge too many files!", tag)
        msg("Merging", len(files_to_merge), "results", color=bcolors.BOKBLUE)
        files_per_type = {}  # List of files to be merged per type
        for i in files_to_merge:
            fn = os.path.basename(i)
            files_per_type.setdefault(fn, [])
            files_per_type[fn].append(i)
        merged_files = []
        for i in files_per_type:
            merged_file = os.path.join(out_path, i)
            if avoid_overwriting_merge and os.path.isfile(merged_file):
                warning_msg(
                    "file", merged_file,
                    "is already found, remove it before merging, you can use the --mergeonly flag to avoid running the analysis again"
                )
                continue
            merged_files.append(merged_file)
            merge_file_list = os.path.join(
                os.path.dirname(os.path.abspath(merged_file)),
                "tomerge_" + "".join(i.split(".")[:-1]) + ".txt")
            verbose_msg("List of files to be merged:", merge_file_list)
            with open(merge_file_list, "w") as fmerge:
                for j in files_per_type[i]:
                    fmerge.write(j + "\n")
            run_cmd(
                f"hadd -j {njobs} -f {merged_file} `cat {merge_file_list}`",
                log_file=merge_file_list.replace(".txt", ".log"))
        if len(merged_files) == 0:
            warning_msg("Merged no files")
        else:
            msg("Merging completed, merged:",
                *merged_files,
                color=bcolors.BOKGREEN)
示例#18
0
def main(mode,
         input_file,
         out_path,
         out_tag="",
         batch_size=4,
         n_max_files=100,
         dpl_configuration_file=None,
         njobs=1,
         merge_output=True,
         merge_only=False,
         shm_mem_size=16000000000,
         rate_lim=1000000000,
         readers=1,
         avoid_overwriting_merge=False,
         clean_localhost_after_running=True,
         extra_arguments="",
         resume_previous_analysis=False,
         check_input_file_integrity=True,
         analysis_timeout=None,
         linearize_single_core=True):
    if do_bash_script:
        njobs = 1
        linearize_single_core = True

    if len(input_file) == 1:
        input_file = input_file[0]
    else:
        input_file = input_file[0:n_max_files]
    if not merge_only:
        msg("Running",
            f"'{mode}'",
            "analysis on",
            f"'{input_file}'",
            color=bcolors.BOKBLUE)
        msg("Maximum",
            n_max_files,
            "files with batch size",
            batch_size,
            "and",
            njobs,
            "jobs" if njobs > 1 else "job",
            color=bcolors.BOKBLUE)
    else:
        msg("Merging output of",
            f"'{mode}'",
            "analysis",
            color=bcolors.BOKBLUE)
    if analysis_timeout is not None:
        msg("Using analysis timeout of",
            analysis_timeout,
            "seconds",
            color=bcolors.BOKBLUE)
        analysis_timeout = f"--time-limit {analysis_timeout}"
    else:
        analysis_timeout = ""

    o2_arguments = f"-b --shm-segment-size {shm_mem_size} --aod-memory-rate-limit {rate_lim} --readers {readers} {analysis_timeout}"
    o2_arguments += extra_arguments
    if mode not in analyses:
        raise ValueError("Did not find analyses matching mode", mode,
                         ", please choose in", ", ".join(analyses.keys()))
    an = analyses[mode]
    tag = mode + out_tag
    # Build input file list
    input_file_list = []

    def is_root_file_sane(file_name_to_check):
        file_name_to_check = file_name_to_check.strip()
        if not os.path.isfile(file_name_to_check):
            warning_msg("File", file_name_to_check, "does not exist")
            return "Does not exist"
        file_to_check = TFile(file_name_to_check, "READ")
        if not file_to_check.IsOpen():
            warning_msg("Cannot open AOD file:", file_name_to_check)
            return "Cannot be open"
        elif file_to_check.TestBit(TFile.kRecovered):
            verbose_msg(file_name_to_check, "was a recovered file")
            return "Was recovered"
        else:
            verbose_msg(file_name_to_check, "is OK")
            return "Is Ok"

    def build_list_of_files(file_list):
        verbose_msg("Building list of files from", file_list)
        # Check that runlist does not have duplicates
        unique_file_list = set(file_list)
        if len(file_list) != len(unique_file_list):
            # for i in file_list
            fatal_msg("Runlist has duplicated entries, fix runlist!",
                      len(unique_file_list), "unique files, while got",
                      len(file_list), "files")
        file_status = {
            "Does not exist": [],
            "Cannot be open": [],
            "Was recovered": [],
            "Is Ok": []
        }
        if check_input_file_integrity:  # Check that input files can be open
            for i in file_list:
                verbose_msg("Checking that TFile", i.strip(),
                            "can be processed")
                file_status[is_root_file_sane(i)] = i
        recovered_files = file_status["Was recovered"]
        not_readable = []
        for i in file_status:
            if i == "Is Ok":
                continue
            not_readable += file_status[i]
        if len(recovered_files) > 0:
            msg(
                "Recovered",
                len(recovered_files),
                "files:\n",
            )
        if len(not_readable) > 0:
            warning_msg(len(not_readable), "over", len(file_list),
                        "files cannot be read and will be skipped")
            for i in not_readable:
                if i not in file_list:
                    warning_msg("did not find file to remove", f"'{i}'")
                file_list.remove(i)

        files_per_batch = []
        iter_file_list = iter(file_list)
        for i in range(0, len(file_list)):
            sub_set = list(islice(iter_file_list, batch_size))
            if len(sub_set) <= 0:
                continue
            files_per_batch.append(sub_set)
        run_list = []
        if len(files_per_batch) > 0:
            for i, lines in enumerate(files_per_batch):
                p = os.path.join(out_path, f"{i}")
                if not os.path.isdir(p):
                    os.makedirs(p)
                run_list.append(os.path.join(p,
                                             f"ListForRun5Analysis.{i}.txt"))
                with open(run_list[-1], "w") as f:
                    for j in lines:
                        f.write(j.strip() + "\n")
        msg("Number of runs:", len(run_list))
        return run_list

    if type(input_file) is list:
        input_file = [os.path.join(os.getcwd(), i) for i in input_file]
        input_file_list = build_list_of_files(input_file)
    elif not input_file.endswith(".root"):
        with open(input_file, "r") as f:
            lines = f.readlines()
            msg("Building input list from", len(lines), "inputs, limiting to",
                n_max_files)
            if len(lines) > n_max_files:
                lines = lines[0:n_max_files]
            lines = [
                os.path.join(os.path.dirname(os.path.abspath(input_file)), i)
                for i in lines
            ]
            input_file_list = build_list_of_files(lines)
    else:
        input_file_list = [os.path.join(os.getcwd(), input_file)]

    if dpl_configuration_file is not None:
        dpl_configuration_file = os.path.join(os.getcwd(),
                                              dpl_configuration_file)

    run_list = []
    for i, j in enumerate(input_file_list):
        run_list.append(
            set_o2_analysis(an,
                            o2_arguments=o2_arguments,
                            input_file=j,
                            tag=tag,
                            dpl_configuration_file=dpl_configuration_file,
                            resume_previous_analysis=resume_previous_analysis,
                            write_runner_script=not merge_only))
    if not merge_only:
        if do_bash_script:
            with open("parallelbash.sh", "w") as f:
                f.write(f"#!/bin/bash\n\n")
                f.write(f"echo \"Start running\"\n\n")
                f.write(f"date\n\n")
                f.write("""function trap_ctrlc (){
                            # perform cleanup here
                            echo "Ctrl-C caught...performing clean up"
                            exit 2
                        }\n\n""")
                f.write("""trap "trap_ctrlc" 2\n""")

        run_in_parallel(
            processes=njobs,
            job_runner=run_o2_analysis,
            job_arguments=run_list,
            job_message=f"Running analysis, it's {datetime.datetime.now()}",
            linearize_single_core=linearize_single_core)
        if do_bash_script:
            with open("parallelbash.sh", "a") as f:
                f.write(f"wait\n\n")
                f.write(f"date\n\n")
            msg("Now run bash script `bash parallelbash.sh`")
            return
        if clean_localhost_after_running:
            run_cmd(
                "find /tmp/ -maxdepth 1 -name localhost* -user $(whoami) | xargs rm -v 2>&1",
                check_status=False)

    if (merge_output or merge_only) and len(run_list) > 1:
        files_to_merge = []
        for i in input_file_list:
            p = os.path.dirname(os.path.abspath(i))
            for j in os.listdir(p):
                if j.endswith(f"_{tag}.root"):
                    files_to_merge.append(os.path.join(p, j))
        if len(files_to_merge) == 0:
            warning_msg("Did not find any file to merge for tag", tag)
            return
        files_per_type = {}  # List of files to be merged per type
        # List of files to be merged per type that are not declared sane
        non_sane_files_per_type = {}
        for i in files_to_merge:
            if is_root_file_sane(i) != "Is Ok":
                non_sane_files_per_type[fn].setdefault(fn, []).append(i)
                warning_msg("Result file", i, "is not sane")
                continue
            fn = os.path.basename(i)
            files_per_type.setdefault(fn, [])
            files_per_type[fn].append(i)
        for i in non_sane_files_per_type:
            warning_msg("Non sane files for type", i)
            for j in non_sane_files_per_type[i]:
                msg(j)
        merged_files = []
        for i in files_per_type:
            merged_file = os.path.join(out_path, i)
            if avoid_overwriting_merge and os.path.isfile(merged_file):
                warning_msg(
                    "file", merged_file,
                    "is already found, remove it before merging, you can use the --mergeonly flag to avoid running the analysis again"
                )
                continue
            merged_files.append(merged_file)
            merge_file_list = os.path.join(
                os.path.dirname(os.path.abspath(merged_file)),
                "tomerge_" + "".join(i.split(".")[:-1]) + ".txt")
            verbose_msg("List of files to be merged:", merge_file_list)
            with open(merge_file_list, "w") as fmerge:
                for j in files_per_type[i]:
                    fmerge.write(j + "\n")
            if len(files_per_type[i]) > len(run_list):
                fatal_msg("Trying to merge too many files of type", i,
                          "for tag", tag, ":", len(files_per_type[i]), "vs",
                          len(run_list), "runs")
            msg("Merging", len(files_per_type[i]), "files to", merged_file)
            run_cmd(
                f"hadd -j {njobs} -f {merged_file} `cat {merge_file_list}`",
                log_file=merge_file_list.replace(".txt", ".log"),
                time_it=True,
                comment=f"Merging to {merged_file}")
        if len(merged_files) == 0:
            warning_msg("Merged no files")
        else:
            msg("Merging completed, merged:",
                *merged_files,
                color=bcolors.BOKGREEN)
示例#19
0
def main(input_files,
         do_merge=True,
         sanity_file=None,
         max_bunch_size=200,
         out_path="./",
         over_write_lists=False,
         jobs=1):
    msg("Merging to", out_path, "with maximum input size", max_bunch_size)
    out_path = os.path.normpath(out_path)
    if not os.path.exists(out_path):
        warning_msg("Output path", out_path, "does not exist")
        ans = input("Create it? (Y/[N])")
        if ans == "Y":
            os.makedirs(out_path)
        else:
            msg("Exit")
            return
    sane_files = None
    if sanity_file is not None:
        msg("Using sanity file", sanity_file)
        sane_files = []
        with open(sanity_file, "r") as f:
            for i in f:
                sane_files.append(os.path.abspath(os.path.normpath(i.strip())))
    size_of_files = {}
    for i in input_files:
        i = os.path.normpath(i.strip())
        if sane_files is not None and os.path.abspath(i) not in sane_files:
            msg("Skipping", i, "because not in sanity file")
            continue
        size_of_files[i] = os.path.getsize(i) * 1e-6
    bunched_files = [[]]
    bunched_sizes = []
    bunch_size = []
    for i in size_of_files:
        verbose_msg("Checking file", i, "of size", size_of_files[i], "MB")
        if sum(bunch_size) > max_bunch_size:
            verbose_msg("Bunch size", sum(bunch_size), "reached limit with",
                        len(bunch_size), "files", max_bunch_size, "MB",
                        "preparing next bunch!")
            bunched_files.append([])
            bunched_sizes.append(sum(bunch_size))
            bunch_size = []
        bunch_size.append(size_of_files[i])
        bunched_files[-1].append(i)
    bunched_sizes.append(sum(bunch_size))
    verbose_msg("Got", len(bunched_files), "bunches")
    for i, j in enumerate(bunched_files):
        verbose_msg(f"{i})", bunched_sizes[i], "MB, with", len(j), j)

    msg("Preparing", len(bunched_files), "bunched lists")
    bunched_aod_names.clear()
    for i, j in enumerate(bunched_files):
        fn = f"aod_merge_list_bunch{i}.txt"
        verbose_msg("Writing bunch", i, "to", fn)
        if not over_write_lists:
            if os.path.isfile(fn):
                fatal_msg(fn, "already present, remove it first")
        with open(fn, "w") as f:
            for k in j:
                f.write(k + "\n")
        if do_merge:
            out_aod = os.path.join(out_path, f"AO2D_Merge_{i}.root")
            if os.path.isfile(out_aod):
                fatal_msg(out_aod, "already present")
            bunched_aod_names[fn] = {
                "out_aod": out_aod,
                "file_index": i,
                "total_files": len(bunched_files),
                "input_size": bunched_sizes[i]
            }

    run_in_parallel(jobs,
                    run_merge,
                    list(bunched_aod_names.keys()),
                    job_message="Running AOD merging",
                    linearize_single_core=True)