Python glob_and_check示例，csv2hdf5.glob_and_check Python示例

示例#1

0

显示文件

def get_inglob(outdir):
    dirname = path.dirname(outdir)
    run = get_run(outdir)
    newbasename = f"*{run}*.root"
    newglob = path.join(dirname, newbasename)
    glob_and_check(newglob)
    return newglob

示例#2

0

显示文件

def check_telescope_files(rootdir=None,
                          globstr1=None,
                          globstr2=None,
                          replacer=("_M1_", "_M2_"),
                          force=False):
    """Checks whether the files under globstr1 and globstr2 are equal (the filenames should all
	   be identical once the replacer is swapped out)."""
    # needs implementation If force is set to False it aborts if the files are not equal.
    # If force is set to True it simply returns the union of filenames
    if rootdir is not None:
        globstr1 = path.join(rootdir, globstr1)
        globstr2 = path.join(rootdir, globstr2)
    if isinstance(globstr1, str):
        files1 = glob_and_check(globstr1)
    else:
        files1 = globstr1
    if isinstance(globstr2, str):
        files2 = glob_and_check(globstr2)
    else:
        files2 = globstr2

    bnames1 = set([
        path.basename(fname.replace(replacer[0], replacer[1]))
        for fname in files1
    ])
    bnames2 = set([
        path.basename(fname.replace(replacer[0], replacer[1]))
        for fname in files2
    ])
    assert bnames1 == bnames2, \
           (f"Telescope files not equal, set difference: len files1 {len(files1)} len files2 {len(files2)}\n\n"
            f"dirs: {path.dirname(files1[0])}, {bnames1.symmetric_difference(bnames2)}")

示例#3

0

显示文件

def process_melibea(superstar, nnenergies, outdir, njobs, mode):
    ssfiles = glob_and_check(superstar)
    if "nn" in mode or "second" in mode:
        if isinstance(nnenergies, str):
            nnfiles = glob_and_check(nnenergies)
        else:
            nnfiles = sorted(nnenergies)
        assert np.all(
            [".root" not in fname for fname in nnfiles]
        ), f"nnenergy files are not supposed to be rootfiles, aborting. nnglob: {nnenergies}"
        assert_filenames(ssfiles, nnfiles)
    else:
        nnfiles = ssfiles

    assert np.all(
        [".root" in fname for fname in ssfiles]
    ), f"superstar files are supposed to be rootfiles, aborting. superstarglob: {superstar}"

    newssfiles, newnnfiles = make_fnames_equal(ssfiles, nnfiles)
    assert len(newnnfiles) >= len(
        ssfiles), "Not all ssfiles get processed, something is wrong"
    setup_dirs(outdir)
    filezip = zip(ssfiles, nnfiles)
    Parallel(n_jobs=njobs)\
        (delayed(run_melibea)(ssfile, nnfile, outdir, mode) for ssfile, nnfile in filezip)

示例#4

0

显示文件

def check_globs(glob1, glob2):
    if isinstance(glob1, str):
        fnames1 = glob_and_check(glob1)
        fnames2 = glob_and_check(glob2)
    elif isinstance(glob1, list):
        fnames1 = glob1
        fnames2 = glob2
    runs1 = sorted([get_run(fname) for fname in fnames1])
    runs2 = sorted([get_run(fname) for fname in fnames2])
    assert np.array_equal(runs1, runs2), "runs are not equal, aborting."

示例#5

0

显示文件

def convert_multiple_dirs(rootdir, mergecolsonly, parallelprocessing):
    """Descends into multiple dirs and converts their root files to csv.
       Dirnames need to be formatted like: YYYY_MM_DD or YYYY_DD_MM"""
    # only works for data as of now, not for mc
    subdirs = glob_and_check(path.join(rootdir, r"20\d\d_\d\d_\d\d/"))
    # not used, setting globstr1 and 2 to None and subsequent none handling in call_c_converter needs to be implemented first
    Parallel(n_jobs=njobs)(delayed(convert_to_csv)(
        rootdirectory, mergecolsonly, parallelprocessing)
                           for rootdirectory in subdirs)
    subdirGlobs = sorted(
        [path.join(subdir, "/*" + MERGECOLENDING) for subdir in subdirs])
    filenames = [glob_and_check(subdirGlob) for subdirGlob in subdirGlobs]
    outFilename = merge_csv_files(filenames)
    return outFilename

示例#6

0

显示文件

def merge_wrapper(processdir,
                  basedir,
                  starglob,
                  superstarglob,
                  calibrootglob,
                  njobs=2,
                  invert=False):
    """extracts the mergecols from the _S_ root (superstarglob) files and merges the energies"""
    for glob in [starglob, superstarglob, calibrootglob]:
        assert path.dirname(glob), \
               f"Glob : {glob} should be/contain a subdirectory"

    superstarGlobNew = get_glob_strings(superstarglob)
    calibrootGlob1, calibrootGlob2 = get_glob_strings(calibrootglob)
    superstardir = get_dir_from_glob(processdir, superstarglob)
    calibdir = get_dir_from_glob(basedir, calibrootglob)
    starglob = processdir + starglob

    # ssmcolfnames = converter(superstardir,
    # globstr1=superstarGlobNew,
    # globstr2=superstarGlobNew,
    # njobs=42,
    # mergecolsonly=True)
    # yecho("SuperStarfiles done.")
    # tofiltercalibglob = converter(processdir,
    # globstr1=calibrootGlob1,
    # globstr2=calibrootGlob2,
    # njobs=42,
    # mergecolsonly=False)
    # yecho("Extracting done.")
    tofiltercalibglob = "./csv/*.csv"
    ssmcolfnames = glob_and_check("./superstar/mergecols/*.csv")

    yecho("Removing events.")
    if njobs > 1:
        splitcalib = split_by_dates(tofiltercalibglob)
        splitstar = split_by_dates(starglob)
        splitss = split_by_dates(ssmcolfnames)
        # needs filename output
        assert len(splitcalib) == len(splitstar) == len(
            splitss
        ), "only works the first time when no calibfiles got moved, for everything else this needs a new function with more logic"
        Parallel(n_jobs=njobs)\
                           (delayed(single_remove_events)(calibglob, starglob, ssglob, njobs, invert)
                              for calibglob, starglob, ssglob in zip(splitcalib, splitstar, splitss))
        # filteredFiles = [f for arr in filteredFiles for f in arr]
    else:
        check_telescope_files(rootdir=None,
                              globstr1=ssmcolfnames,
                              globstr2=calibmcolfnames,
                              replacer=("_Y_", "_I_"))
        remover = EventRemover(tofiltercalibglob=tofiltercalibglob,
                               starglob=starglob,
                               superstarmcolglob=ssmcolfnames)
        remover.remove_events()
        filteredFiles = remover.outfilenames
    yecho(
        "Removed events that get thrown out during image cleaning and superstar processing and wrote the merged runs to:"
    )
    yecho(f"{path.basename(filteredFiles[0])}")

示例#7

0

显示文件

def split_by_dates(glob):
    if isinstance(glob, str):
        fullnames = glob_and_check(glob)
    else:
        fullnames = glob
    dirnames = [path.dirname(f) for f in fullnames]
    fnames = [path.basename(f) for f in fullnames]
    if not fnames[0].startswith("GA_"):
        dates = set([fname.split("_")[0] for fname in fnames])
    else:
        # example GA_M2_za05to35_8_1740969_Y_wr.root
        # first 5 numbers of run for splitting
        if "_S_" not in fnames[0]:
            dates = set(["*" + f.split("_")[4][:5] for f in fnames])
        # example GA_za05to35_8_1740969_Y_wr.root
        else:
            dates = set(["*" + f.split("_")[3][:5] for f in fnames])
    dates = sorted(list(dates))
    identifiers = [get_identifier(fname) for fname in fnames]
    newglobs = [
        date + identifier for date, identifier in zip(dates, identifiers)
    ]
    newglobs = [
        path.join(dname, glob) for dname, glob in zip(dirnames, newglobs)
    ]

    return sorted(newglobs)

示例#8

0

显示文件

def main(glob):
    filenames = glob_and_check(glob)
    ish5 = np.all(["h5" in fname for fname in filenames])
    ishdf5 = np.all(["hdf5" in fname for fname in filenames])
    ishdf = np.all(["hdf" in fname for fname in filenames])
    if not (ish5 or ishdf5 or ishdf):
        print(
            "files are not ending with .hdf or something like that, this should fail soon."
        )
    for fname in filenames:
        print_linenumber(fname)

示例#9

0

显示文件

def generate_imputed_luts(basedir, superstar, lutfnames):
    """processes superstar with melibea, taking the energies from the lutfnames text files for imputation
       the extracts the lutenergies from the newly imputed files and returns their filenames"""
    outdir = path.join(basedir, "tmp")
    os.makedirs(outdir, exist_ok=True)
    process_melibea(superstar=superstar,
                    nnenergies=lutfnames,
                    outdir=outdir,
                    njobs=10,
                    mode="nn")
    glob_and_check(path.join(outdir, "*_Q_*.root"))
    imputedluts = path.join(outdir, "*_Q_*.root")
    lutenergies = converter(outdir,
                            globstr1=path.basename(imputedluts),
                            globstr2=path.basename(imputedluts),
                            multidir=False,
                            njobs=8,
                            mergecolsonly=True,
                            parallelprocessing=True)
    check_globs(lutenergies, lutfnames)

    return sorted(lutenergies)

示例#10

0

显示文件

def convert_to_csv(rootdir,
                   globstr1,
                   globstr2,
                   mergecolsonly,
                   parallelprocessing,
                   star,
                   njobs=1):
    """Converts the rootfiles that are found in the globstrings to csv and puts them in MERGECOLDIR.
	   If there are multiple csvfiles it merges them with prefix MERGEDCSVPREFIX.
       Returns the name of the merged csv file."""
    setup_dirs(rootdir)
    #maybe implement parellel processing here so the filenames in globstring get split into subarrays
    # and each processed seperately
    # the arguments can be found in globRoot2.csv the third one should be whether to glob or not
    # the 1 is for doGlob
    if (star is False) and (mergecolsonly is False):
        check_telescope_files(rootdir, globstr1, globstr2)
    elif (star is True) and (mergecolsonly is False):
        check_telescope_files(rootdir,
                              globstr1,
                              globstr2,
                              replacer=("_Y_", "_I_"))

    call_c_converter(rootdir, globstr1, globstr2, mergecolsonly,
                     parallelprocessing, star, njobs)

    if mergecolsonly is False:
        outFilenames = path.dirname(globstr1)
    # used to be else, idk yet if that works with star mode
    elif star is False:
        newdir = path.join(rootdir, MERGECOLDIR)
        newglobstr1 = get_new_globstr(globstr1)
        newglobstr2 = get_new_globstr(globstr2)
        check_telescope_files(newdir, newglobstr1, newglobstr2)
        mergecolFilenames1 = glob_and_check(path.join(newdir, newglobstr1))
        mergecolFilenames2 = glob_and_check(path.join(newdir, newglobstr2))
        mergecolFilenames1.extend(mergecolFilenames2)
        outFilenames = mergecolFilenames1
    return list(set(outFilenames))

示例#11

0

显示文件

def call_c_converter(rootdir,
                     globstr1,
                     globstr2,
                     mergecolsonly,
                     parallelprocessing,
                     star,
                     njobs=1):
    """Function that encapsulates the call to the root macro that converts the root files to csv."""
    if isinstance(globstr1, str):
        filenames1 = glob_and_check(path.join(rootdir, globstr1))
        filenames2 = glob_and_check(path.join(rootdir, globstr2))
    elif isinstance(globstr1, list):
        filenames1 = globstr1
        filenames2 = globstr2

    if parallelprocessing is False:
        fcall(rootdir, globstr1, globstr2, mergecolsonly, parallelprocessing,
              star)
    else:
        Parallel(n_jobs=njobs)(delayed(fcall)(
            rootdir, file1, file2, mergecolsonly, parallelprocessing, star)
                               for file1, file2 in zip(filenames1, filenames2))

示例#12

0

显示文件

def get_file_shape(subglob):
    fname = glob_and_check(path.join(rootdir, subglob))[0]
    h5file = h5.File(fname)
    shape = h5file["data"].shape
    return shape

示例#13

0

显示文件

 def split_into_telescopes(fnames):
     if isinstance(fnames, basestring):
         fnames = sorted(glob_and_check(fnames))

示例#14

0

显示文件

 def _rootdir_glob(self, subdir):
     globname = join(self.rootdir, subdir)
     filenames = cth.glob_and_check(globname)
     return sorted(filenames)[self.entrypoint:]

示例#15

0

显示文件

 def setup_currentfiles(self, currfilesglob):
     if currfilesglob is None:
         self.__currentfiles = None
     else:
         self.__currentfiles = cth.glob_and_check(currfilesglob)[self.entrypoint:]
         self._cleanup()