Пример #1
0
def sendSkims(in_dir, num_jobs, cut, out_parent, file_tag, overwrite, cache):
    in_dir = utilities.fullPath(in_dir)
    skim_name = getSkimName(cut)

    if out_parent == None:
        dir_pat = re.compile(
            "(.*?/cms[0-9]+/cms[0-9]+r0/babymaker/babies/[0-9]{4}_[0-9]{2}_[0-9]{2}/.*?)/"
        )
        match = dir_pat.search(in_dir + "/")
        out_parent = match.group(0)

    out_dir = os.path.join(out_parent, "skim_" + skim_name)

    in_files = [
        f for f in glob.glob(
            utilities.fullPath(os.path.join(in_dir, "*" + file_tag +
                                            "*.root")))
    ]
    out_files = [
        f.replace(in_dir, out_dir).replace(".root", "_" + skim_name + ".root")
        for f in in_files
    ]

    in_files = splitJobs(in_files, num_jobs)
    out_files = splitJobs(out_files, num_jobs)

    total_jobs = 0
    for ijob in xrange(len(in_files)):
        total_jobs += 1
        sendSkimJob(
            in_files[ijob], out_files[ijob], cut, overwrite, cache, skim_name +
            "_" + file_tag + "_" + str(ijob) + "_of_" + str(num_jobs) + ".py")

    print("Submitted " + str(total_jobs) + " jobs.")
    print("Output sent to {}".format(out_dir))
Пример #2
0
def cacheCopy(src, dst, min_free, file_map, no_delete):
    #Cache a copy of src if possible, removing old files from cache if necessary

    src_size = os.stat(
        src
    ).st_size * 2  #Safety factor of 2 to account for file growth if cached copy is modified

    du = os.statvfs(utilities.fullPath("/scratch/babymaker"))
    avail = du.f_bsize * du.f_bavail
    while avail - src_size < min_free:
        #Keep deleting until there's room
        if no_delete: return
        removed_file = removeOldCache(file_map)
        if not removed_file: return
        du = os.statvfs(utilities.fullPath("/scratch/babymaker"))
        avail = du.f_bsize * du.f_bavail
    print("Caching " + src + " to " + dst + "\n")
    try:
        shutil.copy(src, dst)
        os.chmod(dst, 0775)
        while not cacheUpToDate(dst, src):
            now = time.time()
            os.utime(dst, (now, now))
    except:
        os.remove(dst)
        utilities.ePrint("Failed to cache " + src + " to " + dst + "\n")
        raise
Пример #3
0
def removeOldCache(file_map):
    #Deletes oldest cached file
    found_file = False
    oldest_mod_time = 0
    oldest_path = ""
    for root, dirs, files in os.walk(utilities.fullPath("/scratch/babymaker")):
        for f in files:
            path = os.path.join(root, f)
            if path in file_map.itervalues(): continue
            mod_time = lastTime(path)
            if mod_time < oldest_mod_time or not found_file:
                found_file = True
                oldest_mod_time = mod_time
                oldest_path = path

    if time.time()-oldest_mod_time <= 86400.:
        #Don't delete files used in last 24 hours
        return False
    oldest_path = utilities.fullPath(oldest_path)
    if found_file:
        print("Deleting "+oldest_path+" from cache\n")
        try: os.remove(oldest_path)
        except: return False
        while oldest_path != "/" and oldest_path != "":
            try: os.rmdir(oldest_path)
            except OSError: pass
            finally: oldest_path = os.path.dirname(oldest_path)
        return True
    else:
        return False
Пример #4
0
def cacheCopy(src, dst, min_free, file_map, no_delete):
    #Cache a copy of src if possible, removing old files from cache if necessary

    src_size = os.stat(src).st_size * 2 #Safety factor of 2 to account for file growth if cached copy is modified

    du = os.statvfs(utilities.fullPath("/scratch/babymaker"))
    avail = du.f_bsize*du.f_bavail
    while avail-src_size < min_free:
        #Keep deleting until there's room
        if no_delete: return
        removed_file = removeOldCache(file_map)
        if not removed_file: return
        du = os.statvfs(utilities.fullPath("/scratch/babymaker"))
        avail = du.f_bsize*du.f_bavail
    print("Caching "+src+" to "+dst+"\n")
    try:
        shutil.copy(src, dst)
        os.chmod(dst, 0775)
        while not cacheUpToDate(dst, src):
            now = time.time()
            os.utime(dst, (now, now))
    except:
        os.remove(dst)
        utilities.ePrint("Failed to cache "+src+" to "+dst+"\n")
        raise
Пример #5
0
def removeOldCache(file_map):
    #Deletes oldest cached file
    found_file = False
    oldest_mod_time = 0
    oldest_path = ""
    for root, dirs, files in os.walk(utilities.fullPath("/scratch/babymaker")):
        for f in files:
            path = os.path.join(root, f)
            if path in file_map.itervalues(): continue
            mod_time = lastTime(path)
            if mod_time < oldest_mod_time or not found_file:
                found_file = True
                oldest_mod_time = mod_time
                oldest_path = path

    if time.time() - oldest_mod_time <= 86400.:
        #Don't delete files used in last 24 hours
        return False
    oldest_path = utilities.fullPath(oldest_path)
    if found_file:
        print("Deleting " + oldest_path + " from cache\n")
        try:
            os.remove(oldest_path)
        except:
            return False
        while oldest_path != "/" and oldest_path != "":
            try:
                os.rmdir(oldest_path)
            except OSError:
                pass
            finally:
                oldest_path = os.path.dirname(oldest_path)
        return True
    else:
        return False
Пример #6
0
def expand(files):
    expanded = []
    for f in files:
        globbed = glob.glob(f)
        if len(globbed) > 0:
            for g in globbed:
                expanded.append(utilities.fullPath(g))
        else:
            expanded.append(utilities.fullPath(f))
    return expanded
Пример #7
0
def expand(files):
    expanded = []
    for f in files:
        globbed = glob.glob(f)
        if len(globbed) > 0:
            for g in globbed:
                expanded.append(utilities.fullPath(g))
        else:
            expanded.append(utilities.fullPath(f))
    return expanded
Пример #8
0
def sendSlimJobs(input_dir, skims, slims, overwrite, output_dir):
    input_dir = utilities.fullPath(input_dir)
    if skims == []: skims = ["*"]
    skims = [ utilities.fullPath(skim) for sublist in skims for skim in glob.glob(os.path.join(input_dir, "skim_"+sublist)) ]
    slims = [ utilities.fullPath(slim) for sublist in slims for slim in glob.glob(sublist)]

    total_jobs = 0
    for slim in slims:
        for skim in skims:
            total_jobs += sendSlimJob(skim, slim, overwrite, output_dir)

    print("Submitted "+str(total_jobs)+" jobs.")
Пример #9
0
def sendSkimJob(in_files, out_files, cut, overwrite, cache, exe_name):
    python_dir = utilities.fullPath(os.path.dirname(__file__))
    run_dir = os.path.join(os.path.dirname(out_files[0]), "run")
    utilities.ensureDir(run_dir)
    run_file = os.path.join(run_dir, exe_name)

    with open(run_file, "w") as f:
        f.write('#! /usr/bin/env python\n')
        f.write('import sys\n')
        f.write('sys.path.append("' + python_dir + '")\n')
        f.write('import subprocess\n')
        f.write('import cache\n')
        for in_file, out_file in itertools.izip(in_files, out_files):
            if os.path.exists(out_file) and not overwrite:
                continue
            if cache:
                f.write('cache.cacheRun(["' + out_file + '","' + in_file +
                        '"],["' + os.path.join(python_dir, 'skim_ntuple.py') +
                        '","' + cut + '","' + out_file + '","' + in_file +
                        '"],False,10000000000,0.5,False)\n')
            else:
                f.write('subprocess.call(["' +
                        os.path.join(python_dir, 'skim_ntuple.py') + '","' +
                        cut + '","' + out_file + '","' + in_file + '"])\n')
    os.chmod(run_file, 0755)

    subprocess.call(["JobSubmit.csh", "run/wrapper.sh", run_file])
Пример #10
0
def mapFiles(command, file_map):
    #Replace executable arguments with cached equivalent

    expanded_args = []
    for arg in command:
        globbed = glob.glob(arg)
        if len(globbed) > 0:
            #Argument represents file(s)
            for f in globbed:
                expanded_args.append(utilities.fullPath(f))
        else:
            expanded_args.append(arg)

    command = []
    inv_file_map = dict((cached,net) for net,cached in file_map.iteritems())
    for arg in expanded_args:
        if arg in file_map and cacheUpToDate(file_map[arg], arg):
            #Check if generated cache for file
            command.append(file_map[arg])
        elif isNetFile(arg):
            #Check if pre-existing cache
            cache_path = cachePath(arg)
            if cacheUpToDate(cache_path, arg):
                command.append(cache_path)
                inv_file_map[cache_path] = arg
            else:
                command.append(arg)
        else:
            command.append(arg)

    return command, inv_file_map
Пример #11
0
def mapFiles(command, file_map):
    #Replace executable arguments with cached equivalent

    expanded_args = []
    for arg in command:
        globbed = glob.glob(arg)
        if len(globbed) > 0:
            #Argument represents file(s)
            for f in globbed:
                expanded_args.append(utilities.fullPath(f))
        else:
            expanded_args.append(arg)

    command = []
    inv_file_map = dict((cached, net) for net, cached in file_map.iteritems())
    for arg in expanded_args:
        if arg in file_map and cacheUpToDate(file_map[arg], arg):
            #Check if generated cache for file
            command.append(file_map[arg])
        elif isNetFile(arg):
            #Check if pre-existing cache
            cache_path = cachePath(arg)
            if cacheUpToDate(cache_path, arg):
                command.append(cache_path)
                inv_file_map[cache_path] = arg
            else:
                command.append(arg)
        else:
            command.append(arg)

    return command, inv_file_map
Пример #12
0
def skimFiles(in_files, out_file, cut, keep_existing):
    in_files = [ utilities.fullPath(in_file) for in_file in in_files ]
    out_file = utilities.fullPath(out_file)

    utilities.ensureDir(os.path.dirname(out_file))

    cut = expandCut(cut)

    print("INPUT FILES:",in_files,"\n")
    print("OUTPUT FILE:",out_file,"\n")
    print("        CUT:",cut,"\n")

    if keep_existing and os.path.exists(out_file):
        print("Keeping pre-existing "+out_file+"\n")
        return

    in_tree = ROOT.TChain("tree", "tree")
    for in_file in in_files:
        in_tree.Add(in_file)

    with utilities.ROOTFile(out_file, "recreate") as out:
        out_tree = in_tree.CopyTree(cut)
        out_tree.Write()
Пример #13
0
def skimFiles(in_files, out_file, cut, keep_existing):
    in_files = [utilities.fullPath(in_file) for in_file in in_files]
    out_file = utilities.fullPath(out_file)

    utilities.ensureDir(os.path.dirname(out_file))

    cut = expandCut(cut)

    print("INPUT FILES:", in_files, "\n")
    print("OUTPUT FILE:", out_file, "\n")
    print("        CUT:", cut, "\n")

    if keep_existing and os.path.exists(out_file):
        print("Keeping pre-existing " + out_file + "\n")
        return

    in_tree = ROOT.TChain("tree", "tree")
    for in_file in in_files:
        in_tree.Add(in_file)

    with utilities.ROOTFile(out_file, "recreate") as out:
        out_tree = in_tree.CopyTree(cut)
        out_tree.Write()
Пример #14
0
def cacheRun(caches, command, fragile, abs_limit, rel_limit, no_delete):
    for s in [sig for sig in dir(signal) if sig.startswith("SIG")
              and not sig.startswith("SIG_")
              and sig!="SIGKILL"
              and sig!="SIGSTOP"]:
        signum = getattr(signal, s)
        signal.signal(signum,signalHandler)

    if not os.path.isdir("/scratch/babymaker"):
        cacheRecurse([], dict(), command, True, 0, True)
        return
    caches = expand(caches)
    du = os.statvfs(utilities.fullPath("/scratch/babymaker"))
    min_free = max(abs_limit, du.f_bsize*du.f_blocks*rel_limit)
    cacheRecurse(caches, dict(), command, fragile, min_free, no_delete)
Пример #15
0
def sendSkims(in_dir, num_jobs, cut, out_parent, file_tag, overwrite):
    in_dir = utilities.fullPath(in_dir)
    skim_name = getSkimName(cut)

    if out_parent == None:
        dir_pat = re.compile("(.*?/cms[0-9]+/cms[0-9]+r0/babymaker/babies/[0-9]{4}_[0-9]{2}_[0-9]{2}/.*?)/")
        match = dir_pat.search(in_dir+"/")
        out_parent = match.group(0)

    out_dir = os.path.join(out_parent,"skim_"+skim_name)
        
    in_files = [ f for f in glob.glob(utilities.fullPath(os.path.join(in_dir, "*"+file_tag+"*.root"))) ]
    out_files = [ f.replace(in_dir, out_dir).replace(".root","_"+skim_name+".root") for f in in_files ]

    in_files = splitJobs(in_files, num_jobs)
    out_files = splitJobs(out_files, num_jobs)

    total_jobs = 0
    for ijob in xrange(len(in_files)):
        total_jobs += 1
        sendSkimJob(in_files[ijob], out_files[ijob], cut, overwrite,
                    skim_name+"_"+file_tag+"_"+str(ijob)+"_of_"+str(num_jobs)+".py")

    print("Submitted "+str(total_jobs)+" jobs.")
Пример #16
0
def cacheRun(caches, command, fragile, abs_limit, rel_limit, no_delete):
    for s in [
            sig for sig in dir(signal)
            if sig.startswith("SIG") and not sig.startswith("SIG_")
            and sig != "SIGKILL" and sig != "SIGSTOP"
    ]:
        signum = getattr(signal, s)
        signal.signal(signum, signalHandler)

    if not os.path.isdir("/scratch/babymaker"):
        cacheRecurse([], dict(), command, True, 0, True)
        return
    caches = expand(caches)
    du = os.statvfs(utilities.fullPath("/scratch/babymaker"))
    min_free = max(abs_limit, du.f_bsize * du.f_blocks * rel_limit)
    cacheRecurse(caches, dict(), command, fragile, min_free, no_delete)
Пример #17
0
def killZombies(in_dirs):
    in_dirs = [ utilities.fullPath(d) for sublist in in_dirs for d in glob.glob(sublist) ]
    ROOT.gErrorIgnoreLevel = 6000
    for d in in_dirs:
        for root, dirs, files in os.walk(d):
            print "In "+root
            for f in files:
                path = os.path.join(root, f)
                if os.path.splitext(f)[1] != ".root":
                    continue
                tfile = ROOT.TFile(path, "read")
                kill = tfile.IsZombie() or not tfile.IsOpen()
                tfile.Close()
                if kill:
                    print "Removing "+path
                    os.remove(path)
Пример #18
0
def killZombies(in_dirs):
    in_dirs = [
        utilities.fullPath(d) for sublist in in_dirs
        for d in glob.glob(sublist)
    ]
    ROOT.gErrorIgnoreLevel = 6000
    for d in in_dirs:
        for root, dirs, files in os.walk(d):
            print "In " + root
            for f in files:
                path = os.path.join(root, f)
                if os.path.splitext(f)[1] != ".root":
                    continue
                tfile = ROOT.TFile(path, "read")
                kill = tfile.IsZombie() or not tfile.IsOpen()
                tfile.Close()
                if kill:
                    print "Removing " + path
                    os.remove(path)
Пример #19
0
def sendSkimJob(in_files, out_files, cut, overwrite, exe_name):
    python_dir = utilities.fullPath(os.path.dirname(__file__))
    run_dir = os.path.join(os.path.dirname(out_files[0]), "run")
    utilities.ensureDir(run_dir)
    run_file = os.path.join(run_dir, exe_name)

    with open(run_file, "w") as f:
        f.write('#! /usr/bin/env python\n')
        f.write('import sys\n')
        f.write('sys.path.append("'+python_dir+'")\n')
        f.write('import cache\n')
        for in_file, out_file in itertools.izip(in_files, out_files):
            if os.path.exists(out_file) and not overwrite:
                continue
            f.write('cache.cacheRun(["'+out_file+'","'+in_file+'"],["'
                    +os.path.join(python_dir,'skim_ntuple.py')
                    +'","'+cut+'","'+out_file+'","'+in_file
                    +'"],False,10000000000,0.5,False)\n')
    os.chmod(run_file, 0755)

    subprocess.call(["JobSubmit.csh","run/wrapper.sh",run_file])
Пример #20
0
def cachePath(path):
    cache_root = utilities.fullPath("/scratch/babymaker")
    return os.path.join(cache_root, path[5:])
Пример #21
0
def deleteTreeglobal(in_files):
    in_files = [ utilities.fullPath(f) for sublist in in_files for f in glob.glob(sublist) ]
    for file_dir in in_files:
        recursiveDelete(file_dir)
Пример #22
0
def cachePath(path):
    cache_root = utilities.fullPath("/scratch/babymaker")
    return os.path.join(cache_root, path[5:])