def hadd(output_fn, input_fns): """This is a simple wrapper around hadd that suppresses the stdout from hadd, only reporting a summary line of how many files were merged. Output to eos is supported, including for the log file for stdout. Checks that the number of files reported merged by hadd is the same as the number in the input list, or if there were any other problems reported by hadd. If so, prints an error to stdout. Returns true if success. """ l = len(input_fns) start = datetime.now() print 'hadding %i files to %s at %s' % (l, output_fn, start) args = ['hadd', output_fn] + input_fns p = subprocess.Popen(args=args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) stdout, stderr = p.communicate() assert stderr is None log_fn = output_fn + '.haddlog' is_eos = '/store/' in output_fn # ugh while eos.exists(log_fn) if is_eos else os.path.exists(log_fn): log_fn += '.2' if is_eos: fd, tmp_fn = tempfile.mkstemp() os.fdopen(fd, 'wt').write(stdout) eos.cp( tmp_fn, log_fn ) # if the haddlog already exists the new one will silently go into the ether... os.remove(tmp_fn) else: open(log_fn, 'wt').write(stdout) if p.returncode != 0: print colors.error('PROBLEM hadding %s' % output_fn) #print p.stdout.read() return False max_file_num = max( int(line.split(':')[0].split(' ')[-1]) for line in stdout.split('\n') if 'Source file' in line) print '-> %i files merged in %s' % (max_file_num, datetime.now() - start) if max_file_num != l: print colors.error('PROBLEM hadding %s' % output_fn) return False return True
def hadd(output_fn, input_fns): """This is a simple wrapper around hadd that suppresses the stdout from hadd, only reporting a summary line of how many files were merged. Output to eos is supported, including for the log file for stdout. Checks that the number of files reported merged by hadd is the same as the number in the input list, or if there were any other problems reported by hadd. If so, prints an error to stdout. Returns true if success. """ l = len(input_fns) start = datetime.now() print 'hadding %i files to %s at %s' % (l, output_fn, start) args = ['hadd', output_fn] + input_fns p = subprocess.Popen(args=args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) stdout, stderr = p.communicate() assert stderr is None log_fn = output_fn + '.haddlog' is_eos = '/store/' in output_fn # ugh while eos.exists(log_fn) if is_eos else os.path.exists(log_fn): log_fn += '.2' if is_eos: fd, tmp_fn = tempfile.mkstemp() os.fdopen(fd, 'wt').write(stdout) eos.cp(tmp_fn, log_fn) # if the haddlog already exists the new one will silently go into the ether... os.remove(tmp_fn) else: open(log_fn, 'wt').write(stdout) if p.returncode != 0: print colors.boldred('PROBLEM hadding %s' % output_fn) #print p.stdout.read() return False max_file_num = max(int(line.split(':')[0].split(' ')[-1]) for line in stdout.split('\n') if 'Source file' in line) print '-> %i files merged in %s' % (max_file_num, datetime.now() - start) if max_file_num != l: print colors.boldred('PROBLEM hadding %s' % output_fn) return False return True
infos = [] for sample in samples: if verbosity >= 1: print sample fns = SampleFiles.get_fns(sample, dataset) fns = random.sample(fns, nfiles) total_size = 0 total_size_in_events = 0 for fn in fns: assert fn.endswith('.root') if verbosity >= 2: print fn if not eos.exists(fn): raise IOError('does not exist on eos: %r' % fn) bn = os.path.basename(fn) if not fnmatch(bn, pattern): continue size = eos.size(fn) info = EdmFileInfo(eos.canon(fn)) nevents = info.Events.nevents size_in_events = info.Events.size() if verbosity >= 3: print ' size %.0f in %i events %.0f frac %.4f' % ( size, nevents, size_in_events, float(size_in_events) / size)
import sys, os from pprint import pprint from JMTucker.Tools import eos, SampleFiles from JMTucker.Tools.ROOTTools import ROOT, detree if len(sys.argv) < 6: sys.exit('usage: %s dataset sample run lumi event\n where dataset and sample are as registered in SampleFiles. sample can be "*" to mean all samples having the dataset.' % sys.argv[0]) dataset = sys.argv[1] sample = sys.argv[2] rle = int(sys.argv[3]), int(sys.argv[4]), int(sys.argv[5]) fns = SampleFiles.get_fns(sample, dataset) nfound = 0 for fn in fns: assert fn.endswith('.root') if not eos.exists(fn): raise IOError('does not exist on eos: %r' % fn) f = ROOT.TFile.Open(eos.canon(fn)) t = f.Get('Events') for x in sorted(detree(t, 'EventAuxiliary.id().run():EventAuxiliary.luminosityBlock():EventAuxiliary.id().event()', xform=int)): if x == rle: print fn nfound += 1 if nfound != 1: sys.exit('%i found' % nfound)
for sample in samples: if verbosity >= 1: print sample fns = SampleFiles.get_fns(sample, dataset) vh_fns = [] # since the files can be spread out over multiple datedirs due to # resubmissions, put the file in the date dir that has most of the files new_vh_paths = Counter() for fn in fns: assert fn.endswith('.root') if verbosity >= 2: print fn if not eos.exists(fn): raise IOError('does not exist on eos: %r' % fn) dn, bn = os.path.split(fn) mo = bn_re.search(bn) if not mo: raise ValueError('could not parse fn %r' % fn) _, jobnum = mo.groups() jobnum = int(jobnum) path_mo = path_re.search(fn) if not path_mo: raise ValueError('could not parse path for %r' % fn) new_vh_paths[path_mo.group(1)] += 1