def pool_insert(files, catalog_name="xmlcatalog_file:PoolFileCatalog.xml"): print ":: inserting [%i] files into pool catalog... (%s)" % (len(files), catalog_name) import os, sys import commands sc, exe = commands.getstatusoutput('which pool_insertFileToCatalog') if sc != 0: print ":: could not find 'pool_insertFileToCatalog' !" print exe return 1 import PyUtils.Helpers as H with H.restricted_ldenviron(projects=('AtlasCore', )): os.environ['POOL_CATALOG'] = catalog_name cmd = "%s %s" % (exe, " ".join(files)) sc, out = commands.getstatusoutput(cmd) out = os.linesep.join([ o for o in out.splitlines() if not ( o.startswith( "Warning in <TClass::TClass>: no dictionary for class ") or o.startswith('Warning in <TEnvRec::ChangeValue>: duplicate entry')) ]) if sc != 0: print ":: problem running pool_insertFileToCatalog:" print out return 2 print out print ":: inserting [%i] files into pool catalog... [done]" % len(files) return sc
def pool_extract(files): print ":: extracting GUID for [%i] files... " % len(files) import os, sys import commands sc, exe = commands.getstatusoutput('which pool_extractFileIdentifier') if sc != 0: print ":: could not find 'pool_extractFileIdentifier' !" print exe return 1 import PyUtils.Helpers as H with H.restricted_ldenviron(projects=('AtlasCore', )): cmd = "%s %s" % (exe, " ".join(files)) sc, out = commands.getstatusoutput(cmd) out = os.linesep.join([ o for o in out.splitlines() if not ( o.startswith( "Warning in <TClass::TClass>: no dictionary for class ") or o.startswith('Warning in <TEnvRec::ChangeValue>: duplicate entry')) ]) if sc != 0: print ":: problem running pool_extractFileIdentifier:" print out return sc print out print ":: extracting GUID for [%i] files... [done]" % len(files) return sc
def _root_open(self, fname, raw=True): root = self.pyroot import re with H.ShutUp(filters=[ re.compile('TClass::TClass:0: RuntimeWarning: no dictionary for class.*') ]): root.gSystem.Load('libRootCollection') root_open = root.TFile.Open # we need to get back the protocol b/c of the special # case of secure-http which needs to open TFiles as TWebFiles... protocol, _ = self.fname(fname) if protocol == 'https': _setup_ssl(self.msg(), root) root_open = root.TWebFile.Open if raw: if protocol == 'https' and '?' in fname: # append filetype to existing parameters f = root_open(fname+'&filetype=raw', 'READ') else: f = root_open(fname+'?filetype=raw', 'READ') else: f = root_open(fname, 'READ') if f is None or not f: raise IOError(errno.ENOENT, 'No such file or directory',fname) return f return
def __call__(self, fname, evtmax): import re import PyUtils.Helpers as H with H.ShutUp(filters=[re.compile('.*')]): f = self._process_call(fname, evtmax, projects=None) return f
def pool_insert(files, catalog_name="xmlcatalog_file:PoolFileCatalog.xml"): print ":: inserting [%i] files into pool catalog... (%s)"%( len (files), catalog_name ) import os, sys import commands sc,exe = commands.getstatusoutput ('which pool_insertFileToCatalog') if sc != 0: print ":: could not find 'pool_insertFileToCatalog' !" print exe return 1 import PyUtils.Helpers as H with H.restricted_ldenviron(projects=('AtlasCore',)): os.environ['POOL_CATALOG'] = catalog_name cmd = "%s %s" % (exe, " ".join(files)) sc, out = commands.getstatusoutput (cmd) out = os.linesep.join( [o for o in out.splitlines() if not (o.startswith("Warning in <TClass::TClass>: no dictionary for class ") or o.startswith('Warning in <TEnvRec::ChangeValue>: duplicate entry'))] ) if sc != 0: print ":: problem running pool_insertFileToCatalog:" print out return 2 print out print ":: inserting [%i] files into pool catalog... [done]"%len(files) return sc
class PoolFile(object): """ A simple class to retrieve informations about the content of a POOL file. It should be abstracted from the underlying technology used to create this POOL file (Db, ROOT,...). Right now, we are using the easy and loosy solution: going straight to the ROOT 'API'. """ def __init__(self, fileName, verbose=True): object.__init__(self) self._fileInfos = None self.keys = None self.dataHeader = PoolRecord("DataHeader", 0, 0, 0, nEntries = 0, dirType = "T") self.data = [] self.verbose = verbose # get the "final" file name (handles all kind of protocols) try: import PyUtils.AthFile as af protocol, fileName = af.server.fname(fileName) except Exception,err: print "## warning: problem opening PoolFileCatalog:\n%s"%err import traceback traceback.print_exc(err) pass self.poolFile = None dbFileName = whichdb.whichdb( fileName ) if not dbFileName in ( None, '' ): if self.verbose==True: print "## opening file [%s]..." % str(fileName) db = shelve.open( fileName, 'r' ) if self.verbose==True: print "## opening file [OK]" report = db['report'] self._fileInfos = report['fileInfos'] self.dataHeader = report['dataHeader'] self.data = report['data'] else: import PyUtils.Helpers as _H projects = 'AtlasCore' if PoolOpts.FAST_MODE else None with _H.restricted_ldenviron (projects=projects): if self.verbose==True: print "## opening file [%s]..." % str(fileName) self.__openPoolFile( fileName ) if self.verbose==True: print "## opening file [OK]" self.__processFile() return
def get_runs_from_tagfile(self, fname): # check own cache for this file if fname in self.file_cache.keys(): return self.file_cache[fname] # check file type with AthFile - this should avoid reopening files more times than necessary msg.debug("Checking file %s" % fname) import PyUtils.AthFile as athFile fileinfos = athFile.fopen(fname).fileinfos if not 'TAG' in fileinfos['stream_names']: return [] # this is a TAG file, open it and read all run numbers # fileinfos have only the run number from the first TAG import PyUtils.Helpers as H with H.restricted_ldenviron(projects=['AtlasCore']): import re with H.ShutUp(filters=[ re.compile( 'TClass::TClass:0: RuntimeWarning: no dictionary for.*' ), re.compile('.*duplicate entry.*') ]): msg.debug("Opening TAG file %s" % fname) import PyUtils.RootUtils as ru f = ru.import_root().TFile.Open(fname, "read") if f is None or not f: msg.warning("Failed to open TAG file %s" % fname) return [] coll_tree = f.Get('POOLCollectionTree') run_numbers = set() if coll_tree is not None: for row in xrange(0, coll_tree.GetEntries()): coll_tree.GetEntry(row) run_numbers.add(getattr(coll_tree, self.run_attr_name)) del coll_tree f.Close() del f self.file_cache[fname] = run_numbers msg.info("TAG file: %s, found runs: %s" % (fname, str(run_numbers))) return run_numbers
def _pythonize_tfile(): import PyCintex PyCintex.Cintex.Enable() root = import_root() import PyUtils.Helpers as H with H.ShutUp(filters=[ re.compile( 'TClass::TClass:0: RuntimeWarning: no dictionary for.*'), re.compile('Warning in <TEnvRec::ChangeValue>: duplicate entry.*'), ]): PyCintex.loadDict("RootUtilsPyROOTDict") rootutils = getattr(root, "RootUtils") pybytes = getattr(rootutils, "PyBytes") read_root_file = getattr(rootutils, "_pythonize_read_root_file") tell_root_file = getattr(rootutils, "_pythonize_tell_root_file") pass def read(self, size=-1): """read([size]) -> read at most size bytes, returned as a string. If the size argument is negative or omitted, read until EOF is reached. Notice that when in non-blocking mode, less data than what was requested may be returned, even if no size parameter was given. FIXME: probably doesn't follow python file-like conventions... """ SZ = 4096 if size >= 0: #size = _adjust_sz(size) #print "-->0",self.tell(),size c_buf = read_root_file(self, size) if c_buf and c_buf.sz: #print "-->1",self.tell(),c_buf.sz #self.seek(c_buf.sz+self.tell()) #print "-->2",self.tell() buf = c_buf.buffer() buf.SetSize(c_buf.sz) return str(buf[:]) return '' else: size = SZ out = [] while True: #size = _adjust_sz(size) c_buf = read_root_file(self, size) if c_buf and c_buf.sz: buf = c_buf.buffer() buf.SetSize(c_buf.sz) out.append(str(buf[:])) else: break return ''.join(out) root.TFile.read = read del read root.TFile.seek = root.TFile.Seek root.TFile.tell = lambda self: tell_root_file(self) ## import os ## def tell(self): ## fd = os.dup(self.GetFd()) ## return os.fdopen(fd).tell() ## root.TFile.tell = tell ## del tell return
def main(args): """check that 2 ROOT files have same content (containers and sizes) """ global g_args g_args = args import PyUtils.RootUtils as ru root = ru.import_root() import PyUtils.Logging as L msg = L.logging.getLogger('diff-root') msg.setLevel(L.logging.INFO) from PyUtils.Helpers import ShutUp, ROOT6Setup ROOT6Setup() if args.entries == '': args.entries = -1 msg.info('comparing tree [%s] in files:', args.tree_name) msg.info(' old: [%s]', args.old) msg.info(' new: [%s]', args.new) msg.info('ignore leaves: %s', args.ignore_leaves) msg.info('enforce leaves: %s', args.enforce_leaves) msg.info('hacks: %s', args.known_hacks) msg.info('entries: %s', args.entries) msg.info('mode: %s', args.mode) msg.info('error mode: %s', args.error_mode) import PyUtils.Helpers as H with H.ShutUp(): fold = ru.RootFileDumper(args.old, args.tree_name) fnew = ru.RootFileDumper(args.new, args.tree_name) pass def tree_infos(tree, args): nentries = tree.GetEntriesFast() # l.GetBranch().GetName() gives the full leaf path name leaves = [ l.GetBranch().GetName() for l in tree.GetListOfLeaves() if l.GetBranch().GetName() not in args.ignore_leaves ] return { 'entries': nentries, 'leaves': set(leaves), } def diff_tree(fold, fnew, args): infos = { 'old': tree_infos(fold.tree, args), 'new': tree_infos(fnew.tree, args), } nentries = min(infos['old']['entries'], infos['new']['entries']) itr_entries = nentries if args.entries in (-1, '', '-1'): #msg.info('comparing over [%s] entries...', nentries) itr_entries = nentries if infos['old']['entries'] != infos['new']['entries']: msg.info('different numbers of entries:') msg.info(' old: [%s]', infos['old']['entries']) msg.info(' new: [%s]', infos['new']['entries']) msg.info('=> comparing [%s] first entries...', nentries) else: itr_entries = args.entries pass msg.info('comparing over [%s] entries...', itr_entries) old_leaves = infos['old']['leaves'] - infos['new']['leaves'] if old_leaves: msg.warning('the following variables exist only in the old file !') for l in old_leaves: msg.warning(' - [%s]', l) new_leaves = infos['new']['leaves'] - infos['old']['leaves'] if new_leaves: msg.warning('the following variables exist only in the new file !') for l in new_leaves: msg.warning(' - [%s]', l) skip_leaves = old_leaves | new_leaves | set(args.ignore_leaves) leaves = infos['old']['leaves'] & infos['new']['leaves'] msg.info('comparing [%s] leaves over entries...', len(leaves)) all_good = True n_good = 0 n_bad = 0 import collections from itertools import izip summary = collections.defaultdict(int) old_dump_iter = fold.dump(args.tree_name, itr_entries) new_dump_iter = fnew.dump(args.tree_name, itr_entries) def leafname_fromdump(entry): return '.'.join([s for s in entry[2] if not s.isdigit()]) def reach_next(dump_iter, skip_leaves): keep_reading = True while keep_reading: try: entry = dump_iter.next() except StopIteration: return None entry[2][0] = entry[2][0].rstrip('.\0') # clean branch name name = [] skip = False for n in leafname_fromdump(entry).split('.'): name.append(n) if '.'.join(name) in skip_leaves: skip = True break if not skip: return entry # print 'SKIP:', leafname_fromdump(entry) pass read_old = True read_new = True d_old = None d_new = None while True: if read_old: prev_d_old = d_old d_old = reach_next(old_dump_iter, skip_leaves) if read_new: prev_d_new = d_new d_new = reach_next(new_dump_iter, skip_leaves) if not d_new and not d_old: break read_old = True read_new = True if d_old == d_new: n_good += 1 continue if d_old: tree_name, ientry, name, iold = d_old if d_new: tree_name, ientry, name, inew = d_new # FIXME: that's a plain (temporary?) hack if name[-1] in args.known_hacks: continue n_bad += 1 in_synch = d_old and d_new and d_old[:-1] == d_new[:-1] if not in_synch: if not _is_summary(): if d_old: print '::sync-old %s' % '.'.join(["%03i" % ientry] + map(str, d_old[2])) else: print '::sync-old ABSENT' if d_new: print '::sync-new %s' % '.'.join(["%03i" % ientry] + map(str, d_new[2])) else: print '::sync-new ABSENT' pass # remember for later if not d_old: fold.allgood = False summary[d_new[2][0]] += 1 elif not d_new: fnew.allgood = False summary[d_old[2][0]] += 1 else: branch_old = '.'.join(["%03i" % ientry, d_old[2][0]]) branch_new = '.'.join(["%03i" % ientry, d_new[2][0]]) if branch_old < branch_new: if not _is_summary(): print '::sync-old skipping entry' summary[d_old[2][0]] += 1 fnew.allgood = False read_new = False elif branch_old > branch_new: if not _is_summary(): print '::sync-new skipping entry' summary[d_new[2][0]] += 1 fold.allgood = False read_old = False else: # MN: difference in the leaves prev_leaf_old = leafname_fromdump(prev_d_old) prev_leaf_new = leafname_fromdump(prev_d_new) leaf_old = leafname_fromdump(d_old) leaf_new = leafname_fromdump(d_new) if prev_leaf_old == prev_leaf_new: # array size difference? if leaf_old == leaf_new and leaf_old == prev_leaf_old: # could be a size difference in >1 dim arrays # hard to sync, skipping both pass elif leaf_old == prev_leaf_old: # old has bigger array, skip old entry read_new = False if not _is_summary(): print '::sync-old skipping entry' summary[leaf_old] += 1 elif leaf_new == prev_leaf_new: # new has bigger array, skip new entry read_old = False if not _is_summary(): print '::sync-new skipping entry' summary[leaf_new] += 1 if read_old and read_new: summary[d_new[2][0]] += 1 if not _is_summary(): print '::sync-old+new skipping both entries' fold.allgood = False fnew.allgood = False if _is_exit_early(): print "*** exit on first error ***" break continue n = '.'.join(map(str, ["%03i" % ientry] + name)) diff_value = 'N/A' try: diff_value = 50. * (iold - inew) / (iold + inew) diff_value = '%.8f%%' % (diff_value, ) except Exception: pass if not _is_summary(): print '%s %r -> %r => diff= [%s]' % (n, iold, inew, diff_value) pass summary[leafname_fromdump(d_old)] += 1 if name[0] in args.enforce_leaves: msg.info("don't compare further") all_good = False break pass # loop over events/branches msg.info('Found [%s] identical leaves', n_good) msg.info('Found [%s] different leaves', n_bad) if not _is_summary(): keys = sorted(summary.keys()) for n in keys: v = summary[n] msg.info(' [%s]: %i leaves differ', n, v) pass pass if (not fold.allgood) or (not fnew.allgood): msg.info('NOTE: there were errors during the dump') msg.info('fold.allgood: %s' % fold.allgood) msg.info('fnew.allgood: %s' % fnew.allgood) n_bad += 0.5 return n_bad ndiff = diff_tree(fold, fnew, args) if ndiff != 0: msg.info('files differ!') return 2 msg.info('all good.') return 0
def main(args): """diff two ROOT files (containers and sizes)""" global g_args g_args = args import PyUtils.RootUtils as ru root = ru.import_root() # noqa: F841 import PyUtils.Logging as L msg = L.logging.getLogger('diff-root') if args.verbose: msg.setLevel(L.logging.VERBOSE) else: msg.setLevel(L.logging.INFO) from PyUtils.Helpers import ShutUp # noqa: F401 if args.entries == '': args.entries = -1 msg.info('comparing tree [%s] in files:', args.tree_name) msg.info(' old: [%s]', args.old) msg.info(' new: [%s]', args.new) msg.info('ignore leaves: %s', args.ignore_leaves) msg.info('enforce leaves: %s', args.enforce_leaves) msg.info('leaves prefix: %s', args.leaves_prefix) msg.info('hacks: %s', args.known_hacks) msg.info('entries: %s', args.entries) msg.info('mode: %s', args.mode) msg.info('error mode: %s', args.error_mode) msg.info('order trees: %s', args.order_trees) import PyUtils.Helpers as H with H.ShutUp(): fold = ru.RootFileDumper(args.old, args.tree_name) fnew = ru.RootFileDumper(args.new, args.tree_name) pass def tree_infos(tree, args): nentries = tree.GetEntriesFast() # l.GetBranch().GetName() gives the full leaf path name leaves = [ l.GetBranch().GetName() for l in tree.GetListOfLeaves() if l.GetBranch().GetName() not in args.ignore_leaves ] if args.leaves_prefix: leaves = [l.replace(args.leaves_prefix, '') for l in leaves] return { 'entries': nentries, 'leaves': set(leaves), } def ordered_indices(tree, reverse_order=False): from collections import OrderedDict import operator dict_in = {} nevts = tree.GetEntriesFast() for idx in range(0, nevts): if idx % 100 == 0: msg.debug('Read {} events from the input so far'.format(idx)) tree.GetEntry(idx) if hasattr(tree, 'xAOD::EventAuxInfo_v2_EventInfoAux.'): event_info = getattr(tree, 'xAOD::EventAuxInfo_v2_EventInfoAux.') event_number = event_info.eventNumber elif hasattr(tree, 'xAOD::EventAuxInfo_v1_EventInfoAux.'): event_info = getattr(tree, 'xAOD::EventAuxInfo_v1_EventInfoAux.') event_number = event_info.eventNumber elif hasattr(tree, 'EventInfoAux.'): event_info = getattr(tree, 'EventInfoAux.') event_number = event_info.eventNumber elif hasattr(tree, 'EventInfo_p4_McEventInfo'): event_info = getattr(tree, 'EventInfo_p4_McEventInfo') event_number = event_info.m_event_ID.m_event_number elif hasattr(tree, 'EventInfo_p4_ByteStreamEventInfo'): event_info = getattr(tree, 'EventInfo_p4_ByteStreamEventInfo') event_number = event_info.m_event_ID.m_event_number elif hasattr(tree, 'ByteStreamEventInfo'): event_info = getattr(tree, 'ByteStreamEventInfo') event_number = event_info.m_event_ID.m_event_number else: msg.error('Cannot read event info, will bail out.') break msg.debug('Idx : EvtNum {:10d} : {}'.format(idx, event_number)) dict_in[idx] = event_number # Sort the dictionary by event numbers dict_out = OrderedDict( sorted(dict_in.items(), key=operator.itemgetter(1), reverse=reverse_order)) # Write out the ordered index list return [idx for idx in dict_out] def diff_tree(fold, fnew, args): infos = { 'old': tree_infos(fold.tree, args), 'new': tree_infos(fnew.tree, args), } nentries = min(infos['old']['entries'], infos['new']['entries']) itr_entries = nentries if args.entries in (-1, '', '-1'): #msg.info('comparing over [%s] entries...', nentries) itr_entries = nentries if infos['old']['entries'] != infos['new']['entries']: msg.info('different numbers of entries:') msg.info(' old: [%s]', infos['old']['entries']) msg.info(' new: [%s]', infos['new']['entries']) msg.info('=> comparing [%s] first entries...', nentries) else: itr_entries = args.entries pass msg.info('comparing over [%s] entries...', itr_entries) old_leaves = infos['old']['leaves'] - infos['new']['leaves'] if old_leaves: msg.warning('the following variables exist only in the old file !') for l in old_leaves: msg.warning(' - [%s]', l) new_leaves = infos['new']['leaves'] - infos['old']['leaves'] if new_leaves: msg.warning('the following variables exist only in the new file !') for l in new_leaves: msg.warning(' - [%s]', l) # need to remove trailing dots as they confuse reach_next() skip_leaves = [ l.rstrip('.') for l in old_leaves | new_leaves | set(args.ignore_leaves) ] for l in skip_leaves: msg.debug('skipping [%s]', l) leaves = infos['old']['leaves'] & infos['new']['leaves'] msg.info('comparing [%s] leaves over entries...', len(leaves)) n_good = 0 n_bad = 0 import collections summary = collections.defaultdict(int) if args.order_trees: slice_max = int(itr_entries) if int(itr_entries) > 0 else None itr_entries_old = ordered_indices(fold.tree)[0:slice_max] itr_entries_new = ordered_indices(fnew.tree)[0:slice_max] msg.debug('List of old indices {}'.format(itr_entries_old)) msg.debug('List of new indices {}'.format(itr_entries_new)) else: itr_entries_old = itr_entries itr_entries_new = itr_entries old_dump_iter = fold.dump(args.tree_name, itr_entries_old) new_dump_iter = fnew.dump(args.tree_name, itr_entries_new) def leafname_fromdump(entry): return '.'.join([s for s in entry[2] if not s.isdigit()]) def reach_next(dump_iter, skip_leaves, leaves_prefix=None): keep_reading = True while keep_reading: try: entry = next(dump_iter) except StopIteration: return None entry[2][0] = entry[2][0].rstrip('.\0') # clean branch name if leaves_prefix: entry[2][0] = entry[2][0].replace(leaves_prefix, '') name = [] skip = False for n in leafname_fromdump(entry).split('.'): name.append(n) if '.'.join(name) in skip_leaves or n in skip_leaves: skip = True break if not skip: return entry # print('SKIP:', leafname_fromdump(entry)) pass read_old = True read_new = True d_old = None d_new = None while True: if read_old: prev_d_old = d_old d_old = reach_next(old_dump_iter, skip_leaves, args.leaves_prefix) if read_new: prev_d_new = d_new d_new = reach_next(new_dump_iter, skip_leaves, args.leaves_prefix) if not d_new and not d_old: break read_old = True read_new = True if (args.order_trees and d_old and d_new and d_old[-1] == d_new[-1]) or d_old == d_new: n_good += 1 continue if d_old: tree_name, ientry, name, iold = d_old if d_new: tree_name, jentry, name, inew = d_new # for regression testing we should have NAN == NAN if args.nan_equal: if all( [isinstance(x, Real) and isnan(x) for x in [iold, inew]]): n_good += 1 continue # FIXME: that's a plain (temporary?) hack if name[-1] in args.known_hacks: continue n_bad += 1 if not args.order_trees: in_synch = d_old and d_new and d_old[:-1] == d_new[:-1] else: in_synch = d_old and d_new and d_old[0] == d_new[0] and d_old[ 2] == d_new[2] if not in_synch: if _is_detailed(): if d_old: print('::sync-old %s' % '.'.join(["%03i" % ientry] + list(map(str, d_old[2])))) else: print('::sync-old ABSENT') if d_new: print('::sync-new %s' % '.'.join(["%03i" % jentry] + list(map(str, d_new[2])))) else: print('::sync-new ABSENT') pass # remember for later if not d_old: fold.allgood = False summary[d_new[2][0]] += 1 elif not d_new: fnew.allgood = False summary[d_old[2][0]] += 1 else: branch_old = '.'.join(["%03i" % ientry, d_old[2][0]]) branch_new = '.'.join(["%03i" % jentry, d_new[2][0]]) if branch_old < branch_new: if _is_detailed(): print('::sync-old skipping entry') summary[d_old[2][0]] += 1 fnew.allgood = False read_new = False elif branch_old > branch_new: if _is_detailed(): print('::sync-new skipping entry') summary[d_new[2][0]] += 1 fold.allgood = False read_old = False else: # MN: difference in the leaves prev_leaf_old = leafname_fromdump(prev_d_old) prev_leaf_new = leafname_fromdump(prev_d_new) leaf_old = leafname_fromdump(d_old) leaf_new = leafname_fromdump(d_new) if prev_leaf_old == prev_leaf_new: # array size difference? if leaf_old == leaf_new and leaf_old == prev_leaf_old: # could be a size difference in >1 dim arrays # hard to sync, skipping both pass elif leaf_old == prev_leaf_old: # old has bigger array, skip old entry read_new = False if _is_detailed(): print('::sync-old skipping entry') summary[leaf_old] += 1 elif leaf_new == prev_leaf_new: # new has bigger array, skip new entry read_old = False if _is_detailed(): print('::sync-new skipping entry') summary[leaf_new] += 1 if read_old and read_new: summary[d_new[2][0]] += 1 if _is_detailed(): print('::sync-old+new skipping both entries') fold.allgood = False fnew.allgood = False if _is_exit_early(): print('*** exit on first error ***') break continue if not args.order_trees: n = '.'.join(list(map(str, ["%03i" % ientry] + name))) else: n = '.'.join( list(map(str, ["%03i.%03i" % (ientry, jentry)] + name))) diff_value = 'N/A' try: diff_value = 50. * (iold - inew) / (iold + inew) diff_value = '%.8f%%' % (diff_value, ) except Exception: pass if _is_detailed(): print('%s %r -> %r => diff= [%s]' % (n, iold, inew, diff_value)) pass summary[leafname_fromdump(d_old)] += 1 if name[0] in args.enforce_leaves: msg.info("don't compare further") break pass # loop over events/branches msg.info('Found [%s] identical leaves', n_good) msg.info('Found [%s] different leaves', n_bad) if not _is_summary(): keys = sorted(summary.keys()) for n in keys: v = summary[n] msg.info(' [%s]: %i leaves differ', n, v) pass pass if (not fold.allgood) or (not fnew.allgood): msg.info('NOTE: there were errors during the dump') msg.info('fold.allgood: %s', fold.allgood) msg.info('fnew.allgood: %s', fnew.allgood) n_bad += 0.5 return n_bad ndiff = diff_tree(fold, fnew, args) if ndiff != 0: msg.info('files differ!') return 2 msg.info('all good.') return 0
def main(args): """check that 2 ROOT files have same content (containers and sizes) """ global g_args g_args = args import PyUtils.RootUtils as ru root = ru.import_root() import PyUtils.Logging as L msg = L.logging.getLogger('diff-root') msg.setLevel(L.logging.INFO) if args.entries == '': args.entries = -1 msg.info('comparing tree [%s] in files:', args.tree_name) msg.info(' old: [%s]', args.old) msg.info(' new: [%s]', args.new) msg.info('ignore leaves: %s', args.ignore_leaves) msg.info('enforce leaves: %s', args.enforce_leaves) msg.info('hacks: %s', args.known_hacks) msg.info('entries: %s', args.entries) msg.info('mode: %s', args.mode) msg.info('error mode: %s', args.error_mode) import PyUtils.Helpers as H with H.ShutUp() : fold = ru.RootFileDumper(args.old, args.tree_name) fnew = ru.RootFileDumper(args.new, args.tree_name) pass def tree_infos(tree, args): nentries = tree.GetEntriesFast() leaves = [l.GetName() for l in tree.GetListOfLeaves() if l not in args.ignore_leaves] return { 'entries' : nentries, 'leaves': set(leaves), } def diff_tree(fold, fnew, args): infos = { 'old' : tree_infos(fold.tree, args), 'new' : tree_infos(fnew.tree, args), } nentries = min(infos['old']['entries'], infos['new']['entries']) itr_entries = nentries if args.entries in (-1,'','-1'): #msg.info('comparing over [%s] entries...', nentries) itr_entries = nentries if infos['old']['entries'] != infos['new']['entries']: msg.info('different numbers of entries:') msg.info(' old: [%s]', infos['old']['entries']) msg.info(' new: [%s]', infos['new']['entries']) msg.info('=> comparing [%s] first entries...', nentries) else: itr_entries = args.entries pass msg.info('comparing over [%s] entries...', itr_entries) leaves = infos['old']['leaves'] & infos['new']['leaves'] diff_leaves = infos['old']['leaves'] - infos['new']['leaves'] if diff_leaves: msg.info('the following variables exist in only one tree !') for l in diff_leaves: msg.info(' - [%s]', l) leaves = leaves - set(args.ignore_leaves) msg.info('comparing [%s] leaves over entries...', len(leaves)) all_good = True n_good = 0 n_bad = 0 import collections from itertools import izip summary = collections.defaultdict(int) for d in izip(fold.dump(args.tree_name, itr_entries), fnew.dump(args.tree_name, itr_entries)): tree_name, ientry, name, iold = d[0] _, _, _, inew = d[1] name[0] = name[0].rstrip('\0') if ((not (name[0] in leaves)) or # FIXME: that's a plain (temporary?) hack name[-1] in args.known_hacks): continue if d[0] == d[1]: diff = False n_good += 1 continue n_bad += 1 diff = True in_synch = d[0][:-1] == d[1][:-1] if not in_synch: if not _is_summary(): print '::sync-old %s' % \ '.'.join(["%03i"%ientry]+map(str, d[0][2])) print '::sync-new %s' % \ '.'.join(["%03i"%ientry]+map(str, d[1][2])) pass summary[name[0]] += 1 # remember for later fold.allgood = False fnew.allgood = False if _is_exit_early(): print "*** exit on first error ***" break continue n = '.'.join(map(str, ["%03i"%ientry]+name)) diff_value = 'N/A' try: diff_value = 50.*(iold-inew)/(iold+inew) diff_value = '%.8f%%' % (diff_value,) except Exception: pass if not _is_summary(): print '%s %r -> %r => diff= [%s]' %(n, iold, inew, diff_value) pass summary[name[0]] += 1 if name[0] in args.enforce_leaves: msg.info("don't compare further") all_good = False break pass # loop over events/branches msg.info('Found [%s] identical leaves', n_good) msg.info('Found [%s] different leaves', n_bad) if not _is_summary(): keys = sorted(summary.keys()) for n in keys: v = summary[n] msg.info(' [%s]: %i leaves differ', n, v) pass pass if (not fold.allgood) or (not fnew.allgood): msg.info('NOTE: there were errors during the dump') msg.info('fold.allgood: %s' % fold.allgood) msg.info('fnew.allgood: %s' % fnew.allgood) n_bad += 0.5 return n_bad ndiff = diff_tree(fold, fnew, args) if ndiff != 0: msg.info('files differ!') return 2 msg.info('all good.') return 0
def _pythonize_tfile(): import cppyy root = import_root() import PyUtils.Helpers as H with H.ShutUp(filters=[ re.compile( 'TClass::TClass:0: RuntimeWarning: no dictionary for.*'), re.compile( 'Warning in <TEnvRec::ChangeValue>: duplicate entry.*' ), ]): cppyy.loadDict("RootUtilsPyROOTDict") rootutils = getattr(root, "RootUtils") pybytes = getattr(rootutils, "PyBytes") # noqa: F841 #MN: lines below fail in ROOT6 if PCM from RootUtils is not found read_root_file = getattr(rootutils, "_pythonize_read_root_file") tell_root_file = getattr(rootutils, "_pythonize_tell_root_file") pass def read(self, size=-1): """read([size]) -> read at most size bytes, returned as a string. If the size argument is negative or omitted, read until EOF is reached. Notice that when in non-blocking mode, less data than what was requested may be returned, even if no size parameter was given. FIXME: probably doesn't follow python file-like conventions... """ SZ = 4096 # FIXME: Once we drop py2, we can simplify this by using a bytes # object directly instead of PyBytes. if size>=0: #size = _adjust_sz(size) #print ("-->0",self.tell(),size) c_buf = read_root_file(self, size) if c_buf and c_buf.sz: v = c_buf.buf if six.PY3: return bytes([ord(v[i]) for i in range(v.size())]) return ''.join([v[i] for i in range(v.size())]) return '' else: size = SZ out = [] while True: #size = _adjust_sz(size) c_buf = read_root_file(self, size) if c_buf and c_buf.sz: v = c_buf.buf if six.PY3: chunk = bytes([ord(v[i]) for i in range(v.size())]) else: chunk = ''.join([v[i] for i in range(v.size())]) out.append(chunk) else: break if six.PY3: return b''.join(out) return ''.join(out) root.TFile.read = read del read root.TFile.seek = root.TFile.Seek root.TFile.tell = lambda self: tell_root_file(self) ## import os ## def tell(self): ## fd = os.dup(self.GetFd()) ## return os.fdopen(fd).tell() ## root.TFile.tell = tell ## del tell return