def _togetall(self, arrays, cache, bottomup, memo): if id(self) not in memo: memo.add(id(self)) out = OrderedDict() for x in self.types: out.update(x._togetall(arrays, cache, bottomup, memo)) return out else: return OrderedDict()
def combinelists(schema): if isinstance(schema, oamap.schema.Record) and all( isinstance(x, oamap.schema.List) for x in schema.fields.values()): out = oamap.schema.List(oamap.schema.Record(OrderedDict(), namespace=namespace), namespace=namespace) countbranch = None for fieldname, field in schema.items(): try: branch = tree[field.starts] except KeyError: return schema if branch.countbranch is None: return schema if countbranch is None: countbranch = branch.countbranch elif countbranch is not branch.countbranch: return schema out.content[fieldname] = field.content if countbranch is not None: countbranchname = countbranch.name if not isinstance(countbranchname, str): countbranchname = countbranchname.decode("ascii") out.starts = countbranchname out.stops = countbranchname return out return schema
def accumulate(node): out = oamap.schema.Record(OrderedDict(), namespace=namespace) for branchname, branch in node.iteritems(aliases=False) if isinstance(node, uproot.tree.TTreeMethods) else node.iteritems(): if not isinstance(branchname, str): branchname = branchname.decode("ascii") fieldname = branchname.split(".")[-1] if len(branch.fBranches) > 0: subrecord = accumulate(branch) if len(subrecord.fields) > 0: out[fieldname] = subrecord elif isinstance(branch.interpretation, (uproot.interp.asdtype, uproot.interp.numerical.asdouble32)): subnode = oamap.schema.Primitive(branch.interpretation.todtype, data=branchname, namespace=namespace) for i in range(len(branch.interpretation.todims)): subnode = oamap.schema.List(subnode, starts="{0}:/{1}".format(branchname, i), stops="{0}:/{1}".format(branchname, i), namespace=namespace) out[fieldname] = subnode elif isinstance(branch.interpretation, uproot.interp.asjagged) and isinstance(branch.interpretation.asdtype, uproot.interp.asdtype): subnode = oamap.schema.Primitive(branch.interpretation.asdtype.todtype, data=branchname, namespace=namespace) for i in range(len(branch.interpretation.asdtype.todims)): subnode = oamap.schema.List(subnode, starts="{0}:/{1}".format(branchname, i), stops="{0}:/{1}".format(branchname, i), namespace=namespace) out[fieldname] = oamap.schema.List(subnode, starts=branchname, stops=branchname, namespace=namespace) elif isinstance(branch.interpretation, uproot.interp.asstrings): out[fieldname] = oamap.schema.List(oamap.schema.Primitive(oamap.interp.strings.CHARTYPE, data=branchname, namespace=namespace), starts=branchname, stops=branchname, namespace=namespace, name="ByteString") return out
def _toget(self, arrays, cache): tags = TagsRole(self.tags, self.namespace, None) offsets = OffsetsRole(self.offsets, self.namespace, None) tags.offsets = offsets offsets.tags = tags return OrderedDict([(tags, (self.tagsidx, self.tagdtype)), (offsets, (self.offsetsidx, self.offsetdtype))])
def _toget(self, arrays, cache): starts = StartsRole(self.starts, self.namespace, None) stops = StopsRole(self.stops, self.namespace, None) starts.stops = stops stops.starts = starts return OrderedDict([(starts, (self.startsidx, self.posdtype)), (stops, (self.stopsidx, self.posdtype))])
def schema(table): import pyarrow def recurse(node, name, index, nullable): if isinstance(node, pyarrow.lib.ListType): return oamap.schema.List(recurse(node.value_type, name, index + 2, nullable), nullable=nullable, starts="{0}/{1}".format(name, index + 1), stops="{0}/{1}".format(name, index + 1), mask="{0}/{1}".format(name, index)) elif isinstance(node, pyarrow.lib.DataType): return oamap.schema.Primitive(node.to_pandas_dtype(), nullable=nullable, data="{0}/{1}".format( name, index + 1), mask="{0}/{1}".format(name, index)) else: raise NotImplementedError(type(node)) fields = [] for n in table.schema.names: field = table.schema.field_by_name(n) fields.append((n, recurse(field.type, n, 0, field.nullable))) return oamap.schema.List(oamap.schema.Record(OrderedDict(fields)), starts="", stops="")
def _schema(tree, namespace=None): if namespace is None: namespace = "root.cmsnano({0})".format(repr(path)) schema = oamap.backend.root._schema(tree, namespace=namespace) groups = OrderedDict() for name in list(schema.content.keys()): if isinstance(schema.content[name], oamap.schema.List) and "_" in name: try: branch = tree[schema.content[name].starts] except KeyError: pass else: underscore = name.index("_") groupname, fieldname = name[:underscore], name[underscore + 1:] countbranchname = branch.countbranch.name if not isinstance(countbranchname, str): countbranchname = countbranchname.decode("ascii") if groupname not in groups: groups[groupname] = schema.content[groupname] = \ oamap.schema.List(oamap.schema.Record({}, name=groupname), starts=countbranchname, stops=countbranchname, namespace=namespace) assert countbranchname == schema.content[groupname].starts groups[groupname].content[fieldname] = schema.content[ name].content del schema.content[name] elif "MET_" in name or name.startswith("LHE_") or name.startswith( "Pileup_") or name.startswith("PV_"): underscore = name.index("_") groupname, fieldname = name[:underscore], name[underscore + 1:] if groupname not in groups: groups[groupname] = schema.content[groupname] = \ oamap.schema.Record({}, name=groupname) groups[groupname][fieldname] = schema.content[name] del schema.content[name] hlt = oamap.schema.Record({}, name="HLT") flag = oamap.schema.Record({}, name="Flag") for name in schema.content.keys(): if name.startswith("HLT_"): hlt[name[4:]] = schema.content[name] del schema.content[name] if name.startswith("Flag_"): flag[name[5:]] = schema.content[name] del schema.content[name] schema.content["HLT"] = hlt schema.content["Flag"] = flag schema.content.name = "Event" return schema
def _togetall(self, arrays, cache, bottomup, memo): if id(self) not in memo: memo.add(id(self)) out = self.target._togetall(arrays, cache, bottomup, memo) if self._required and cache[self.positionsidx] is None: if bottomup: out.update(self._toget(arrays, cache)) else: out2 = self._toget(arrays, cache) out2.update(out) out = out2 return out else: return OrderedDict()
def _togetall(self, arrays, cache, bottomup, memo): if id(self) not in memo: memo.add(id(self)) out = self.content._togetall(arrays, cache, bottomup, memo) if self._required and (cache[self.startsidx] is None or cache[self.stopsidx] is None): if bottomup: out.update(self._toget(arrays, cache)) else: out2 = self._toget(arrays, cache) out2.update(out) out = out2 return out else: return OrderedDict()
def _togetall(self, arrays, cache, bottomup, memo): key = (id(self),) if key not in memo: memo.add(key) out = self.__class__.__bases__[1]._togetall(self, arrays, cache, bottomup, memo) if self._required and cache[self.maskidx] is None: if bottomup: out.update(self._toget(arrays, cache)) else: out2 = self._toget(arrays, cache) out2.update(out) out = out2 return out else: return OrderedDict()
def _togetall(self, arrays, cache, bottomup, memo): if id(self) not in memo: memo.add(id(self)) out = OrderedDict() for x in self.possibilities: out.update(x._togetall(arrays, cache, bottomup, memo)) if self._required and (cache[self.tagsidx] is None or cache[self.offsetsidx] is None): if bottomup: out.update(self._toget(arrays, cache)) else: out2 = self._toget(arrays, cache) out2.update(out) out = out2 return out else: return OrderedDict()
def schema(table, nullable=True): def recurse(node, name, index): if isinstance(node, pyarrow.lib.ListType): return oamap.schema.List(recurse(node.value_type, name, index + 2), nullable=nullable, starts="{0}/{1}".format(name, index + 1), stops="{0}/{1}".format(name, index + 1), mask="{0}/{1}".format(name, index)) elif isinstance(node, pyarrow.lib.DataType): return oamap.schema.Primitive(node.to_pandas_dtype(), nullable=nullable, data="{0}/{1}".format( name, index + 1), mask="{0}/{1}".format(name, index)) else: raise NotImplementedError return oamap.schema.List(oamap.schema.Record( OrderedDict([(n, recurse(table.schema.field_by_name(n).type, n, 0)) for n in table.schema.names])), starts="", stops="")
def _toget(self, arrays, cache): return OrderedDict([(PositionsRole(self.positions), (self.positionsidx, self.posdtype))])
def recurse(parent): flats = [] lists = OrderedDict() for name, branch in parent.items(): if len(branch.fLeaves ) == 1 and branch.fLeaves[0].fLeafCount is not None: leafcount = branch.fLeaves[0].fLeafCount if leafcount not in lists: lists[leafcount] = [] lists[leafcount].append((name, branch)) else: flats.append((name, branch)) out = oamap.schema.Record({}) for name, branch in flats: x = frominterp(name, branch, uproot.interp.auto.interpret(branch)) if x is not None: out[name.split(".")[-1]] = x for leafcount, namebranches in lists.items(): rec = oamap.schema.Record({}) for name, branch in namebranches: x = frominterp(name, branch, uproot.interp.auto.interpret(branch)) if x is not None: assert isinstance(x, oamap.schema.List) rec[name.split(".")[-1]] = x.content found = False for branchname, branch in self.tree.allitems(): if branch.fLeaves == [leafcount]: found = True break if not found: raise ValueError( "could not find a single-leaf branch corresponding to leaf count {0}" .format(leafcount)) if hasattr(branch, "_streamer") and hasattr( branch._streamer, "fName"): name = branch._streamer.fName.decode("ascii") name = re.split("[^a-zA-Z_0-9]", name)[-1] if len(name) > 0: rec.name = name if len(rec.fields) > 0: out[branchname.split(".")[-1]] = oamap.schema.List( rec, starts=branchname, stops=branchname) if hasattr(parent, "_streamer") and hasattr( parent._streamer, "fName"): name = parent._streamer.fName.decode("ascii") elif isinstance(parent, uproot.tree.TTreeMethods): name = parent.name.decode("ascii") else: name = None if name is not None: name = re.split("[^a-zA-Z_0-9]", name)[-1] if len(name) > 0: out.name = name if len(flats) == 0 and len(lists) == 1: out, = out.fields.values() return out
def _toget(self, arrays, cache): return OrderedDict()
def _togetall(self, arrays, cache, bottomup, memo): if id(self) not in memo: memo.add(id(self)) if self._required and cache[self.dataidx] is None: return self._toget(arrays, cache) return OrderedDict()
def _toget(self, arrays, cache): return OrderedDict([(DataRole(self.data), (self.dataidx, self.dtype))])
def _toget(self, arrays, cache): others = self.__class__.__bases__[1]._toget(self, arrays, cache) out = OrderedDict([(MaskRole(self.mask, others), (self.maskidx, self.maskdtype))]) out.update(others) return out