Пример #1
0
 def _togetall(self, arrays, cache, bottomup, memo):
     if id(self) not in memo:
         memo.add(id(self))
         out = OrderedDict()
         for x in self.types:
             out.update(x._togetall(arrays, cache, bottomup, memo))
         return out
     else:
         return OrderedDict()
Пример #2
0
    def combinelists(schema):
        if isinstance(schema, oamap.schema.Record) and all(
                isinstance(x, oamap.schema.List)
                for x in schema.fields.values()):
            out = oamap.schema.List(oamap.schema.Record(OrderedDict(),
                                                        namespace=namespace),
                                    namespace=namespace)

            countbranch = None
            for fieldname, field in schema.items():
                try:
                    branch = tree[field.starts]
                except KeyError:
                    return schema

                if branch.countbranch is None:
                    return schema

                if countbranch is None:
                    countbranch = branch.countbranch
                elif countbranch is not branch.countbranch:
                    return schema

                out.content[fieldname] = field.content

            if countbranch is not None:
                countbranchname = countbranch.name
                if not isinstance(countbranchname, str):
                    countbranchname = countbranchname.decode("ascii")
                out.starts = countbranchname
                out.stops = countbranchname
                return out

        return schema
Пример #3
0
    def accumulate(node):
        out = oamap.schema.Record(OrderedDict(), namespace=namespace)
        for branchname, branch in node.iteritems(aliases=False) if isinstance(node, uproot.tree.TTreeMethods) else node.iteritems():
            if not isinstance(branchname, str):
                branchname = branchname.decode("ascii")
            fieldname = branchname.split(".")[-1]

            if len(branch.fBranches) > 0:
                subrecord = accumulate(branch)
                if len(subrecord.fields) > 0:
                    out[fieldname] = subrecord

            elif isinstance(branch.interpretation, (uproot.interp.asdtype, uproot.interp.numerical.asdouble32)):
                subnode = oamap.schema.Primitive(branch.interpretation.todtype, data=branchname, namespace=namespace)
                for i in range(len(branch.interpretation.todims)):
                    subnode = oamap.schema.List(subnode, starts="{0}:/{1}".format(branchname, i), stops="{0}:/{1}".format(branchname, i), namespace=namespace)
                out[fieldname] = subnode

            elif isinstance(branch.interpretation, uproot.interp.asjagged) and isinstance(branch.interpretation.asdtype, uproot.interp.asdtype):
                subnode = oamap.schema.Primitive(branch.interpretation.asdtype.todtype, data=branchname, namespace=namespace)
                for i in range(len(branch.interpretation.asdtype.todims)):
                    subnode = oamap.schema.List(subnode, starts="{0}:/{1}".format(branchname, i), stops="{0}:/{1}".format(branchname, i), namespace=namespace)
                out[fieldname] = oamap.schema.List(subnode, starts=branchname, stops=branchname, namespace=namespace)

            elif isinstance(branch.interpretation, uproot.interp.asstrings):
                out[fieldname] = oamap.schema.List(oamap.schema.Primitive(oamap.interp.strings.CHARTYPE, data=branchname, namespace=namespace), starts=branchname, stops=branchname, namespace=namespace, name="ByteString")
        
        return out
Пример #4
0
 def _toget(self, arrays, cache):
     tags = TagsRole(self.tags, self.namespace, None)
     offsets = OffsetsRole(self.offsets, self.namespace, None)
     tags.offsets = offsets
     offsets.tags = tags
     return OrderedDict([(tags, (self.tagsidx, self.tagdtype)),
                         (offsets, (self.offsetsidx, self.offsetdtype))])
Пример #5
0
 def _toget(self, arrays, cache):
     starts = StartsRole(self.starts, self.namespace, None)
     stops = StopsRole(self.stops, self.namespace, None)
     starts.stops = stops
     stops.starts = starts
     return OrderedDict([(starts, (self.startsidx, self.posdtype)),
                         (stops, (self.stopsidx, self.posdtype))])
Пример #6
0
def schema(table):
    import pyarrow

    def recurse(node, name, index, nullable):
        if isinstance(node, pyarrow.lib.ListType):
            return oamap.schema.List(recurse(node.value_type, name, index + 2,
                                             nullable),
                                     nullable=nullable,
                                     starts="{0}/{1}".format(name, index + 1),
                                     stops="{0}/{1}".format(name, index + 1),
                                     mask="{0}/{1}".format(name, index))
        elif isinstance(node, pyarrow.lib.DataType):
            return oamap.schema.Primitive(node.to_pandas_dtype(),
                                          nullable=nullable,
                                          data="{0}/{1}".format(
                                              name, index + 1),
                                          mask="{0}/{1}".format(name, index))
        else:
            raise NotImplementedError(type(node))

    fields = []
    for n in table.schema.names:
        field = table.schema.field_by_name(n)
        fields.append((n, recurse(field.type, n, 0, field.nullable)))

    return oamap.schema.List(oamap.schema.Record(OrderedDict(fields)),
                             starts="",
                             stops="")
Пример #7
0
def _schema(tree, namespace=None):
    if namespace is None:
        namespace = "root.cmsnano({0})".format(repr(path))

    schema = oamap.backend.root._schema(tree, namespace=namespace)

    groups = OrderedDict()
    for name in list(schema.content.keys()):
        if isinstance(schema.content[name], oamap.schema.List) and "_" in name:
            try:
                branch = tree[schema.content[name].starts]
            except KeyError:
                pass
            else:
                underscore = name.index("_")
                groupname, fieldname = name[:underscore], name[underscore + 1:]
                countbranchname = branch.countbranch.name
                if not isinstance(countbranchname, str):
                    countbranchname = countbranchname.decode("ascii")
                if groupname not in groups:
                    groups[groupname] = schema.content[groupname] = \
                        oamap.schema.List(oamap.schema.Record({}, name=groupname), starts=countbranchname, stops=countbranchname, namespace=namespace)
                assert countbranchname == schema.content[groupname].starts
                groups[groupname].content[fieldname] = schema.content[
                    name].content
                del schema.content[name]

        elif "MET_" in name or name.startswith("LHE_") or name.startswith(
                "Pileup_") or name.startswith("PV_"):
            underscore = name.index("_")
            groupname, fieldname = name[:underscore], name[underscore + 1:]
            if groupname not in groups:
                groups[groupname] = schema.content[groupname] = \
                    oamap.schema.Record({}, name=groupname)
            groups[groupname][fieldname] = schema.content[name]
            del schema.content[name]

    hlt = oamap.schema.Record({}, name="HLT")
    flag = oamap.schema.Record({}, name="Flag")
    for name in schema.content.keys():
        if name.startswith("HLT_"):
            hlt[name[4:]] = schema.content[name]
            del schema.content[name]
        if name.startswith("Flag_"):
            flag[name[5:]] = schema.content[name]
            del schema.content[name]

    schema.content["HLT"] = hlt
    schema.content["Flag"] = flag
    schema.content.name = "Event"
    return schema
Пример #8
0
 def _togetall(self, arrays, cache, bottomup, memo):
     if id(self) not in memo:
         memo.add(id(self))
         out = self.target._togetall(arrays, cache, bottomup, memo)
         if self._required and cache[self.positionsidx] is None:
             if bottomup:
                 out.update(self._toget(arrays, cache))
             else:
                 out2 = self._toget(arrays, cache)
                 out2.update(out)
                 out = out2
         return out
     else:
         return OrderedDict()
Пример #9
0
 def _togetall(self, arrays, cache, bottomup, memo):
     if id(self) not in memo:
         memo.add(id(self))
         out = self.content._togetall(arrays, cache, bottomup, memo)
         if self._required and (cache[self.startsidx] is None or cache[self.stopsidx] is None):
             if bottomup:
                 out.update(self._toget(arrays, cache))
             else:
                 out2 = self._toget(arrays, cache)
                 out2.update(out)
                 out = out2
         return out
     else:
         return OrderedDict()
Пример #10
0
 def _togetall(self, arrays, cache, bottomup, memo):
     key = (id(self),)
     if key not in memo:
         memo.add(key)
         out = self.__class__.__bases__[1]._togetall(self, arrays, cache, bottomup, memo)
         if self._required and cache[self.maskidx] is None:
             if bottomup:
                 out.update(self._toget(arrays, cache))
             else:
                 out2 = self._toget(arrays, cache)
                 out2.update(out)
                 out = out2
         return out
     else:
         return OrderedDict()
Пример #11
0
 def _togetall(self, arrays, cache, bottomup, memo):
     if id(self) not in memo:
         memo.add(id(self))
         out = OrderedDict()
         for x in self.possibilities:
             out.update(x._togetall(arrays, cache, bottomup, memo))
         if self._required and (cache[self.tagsidx] is None or cache[self.offsetsidx] is None):
             if bottomup:
                 out.update(self._toget(arrays, cache))
             else:
                 out2 = self._toget(arrays, cache)
                 out2.update(out)
                 out = out2
         return out
     else:
         return OrderedDict()
Пример #12
0
def schema(table, nullable=True):
    def recurse(node, name, index):
        if isinstance(node, pyarrow.lib.ListType):
            return oamap.schema.List(recurse(node.value_type, name, index + 2),
                                     nullable=nullable,
                                     starts="{0}/{1}".format(name, index + 1),
                                     stops="{0}/{1}".format(name, index + 1),
                                     mask="{0}/{1}".format(name, index))
        elif isinstance(node, pyarrow.lib.DataType):
            return oamap.schema.Primitive(node.to_pandas_dtype(),
                                          nullable=nullable,
                                          data="{0}/{1}".format(
                                              name, index + 1),
                                          mask="{0}/{1}".format(name, index))
        else:
            raise NotImplementedError

    return oamap.schema.List(oamap.schema.Record(
        OrderedDict([(n, recurse(table.schema.field_by_name(n).type, n, 0))
                     for n in table.schema.names])),
                             starts="",
                             stops="")
Пример #13
0
 def _toget(self, arrays, cache):
     return OrderedDict([(PositionsRole(self.positions), (self.positionsidx, self.posdtype))])
Пример #14
0
            def recurse(parent):
                flats = []
                lists = OrderedDict()

                for name, branch in parent.items():
                    if len(branch.fLeaves
                           ) == 1 and branch.fLeaves[0].fLeafCount is not None:
                        leafcount = branch.fLeaves[0].fLeafCount
                        if leafcount not in lists:
                            lists[leafcount] = []
                        lists[leafcount].append((name, branch))
                    else:
                        flats.append((name, branch))

                out = oamap.schema.Record({})

                for name, branch in flats:
                    x = frominterp(name, branch,
                                   uproot.interp.auto.interpret(branch))
                    if x is not None:
                        out[name.split(".")[-1]] = x

                for leafcount, namebranches in lists.items():
                    rec = oamap.schema.Record({})
                    for name, branch in namebranches:
                        x = frominterp(name, branch,
                                       uproot.interp.auto.interpret(branch))
                        if x is not None:
                            assert isinstance(x, oamap.schema.List)
                            rec[name.split(".")[-1]] = x.content

                    found = False
                    for branchname, branch in self.tree.allitems():
                        if branch.fLeaves == [leafcount]:
                            found = True
                            break
                    if not found:
                        raise ValueError(
                            "could not find a single-leaf branch corresponding to leaf count {0}"
                            .format(leafcount))

                    if hasattr(branch, "_streamer") and hasattr(
                            branch._streamer, "fName"):
                        name = branch._streamer.fName.decode("ascii")
                        name = re.split("[^a-zA-Z_0-9]", name)[-1]
                        if len(name) > 0:
                            rec.name = name

                    if len(rec.fields) > 0:
                        out[branchname.split(".")[-1]] = oamap.schema.List(
                            rec, starts=branchname, stops=branchname)

                if hasattr(parent, "_streamer") and hasattr(
                        parent._streamer, "fName"):
                    name = parent._streamer.fName.decode("ascii")
                elif isinstance(parent, uproot.tree.TTreeMethods):
                    name = parent.name.decode("ascii")
                else:
                    name = None

                if name is not None:
                    name = re.split("[^a-zA-Z_0-9]", name)[-1]
                    if len(name) > 0:
                        out.name = name

                if len(flats) == 0 and len(lists) == 1:
                    out, = out.fields.values()

                return out
Пример #15
0
 def _toget(self, arrays, cache):
     return OrderedDict()
Пример #16
0
 def _togetall(self, arrays, cache, bottomup, memo):
     if id(self) not in memo:
         memo.add(id(self))
         if self._required and cache[self.dataidx] is None:
             return self._toget(arrays, cache)
     return OrderedDict()
Пример #17
0
 def _toget(self, arrays, cache):
     return OrderedDict([(DataRole(self.data), (self.dataidx, self.dtype))])
Пример #18
0
 def _toget(self, arrays, cache):
     others = self.__class__.__bases__[1]._toget(self, arrays, cache)
     out = OrderedDict([(MaskRole(self.mask, others), (self.maskidx, self.maskdtype))])
     out.update(others)
     return out