示例#1
0
 def _recursive_zip(self, forms, hierarchy, key, final_zip=False):
     # print(key,'\n')
     for k, v in hierarchy.items():
         if isinstance(v, collections.abc.Mapping):
             # print('if:', k,v)
             forms[k] = self._recursive_zip(forms.get(k, {}),
                                            hierarchy.get(k, {}), k, True)
             # form = zip_forms(form,"events",record_name=None)
         else:
             name = self.mixins[k] if k in self.mixins.keys() else None
             # print('else:', k,v,name)
             forms[k] = zip_forms(forms[k], k, record_name=name)
     if final_zip:
         forms = zip_forms(forms, key, record_name=None)
     return forms
示例#2
0
    def _build_collections(self, branch_forms):
        zip_groups = defaultdict(list)
        has_eventindex = defaultdict(bool)
        for key, ak_form in branch_forms.items():
            # Normal fields
            key_fields = key.split("/")[-1].split(".")
            top_key = key_fields[0]
            sub_key = ".".join(key_fields[1:])
            objname = top_key.replace("Analysis", "").replace("AuxDyn", "")

            # temporary hack to have the correct type for the ElementLinks
            # (uproot loses the type information somewhere on the way and they end up int64)
            if self._hack_for_elementlink_int64:
                try:
                    for k in ["m_persIndex", "m_persKey"]:
                        form = ak_form["content"]["content"]["contents"][k]
                        form["itemsize"] = 8
                        form["primitive"] = "int64"
                except KeyError:
                    pass

            zip_groups[objname].append(((key, sub_key), ak_form))

            # add eventindex form, based on the first single-jagged list column
            if (not has_eventindex[objname] and "List" in ak_form["class"]
                    and "List" not in ak_form["content"]["class"]):
                zip_groups[objname].append(
                    ((key, "_eventindex"),
                     self._create_eventindex_form(ak_form, key)))
                has_eventindex[objname] = True

        # zip the forms
        contents = {}
        for objname, keys_and_form in zip_groups.items():
            try:
                contents[objname] = zip_forms(
                    {sub_key: form
                     for (key, sub_key), form in keys_and_form},
                    objname,
                    self.mixins.get(objname, None),
                )
                content = contents[objname]["content"]
                content["parameters"] = dict(content.get("parameters", {}),
                                             collection_name=objname)
            except NotImplementedError:
                warnings.warn(f"Can't zip collection {objname}")
        return contents
示例#3
0
 def _tlorentz_vectorize(objname, form):
     # first handle RecordArray
     if {"fE", "fP"} == form.get("contents", {}).keys():
         return zip_forms(
             {
                 "x": form["contents"]["fP"]["contents"]["fX"],
                 "y": form["contents"]["fP"]["contents"]["fY"],
                 "z": form["contents"]["fP"]["contents"]["fZ"],
                 "t": form["contents"]["fE"],
             },
             objname,
             "LorentzVector",
         )
     # If there's no "content", like a NumpyArray, just return.
     # Note: this comes after checking for RecordArray.
     if "content" not in form:
         return form
     # Then recursively go through and update the form's content.
     form["content"] = _tlorentz_vectorize(objname, form["content"])
     return form
示例#4
0
    def _build_collections(self, branch_forms):
        zip_groups = defaultdict(list)
        has_eventindex = defaultdict(bool)
        for key, ak_form in branch_forms.items():
            # Normal fields
            key_fields = key.split("/")[-1].split(".")
            top_key = key_fields[0]
            sub_key = ".".join(key_fields[1:])
            objname = top_key.replace("Analysis", "").replace("AuxDyn", "")

            zip_groups[objname].append(((key, sub_key), ak_form))

            # add eventindex form, based on the first single-jagged list column
            if (not has_eventindex[objname] and "List" in ak_form["class"]
                    and "List" not in ak_form["content"]["class"]):
                zip_groups[objname].append(
                    ((key, "_eventindex"),
                     self._create_eventindex_form(ak_form, key)))
                has_eventindex[objname] = True

        # zip the forms
        contents = {}
        for objname, keys_and_form in zip_groups.items():
            try:
                contents[objname] = zip_forms(
                    {sub_key: form
                     for (key, sub_key), form in keys_and_form},
                    objname,
                    self.mixins.get(objname, None),
                    bypass=True,
                )
                content = contents[objname]["content"]
                content["parameters"] = dict(content.get("parameters", {}),
                                             collection_name=objname)
            except NotImplementedError:
                warnings.warn(f"Can't zip collection {objname}")
        return contents
示例#5
0
    def _build_collections(self, branch_forms):
        def _tlorentz_vectorize(objname, form):
            # first handle RecordArray
            if {"fE", "fP"} == form.get("contents", {}).keys():
                return zip_forms(
                    {
                        "x": form["contents"]["fP"]["contents"]["fX"],
                        "y": form["contents"]["fP"]["contents"]["fY"],
                        "z": form["contents"]["fP"]["contents"]["fZ"],
                        "t": form["contents"]["fE"],
                    },
                    objname,
                    "LorentzVector",
                )
            # If there's no "content", like a NumpyArray, just return.
            # Note: this comes after checking for RecordArray.
            if "content" not in form:
                return form
            # Then recursively go through and update the form's content.
            form["content"] = _tlorentz_vectorize(objname, form["content"])
            return form

        # preprocess lorentz vectors properly (and recursively)
        for objname, form in branch_forms.items():
            branch_forms[objname] = _tlorentz_vectorize(objname, form)

        # parse into high-level records (collections, list collections, and singletons)
        collections = set(k.split("/")[0] for k in branch_forms)
        collections -= set(k for k in collections if k.endswith("_size"))

        # Create offsets virtual arrays
        for name in collections:
            if f"{name}_size" in branch_forms:
                branch_forms[f"o{name}"] = transforms.counts2offsets_form(
                    branch_forms[f"{name}_size"])

        output = {}
        for name in collections:
            output[f"{name}.offsets"] = branch_forms[f"o{name}"]
            mixin = self.mixins.get(name, "NanoCollection")

            # Every delphes collection is a list
            offsets = branch_forms["o" + name]
            content = {
                k[2 * len(name) + 2:]: branch_forms[k]
                for k in branch_forms if k.startswith(name + "/" + name)
            }
            output[name] = zip_forms(content,
                                     name,
                                     record_name=mixin,
                                     offsets=offsets)

            # update docstrings as needed
            # NB: must be before flattening for easier logic
            for parameter in output[name]["content"]["contents"].keys():
                if "parameters" not in output[name]["content"]["contents"][
                        parameter]:
                    continue
                output[name]["content"]["contents"][parameter]["parameters"][
                    "__doc__"] = self.docstrings.get(
                        parameter,
                        output[name]["content"]["contents"][parameter]
                        ["parameters"].get("__doc__",
                                           "no docstring available"),
                    )

            # handle branches named like [4] and [5]
            output[name]["content"]["contents"] = {
                k.replace("[", "_").replace("]", ""): v
                for k, v in output[name]["content"]["contents"].items()
            }
            output[name]["content"]["parameters"].update({
                "__doc__":
                offsets["parameters"]["__doc__"],
                "collection_name":
                name,
            })

            if name in self.singletons:
                # flatten! this 'promotes' the content of an inner dimension
                # upwards, effectively hiding one nested dimension
                output[name] = output[name]["content"]

        return output
示例#6
0
    def _build_collections(self, branch_forms):
        # Turn any special classes into the appropriate awkward form
        composite_objects = list(set(k.split("/")[0] for k in branch_forms if "/" in k))

        composite_behavior = {  # Dictionary for overriding the default behavior
            "Tracks": "LorentzVector"
        }
        for objname in composite_objects:
            # grab the * from "objname/objname.*"
            components = set(
                k[2 * len(objname) + 2 :]
                for k in branch_forms
                if k.startswith(objname + "/")
            )
            if components == {
                "fCoordinates.fPt",
                "fCoordinates.fEta",
                "fCoordinates.fPhi",
                "fCoordinates.fE",
            }:
                form = zip_forms(
                    {
                        "pt": branch_forms.pop(f"{objname}/{objname}.fCoordinates.fPt"),
                        "eta": branch_forms.pop(
                            f"{objname}/{objname}.fCoordinates.fEta"
                        ),
                        "phi": branch_forms.pop(
                            f"{objname}/{objname}.fCoordinates.fPhi"
                        ),
                        "energy": branch_forms.pop(
                            f"{objname}/{objname}.fCoordinates.fE"
                        ),
                    },
                    objname,
                    composite_behavior.get(objname, "PtEtaPhiELorentzVector"),
                )
                branch_forms[objname] = form
            elif components == {
                "fCoordinates.fX",
                "fCoordinates.fY",
                "fCoordinates.fZ",
            }:
                form = zip_forms(
                    {
                        "x": branch_forms.pop(f"{objname}/{objname}.fCoordinates.fX"),
                        "y": branch_forms.pop(f"{objname}/{objname}.fCoordinates.fY"),
                        "z": branch_forms.pop(f"{objname}/{objname}.fCoordinates.fZ"),
                    },
                    objname,
                    composite_behavior.get(objname, "ThreeVector"),
                )
                branch_forms[objname] = form
            else:
                raise ValueError(
                    f"Unrecognized class with split branches: {components}"
                )

        # Generating collection from branch name
        collections = [k for k in branch_forms if "_" in k]
        collections = set(
            [
                "_".join(k.split("_")[:-1])
                for k in collections
                if k.split("_")[-1] != "AK8"
                # Excluding per-event variables with AK8 variants like Mjj and MT
            ]
        )

        subcollections = []

        for cname in collections:
            items = sorted(k for k in branch_forms if k.startswith(cname + "_"))
            if len(items) == 0:
                continue

            # Special pattern parsing for <collection>_<subcollection>Counts branches
            countitems = [x for x in items if x.endswith("Counts")]
            subcols = set(x[:-6] for x in countitems)  # List of subcollection names
            for subcol in subcols:
                items = [
                    k for k in items if not k.startswith(subcol) or k.endswith("Counts")
                ]
                subname = subcol[len(cname) + 1 :]
                subcollections.append(
                    {
                        "colname": cname,
                        "subcol": subcol,
                        "countname": subname + "Counts",
                        "subname": subname,
                    }
                )

            if cname not in branch_forms:
                collection = zip_forms(
                    {k[len(cname) + 1]: branch_forms.pop(k) for k in items}, cname
                )
                branch_forms[cname] = collection
            else:
                collection = branch_forms[cname]
                if not collection["class"].startswith("ListOffsetArray"):
                    raise NotImplementedError(
                        f"{cname} isn't a jagged array, not sure what to do"
                    )
                for item in items:
                    itemname = item[len(cname) + 1 :]
                    collection["content"]["contents"][itemname] = branch_forms.pop(
                        item
                    )["content"]

        for sub in subcollections:
            nest_jagged_forms(
                branch_forms[sub["colname"]],
                branch_forms.pop(sub["subcol"]),
                sub["countname"],
                sub["subname"],
            )

        return branch_forms
示例#7
0
文件: nanoaod.py 项目: uccross/coffea
    def _build_collections(self, branch_forms):
        # parse into high-level records (collections, list collections, and singletons)
        collections = set(k.split("_")[0] for k in branch_forms)
        collections -= set(
            k for k in collections if k.startswith("n") and k[1:] in collections
        )
        isData = "GenPart" not in collections

        # Create offsets virtual arrays
        for name in collections:
            if "n" + name in branch_forms:
                branch_forms["o" + name] = transforms.counts2offsets_form(
                    branch_forms["n" + name]
                )

        # Create global index virtual arrays for indirection
        for indexer, target in self.cross_references.items():
            if target.startswith("Gen") and isData:
                continue
            if indexer not in branch_forms:
                if self.warn_missing_crossrefs:
                    warnings.warn(
                        f"Missing cross-reference index for {indexer} => {target}",
                        RuntimeWarning,
                    )
                continue
            if "o" + target not in branch_forms:
                if self.warn_missing_crossrefs:
                    warnings.warn(
                        f"Missing cross-reference target for {indexer} => {target}",
                        RuntimeWarning,
                    )
                continue
            branch_forms[indexer + "G"] = transforms.local2global_form(
                branch_forms[indexer], branch_forms["o" + target]
            )

        # Create nested indexer from Idx1, Idx2, ... arrays
        for name, indexers in self.nested_items.items():
            if all(idx in branch_forms for idx in indexers):
                branch_forms[name] = transforms.nestedindex_form(
                    [branch_forms[idx] for idx in indexers]
                )

        # Create nested indexer from n* counts arrays
        for name, (local_counts, target) in self.nested_index_items.items():
            if local_counts in branch_forms and "o" + target in branch_forms:
                branch_forms[name] = transforms.counts2nestedindex_form(
                    branch_forms[local_counts], branch_forms["o" + target]
                )

        # Create any special arrays
        for name, (fcn, args) in self.special_items.items():
            if all(k in branch_forms for k in args):
                branch_forms[name] = fcn(*(branch_forms[k] for k in args))

        output = {}
        for name in collections:
            mixin = self.mixins.get(name, "NanoCollection")
            if "o" + name in branch_forms and name not in branch_forms:
                # list collection
                offsets = branch_forms["o" + name]
                content = {
                    k[len(name) + 1 :]: branch_forms[k]
                    for k in branch_forms
                    if k.startswith(name + "_")
                }
                output[name] = zip_forms(
                    content, name, record_name=mixin, offsets=offsets
                )
                output[name]["content"]["parameters"].update(
                    {
                        "__doc__": offsets["parameters"]["__doc__"],
                        "collection_name": name,
                    }
                )
            elif "o" + name in branch_forms:
                # list singleton, can use branch's own offsets
                output[name] = branch_forms[name]
                output[name]["parameters"].update(
                    {"__array__": mixin, "collection_name": name}
                )
            elif name in branch_forms:
                # singleton
                output[name] = branch_forms[name]
            else:
                # simple collection
                output[name] = zip_forms(
                    {
                        k[len(name) + 1 :]: branch_forms[k]
                        for k in branch_forms
                        if k.startswith(name + "_")
                    },
                    name,
                    record_name=mixin,
                )
                output[name]["parameters"].update({"collection_name": name})

        return output
示例#8
0
    def _build_collections(self, branch_forms):
        # Turn any special classes into the appropriate awkward form
        composite_objects = list(
            set(k.split("/")[0] for k in branch_forms if "/" in k))
        for objname in composite_objects:
            # grab the * from "objname/objname.*"
            components = set(k[2 * len(objname) + 2:] for k in branch_forms
                             if k.startswith(objname + "/"))
            if components == {
                    "fCoordinates.fPt",
                    "fCoordinates.fEta",
                    "fCoordinates.fPhi",
                    "fCoordinates.fE",
            }:
                form = zip_forms(
                    {
                        "pt":
                        branch_forms.pop(
                            f"{objname}/{objname}.fCoordinates.fPt"),
                        "eta":
                        branch_forms.pop(
                            f"{objname}/{objname}.fCoordinates.fEta"),
                        "phi":
                        branch_forms.pop(
                            f"{objname}/{objname}.fCoordinates.fPhi"),
                        "energy":
                        branch_forms.pop(
                            f"{objname}/{objname}.fCoordinates.fE"),
                    },
                    objname,
                    "PtEtaPhiELorentzVector",
                )
                branch_forms[objname] = form
            elif components == {
                    "fCoordinates.fX",
                    "fCoordinates.fY",
                    "fCoordinates.fZ",
            }:
                form = zip_forms(
                    {
                        "x":
                        branch_forms.pop(
                            f"{objname}/{objname}.fCoordinates.fX"),
                        "y":
                        branch_forms.pop(
                            f"{objname}/{objname}.fCoordinates.fY"),
                        "z":
                        branch_forms.pop(
                            f"{objname}/{objname}.fCoordinates.fZ"),
                    },
                    objname,
                    "Point",
                )
                branch_forms[objname] = form
            else:
                raise ValueError(
                    f"Unrecognized class with split branches: {components}")

        # Generating collection from branch name
        collections = [k for k in branch_forms if "_" in k]
        collections = set([
            "_".join(k.split("_")[:-1]) for k in collections
            if k.split("_")[-1] != "AK8"
            # Excluding per-event variables with AK8 variants like Mjj and MT
        ])

        for cname in collections:
            items = sorted(k for k in branch_forms
                           if k.startswith(cname + "_"))
            if len(items) == 0:
                continue
            if cname == "JetsAK8":
                items = [
                    k for k in items if not k.startswith("JetsAK8_subjets")
                ]
                items.append("JetsAK8_subjetsCounts")
            if cname == "JetsAK8_subjets":
                items = [k for k in items if not k.endswith("Counts")]
            if cname not in branch_forms:
                collection = zip_forms(
                    {k[len(cname) + 1]: branch_forms.pop(k)
                     for k in items}, cname)
                branch_forms[cname] = collection
            else:
                collection = branch_forms[cname]
                if not collection["class"].startswith("ListOffsetArray"):
                    raise NotImplementedError(
                        f"{cname} isn't a jagged array, not sure what to do")
                for item in items:
                    itemname = item[len(cname) + 1:]
                    collection["content"]["contents"][
                        itemname] = branch_forms.pop(item)["content"]

        nest_jagged_forms(
            branch_forms["JetsAK8"],
            branch_forms.pop("JetsAK8_subjets"),
            "subjetsCounts",
            "subjets",
        )

        return branch_forms