Пример #1
0
    def testCountWithFilter(self):
        for i in xrange(11):
            left, right = self.simple[:i], self.simple[i:]

            leftCounting = Select(named("something", lambda x: x > 0.0),
                                  Count())
            rightCounting = Select(named("something", lambda x: x > 0.0),
                                   Count())

            for _ in left:
                leftCounting.fill(_)
            for _ in right:
                rightCounting.fill(_)

            self.assertEqual(leftCounting.cut.entries,
                             len(list(filter(lambda x: x > 0.0, left))))
            self.assertEqual(rightCounting.cut.entries,
                             len(list(filter(lambda x: x > 0.0, right))))

            finalResult = leftCounting + rightCounting

            self.assertEqual(finalResult.cut.entries,
                             len(list(filter(lambda x: x > 0.0, self.simple))))

            self.checkScaling(leftCounting)
            self.checkScaling(leftCounting.toImmutable())
            self.checkJson(leftCounting)
            self.checkPickle(leftCounting)
            self.checkName(leftCounting)
Пример #2
0
 def histogram(self):
     """Return a plain histogram by converting all sub-aggregator values into Counts"""
     out = CentrallyBin([c for c, v in self.bins], self.quantity, Count(),
                        self.nanflow.copy())
     out.entries = self.entries
     for i, v in self.bins:
         out.bins[i] = Count.ed(v.entries)
     return out.specialize()
Пример #3
0
def TwoDimensionallySparselyHistogram(xbinWidth, xquantity,
                                      ybinWidth, yquantity,
                                      selection=unweighted,
                                      xorigin=0.0, yorigin=0.0):
    """Convenience function for creating a sparsely binned, two-dimensional histogram."""
    return Select.ing(selection,
        SparselyBin.ing(xbinWidth, xquantity,
            SparselyBin.ing(ybinWidth, yquantity,
                Count.ing(), Count.ing(), yorigin), Count.ing(), xorigin))
Пример #4
0
 def histogram(self):
     """Return a plain histogram by converting all sub-aggregator values into Counts"""
     out = SparselyBin(self.binWidth, self.quantity, Count(),
                       self.nanflow.copy(), self.origin)
     out.entries = float(self.entries)
     out.contentType = "Count"
     for i, v in self.bins.items():
         out.bins[i] = Count.ed(v.entries)
     return out.specialize()
Пример #5
0
def SparselyHistogram(binWidth, quantity=identity, origin=0.0):
    """Create a sparsely binned histogram that is only capable of being added.

    Parameters:
        binWidth (float): the width of a bin.
        quantity (function returning float or string): function that computes the quantity of interest from
            the data. pass on all values by default. If a string is given, quantity is set to identity(string),
            in which case that column is picked up from a pandas df.
        origin (float): the left edge of the bin whose index is zero.
    """
    return SparselyBin.ing(binWidth, quantity, Count.ing(), Count.ing(), origin)
Пример #6
0
 def testCategorizeTrans(self):
     with Numpy() as numpy:
         if numpy is None:
             return
         sys.stderr.write("\n")
         self.compare("CategorizeTrans no data", Categorize(lambda x: numpy.array(numpy.floor(x["empty"]), dtype="<U5"), Count(
             lambda x: 0.5*x)), self.data, Categorize(lambda x: x, Count(lambda x: 0.5*x)), numpy.array(numpy.floor(self.empty), dtype="<U5"))
         self.compare("CategorizeTrans noholes", Categorize(lambda x: numpy.array(numpy.floor(x["noholes"]), dtype="<U5"), Count(
             lambda x: 0.5*x)), self.data, Categorize(lambda x: x, Count(lambda x: 0.5*x)), numpy.array(numpy.floor(self.noholes), dtype="<U5"))
         self.compare("CategorizeTrans holes", Categorize(lambda x: numpy.array(numpy.floor(x["withholes"]), dtype="<U5"), Count(
             lambda x: 0.5*x)), self.data, Categorize(lambda x: x, Count(lambda x: 0.5*x)), numpy.array(numpy.floor(self.withholes), dtype="<U5"))
Пример #7
0
 def testSparselyBinTrans(self):
     with Numpy() as numpy:
         if numpy is None:
             return
         sys.stderr.write("\n")
         self.compare("SparselyBinTrans no data", SparselyBin(0.1, lambda x: x["empty"], Count(
             lambda x: 0.5*x)), self.data, SparselyBin(0.1, lambda x: x, Count(lambda x: 0.5*x)), self.empty)
         self.compare("SparselyBinTrans noholes", SparselyBin(0.1, lambda x: x["noholes"], Count(
             lambda x: 0.5*x)), self.data, SparselyBin(0.1, lambda x: x, Count(lambda x: 0.5*x)), self.noholes)
         self.compare("SparselyBinTrans holes", SparselyBin(0.1, lambda x: x["withholes"], Count(
             lambda x: 0.5*x)), self.data, SparselyBin(0.1, lambda x: x, Count(lambda x: 0.5*x)), self.withholes)
Пример #8
0
def Histogram(num, low, high, quantity=identity):
    """Create a conventional histogram that is capable of being filled and added.

    Parameters:
        num (int): the number of bins; must be at least one.
        low (float): the minimum-value edge of the first bin.
        high (float): the maximum-value edge of the last bin; must be strictly greater than `low`.
        quantity (function returning float or string): function that computes the quantity of interest from
            the data. pass on all values by default. If a string is given, quantity is set to identity(string),
            in which case that column is picked up from a pandas df.
    """
    return Bin.ing(num, low, high, quantity, Count.ing(), Count.ing(), Count.ing(), Count.ing())
Пример #9
0
 def testCentrallyBinTrans(self):
     with Numpy() as numpy:
         if numpy is None:
             return
         sys.stderr.write("\n")
         centers = [-3.0, -1.5, -1.0, -0.5, 0.0, 0.5, 1.0, 1.5, 3.0]
         self.compare("CentrallyBinTrans no data", CentrallyBin(centers, lambda x: x["empty"], Count(
             lambda x: 0.5*x)), self.data, CentrallyBin(centers, lambda x: x, Count(lambda x: 0.5*x)), self.empty)
         self.compare("CentrallyBinTrans noholes", CentrallyBin(centers, lambda x: x["noholes"], Count(
             lambda x: 0.5*x)), self.data, CentrallyBin(centers, lambda x: x, Count(lambda x: 0.5*x)), self.noholes)
         self.compare("CentrallyBinTrans holes", CentrallyBin(centers, lambda x: x["withholes"], Count(
             lambda x: 0.5*x)), self.data, CentrallyBin(centers, lambda x: x, Count(lambda x: 0.5*x)), self.withholes)
Пример #10
0
 def testBinTrans(self):
     with Numpy() as numpy:
         if numpy is None:
             return
         sys.stderr.write("\n")
         for bins in [10, 100]:
             self.compare("BinTrans ({0} bins) no data".format(bins), Bin(bins, -3.0, 3.0, lambda x: x["empty"], Count(
                 lambda x: 0.5*x)), self.data, Bin(bins, -3.0, 3.0, lambda x: x, Count(lambda x: 0.5*x)), self.empty)
             self.compare("BinTrans ({0} bins) noholes".format(bins), Bin(bins, -3.0, 3.0, lambda x: x["noholes"], Count(
                 lambda x: 0.5*x)), self.data, Bin(bins, -3.0, 3.0, lambda x: x, Count(lambda x: 0.5*x)), self.noholes)
             self.compare("BinTrans ({0} bins) holes".format(bins), Bin(bins, -3.0, 3.0, lambda x: x["withholes"], Count(
                 lambda x: 0.5*x)), self.data, Bin(bins, -3.0, 3.0, lambda x: x, Count(lambda x: 0.5*x)), self.withholes)
Пример #11
0
    def __init__(self,
                 centers,
                 quantity=identity,
                 value=Count(),
                 nanflow=Count()):
        """Create a CentrallyBin that is capable of being filled and added.

        Parameters:
            centers (list of float): the centers of all bins
            quantity (function returning float): computes the quantity of interest from the data.
            value (:doc:`Container <histogrammar.defs.Container>`): generates sub-aggregators to put in each bin.
            nanflow (:doc:`Container <histogrammar.defs.Container>`): a sub-aggregator to use for data whose quantity
                is NaN.

        Other parameters:
            entries (float): the number of entries, initially 0.0.
            bins (list of float, :doc:`Container <histogrammar.defs.Container>` pairs): the bin centers and
                sub-aggregators in each bin.
        """

        if not isinstance(centers, (list, tuple)) and not all(
                isinstance(v, (list, tuple)) and len(v) == 2 and isinstance(
                    v[0], numbers.Real) and isinstance(v[1], Container)
                for v in centers):
            raise TypeError(
                "centers ({0}) must be a list of number, Container pairs".
                format(centers))
        if value is not None and not isinstance(value, Container):
            raise TypeError(
                "value ({0}) must be None or a Container".format(value))
        if not isinstance(nanflow, Container):
            raise TypeError(
                "nanflow ({0}) must be a Container".format(nanflow))
        if len(centers) < 2:
            raise ValueError(
                "number of centers ({0}) must be at least two".format(
                    len(centers)))

        self.entries = 0.0
        if value is None:
            self.bins = None
        else:
            self.bins = [(float(x), value.zero()) for x in sorted(centers)]

        self.quantity = serializable(
            identity(quantity) if isinstance(quantity, str) else quantity)
        self.value = value
        self.nanflow = nanflow.copy()

        super(CentrallyBin, self).__init__()
        self.specialize()
Пример #12
0
def HistogramCut(num, low, high, quantity=identity, selection=unweighted):
    """Create a conventional histogram that is capable of being filled and added, with a selection cut.

    Parameters:
        num (int): the number of bins; must be at least one.
        low (float): the minimum-value edge of the first bin.
        high (float): the maximum-value edge of the last bin; must be strictly greater than `low`.
        quantity (function returning float or string): function that computes the quantity of interest from
            the data. pass on all values by default. If a string is given, quantity is set to identity(string),
            in which case that column is picked up from a pandas df.
        selection (function returning boolean): function that computes if data point is accepted or not.
            default is: lamba x: True
    """
    return Select.ing(selection, Bin.ing(num, low, high, quantity, Count.ing(), Count.ing(), Count.ing(), Count.ing()))
Пример #13
0
    def __init__(self,
                 binWidth,
                 quantity=identity,
                 value=Count(),
                 nanflow=Count(),
                 origin=0.0):
        """Create a SparselyBin that is capable of being filled and added.

        Parameters:
            binWidth (float): the width of a bin; must be strictly greater than zero.
            quantity (function returning float): computes the quantity of interest from the data.
            value (:doc:`Container <histogrammar.defs.Container>`): generates sub-aggregators to put in each bin.
            nanflow (:doc:`Container <histogrammar.defs.Container>`): a sub-aggregator to use for data whose quantity
                is NaN.
            origin (float): the left edge of the bin whose index is 0.

        Other parameters:
            entries (float): the number of entries, initially 0.0.
            bins (dict from int to :doc:`Container <histogrammar.defs.Container>`): the map, probably a hashmap, to
                fill with values when their `entries` become non-zero.
        """
        if not isinstance(binWidth, numbers.Real):
            raise TypeError("binWidth ({0}) must be a number".format(binWidth))
        if value is not None and not isinstance(value, Container):
            raise TypeError("value ({0}) must be a Container".format(value))
        if not isinstance(nanflow, Container):
            raise TypeError(
                "nanflow ({0}) must be a Container".format(nanflow))
        if not isinstance(origin, numbers.Real):
            raise TypeError("origin ({0}) must be a number".format(origin))
        if binWidth <= 0.0:
            raise ValueError(
                "binWidth ({0}) must be greater than zero".format(binWidth))

        self.binWidth = float(binWidth)
        self.entries = 0.0
        self.quantity = serializable(
            identity(quantity) if isinstance(quantity, str) else quantity)
        self.value = value
        if value is not None:
            self.contentType = value.name
        else:
            self.contentType = "Count"
        self.bins = {}
        self.nanflow = nanflow.copy()
        self.origin = float(origin)
        super(SparselyBin, self).__init__()
        self.specialize()
Пример #14
0
    def __init__(self, quantity=identity, value=Count()):
        """Create a Categorize that is capable of being filled and added.

        Parameters:
            quantity (function returning float): computes the quantity of interest from the data.
            value (:doc:`Container <histogrammar.defs.Container>`): generates sub-aggregators to put in each bin.

        Other Parameters:
            entries (float): the number of entries, initially 0.0.
            bins (dict from str to :doc:`Container <histogrammar.defs.Container>`): the map, probably a hashmap, to
            fill with values when their `entries` become non-zero.
        """
        if value is not None and not isinstance(value, Container):
            raise TypeError(
                "value ({0}) must be None or a Container".format(value))
        self.entries = 0.0
        self.quantity = serializable(
            identity(quantity) if isinstance(quantity, str) else quantity)
        self.value = value
        self.bins = {}
        if value is not None:
            self.contentType = value.name
        else:
            self.contentType = "Count"
        super(Categorize, self).__init__()
        self.specialize()
Пример #15
0
    def testBranch(self):
        one = Histogram(5, -3.0, 7.0, lambda x: x)
        two = Count()
        three = Deviate(lambda x: x + 100.0)

        branching = Branch(one, two, three)

        for _ in self.simple:
            branching.fill(_)

        self.assertEqual(branching.i0.numericalValues,
                         [3.0, 2.0, 2.0, 1.0, 0.0])
        self.assertEqual(branching.i0.numericalUnderflow, 1.0)
        self.assertEqual(branching.i0.numericalOverflow, 1.0)
        self.assertEqual(branching.i0.numericalNanflow, 0.0)

        self.assertEqual(branching.i1.entries, 10.0)

        self.assertAlmostEqual(branching.i2.entries, 10.0)
        self.assertAlmostEqual(branching.i2.mean, 100.33)
        self.assertAlmostEqual(branching.i2.variance, 10.8381)

        self.checkScaling(branching)
        self.checkScaling(branching.toImmutable())
        self.checkJson(branching)
        self.checkPickle(branching)
        self.checkName(branching)
Пример #16
0
    def __init__(self,
                 thresholds,
                 quantity=identity,
                 value=Count(),
                 nanflow=Count()):
        """Create a Stack that is capable of being filled and added.

        Parameters:
            thresholds (list of floats): specifies ``N`` cut thresholds, so the Stack will fill ``N + 1`` aggregators,
                each overlapping the last.
            quantity (function returning float): computes the quantity of interest from the data.
            value (:doc:`Container <histogrammar.defs.Container>`): generates sub-aggregators for each bin.
            nanflow (:doc:`Container <histogrammar.defs.Container>`): a sub-aggregator to use for data whose quantity
                is NaN.

        Other parameters:
            entries (float): the number of entries, initially 0.0.
            bins (list of float, :doc:`Container <histogrammar.defs.Container>` pairs): the ``N + 1`` thresholds and
                sub-aggregators. (The first threshold is minus infinity; the rest are the ones specified
                by ``thresholds``).
        """
        if not isinstance(thresholds, (list, tuple)) and not all(
                isinstance(v, (list, tuple)) and len(v) == 2 and isinstance(
                    v[0], numbers.Real) and isinstance(v[1], Container)
                for v in thresholds):
            raise TypeError(
                "thresholds ({0}) must be a list of number, Container pairs".
                format(thresholds))
        if value is not None and not isinstance(value, Container):
            raise TypeError(
                "value ({0}) must be None or a Container".format(value))
        if not isinstance(nanflow, Container):
            raise TypeError(
                "nanflow ({0}) must be a Container".format(nanflow))
        self.entries = 0.0
        self.quantity = serializable(
            identity(quantity) if isinstance(quantity, str) else quantity)
        if value is None:
            self.bins = tuple(thresholds)
        else:
            self.bins = tuple((float(x), value.zero())
                              for x in (float("-inf"), ) + tuple(thresholds))
        self.nanflow = nanflow.copy()
        super(Stack, self).__init__()
        self.specialize()
Пример #17
0
def CategorizeHistogram(quantity=identity):
    """Create a Categorize histogram for categorical features such as strings and booleans

    Parameters:
        quantity (function returning float or string): function that computes the quantity of interest from
            the data. pass on all values by default. If a string is given, quantity is set to identity(string),
            in which case that column is picked up from a pandas df.
    """
    return Categorize.ing(quantity, Count.ing())
Пример #18
0
def SparselyProfile(binWidth,
                    binnedQuantity,
                    averagedQuantity,
                    selection=unweighted,
                    origin=0.0):
    """Convenience function for creating sparsely binned binwise averages."""
    return Select.ing(
        selection,
        SparselyBin.ing(binWidth, binnedQuantity,
                        Average.ing(averagedQuantity), Count.ing(), origin))
Пример #19
0
def SparselyProfileErr(binWidth,
                       binnedQuantity,
                       averagedQuantity,
                       selection=unweighted,
                       origin=0.0):
    """Convenience function for creating a physicist's sparsely binned "profile plot," which is a Profile with variances."""
    return Select.ing(
        selection,
        SparselyBin.ing(binWidth, binnedQuantity,
                        Deviate.ing(averagedQuantity), Count.ing(), origin))
Пример #20
0
    def testFraction(self):
        fracking = Fraction(named("something", lambda x: x > 0.0), Count())
        for _ in self.simple:
            fracking.fill(_)

        self.assertEqual(fracking.numerator.entries, 4.0)
        self.assertEqual(fracking.denominator.entries, 10.0)

        self.checkScaling(fracking)
        self.checkScaling(fracking.toImmutable())
        self.checkJson(fracking)
        self.checkPickle(fracking)
        self.checkName(fracking)
Пример #21
0
    def testIrregularlyBin(self):
        partitioning = IrregularlyBin([0.0, 2.0, 4.0, 6.0, 8.0],
                                      named("something", lambda x: x), Count())
        for _ in self.simple:
            partitioning.fill(_)

        self.assertEqual([(k, v.entries) for k, v in partitioning.bins],
                         [(float("-inf"), 4.0), (0.0, 3.0), (2.0, 2.0),
                          (4.0, 0.0), (6.0, 1.0), (8.0, 0.0)])

        self.checkScaling(partitioning)
        self.checkScaling(partitioning.toImmutable())
        self.checkJson(partitioning)
        self.checkPickle(partitioning)
        self.checkName(partitioning)
Пример #22
0
    def build(*ys):
        """Create a Stack out of pre-existing containers, which might have been aggregated on different streams.

        Parameters:
            aggregators (list of :doc:`Container <histogrammar.defs.Container>`): this function will attempt to add
                them, so they must also have the same binning/bounds/etc.
        """
        from functools import reduce
        if not all(isinstance(y, Container) for y in ys):
            raise TypeError("ys must all be Containers")
        entries = sum(y.entries for y in ys)
        bins = []
        for i in xrange(len(ys)):
            bins.append((float("nan"), reduce(lambda a, b: a + b, ys[i:])))
        return Stack.ed(entries, bins, Count.ed(0.0))
Пример #23
0
    def testStack(self):
        stacking = Stack([0.0, 2.0, 4.0, 6.0, 8.0],
                         named("something", lambda x: x), Count())
        for _ in self.simple:
            stacking.fill(_)

        self.assertEqual([(k, v.entries) for k, v in stacking.bins],
                         [(float("-inf"), 10.0), (0.0, 6.0), (2.0, 3.0),
                          (4.0, 1.0), (6.0, 1.0), (8.0, 0.0)])

        self.checkScaling(stacking)
        self.checkScaling(stacking.toImmutable())
        self.checkJson(stacking)
        self.checkPickle(stacking)
        self.checkName(stacking)
Пример #24
0
    def testCount(self):
        for i in xrange(11):
            left, right = self.simple[:i], self.simple[i:]

            leftCounting = Count()
            rightCounting = Count()

            for _ in left:
                leftCounting.fill(_)
            for _ in right:
                rightCounting.fill(_)

            self.assertEqual(leftCounting.entries, len(left))
            self.assertEqual(rightCounting.entries, len(right))

            finalResult = leftCounting + rightCounting

            self.assertEqual(finalResult.entries, len(self.simple))

            self.checkScaling(leftCounting)
            self.checkScaling(leftCounting.toImmutable())
            self.checkJson(leftCounting)
            self.checkPickle(leftCounting)
            self.checkName(leftCounting)
Пример #25
0
    def __init__(self, quantity=identity, value=Count()):
        """Create a Fraction that is capable of being filled and added.

        Parameters:
            quantity (function returning bool or float): computes the quantity of interest from the data and interprets
                it as a selection (multiplicative factor on weight).
            value (:doc:`Container <histogrammar.defs.Container>`): generates sub-aggregators for the numerator and
                denominator.

        Other parameters:
            entries (float): the number of entries, initially 0.0.
            numerator (:doc:`Container <histogrammar.defs.Container>`): the sub-aggregator of entries that pass
                the selection.
            denominator (:doc:`Container <histogrammar.defs.Container>`): the sub-aggregator of all entries.
        """
        if value is not None and not isinstance(value, Container):
            raise TypeError("value ({0}) must be None or a Container".format(value))
        self.entries = 0.0
        self.quantity = serializable(identity(quantity) if isinstance(quantity, str) else quantity)
        if value is not None:
            self.numerator = value.zero()
            self.denominator = value.zero()
        super(Fraction, self).__init__()
        self.specialize()
Пример #26
0
 def ing(quantity, value=Count()):
     """Synonym for ``__init__``."""
     return Categorize(quantity, value)
def Histogram(num, low, high, quantity, selection=unweighted):
    """Convenience function for creating a conventional histogram."""
    return Select.ing(selection, Bin.ing(num, low, high, quantity,
        Count.ing(), Count.ing(), Count.ing(), Count.ing()))
Пример #28
0
 def ing(centers, quantity, value=Count(), nanflow=Count()):
     """Synonym for ``__init__``."""
     return CentrallyBin(centers, quantity, value, nanflow)
def SparselyHistogram(binWidth, quantity, selection=unweighted, origin=0.0):
    """Convenience function for creating a sparsely binned histogram."""
    return Select.ing(selection,
        SparselyBin.ing(binWidth, quantity, Count.ing(), Count.ing(), origin))
Пример #30
0
 def ing(thresholds, quantity, value=Count(), nanflow=Count()):
     """Synonym for ``__init__``."""
     return Stack(thresholds, quantity, value, nanflow)
Пример #31
0
 def ing(quantity, value=Count()):
     """Synonym for ``__init__``."""
     return Fraction(quantity, value)
def CategorizeHistogram(quantity, selection=unweighted):
    """Convenience function for creating a categorize histogram."""
    return Select.ing(selection, Categorize.ing(quantity, Count.ing()))
def SparselyProfile(binWidth, binnedQuantity, averagedQuantity, selection=unweighted, origin=0.0):
    """Convenience function for creating sparsely binned binwise averages."""
    return Select.ing(selection,
        SparselyBin.ing(binWidth, binnedQuantity,
            Average.ing(averagedQuantity), Count.ing(), origin))
def SparselyProfileErr(binWidth, binnedQuantity, averagedQuantity, selection=unweighted, origin=0.0):
    """Convenience function for creating a physicist's sparsely binned "profile plot," which is a Profile with variances."""
    return Select.ing(selection,
        SparselyBin.ing(binWidth, binnedQuantity,
            Deviate.ing(averagedQuantity), Count.ing(), origin))
Пример #35
0
def SparselyProfileErr(binWidth, binnedQuantity, averagedQuantity, origin=0.0):
    """Convenience function for creating a sparsely binned profile plot

    This is a Profile with variances.
    """
    return SparselyBin.ing(binWidth, binnedQuantity, Deviate.ing(averagedQuantity), Count.ing(), origin)