예제 #1
0
    def from_protobuf(message: NumbersMessage):
        """
        Load from a protobuf message

        Returns
        -------
        number_tracker : NumberTracker
        """
        theta = None
        if message.theta is not None and len(message.theta) > 0:
            theta = ThetaSketch.deserialize(message.theta)
        elif message.compact_theta is not None and len(message.compact_theta) > 0:
            theta = ThetaSketch.deserialize(message.compact_theta)

        opts = dict(
            theta_sketch=theta,
            variance=VarianceTracker.from_protobuf(message.variance),
            histogram=dsketch.deserialize_kll_floats_sketch(message.histogram),
            frequent_numbers=dsketch.FrequentNumbersSketch.from_protobuf(
                message.frequent_numbers
            ),
        )
        if message.HasField("doubles"):
            opts["floats"] = FloatTracker.from_protobuf(message.doubles)
        if message.HasField("longs"):
            opts["ints"] = IntTracker.from_protobuf(message.longs)
        return NumberTracker(**opts)
예제 #2
0
 def __init__(
     self,
     variance: VarianceTracker = None,
     floats: FloatTracker = None,
     ints: IntTracker = None,
     theta_sketch: ThetaSketch = None,
     histogram: datasketches.kll_floats_sketch = None,
     frequent_numbers: dsketch.FrequentNumbersSketch = None,
 ):
     # Our own trackers
     if variance is None:
         variance = VarianceTracker()
     if floats is None:
         floats = FloatTracker()
     if ints is None:
         ints = IntTracker()
     if theta_sketch is None:
         theta_sketch = ThetaSketch()
     if histogram is None:
         histogram = datasketches.kll_floats_sketch(DEFAULT_HIST_K)
     if frequent_numbers is None:
         frequent_numbers = dsketch.FrequentNumbersSketch()
     self.variance = variance
     self.floats = floats
     self.ints = ints
     self.theta_sketch = theta_sketch
     self.histogram = histogram
     self.frequent_numbers = frequent_numbers
예제 #3
0
    def from_protobuf(message: NumbersMessage):
        """
        Load from a protobuf message

        Returns
        -------
        number_tracker : NumberTracker
        """
        theta = None
        if message.compact_theta is not None and len(
                message.compact_theta) > 0:
            theta = ThetaSketch.deserialize(message.compact_theta)
        elif message.theta is not None and len(message.theta) > 0:
            logger.warning(
                "Possible missing data. Non-compact theta sketches are no longer supported"
            )

        opts = dict(
            theta_sketch=theta,
            variance=VarianceTracker.from_protobuf(message.variance),
            histogram=dsketch.deserialize_kll_floats_sketch(message.histogram),
        )
        if message.HasField("doubles"):
            opts["floats"] = FloatTracker.from_protobuf(message.doubles)
        if message.HasField("longs"):
            opts["ints"] = IntTracker.from_protobuf(message.longs)
        return NumberTracker(**opts)
예제 #4
0
    def from_protobuf(message: StringsMessage):
        """
        Load from a protobuf message

        Returns
        -------
        string_tracker : StringTracker
        """
        theta = None
        if message.compact_theta is not None and len(
                message.compact_theta) > 0:
            theta = ThetaSketch.deserialize(message.compact_theta)
        elif message.theta is not None and len(message.theta) > 0:
            logger.warning(
                "Possible missing data. Non-compact theta sketches are no longer supported"
            )

        return StringTracker(
            count=message.count,
            items=dsketch.deserialize_frequent_strings_sketch(message.items),
            theta_sketch=theta,
            length=NumberTracker.from_protobuf(message.length),
            token_length=NumberTracker.from_protobuf(message.token_length),
            char_pos_tracker=CharPosTracker.from_protobuf(
                message.char_pos_tracker),
        )
예제 #5
0
    def __init__(
        self,
        count: int = None,
        items: frequent_strings_sketch = None,
        theta_sketch: ThetaSketch = None,
        length: NumberTracker = None,
        token_length: NumberTracker = None,
        char_pos_tracker: CharPosTracker = None,
        token_method: Callable[[], List[str]] = None,
    ):
        if count is None:
            count = 0
        if items is None:
            items = frequent_strings_sketch(round(math.log(MAX_ITEMS_SIZE)))
        if theta_sketch is None:
            theta_sketch = ThetaSketch()
        self.count = count
        self.items = items
        self.theta_sketch = theta_sketch

        self.char_pos_tracker = char_pos_tracker if char_pos_tracker else CharPosTracker(
        )

        self.length = length if length else NumberTracker()

        self.token_length = token_length if token_length else NumberTracker()

        self.token_method = token_method if token_method else lambda x: x.split(
            " ")
예제 #6
0
    def from_protobuf(message: StringsMessage):
        """
        Load from a protobuf message

        Returns
        -------
        string_tracker : StringTracker
        """
        theta = None
        if message.theta is not None and len(message.theta) > 0:
            theta = ThetaSketch.deserialize(message.theta)
        elif message.compact_theta is not None and len(
                message.compact_theta) > 0:
            theta = ThetaSketch.deserialize(message.compact_theta)

        return StringTracker(
            count=message.count,
            items=dsketch.deserialize_frequent_strings_sketch(message.items),
            theta_sketch=theta,
        )
예제 #7
0
 def __init__(
     self,
     count: int = None,
     items: frequent_strings_sketch = None,
     theta_sketch: ThetaSketch = None,
 ):
     if count is None:
         count = 0
     if items is None:
         items = frequent_strings_sketch(round(math.log(MAX_ITEMS_SIZE)))
     if theta_sketch is None:
         theta_sketch = ThetaSketch()
     self.count = count
     self.items = items
     self.theta_sketch = theta_sketch