예제 #1
0
def test_merge_total_counts_match():
    x1 = SchemaTracker()
    multiple_track(x1, {
        Type.INTEGRAL: 10,
        Type.FRACTIONAL: 10,
        Type.BOOLEAN: 10,
        Type.UNKNOWN: 10
    })

    x2 = SchemaTracker()
    multiple_track(x2, {
        Type.INTEGRAL: 20,
        Type.FRACTIONAL: 20,
        Type.BOOLEAN: 20,
        Type.UNKNOWN: 20
    })

    merged = x1.merge(x2)
    assert merged.get_count(Type.INTEGRAL) == 30
    assert merged.get_count(Type.FRACTIONAL) == 30
    assert merged.get_count(Type.BOOLEAN) == 30
    assert merged.get_count(Type.UNKNOWN) == 30

    # Make sure we can serialize round trip
    SchemaTracker.from_protobuf(merged.to_protobuf())
예제 #2
0
def test_serialization_roundtrip():
    tracker = SchemaTracker()
    type_count = {
        Type.INTEGRAL: 10,
        Type.STRING: 100,
    }
    multiple_track(tracker, type_count)

    roundtrip = SchemaTracker.from_protobuf(tracker.to_protobuf())

    assert tracker.to_protobuf() == roundtrip.to_protobuf()
    assert roundtrip.get_count(Type.INTEGRAL) == 10
    assert roundtrip.get_count(Type.STRING) == 100
예제 #3
0
    def from_protobuf(message):
        """
        Load from a protobuf message

        Returns
        -------
        column_profile : ColumnProfile
        """
        return ColumnProfile(
            message.name,
            counters=CountersTracker.from_protobuf(message.counters),
            schema_tracker=SchemaTracker.from_protobuf(message.schema),
            number_tracker=NumberTracker.from_protobuf(message.numbers),
            string_tracker=StringTracker.from_protobuf(message.strings),
            frequent_items=FrequentItemsSketch.from_protobuf(
                message.frequent_items),
            cardinality_tracker=HllSketch.from_protobuf(
                message.cardinality_tracker),
        )