def test_mainly_null_inferred_type_not_null(): counts = {Type.INTEGRAL: 1, Type.NULL: 30} tracker = SchemaTracker() multiple_track(tracker, counts) inferred_type = tracker.infer_type() assert inferred_type.type == Type.INTEGRAL counts = {Type.INTEGRAL: 1, Type.STRING: 2, Type.NULL: 30} tracker = SchemaTracker() multiple_track(tracker, counts) inferred_type = tracker.infer_type() assert inferred_type.type != Type.NULL
def test_all_null_inferred_type_is_null(): counts = { Type.NULL: 1, } tracker = SchemaTracker() multiple_track(tracker, counts) inferred_type = tracker.infer_type() assert inferred_type.type == Type.NULL
def test_majority_int(): tracker = SchemaTracker() type_counts = { Type.INTEGRAL: 50, Type.STRING: 30, Type.UNKNOWN: 20, } multiple_track(tracker, type_counts) assert tracker.infer_type().type == Type.INTEGRAL
def test_all_types_equal_coerced_to_string(): tracker = SchemaTracker() multiple_track(tracker, counts={ Type.INTEGRAL: 20, Type.FRACTIONAL: 29, Type.STRING: 50, }) assert tracker.infer_type().type == Type.STRING
def test_float_and_int(): tracker = SchemaTracker() multiple_track(tracker, counts={ Type.INTEGRAL: 50, Type.FRACTIONAL: 50, Type.STRING: 10, }) assert tracker.infer_type().type == Type.FRACTIONAL
def test_summary(): type_counts = { Type.INTEGRAL: 3, Type.STRING: 4, Type.FRACTIONAL: 5, Type.BOOLEAN: 6, Type.UNKNOWN: 1, } tracker = SchemaTracker() multiple_track(tracker, type_counts) summary = tracker.to_summary() c = summary.type_counts assert c["INTEGRAL"] == type_counts[Type.INTEGRAL] assert c["STRING"] == type_counts[Type.STRING] assert c["FRACTIONAL"] == type_counts[Type.FRACTIONAL] assert c["BOOLEAN"] == type_counts[Type.BOOLEAN] assert c["UNKNOWN"] == type_counts[Type.UNKNOWN] assert summary.inferred_type.type == tracker.infer_type().type
def test_70percent_string(): tracker = SchemaTracker() type_counts = {Type.INTEGRAL: 29, Type.STRING: 71} multiple_track(tracker, type_counts) assert tracker.infer_type().type == Type.STRING
def test_track_nothing_should_return_unknown(): tracker = SchemaTracker() inferred_type = tracker.infer_type() assert inferred_type.type == Type.UNKNOWN assert inferred_type.ratio == 0.0