def test_type_system_default_type(default_inference_functions, default_relationships): type_sys = TypeSystem(inference_functions=default_inference_functions, relationships=default_relationships, default_type=SubRegionCode) assert type_sys.default_type == SubRegionCode type_sys.update_inference_function(Categorical, None) test_series = pd.Series(['a', 'b', 'c']) assert type_sys.infer_logical_type(test_series) == SubRegionCode assert SubRegionCode in type_sys.registered_types
def test_nl_inference_not_called_with_other_matches(nl_mock, pandas_integers): assert isinstance(ww.type_system.infer_logical_type(pandas_integers[0]), Integer) new_type_sys = TypeSystem( inference_functions=DEFAULT_INFERENCE_FUNCTIONS, relationships=DEFAULT_RELATIONSHIPS, default_type=DEFAULT_TYPE, ) new_type_sys.inference_functions[NaturalLanguage] = nl_mock _ = new_type_sys.infer_logical_type(pandas_integers[0]) assert not nl_mock.called
def test_nl_inference_called_with_unknown_type(nl_mock, pandas_strings): assert isinstance(ww.type_system.infer_logical_type(pandas_strings[0]), Unknown) new_type_sys = TypeSystem( inference_functions=DEFAULT_INFERENCE_FUNCTIONS, relationships=DEFAULT_RELATIONSHIPS, default_type=DEFAULT_TYPE, ) new_type_sys.inference_functions[NaturalLanguage] = nl_mock _ = new_type_sys.infer_logical_type(pandas_strings[0]) assert nl_mock.called
def test_inference_multiple_matches_different_depths(default_relationships): def always_true(series): return True inference_functions = { Categorical: always_true, Double: always_true, Integer: always_true, CountryCode: always_true, } type_sys = TypeSystem(inference_functions=inference_functions, relationships=default_relationships, default_type=NaturalLanguage) type_sys.update_inference_function(Integer, always_true) type_sys.update_inference_function(CountryCode, always_true) type_sys.add_type(SubRegionCode, inference_function=always_true, parent=CountryCode) inferred_type = type_sys.infer_logical_type(pd.Series([1, 2, 3])) # Should match SubRegionCode as it is the deepest match assert inferred_type == SubRegionCode
def test_inference_multiple_matches_same_depth(default_relationships): def always_true(series): return True inference_functions = { Categorical: always_true, Double: always_true, Integer: always_true, CountryCode: always_true, } type_sys = TypeSystem(inference_functions=inference_functions, relationships=default_relationships, default_type=NaturalLanguage) type_sys.update_inference_function(Integer, always_true) type_sys.update_inference_function(CountryCode, always_true) inferred_type = type_sys.infer_logical_type(pd.Series([1, 2, 3])) # Should match CountryCode - same depth as Integer, but CountryCode parent # (Categorical) is tried and found first assert inferred_type == CountryCode