def test_set_index_twice(sample_column_names, sample_inferred_logical_types): schema = TableSchema(sample_column_names, sample_inferred_logical_types) schema.set_index(None) assert schema.index is None schema.set_time_index(None) assert schema.time_index is None schema = TableSchema( sample_column_names, sample_inferred_logical_types, index="id", time_index="signup_date", ) original_schema = schema.get_subset_schema(list(schema.columns.keys())) schema.set_index("id") assert schema.index == "id" assert schema.semantic_tags["id"] == {"index"} assert schema == original_schema schema.set_time_index("signup_date") assert schema.time_index == "signup_date" assert schema.semantic_tags["signup_date"] == {"time_index"} assert schema == original_schema
def test_get_subset_schema_all_params(sample_column_names, sample_inferred_logical_types): # The first element is self, so it won't be included in kwargs possible_schema_params = inspect.getfullargspec( TableSchema.__init__)[0][1:] kwargs = { "column_names": sample_column_names, "logical_types": { **sample_inferred_logical_types, "email": EmailAddress }, "name": "test_dt", "index": "id", "time_index": "signup_date", "semantic_tags": { "age": "test_tag" }, "table_metadata": { "created_by": "user1" }, "column_metadata": { "phone_number": { "format": "xxx-xxx-xxxx" } }, "use_standard_tags": False, "column_descriptions": { "age": "this is a description" }, "column_origins": "base", "validate": True, } # Confirm all possible params to TableSchema init are present with non-default values where possible assert set(possible_schema_params) == set(kwargs.keys()) schema = TableSchema(**kwargs) copy_schema = schema.get_subset_schema(sample_column_names) assert schema == copy_schema assert schema is not copy_schema
def test_schema_rename(sample_column_names, sample_inferred_logical_types): table_metadata = {"table_info": "this is text"} id_description = "the id of the row" id_origin = "base" schema = TableSchema( sample_column_names, sample_inferred_logical_types, index="id", time_index="signup_date", table_metadata=table_metadata, column_descriptions={"id": id_description}, column_origins={"id": id_origin}, ) original_schema = schema.get_subset_schema(list(schema.columns.keys())) renamed_schema = schema.rename({"age": "birthday"}) # Confirm original schema hasn't changed assert schema == original_schema assert "age" not in renamed_schema.columns assert "birthday" in renamed_schema.columns # confirm that metadata and descriptions are there assert renamed_schema.metadata == table_metadata assert schema.columns["id"].description == id_description assert schema.columns["id"].origin == id_origin old_col = schema.columns["age"] new_col = renamed_schema.columns["birthday"] assert old_col.logical_type == new_col.logical_type assert old_col.semantic_tags == new_col.semantic_tags swapped_schema = schema.rename({"age": "full_name", "full_name": "age"}) swapped_back_schema = swapped_schema.rename({ "age": "full_name", "full_name": "age" }) assert swapped_back_schema == schema
def test_get_subset_schema(sample_column_names, sample_inferred_logical_types): schema = TableSchema(sample_column_names, sample_inferred_logical_types) new_schema = schema.get_subset_schema(sample_column_names[1:4]) for col in new_schema.columns: assert new_schema.semantic_tags[col] == schema.semantic_tags[col] assert new_schema.logical_types[col] == schema.logical_types[col]