예제 #1
0
def test_set_index_twice(sample_column_names, sample_inferred_logical_types):
    schema = TableSchema(sample_column_names, sample_inferred_logical_types)
    schema.set_index(None)
    assert schema.index is None

    schema.set_time_index(None)
    assert schema.time_index is None

    schema = TableSchema(
        sample_column_names,
        sample_inferred_logical_types,
        index="id",
        time_index="signup_date",
    )
    original_schema = schema.get_subset_schema(list(schema.columns.keys()))

    schema.set_index("id")
    assert schema.index == "id"
    assert schema.semantic_tags["id"] == {"index"}
    assert schema == original_schema

    schema.set_time_index("signup_date")
    assert schema.time_index == "signup_date"
    assert schema.semantic_tags["signup_date"] == {"time_index"}
    assert schema == original_schema
예제 #2
0
def test_get_subset_schema_all_params(sample_column_names,
                                      sample_inferred_logical_types):
    # The first element is self, so it won't be included in kwargs
    possible_schema_params = inspect.getfullargspec(
        TableSchema.__init__)[0][1:]

    kwargs = {
        "column_names": sample_column_names,
        "logical_types": {
            **sample_inferred_logical_types, "email": EmailAddress
        },
        "name": "test_dt",
        "index": "id",
        "time_index": "signup_date",
        "semantic_tags": {
            "age": "test_tag"
        },
        "table_metadata": {
            "created_by": "user1"
        },
        "column_metadata": {
            "phone_number": {
                "format": "xxx-xxx-xxxx"
            }
        },
        "use_standard_tags": False,
        "column_descriptions": {
            "age": "this is a description"
        },
        "column_origins": "base",
        "validate": True,
    }

    # Confirm all possible params to TableSchema init are present with non-default values where possible
    assert set(possible_schema_params) == set(kwargs.keys())

    schema = TableSchema(**kwargs)
    copy_schema = schema.get_subset_schema(sample_column_names)

    assert schema == copy_schema
    assert schema is not copy_schema
예제 #3
0
def test_schema_rename(sample_column_names, sample_inferred_logical_types):

    table_metadata = {"table_info": "this is text"}
    id_description = "the id of the row"
    id_origin = "base"
    schema = TableSchema(
        sample_column_names,
        sample_inferred_logical_types,
        index="id",
        time_index="signup_date",
        table_metadata=table_metadata,
        column_descriptions={"id": id_description},
        column_origins={"id": id_origin},
    )
    original_schema = schema.get_subset_schema(list(schema.columns.keys()))

    renamed_schema = schema.rename({"age": "birthday"})

    # Confirm original schema hasn't changed
    assert schema == original_schema

    assert "age" not in renamed_schema.columns
    assert "birthday" in renamed_schema.columns

    # confirm that metadata and descriptions are there
    assert renamed_schema.metadata == table_metadata
    assert schema.columns["id"].description == id_description
    assert schema.columns["id"].origin == id_origin

    old_col = schema.columns["age"]
    new_col = renamed_schema.columns["birthday"]
    assert old_col.logical_type == new_col.logical_type
    assert old_col.semantic_tags == new_col.semantic_tags

    swapped_schema = schema.rename({"age": "full_name", "full_name": "age"})
    swapped_back_schema = swapped_schema.rename({
        "age": "full_name",
        "full_name": "age"
    })
    assert swapped_back_schema == schema
예제 #4
0
def test_get_subset_schema(sample_column_names, sample_inferred_logical_types):
    schema = TableSchema(sample_column_names, sample_inferred_logical_types)
    new_schema = schema.get_subset_schema(sample_column_names[1:4])
    for col in new_schema.columns:
        assert new_schema.semantic_tags[col] == schema.semantic_tags[col]
        assert new_schema.logical_types[col] == schema.logical_types[col]