def test_complex_table_schema(): assert isinstance( TableSchema( columns=[ TableColumn( name="foo", type="customtype", constraints=TableColumnConstraints( nullable=True, unique=True, ), ), TableColumn( name="bar", type="string", description="bar", constraints=TableColumnConstraints( nullable=False, other=["foo"], ), ), ], constraints=TableConstraints(other=["foo"]), ), TableSchema, )
def test_table_schema_values(key, value): kwargs = { "constraints": TableConstraints(other=["foo"]), "columns": [ TableColumn( name="foo", type="string", description="bar", constraints=TableColumnConstraints(other=["foo"]), ) ], } kwargs[key] = value with pytest.raises(CheckError): TableSchema(**kwargs)
def _materialization_for_stream( name: str, stream_schema_props: Dict[str, Any], stream_stats: Dict[str, Any], asset_key_prefix: List[str], ) -> AssetMaterialization: return AssetMaterialization( asset_key=asset_key_prefix + [name], metadata={ "schema": MetadataValue.table_schema( TableSchema( columns=[ TableColumn(name=name, type=str(info.get("type", "unknown"))) for name, info in stream_schema_props.items() ] ) ), **{k: v for k, v in stream_stats.items() if v is not None}, }, )
def test_pandera_schema_to_dagster_type(schema): dagster_type = pandera_schema_to_dagster_type(schema) assert isinstance(dagster_type, DagsterType) assert len(dagster_type.metadata_entries) == 1 schema_entry = dagster_type.metadata_entries[0] assert isinstance(schema_entry.entry_data, TableSchemaMetadataValue) assert schema_entry.entry_data.schema == TableSchema( constraints=TableConstraints(other=["sum(a) > sum(b)"]), columns=[ TableColumn( name="a", type="int64", description="a desc", constraints=TableColumnConstraints(nullable=False, other=["<= 10"]), ), TableColumn( name="b", type="float64", description="b desc", constraints=TableColumnConstraints(nullable=False, other=["< -1.2"]), ), TableColumn( name="c", type="str", description="c desc", constraints=TableColumnConstraints( nullable=False, other=[ "str_startswith(value_)", "Two words separated by underscore", ], ), ), ], )
def _materialization_for_stream( name: str, stream_info: Dict[str, Any], stream_stats: Dict[str, Any], asset_key_prefix: List[str], ) -> AssetMaterialization: return AssetMaterialization( asset_key=asset_key_prefix + [name], metadata={ "schema": MetadataValue.table_schema( TableSchema(columns=[ TableColumn(name=name, type=str(info["type"])) for name, info in stream_info["stream"]["jsonSchema"] ["properties"].items() ])), "columns": ",".join(name for name in stream_info["stream"]["jsonSchema"] ["properties"].keys()), **{k: v for k, v in stream_stats.items() if v is not None}, }, )
def test_table_schema_keys(): with pytest.raises(TypeError): TableSchema(bad_key="foo") # type: ignore