def test_get_compute_domain_with_multicolumn_domain(): engine = PandasExecutionEngine() df = pd.DataFrame({ "a": [1, 2, 3, 4], "b": [2, 3, 4, None], "c": [1, 2, 2, 3], "d": [2, 7, 9, 2] }) expected_identity = df.drop(columns=["d"]) # Loading batch data engine.load_batch_data(batch_data=df, batch_id="1234") data, compute_kwargs, accessor_kwargs = engine.get_compute_domain( domain_kwargs={"columns": ["a", "b", "c"]}, domain_type="multicolumn") assert data.equals(df), "Data does not match after getting compute domain" assert compute_kwargs == {}, "Compute domain kwargs should be existent" assert accessor_kwargs == { "columns": ["a", "b", "c"] }, "Accessor kwargs have been modified" # Trying same test with enum form of table domain - should work the same way data, compute_kwargs, accessor_kwargs = engine.get_compute_domain( domain_kwargs={"columns": ["a", "b", "c"]}, domain_type="identity") assert data.equals( expected_identity), "Data does not match after getting compute domain" assert compute_kwargs == { "columns": ["a", "b", "c"] }, "Compute domain kwargs should be existent" assert accessor_kwargs == {}, "Accessor kwargs have been modified"
def test_get_compute_domain_with_column_domain(): engine = PandasExecutionEngine() df = pd.DataFrame({"a": [1, 2, 3, 4], "b": [2, 3, 4, None]}) expected_identity = df.drop(columns=["b"]) # Loading batch data engine.load_batch_data(batch_data=df, batch_id="1234") data, compute_kwargs, accessor_kwargs = engine.get_compute_domain( domain_kwargs={"column": "a"}, domain_type=MetricDomainTypes.COLUMN) assert data.equals(df), "Data does not match after getting compute domain" assert compute_kwargs == {}, "Compute domain kwargs should be existent" assert accessor_kwargs == { "column": "a" }, "Accessor kwargs have been modified" # Doing this using identity domain should yield different results data, compute_kwargs, accessor_kwargs = engine.get_compute_domain( domain_kwargs={"column": "a"}, domain_type=MetricDomainTypes.IDENTITY) assert data.equals( expected_identity), "Data does not match after getting compute domain" assert compute_kwargs == { "column": "a" }, "Compute domain kwargs should be existent" assert accessor_kwargs == {}, "Accessor kwargs have been modified"
def test_get_compute_domain_with_unmeetable_row_condition(): engine = PandasExecutionEngine() df = pd.DataFrame({"a": [1, 2, 3, 4], "b": [2, 3, 4, None]}) expected_df = df[df["b"] > 24] # Loading batch data engine.load_batch_data(batch_data=df, batch_id="1234") data, compute_kwargs, accessor_kwargs = engine.get_compute_domain( domain_kwargs={ "column": "a", "row_condition": "b > 24", "condition_parser": "pandas", }, domain_type="column", ) # Ensuring data has been properly queried assert data["b"].equals( expected_df["b"]), "Data does not match after getting compute domain" # Ensuring compute kwargs have not been modified assert ("row_condition" in compute_kwargs.keys() ), "Row condition should be located within compute kwargs" assert accessor_kwargs == { "column": "a" }, "Accessor kwargs have been modified"
def test_get_compute_domain_with_no_domain_kwargs(): engine = PandasExecutionEngine() df = pd.DataFrame({"a": [1, 2, 3, 4], "b": [2, 3, 4, None]}) # Loading batch data engine.load_batch_data(batch_data=df, batch_id="1234") data, compute_kwargs, accessor_kwargs = engine.get_compute_domain( domain_kwargs={}, domain_type="identity") assert data.equals(df), "Data does not match after getting compute domain" assert compute_kwargs == {}, "Compute domain kwargs should be existent" assert accessor_kwargs == {}, "Accessor kwargs have been modified" # Trying same test with enum form of table domain - should work the same way data, compute_kwargs, accessor_kwargs = engine.get_compute_domain( domain_kwargs={}, domain_type=MetricDomainTypes.TABLE) assert data.equals(df), "Data does not match after getting compute domain" assert compute_kwargs == {}, "Compute domain kwargs should be existent" assert accessor_kwargs == {}, "Accessor kwargs have been modified"
def test_get_compute_domain_with_column_domain(): engine = PandasExecutionEngine() df = pd.DataFrame({"a": [1, 2, 3, 4], "b": [2, 3, 4, None]}) # Loading batch data engine.load_batch_data(batch_data=df, batch_id="1234") data, compute_kwargs, accessor_kwargs = engine.get_compute_domain( domain_kwargs={"column": "a"}, domain_type=MetricDomainTypes.COLUMN) assert data.equals(df), "Data does not match after getting compute domain" assert compute_kwargs == {}, "Compute domain kwargs should be existent" assert accessor_kwargs == { "column": "a" }, "Accessor kwargs have been modified"
def test_get_compute_domain_with_column_pair_domain(): engine = PandasExecutionEngine() df = pd.DataFrame({ "a": [1, 2, 3, 4], "b": [2, 3, 4, 5], "c": [1, 2, 3, 4] }) expected_column_pair_df = df.drop(columns=["c"]) # Loading batch data engine.load_batch_data(batch_data=df, batch_id="1234") data, compute_kwargs, accessor_kwargs = engine.get_compute_domain( domain_kwargs={ "column_A": "a", "column_B": "b" }, domain_type="column_pair") assert data.equals(df), "Data does not match after getting compute domain" assert compute_kwargs == {}, "Compute domain kwargs should be existent" assert accessor_kwargs == { "column_A": "a", "column_B": "b", }, "Accessor kwargs have been modified"