def test_calculate_value(): ctx = Prosto("My Prosto") tbl = ctx.populate( table_name="My table", attributes=["A"], func="lambda **m: pd.DataFrame({'A': [1, 2, 3]})", tables=[] ) clm = ctx.calculate( name="My column", table=tbl.id, func="lambda x: float(x)", columns=["A"], model=None ) tbl.evaluate() clm.evaluate() clm_data = tbl.get_series('My column') v0 = clm_data[0] v1 = clm_data[1] v2 = clm_data[2] assert np.isclose(v0, 1.0) assert np.isclose(v1, 2.0) assert np.isclose(v2, 3.0) assert isinstance(v0, float) assert isinstance(v1, float) assert isinstance(v2, float)
def test_filter_inheritance(): """Test topology augmentation. Use columns from the parent table by automatically adding the merge operation to topology.""" ctx = Prosto("My Prosto") base_tbl = ctx.populate( table_name="Base table", attributes=["A", "B"], func= "lambda **m: pd.DataFrame({'A': [1.0, 2.0, 3.0], 'B': ['x', 'yy', 'zzz']})", tables=[]) # This (boolean) column will be used for filtering clm = ctx.compute( name="filter_column", table=base_tbl.id, func= "lambda x, param: (x['A'] > param) & (x['B'].str.len() < 3)", # Return a boolean Series columns=["A", "B"], model={"param": 1.5}) f_tbl = ctx.filter(table_name="Filtered table", attributes=["super"], func=None, tables=["Base table"], columns=["filter_column"]) # In this calculate column, we use a column of the filtered table which actually exists only in the base table clm = ctx.calculate(name="My column", table=f_tbl.id, func="lambda x: x + 1.0", columns=["A"], model=None) ctx.run() clm_data = f_tbl.get_series('My column') assert np.isclose(len(clm_data), 1) assert np.isclose(clm_data[0], 3.0) # This column had to be added automatically by the augmentation procedure # It is inherited from the base table and materialized via merge operation # It stores original values of the inherited base column clm_data = f_tbl.get_series('A') assert np.isclose(clm_data[0], 2)
def test_product_inheritance(): """ We add an addition calculate column to the product table which uses a column of a base table. The system has to automatically insert a new operation by resolving this missing column. """ ctx = Prosto("My Prosto") t1 = ctx.populate( table_name="Table 1", attributes=["A"], func="lambda **m: pd.DataFrame({'A': [1.0, 2.0, 3.0]})", tables=[] ) t2 = ctx.populate( table_name="Table 2", attributes=["B"], func="lambda **m: pd.DataFrame({'B': ['x', 'y', 'z']})", tables=[] ) product = ctx.product( table_name="Product", attributes=["t1", "t2"], tables=["Table 1", "Table 2"] ) # In this calculate column, we use a column of the product table which actually exists only in a base table clm = ctx.calculate( name="My column", table=product.id, func="lambda x: x + 1.0", columns=["A"], model=None ) ctx.run() # We get two columns in addition to two attributes: one merge (augmented) and one calculate column assert len(product.get_df().columns) == 4 clm_data = product.get_series('My column') assert clm_data.to_list() == [2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0]
def test_calculate_with_path(): """Test topology augmentation. Calculation with column paths which have to be automatically produce merge operation.""" ctx = Prosto("My Prosto") # Facts f_tbl = ctx.populate( table_name="Facts", attributes=["A", "M"], func="lambda **m: pd.DataFrame({'A': ['a', 'a', 'b', 'b'], 'M': [1.0, 2.0, 3.0, 4.0]})", tables=[] ) # Groups df = pd.DataFrame({'A': ['a', 'b', 'c'], 'B': [3.0, 2.0, 1.0]}) g_tbl = ctx.populate( table_name="Groups", attributes=["A", "B"], func="lambda **m: pd.DataFrame({'A': ['a', 'b', 'c'], 'B': [3.0, 2.0, 1.0]})", tables=[] ) # Link l_clm = ctx.link( name="Link", table=f_tbl.id, type=g_tbl.id, columns=["A"], linked_columns=["A"] ) # Calculate clm = ctx.calculate( name="My column", table=f_tbl.id, func="lambda x: x['M'] + x['Link::B']", columns=["M", "Link::B"], model=None ) ctx.run() clm_data = f_tbl.get_series('My column') assert clm_data[0] == 4.0 assert clm_data[1] == 5.0 assert clm_data[2] == 5.0 assert clm_data[3] == 6.0
def test_calculate_value(): ctx = Prosto("My Prosto") ctx.incremental = True tbl = ctx.create_table( table_name="My table", attributes=["A"], ) clm = ctx.calculate(name="My column", table=tbl.id, func="lambda x: float(x)", columns=["A"], model=None) ctx.run() # Inference on empty data tbl.data.add({"A": 1}) # New record is added and marked as added # Assert new change status assert tbl.data.added_length() == 1 ctx.run() # Assert clean change status and results of inference assert tbl.data.added_length() == 0 tbl.data.add({"A": 2}) tbl.data.add({"A": 3}) # Assert new change status assert tbl.data.added_length() == 2 # For debug purpose, modify an old row (which has not been recently added but was evaluated before) tbl_df = tbl.data.get_df() tbl_df['A'][0] = 10 # Old value is 1. Prosto does not see this change ctx.run() # The manual modification is invisible for Prosto and hence it should not be re-computed and the derived column will have to have the old value assert tbl_df['My column'][0] == 1 # Assert clean change status and results of inference assert tbl.data.added_length() == 0 tbl.data.remove(1) # Remove one oldest record by marking it as removed # Assert new change status assert tbl.data.removed_length() == 1 ctx.run() # Assert clean change status and results of inference assert tbl.data.removed_length() == 0 tbl.data.remove_all() # Remove all records by marking them as removed # Assert new change status ctx.run() # Assert clean change status and results of inference assert tbl.data.added_range.start == 3 assert tbl.data.added_range.end == 3 assert tbl.data.removed_range.start == 3 assert tbl.data.removed_range.end == 3