def test_reduce_agg(): schema = Schema(timestamp="timestamp*", category="str*", value="int") values = { "timestamp": [1589455901, 1589455901, 1589455902, 1589455902], "category": list("abab"), "value": [1, 2, 3, 4], } frm = Frame(schema, values) for op in AST.aggregates: if op == "quantile": # quantile not avail with binning continue new_frm = frm.reduce(category="category", value=f"({op} self.value)") if op == "min": assert list(new_frm["value"]) == [1, 2] elif op == "max": assert list(new_frm["value"]) == [3, 4] elif op == "sum": assert list(new_frm["value"]) == [4, 6] elif op in ("mean", "average"): assert list(new_frm["value"]) == [2, 3] elif op == "first": assert list(new_frm["value"]) == [1, 2] elif op == "last": assert list(new_frm["value"]) == [3, 4] elif op in ("count", "len"): assert list(new_frm["value"]) == [2, 2] else: raise ValueError(f'op "{op}" not tested') for op in AST.aggregates: if op == "quantile": # quantile not avail with binning continue new_frm = frm.reduce(timestamp='(floor self.timestamp "D")', value=f"({op} self.value)") if op == "min": assert list(new_frm["value"]) == [1] elif op == "max": assert list(new_frm["value"]) == [4] elif op == "sum": assert list(new_frm["value"]) == [10] elif op in ("mean", "average"): assert list(new_frm["value"]) == [2.5] elif op == "first": assert list(new_frm["value"]) == [1] elif op == "last": assert list(new_frm["value"]) == [4] elif op in ("count", "len"): assert list(new_frm["value"]) == [4] else: raise ValueError(f'op "{op}" not tested')
def test_alias(): res = AST.parse("(as (asarray (list 1 2 3)) 'new_name')").eval() arr = res.value alias = res.name assert all(arr == asarray([1, 2, 3])) assert alias == "new_name" frm = Frame(schema, values) frm = frm.reduce("(as self.timestamp 'ts')") assert all(frm["ts"] == asarray(values["timestamp"], "M"))
def test_reduce_without_agg(): schema = Schema(timestamp="timestamp*", category="str*", value="int") values = { "timestamp": [1589455901, 1589455901, 1589455902, 1589455902], "category": list("abab"), "value": [1, 2, 3, 4], } frm = Frame(schema, values) # No changes to column assert frm == frm.reduce(timestamp="timestamp", category="category", value="value") # Mapping on one column res = frm.reduce(value="(% self.value 2)")["value"] assert list(res) == [1, 0, 1, 0] # Mapping over two columns expected = frm["timestamp"] + frm["value"] new_frm = frm.reduce(new_col="(+ self.value self.timestamp)") assert all(new_frm["new_col"] == expected)