def test_filter_squash_query_nan_and_inf_metric(small_mock1, small_mock2): """Use call path query language on a metric column containing both int/float, NaN and inf.""" gf1 = GraphFrame.from_literal(small_mock1) gf2 = GraphFrame.from_literal(small_mock2) gf3 = gf1 / gf2 query_nan = [{"time": "== np.nan"}] filt_nan_gf3 = gf3.filter(query_nan, squash=True) assert len(filt_nan_gf3.graph.roots) == 2 assert all(pd.isnull(time) for time in filt_nan_gf3.dataframe["time (inc)"]) assert all(pd.isnull(time) for time in filt_nan_gf3.dataframe["time"]) assert filt_nan_gf3.dataframe.shape[0] == 2 assert sorted(filt_nan_gf3.dataframe["name"].values) == ["D", "G"] query_inf = [{"time": "== np.inf"}] filt_inf_gf3 = gf3.filter(query_inf, squash=True) assert len(filt_inf_gf3.graph.roots) == 1 assert all(np.isinf(inc_time) for inc_time in filt_inf_gf3.dataframe["time (inc)"]) assert all(np.isinf(exc_time) for exc_time in filt_inf_gf3.dataframe["time"]) assert filt_inf_gf3.dataframe.shape[0] == 1 assert filt_inf_gf3.dataframe["name"].values[0] == "B"
def test_inclusive_time_calculation_mock_dag_modules( mock_dag_literal_module, mock_dag_literal_module_complex, mock_dag_literal_module_more_complex, ): gf6 = GraphFrame.from_literal(mock_dag_literal_module) gf6.dataframe["orig_inc_time"] = gf6.dataframe["time (inc)"] del gf6.dataframe["time (inc)"] gf6.update_inclusive_columns() assert all(gf6.dataframe["time (inc)"].values == gf6.dataframe["orig_inc_time"].values) gf7 = GraphFrame.from_literal(mock_dag_literal_module_complex) gf7.dataframe["orig_inc_time"] = gf7.dataframe["time (inc)"] del gf7.dataframe["time (inc)"] gf7.update_inclusive_columns() assert all(gf7.dataframe["time (inc)"].values == gf7.dataframe["orig_inc_time"].values) gf8 = GraphFrame.from_literal(mock_dag_literal_module_more_complex) gf8.dataframe["orig_inc_time"] = gf8.dataframe["time (inc)"] del gf8.dataframe["time (inc)"] gf8.update_inclusive_columns() assert all(gf8.dataframe["time (inc)"].values == gf8.dataframe["orig_inc_time"].values)
def test_sub_operator(mock_graph_literal): gf1 = GraphFrame.from_literal(mock_graph_literal) gf2 = GraphFrame.from_literal(mock_graph_literal) assert gf1.graph is not gf2.graph gf3 = gf1 - gf2 assert gf3.graph == gf1.graph.union(gf2.graph) assert len(gf3.graph) == gf3.dataframe.shape[0] for metric in gf3.exc_metrics + gf3.inc_metrics: assert gf3.dataframe[metric].sum() == 0 gf4 = gf3.copy() assert gf4.graph is gf3.graph gf5 = gf3.sub(gf4) assert gf5.graph == gf3.graph == gf4.graph gf6 = gf1 - gf2 - gf1 assert gf6.dataframe["time"].sum() == -165 gf7 = gf1 - gf2 gf8 = gf7 - gf1 assert gf8.graph == gf6.graph assert gf8.dataframe["time"].sum() == gf6.dataframe["time"].sum()
def test_add_operator(mock_graph_literal): gf1 = GraphFrame.from_literal(mock_graph_literal) gf2 = GraphFrame.from_literal(mock_graph_literal) assert gf1.graph is not gf2.graph gf3 = gf1 + gf2 assert gf3.graph == gf1.graph.union(gf2.graph) assert len(gf3.graph) == gf3.dataframe.shape[0] assert gf3.dataframe["time"].sum() == 330 assert gf3.dataframe["time (inc)"].sum() == 1280 gf4 = gf3.copy() assert gf4.graph is gf3.graph gf5 = gf3 + gf4 assert gf5.graph == gf3.graph == gf4.graph gf6 = gf1 + gf2 + gf1 assert gf6.dataframe["time"].sum() == 495 gf7 = gf1 + gf2 gf8 = gf7 + gf1 assert gf8.graph == gf6.graph assert gf8.dataframe["time"].sum() == gf6.dataframe["time"].sum()
def test_div_operator(mock_graph_literal): gf1 = GraphFrame.from_literal(mock_graph_literal) gf2 = GraphFrame.from_literal(mock_graph_literal) assert gf1.graph is not gf2.graph gf3 = gf1 / gf2 assert gf3.graph == gf1.graph.union(gf2.graph) assert len(gf3.graph) == gf3.dataframe.shape[0] assert gf3.dataframe["time"].sum() == 21 assert gf3.dataframe["time (inc)"].sum() == 24 gf4 = gf3.copy() assert gf4.graph is gf3.graph gf5 = gf3 / gf4 / gf3 assert gf5.graph == gf3.graph == gf4.graph assert gf5.dataframe["time (inc)"].sum() == 24 gf6 = gf3 / gf4 gf7 = gf6 / gf3 assert gf7.graph == gf5.graph assert gf7.dataframe["time"].sum() == gf5.dataframe["time"].sum()
def test_graphframe(mock_graph_literal): """Sanity test a GraphFrame object with known data.""" gf = GraphFrame() gf.from_literal(mock_graph_literal) assert len(gf.dataframe) == 20
def test_div_decorator(small_mock1, small_mock2): gf1 = GraphFrame.from_literal(small_mock1) gf2 = GraphFrame.from_literal(small_mock2) assert len(gf1.graph) == 6 assert len(gf2.graph) == 7 gf3 = gf1 / gf2 assert len(gf3.graph) == 8 assert gf3.dataframe.loc[gf3.dataframe["_missing_node"] == 2].shape[0] == 2 # "R" assert gf3.dataframe.loc[gf3.dataframe["_missing_node"] == 1].shape[0] == 1 # "L" assert gf3.dataframe.loc[gf3.dataframe["_missing_node"] == 0].shape[0] == 5 # "" output = ConsoleRenderer(unicode=True, color=False).render( gf3.graph.roots, gf3.dataframe, metric_column="time", precision=3, name_column="name", expand_name=False, context_column="file", rank=0, thread=0, depth=10000, highlight_name=False, invert_colormap=False, ) assert "1.000 C" in output assert "inf B" in output assert u"nan D ▶" in output assert u"10.000 H ◀" in output
def test_sub_decorator(small_mock1, small_mock2, small_mock3): gf1 = GraphFrame.from_literal(small_mock1) gf2 = GraphFrame.from_literal(small_mock2) gf3 = GraphFrame.from_literal(small_mock3) assert len(gf1.graph) == 6 assert len(gf2.graph) == 7 gf4 = gf1 - gf2 assert len(gf4.graph) == 8 assert gf4.dataframe.loc[gf4.dataframe["_missing_node"] == "R"].shape[0] == 2 assert gf4.dataframe.loc[gf4.dataframe["_missing_node"] == "L"].shape[0] == 1 assert gf4.dataframe.loc[gf4.dataframe["_missing_node"] == ""].shape[0] == 5 output = gf4.tree(metric="time", color=False) assert "0.000 C" in output assert "-5.000 \x1b[1m[[D]] (R)" in output assert "10.000 \x1b[1m[[H]] (L)" in output gf5 = gf1 - gf3 assert len(gf1.graph) == 6 assert len(gf3.graph) == 4 assert len(gf5.graph) == 6 assert gf5.dataframe.loc[gf5.dataframe["_missing_node"] == "R"].shape[0] == 0 assert gf5.dataframe.loc[gf5.dataframe["_missing_node"] == "L"].shape[0] == 2 assert gf5.dataframe.loc[gf5.dataframe["_missing_node"] == ""].shape[0] == 4 output = gf5.tree(metric="time", color=False) assert output.startswith("0.000 A") assert "5.000 \x1b[1m[[C]] (L)" in output assert "10.000 \x1b[1m[[H]] (L)" in output
def test_filter_nan_and_inf(small_mock1, small_mock2): """Use lambda to filter for nodes with NaN and inf values.""" gf1 = GraphFrame.from_literal(small_mock1) gf2 = GraphFrame.from_literal(small_mock2) gf3 = gf1 / gf2 filt_nan_gf3 = gf3.filter(lambda x: pd.isnull(x["time"]), squash=True) assert len(filt_nan_gf3.graph.roots) == 2 assert all( pd.isnull(inc_time) for inc_time in filt_nan_gf3.dataframe["time (inc)"]) assert all( pd.isnull(exc_time) for exc_time in filt_nan_gf3.dataframe["time"]) assert filt_nan_gf3.dataframe.shape[0] == 2 assert sorted(filt_nan_gf3.dataframe["name"].values) == ["D", "G"] filt_inf_gf3 = gf3.filter(lambda x: np.isinf(x["time"]), squash=True) assert len(filt_inf_gf3.graph.roots) == 1 assert all( np.isinf(inc_time) for inc_time in filt_inf_gf3.dataframe["time (inc)"]) assert all( np.isinf(exc_time) for exc_time in filt_inf_gf3.dataframe["time"]) assert filt_inf_gf3.dataframe.shape[0] == 1 assert filt_inf_gf3.dataframe["name"].values == "B"
def test_filter(mock_graph_literal): """Test the filter operation with a foo-bar tree.""" gf = GraphFrame() gf.from_literal(mock_graph_literal) filtered_gf = gf.filter(lambda x: x['time'] > 5.0) assert len(filtered_gf.dataframe) == 7
def test_mul(mock_graph_literal): gf1 = GraphFrame.from_literal(mock_graph_literal) gf2 = GraphFrame.from_literal(mock_graph_literal) assert gf1.graph is not gf2.graph gf3 = gf1.mul(gf2) assert len(gf3.graph) == gf3.dataframe.shape[0] assert gf3.graph == gf1.graph.union(gf2.graph) assert gf3.dataframe["time"].sum() == 1575 assert gf3.dataframe["time (inc)"].sum() == 35400
def test_imul_operator(mock_graph_literal): gf1 = GraphFrame.from_literal(mock_graph_literal) gf2 = GraphFrame.from_literal(mock_graph_literal) assert gf1.graph is not gf2.graph gf1 *= gf2 assert gf1.graph == gf1.graph.union(gf2.graph) assert len(gf1.graph) == gf1.dataframe.shape[0] assert gf1.dataframe["time"].sum() == 1575 assert gf1.dataframe["time (inc)"].sum() == 37900
def test_mul_operator(mock_graph_literal): gf1 = GraphFrame.from_literal(mock_graph_literal) gf2 = GraphFrame.from_literal(mock_graph_literal) gf3 = GraphFrame.from_literal(mock_graph_literal) assert gf1.graph is not gf2.graph is not gf3.graph gf4 = gf1 * gf2 * gf3 assert gf4.graph == gf1.graph.union(gf2.graph.union(gf3.graph)) assert len(gf4.graph) == gf4.dataframe.shape[0] assert gf4.dataframe["time"].sum() == 17625 assert gf4.dataframe["time (inc)"].sum() == 3060250
def test_filter_squash_query_metric_with_nan_and_inf(small_mock1, small_mock2): """Use call path query language to match nodes with NaN and inf metric values.""" gf1 = GraphFrame.from_literal(small_mock1) gf2 = GraphFrame.from_literal(small_mock2) gf3 = gf1 / gf2 query = [{"time": ">= 1"}] filter_gf3 = gf3.filter(query, squash=True) assert len(filter_gf3.graph.roots) == 3 assert filter_gf3.dataframe["time"].sum() == np.inf assert filter_gf3.dataframe["time (inc)"].sum() == np.inf assert filter_gf3.dataframe.shape[0] == 5
def test_filter_with_nan_and_inf(small_mock1, small_mock2): """Use lambda to filter for metric containing int/float, NaN, and inf values.""" gf1 = GraphFrame.from_literal(small_mock1) gf2 = GraphFrame.from_literal(small_mock2) gf3 = gf1 / gf2 filter_gf3 = gf3.filter(lambda x: x["time"] > 5, squash=True) assert len(filter_gf3.graph.roots) == 2 assert filter_gf3.dataframe["time"].sum() == np.inf assert filter_gf3.dataframe["time (inc)"].sum() == np.inf assert filter_gf3.dataframe.shape[0] == 2 assert sorted(filter_gf3.dataframe["name"].values) == ["B", "H"]
def test_union_dag_same_structure(mock_dag_literal1): # make graphs g1 and g2 that you know are equal gf = GraphFrame.from_literal(mock_dag_literal1) other = GraphFrame.from_literal(mock_dag_literal1) g1 = gf.graph g2 = other.graph assert g1 == g2 g3 = g1.union(g2) assert g3 is not g1 assert g3 is not g2 assert g3 == g1 assert g3 == g2
def test_with_duplicate_in_first_node(mock_graph_literal_duplicate_first): gf = GraphFrame.from_literal(mock_graph_literal_duplicate_first) assert len(gf.graph) == 6 graph_literal = gf.to_literal() assert mock_graph_literal_duplicate_first.sort() == graph_literal.sort()
def test_match(mock_graph_literal): gf = GraphFrame.from_literal(mock_graph_literal) root = gf.graph.roots[0].children[2] path0 = [ {"name": "waldo"}, "+", {"time (inc)": ">= 20.0"}, "+", {"time (inc)": 5.0, "time": 5.0}, ] match0 = [ [ root, root.children[0], root.children[0].children[1], root.children[0].children[1].children[0], root.children[0].children[1].children[0].children[0], ] ] query0 = QueryMatcher(path0) assert query0._match_pattern(gf, root, 0) == match0 path1 = [ {"name": "waldo"}, ("+", {}), {"time (inc)": ">= 20.0"}, "+", {"time (inc)": 7.5, "time": 7.5}, ] query1 = QueryMatcher(path1) assert query1._match_pattern(gf, root, 0) is None
def test_match_0_or_more_wildcard(mock_graph_literal): path = [ {"name": "qux"}, ("*", {"time (inc)": "> 10"}), {"name": "gr[a-z]+", "time (inc)": "<= 10"}, ] gf = GraphFrame.from_literal(mock_graph_literal) node = gf.graph.roots[0].children[1] none_node = gf.graph.roots[0].children[2].children[0].children[1].children[0] correct_paths = [ [ node.children[0], node.children[0].children[0], node.children[0].children[0].children[0], ], [node.children[0], node.children[0].children[0]], ] query = QueryMatcher(path) matched_paths = [] for child in sorted(node.children, key=traversal_order): match = query._match_0_or_more(gf, child, 1) if match is not None: matched_paths.extend(match) assert sorted(matched_paths, key=len) == sorted(correct_paths, key=len) assert query._match_0_or_more(gf, none_node, 1) is None
def test_groupby_aggregate_more_complex(mock_dag_literal_module_more_complex): r"""Test reindex on a more complex graph: a main / \ / \ b e groupby module foo--bar | | --------------> | c f graz | d Node Module a main b foo c graz d graz e bar f foo """ modules = ["main", "foo", "graz", "bar"] gf = GraphFrame.from_literal(mock_dag_literal_module_more_complex) groupby_func = ["module"] agg_func = {"time (inc)": np.sum, "time": np.sum} out_gf = gf.groupby_aggregate(groupby_func, agg_func) assert all(m in out_gf.dataframe.name.values for m in modules) assert len(out_gf.graph) == len(modules)
def test_graphframe_to_literal_with_threads(data_dir, osu_allgather_hpct_db): gf = GraphFrame.from_hpctoolkit(str(osu_allgather_hpct_db)) graph_literal = gf.to_literal() gf2 = GraphFrame.from_literal(graph_literal) assert len(gf.graph) == len(gf2.graph)
def test_groupby_aggregate_simple(mock_dag_literal_module): r"""Test reindex on a simple graph: a main / \ / \ b e groupby module foo bar | | --------------> | | c f graz baz Node Module a main b foo c graz e bar f baz """ modules = ["main", "foo", "graz", "bar", "baz"] gf = GraphFrame.from_literal(mock_dag_literal_module) groupby_func = ["module"] agg_func = {"time (inc)": np.max, "time": np.max} out_gf = gf.groupby_aggregate(groupby_func, agg_func) assert all(m in out_gf.dataframe.name.values for m in modules) assert len(out_gf.graph) == len(modules)
def test_filter_query_squash_high_level(mock_graph_literal): gf = GraphFrame.from_literal(mock_graph_literal) path = [ { "time (inc)": ">= 30.0" }, (2, { "name": "[^b][a-z]+" }), ("*", { "name": "[^b][a-z]+" }), { "name": "gr[a-z]+" }, ] root = gf.graph.roots[0] match = list( set([ root, root.children[1], root.children[1].children[0], root.children[1].children[0].children[0], root.children[1].children[0].children[0].children[1], ])) filtered_squashed_gf = gf.filter(path, squash=True) filtered_squashed_nodes = list(filtered_squashed_gf.graph.traverse()) assert len(filtered_squashed_nodes) == len(match) assert ((filtered_squashed_gf.dataframe.loc[filtered_squashed_nodes, "time (inc)"] >= 30.0) | (~filtered_squashed_gf.dataframe.loc[filtered_squashed_nodes, "name"].str.startswith("b")) | (filtered_squashed_gf.dataframe.loc[filtered_squashed_nodes, "name"].str.startswith("gr")) ).all()
def test_filter_query_squash_low_level(mock_graph_literal): gf = GraphFrame.from_literal(mock_graph_literal) def time_filt(row): return row["time (inc)"] >= 30.0 def no_b_filt(row): return not row["name"].startswith("b") def gr_name_filt(row): return row["name"].startswith("gr") query = (QueryMatcher().match(".", time_filt).rel(2, no_b_filt).rel( "*", no_b_filt).rel(".", gr_name_filt)) root = gf.graph.roots[0] match = list( set([ root, root.children[1], root.children[1].children[0], root.children[1].children[0].children[0], root.children[1].children[0].children[0].children[1], ])) filtered_squashed_gf = gf.filter(query, squash=True) filtered_squashed_nodes = list(filtered_squashed_gf.graph.traverse()) assert len(filtered_squashed_nodes) == len(match) assert ((filtered_squashed_gf.dataframe.loc[filtered_squashed_nodes, "time (inc)"] >= 30.0) | (~filtered_squashed_gf.dataframe.loc[filtered_squashed_nodes, "name"].str.startswith("b")) | (filtered_squashed_gf.dataframe.loc[filtered_squashed_nodes, "name"].str.startswith("gr")) ).all()
def test_with_duplicates(mock_graph_literal_duplicates): gf = GraphFrame.from_literal(mock_graph_literal_duplicates) assert len(gf.graph) == 6 graph_literal = gf.to_literal() assert mock_graph_literal_duplicates.sort() == graph_literal.sort()
def test_graphframe_to_literal(tau_profile_dir): """Sanity test a GraphFrame object with known data.""" gf = GraphFrame.from_tau(str(tau_profile_dir)) graph_literal = gf.to_literal() gf_literal = GraphFrame.from_literal(graph_literal) assert len(gf.graph) == len(gf_literal.graph)
def test_to_dot(mock_graph_literal): gf = GraphFrame.from_literal(mock_graph_literal) output = gf.to_dot(metric="time") # do a simple edge check -- this isn't exhaustive for node in gf.graph.traverse(): for child in node.children: assert '"%s" -> "%s"' % (node._hatchet_nid, child._hatchet_nid) in output
def test_filter_no_squash_mock_literal_multi_subtree_merge(mock_graph_literal): gf = GraphFrame.from_literal(mock_graph_literal) gf.drop_index_levels() filtlist = [1, 3, 7, 9, 21, 23] filtered_gf = gf.filter(lambda x: x["node"]._hatchet_nid in filtlist, squash=False) assert filtered_gf.graph is gf.graph filtered_gf.dataframe.reset_index(drop=False, inplace=True) assert all(n in filtered_gf.graph.traverse() for n in filtered_gf.dataframe["node"])
def test_deepcopy(mock_graph_literal): gf = GraphFrame.from_literal(mock_graph_literal) other = gf.deepcopy() assert gf.graph == other.graph assert gf.dataframe is not other.dataframe assert gf.inc_metrics == other.inc_metrics assert gf.exc_metrics == other.exc_metrics
def test_graphframe_to_literal(hatchet_pyinstrument_json): """Sanity test a GraphFrame object with known data.""" gf = GraphFrame.from_pyinstrument(str(hatchet_pyinstrument_json)) graph_literal = gf.to_literal() gf2 = GraphFrame.from_literal(graph_literal) assert len(gf.graph) == len(gf2.graph)