def test_print_expressions(): """ Tests whether the WIR Extraction works for an expression with very simple nested calls """ test_code = cleandoc(""" print("test".isupper()) """) test_ast = ast.parse(test_code) extractor = WirExtractor(test_ast) extracted_wir = extractor.extract_wir() expected_graph = networkx.DiGraph() expected_constant = WirNode(0, "test", "Constant", CodeReference(1, 6, 1, 12)) expected_call_one = WirNode(1, "isupper", "Call", CodeReference(1, 6, 1, 22)) expected_graph.add_edge(expected_constant, expected_call_one, type="caller", arg_index=-1) expected_call_two = WirNode(2, "print", "Call", CodeReference(1, 0, 1, 23)) expected_graph.add_edge(expected_call_one, expected_call_two, type="input", arg_index=0) compare(networkx.to_dict_of_dicts(extracted_wir), networkx.to_dict_of_dicts(expected_graph))
def test_inspector_adult_easy_str_pipeline(): """ Tests whether the str version of the inspector works """ with open(ADULT_SIMPLE_PY) as file: code = file.read() inspector_result = PipelineInspector\ .on_pipeline_from_string(code)\ .add_required_inspection(MaterializeFirstOutputRows(5)) \ .add_check(NoBiasIntroducedFor(['race'])) \ .add_check(NoIllegalFeatures()) \ .execute() extracted_dag = inspector_result.dag expected_dag = get_expected_dag_adult_easy("<string-source>") compare(networkx.to_dict_of_dicts(extracted_dag), networkx.to_dict_of_dicts(expected_dag)) assert HistogramForColumns(['race']) in list( inspector_result.dag_node_to_inspection_results.values())[0] check_to_check_results = inspector_result.check_to_check_results assert check_to_check_results[NoBiasIntroducedFor( ['race'])].status == CheckStatus.SUCCESS assert check_to_check_results[ NoIllegalFeatures()].status == CheckStatus.FAILURE
def test_frame_merge_sorted(): """ Tests whether the monkey patching of ('pandas.core.frame', 'merge') works if the sort option is set to True """ test_code = cleandoc(""" import pandas as pd df_a = pd.DataFrame({'A': [0, 2, 4, 8, 5], 'B': [7, 5, 4, 2, 1]}) df_b = pd.DataFrame({'B': [1, 4, 3, 2, 5], 'C': [1, 5, 4, 11, None]}) df_merged = df_a.merge(df_b, on='B', sort=True) df_expected = pd.DataFrame({'A': [5, 8, 4, 2], 'B': [1, 2, 4, 5], 'C': [1, 11, 5, None]}) pd.testing.assert_frame_equal(df_merged, df_expected) """) inspector_result = _pipeline_executor.singleton.run( python_code=test_code, track_code_references=True, inspections=[RowLineage(5)]) inspector_result.dag.remove_node(list(inspector_result.dag.nodes)[3]) expected_dag = networkx.DiGraph() expected_a = DagNode( 0, BasicCodeLocation("<string-source>", 3), OperatorContext(OperatorType.DATA_SOURCE, FunctionInfo('pandas.core.frame', 'DataFrame')), DagNodeDetails(None, ['A', 'B']), OptionalCodeInfo( CodeReference(3, 7, 3, 65), "pd.DataFrame({'A': [0, 2, 4, 8, 5], 'B': [7, 5, 4, 2, 1]})")) expected_b = DagNode( 1, BasicCodeLocation("<string-source>", 4), OperatorContext(OperatorType.DATA_SOURCE, FunctionInfo('pandas.core.frame', 'DataFrame')), DagNodeDetails(None, ['B', 'C']), OptionalCodeInfo( CodeReference(4, 7, 4, 69), "pd.DataFrame({'B': [1, 4, 3, 2, 5], 'C': [1, 5, 4, 11, None]})")) expected_join = DagNode( 2, BasicCodeLocation("<string-source>", 5), OperatorContext(OperatorType.JOIN, FunctionInfo('pandas.core.frame', 'merge')), DagNodeDetails("on 'B'", ['A', 'B', 'C']), OptionalCodeInfo(CodeReference(5, 12, 5, 47), "df_a.merge(df_b, on='B', sort=True)")) expected_dag.add_edge(expected_a, expected_join) expected_dag.add_edge(expected_b, expected_join) compare(networkx.to_dict_of_dicts(inspector_result.dag), networkx.to_dict_of_dicts(expected_dag)) inspection_results_data_source = inspector_result.dag_node_to_inspection_results[ expected_join] lineage_output = inspection_results_data_source[RowLineage(5)] expected_lineage_df = DataFrame( [[5, 1, 1., {LineageId(0, 4), LineageId(1, 0)}], [8, 2, 11., {LineageId(0, 3), LineageId(1, 3)}], [4, 4, 5., {LineageId(0, 2), LineageId(1, 1)}], [2, 5, math.nan, {LineageId(0, 1), LineageId(1, 4)}]], columns=['A', 'B', 'C', 'mlinspect_lineage']) pandas.testing.assert_frame_equal( lineage_output.reset_index(drop=True), expected_lineage_df.reset_index(drop=True))
def test_nested_import_from(): """ Tests whether the WIR Extraction works for nested from imports """ test_code = cleandoc(""" from mlinspect.utils import get_project_root print(get_project_root()) """) test_ast = ast.parse(test_code) extractor = WirExtractor(test_ast) extracted_wir = extractor.extract_wir() expected_graph = networkx.DiGraph() expected_import = WirNode(0, "mlinspect.utils", "Import", CodeReference(1, 0, 1, 44)) expected_call_one = WirNode(1, "get_project_root", "Call", CodeReference(3, 6, 3, 24)) expected_graph.add_edge(expected_import, expected_call_one, type="caller", arg_index=-1) expected_call_two = WirNode(2, "print", "Call", CodeReference(3, 0, 3, 25)) expected_graph.add_edge(expected_call_one, expected_call_two, type="input", arg_index=0) compare(networkx.to_dict_of_dicts(extracted_wir), networkx.to_dict_of_dicts(expected_graph))
def test_call_after_call(): """ Tests whether the WIR Extraction works for a very simple attribute call """ test_code = cleandoc(""" "hello ".capitalize().count() """) test_ast = ast.parse(test_code) extractor = WirExtractor(test_ast) extracted_wir = extractor.extract_wir() expected_graph = networkx.DiGraph() expected_constant_one = WirNode(0, "hello ", "Constant", CodeReference(1, 0, 1, 8)) expected_call_one = WirNode(1, "capitalize", "Call", CodeReference(1, 0, 1, 21)) expected_call_two = WirNode(2, "count", "Call", CodeReference(1, 0, 1, 29)) expected_graph.add_edge(expected_constant_one, expected_call_one, type="caller", arg_index=-1) expected_graph.add_edge(expected_call_one, expected_call_two, type="caller", arg_index=-1) compare(networkx.to_dict_of_dicts(extracted_wir), networkx.to_dict_of_dicts(expected_graph))
def test_print_var_usage(): """ Tests whether the WIR Extraction works for a very simple var usage """ test_code = cleandoc(""" test_var = "test" print(test_var)""") test_ast = ast.parse(test_code) extractor = WirExtractor(test_ast) extracted_wir = extractor.extract_wir() expected_graph = networkx.DiGraph() expected_constant = WirNode(0, "test", "Constant", CodeReference(1, 11, 1, 17)) expected_assign = WirNode(1, "test_var", "Assign", CodeReference(1, 0, 1, 17)) expected_graph.add_edge(expected_constant, expected_assign, type="input", arg_index=0) expected_call = WirNode(2, "print", "Call", CodeReference(2, 0, 2, 15)) expected_graph.add_node(expected_call) expected_graph.add_edge(expected_assign, expected_call, type="input", arg_index=0) compare(networkx.to_dict_of_dicts(extracted_wir), networkx.to_dict_of_dicts(expected_graph))
def test_black_box_operation(): """ Tests whether the monkey patching of pandas function works """ test_code = cleandoc(""" import pandas from mlinspect.testing._testing_helper_utils import black_box_df_op df = black_box_df_op() df = df.dropna() print("df") """) extracted_dag = _pipeline_executor.singleton.run( python_code=test_code, track_code_references=True).dag expected_dag = networkx.DiGraph() expected_missing_op = DagNode( -1, BasicCodeLocation("<string-source>", 5), OperatorContext(OperatorType.MISSING_OP, None), DagNodeDetails( 'Warning! Operator <string-source>:5 (df.dropna()) encountered a ' 'DataFrame resulting from an operation without mlinspect support!', ['A']), OptionalCodeInfo(CodeReference(5, 5, 5, 16), 'df.dropna()')) expected_select = DagNode( 0, BasicCodeLocation("<string-source>", 5), OperatorContext(OperatorType.SELECTION, FunctionInfo('pandas.core.frame', 'dropna')), DagNodeDetails('dropna', ['A']), OptionalCodeInfo(CodeReference(5, 5, 5, 16), 'df.dropna()')) expected_dag.add_edge(expected_missing_op, expected_select) compare(networkx.to_dict_of_dicts(extracted_dag), networkx.to_dict_of_dicts(expected_dag))
def test_string_call_attribute(): """ Tests whether the WIR Extraction works for a very simple attribute call """ test_code = cleandoc(""" "hello ".join("world") """) test_ast = ast.parse(test_code) extractor = WirExtractor(test_ast) extracted_wir = extractor.extract_wir() expected_graph = networkx.DiGraph() expected_constant_one = WirNode(0, "hello ", "Constant", CodeReference(1, 0, 1, 8)) expected_constant_two = WirNode(1, "world", "Constant", CodeReference(1, 14, 1, 21)) expected_attribute_call = WirNode(2, "join", "Call", CodeReference(1, 0, 1, 22)) expected_graph.add_edge(expected_constant_one, expected_attribute_call, type="caller", arg_index=-1) expected_graph.add_edge(expected_constant_two, expected_attribute_call, type="input", arg_index=0) compare(networkx.to_dict_of_dicts(extracted_wir), networkx.to_dict_of_dicts(expected_graph))
def test_func_defs_and_loops(): """ Tests whether the monkey patching of pandas function works """ test_code = get_test_code_with_function_def_and_for_loop() extracted_dag = _pipeline_executor.singleton.run( python_code=test_code, track_code_references=True).dag expected_dag = networkx.DiGraph() expected_data_source = DagNode( 0, BasicCodeLocation("<string-source>", 4), OperatorContext(OperatorType.DATA_SOURCE, FunctionInfo('pandas.core.frame', 'DataFrame')), DagNodeDetails(None, ['A']), OptionalCodeInfo(CodeReference(4, 9, 4, 44), "pd.DataFrame([0, 1], columns=['A'])")) expected_select_1 = DagNode( 1, BasicCodeLocation("<string-source>", 8), OperatorContext(OperatorType.SELECTION, FunctionInfo('pandas.core.frame', 'dropna')), DagNodeDetails('dropna', ['A']), OptionalCodeInfo(CodeReference(8, 9, 8, 20), 'df.dropna()')) expected_dag.add_edge(expected_data_source, expected_select_1) expected_select_2 = DagNode( 2, BasicCodeLocation("<string-source>", 8), OperatorContext(OperatorType.SELECTION, FunctionInfo('pandas.core.frame', 'dropna')), DagNodeDetails('dropna', ['A']), OptionalCodeInfo(CodeReference(8, 9, 8, 20), 'df.dropna()')) expected_dag.add_edge(expected_select_1, expected_select_2) compare(networkx.to_dict_of_dicts(extracted_dag), networkx.to_dict_of_dicts(expected_dag))
def as_dict(self): return {"@module": self.__class__.__module__, "@class": self.__class__.__name__, "light_structure_environments": self.light_structure_environments.as_dict(), "connectivity_graph": jsanitize(nx.to_dict_of_dicts(self._graph)), "environment_subgraphs": {env_key: jsanitize(nx.to_dict_of_dicts(subgraph)) for env_key, subgraph in self.environment_subgraphs.items()}}
def main(): np.set_printoptions(precision=2) test_complete_file = "../data/toys/toy1.txt" test_sample_file = "../data/toys/toy_comps.txt" #test_sample_file = "../data/toys/toy_comps_with_unobserved.txt" gsamp = nx.to_dict_of_dicts(nx.read_adjlist(test_sample_file)) gcomp = nx.to_dict_of_dicts(nx.read_adjlist(test_complete_file)) #net = Network(gcomp, gsamp, feature_type='default') net = Network(gcomp, gsamp, feature_type='knn') #net = Network(gcomp, gsamp, feature_type='node2vec') #net = Network(gcomp, gsamp, feature_type='n2v-refex') order = 'linear' feat = net.calculate_features(net, order=order) print(feat) print(net.row_to_node) net.probe('9') print("Probed node 9...") feat = net.update_features(net, '9', order=order) print(feat) print(net.row_to_node) net.probe('12') print("Probed node 12...") feat = net.update_features(net, '12', order=order) print(feat) print(net.row_to_node) net.probe('11') print("Probed node 11...") feat = net.update_features(net, '11', order=order) print(feat) print(net.row_to_node) print(net.calculate_features(net, order=order))
def test_list_creation(): """ Tests whether the WIR Extraction works for lists """ test_code = cleandoc(""" print(["test1", "test2"]) """) test_ast = ast.parse(test_code) extractor = WirExtractor(test_ast) extracted_wir = extractor.extract_wir() expected_graph = networkx.DiGraph() expected_constant_one = WirNode(0, "test1", "Constant", CodeReference(1, 7, 1, 14)) expected_constant_two = WirNode(1, "test2", "Constant", CodeReference(1, 16, 1, 23)) expected_list = WirNode(2, "as_list", "List", CodeReference(1, 6, 1, 24)) expected_graph.add_edge(expected_constant_one, expected_list, type="input", arg_index=0) expected_graph.add_edge(expected_constant_two, expected_list, type="input", arg_index=1) expected_call = WirNode(3, "print", "Call", CodeReference(1, 0, 1, 25)) expected_graph.add_edge(expected_list, expected_call, type="input", arg_index=0) compare(networkx.to_dict_of_dicts(extracted_wir), networkx.to_dict_of_dicts(expected_graph))
def test_import_from(): """ Tests whether the WIR Extraction works for from imports """ test_code = cleandoc(""" from math import sqrt sqrt(4) """) test_ast = ast.parse(test_code) extractor = WirExtractor(test_ast) extracted_wir = extractor.extract_wir() expected_graph = networkx.DiGraph() expected_import = WirNode(0, "math", "Import", CodeReference(1, 0, 1, 21)) expected_constant = WirNode(1, "4", "Constant", CodeReference(3, 5, 3, 6)) expected_constant_call = WirNode(2, "sqrt", "Call", CodeReference(3, 0, 3, 7)) expected_graph.add_edge(expected_import, expected_constant_call, type="caller", arg_index=-1) expected_graph.add_edge(expected_constant, expected_constant_call, type="input", arg_index=0) compare(networkx.to_dict_of_dicts(extracted_wir), networkx.to_dict_of_dicts(expected_graph))
def test_statsmodels_add_constant(): """ Tests whether the monkey patching of ('statsmodel.api', 'add_constant') works """ test_code = cleandoc(""" import numpy as np import statsmodels.api as sm np.random.seed(42) test = np.random.random(100) test = sm.add_constant(test) assert len(test) == 100 """) inspector_result = _pipeline_executor.singleton.run( python_code=test_code, track_code_references=True, inspections=[RowLineage(2)]) expected_dag = networkx.DiGraph() expected_random = DagNode( 0, BasicCodeLocation("<string-source>", 4), OperatorContext(OperatorType.DATA_SOURCE, FunctionInfo('numpy.random', 'random')), DagNodeDetails('random', ['array']), OptionalCodeInfo(CodeReference(4, 7, 4, 28), "np.random.random(100)")) expected_constant = DagNode( 1, BasicCodeLocation("<string-source>", 5), OperatorContext(OperatorType.PROJECTION_MODIFY, FunctionInfo('statsmodel.api', 'add_constant')), DagNodeDetails('Adds const column', ['array']), OptionalCodeInfo(CodeReference(5, 7, 5, 28), "sm.add_constant(test)")) expected_dag.add_edge(expected_random, expected_constant) compare(networkx.to_dict_of_dicts(inspector_result.dag), networkx.to_dict_of_dicts(expected_dag)) inspection_results_data_source = inspector_result.dag_node_to_inspection_results[ expected_random] lineage_output = inspection_results_data_source[RowLineage(2)] expected_lineage_df = DataFrame( [[0.5, {LineageId(0, 0)}], [0.5, {LineageId(0, 1)}]], columns=['array', 'mlinspect_lineage']) pandas.testing.assert_frame_equal( lineage_output.reset_index(drop=True), expected_lineage_df.reset_index(drop=True), atol=1) inspection_results_data_source = inspector_result.dag_node_to_inspection_results[ expected_constant] lineage_output = inspection_results_data_source[RowLineage(2)] expected_lineage_df = DataFrame( [[numpy.array([0.5, 1.]), {LineageId(0, 0)}], [numpy.array([0.5, 1.]), {LineageId(0, 1)}]], columns=['array', 'mlinspect_lineage']) pandas.testing.assert_frame_equal( lineage_output.reset_index(drop=True), expected_lineage_df.reset_index(drop=True), atol=1)
def test_format_to_dotfile(dependencies): graph = create_graph_from(dependencies) content = to_dotfile(graph=graph, path=os.getcwd()) exported_graph = nx.drawing.nx_pydot.read_dot(io.StringIO(content)) assert exported_graph.nodes() == graph.nodes() assert nx.to_dict_of_dicts(graph).keys() == nx.to_dict_of_dicts(exported_graph).keys()
def test_frame__setitem__(): """ Tests whether the monkey patching of ('pandas.core.frame', '__setitem__') works """ test_code = cleandoc(""" import pandas as pd pandas_df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 'two'], 'bar': ['A', 'B', 'C', 'A', 'B', 'C'], 'baz': [1, 2, 3, 4, 5, 6], 'zoo': ['x', 'y', 'z', 'q', 'w', 't']}) pandas_df['baz'] = pandas_df['baz'] + 1 df_expected = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 'two'], 'bar': ['A', 'B', 'C', 'A', 'B', 'C'], 'baz': [2, 3, 4, 5, 6, 7], 'zoo': ['x', 'y', 'z', 'q', 'w', 't']}) pd.testing.assert_frame_equal(pandas_df, df_expected) """) inspector_result = _pipeline_executor.singleton.run(python_code=test_code, track_code_references=True, inspections=[RowLineage(2)]) inspector_result.dag.remove_node(list(inspector_result.dag.nodes)[3]) expected_dag = networkx.DiGraph() expected_data_source = DagNode(0, BasicCodeLocation("<string-source>", 3), OperatorContext(OperatorType.DATA_SOURCE, FunctionInfo('pandas.core.frame', 'DataFrame')), DagNodeDetails(None, ['foo', 'bar', 'baz', 'zoo']), OptionalCodeInfo(CodeReference(3, 12, 6, 53), "pd.DataFrame({'foo': ['one', 'one', 'one', 'two', " "'two', 'two'],\n" " 'bar': ['A', 'B', 'C', 'A', 'B', 'C'],\n" " 'baz': [1, 2, 3, 4, 5, 6],\n" " 'zoo': ['x', 'y', 'z', 'q', 'w', 't']})")) expected_project = DagNode(1, BasicCodeLocation("<string-source>", 7), OperatorContext(OperatorType.PROJECTION, FunctionInfo('pandas.core.frame', '__getitem__')), DagNodeDetails("to ['baz']", ['baz']), OptionalCodeInfo(CodeReference(7, 19, 7, 35), "pandas_df['baz']")) expected_dag.add_edge(expected_data_source, expected_project) expected_project_modify = DagNode(2, BasicCodeLocation("<string-source>", 7), OperatorContext(OperatorType.PROJECTION_MODIFY, FunctionInfo('pandas.core.frame', '__setitem__')), DagNodeDetails("modifies ['baz']", ['foo', 'bar', 'baz', 'zoo']), OptionalCodeInfo(CodeReference(7, 0, 7, 39), "pandas_df['baz'] = pandas_df['baz'] + 1")) expected_dag.add_edge(expected_data_source, expected_project_modify) compare(networkx.to_dict_of_dicts(inspector_result.dag), networkx.to_dict_of_dicts(expected_dag)) inspection_results_data_source = inspector_result.dag_node_to_inspection_results[expected_project_modify] lineage_output = inspection_results_data_source[RowLineage(2)] expected_lineage_df = DataFrame([['one', 'A', 2, 'x', {LineageId(0, 0)}], ['one', 'B', 3, 'y', {LineageId(0, 1)}]], columns=['foo', 'bar', 'baz', 'zoo', 'mlinspect_lineage']) pandas.testing.assert_frame_equal(lineage_output.reset_index(drop=True), expected_lineage_df.reset_index(drop=True))
def test_index_subscript_with_module_information(): """ Tests whether the WIR Extraction works for lists """ test_code = cleandoc(""" import pandas as pd data = pd.read_csv('test_path') data['income-per-year'] """) test_ast = ast.parse(test_code) extractor = WirExtractor(test_ast) module_info = { CodeReference(3, 7, 3, 31): ('pandas.io.parsers', 'read_csv'), CodeReference(4, 0, 4, 23): ('pandas.core.frame', '__getitem__') } extracted_wir = extractor.extract_wir() extractor.add_runtime_info(module_info, {}) expected_graph = networkx.DiGraph() expected_import = WirNode(0, "pandas", "Import", CodeReference(1, 0, 1, 19)) expected_constant_one = WirNode(1, "test_path", "Constant", CodeReference(3, 19, 3, 30)) expected_call = WirNode(2, "read_csv", "Call", CodeReference(3, 7, 3, 31), ('pandas.io.parsers', 'read_csv')) expected_graph.add_edge(expected_import, expected_call, type="caller", arg_index=-1) expected_graph.add_edge(expected_constant_one, expected_call, type="input", arg_index=0) expected_assign = WirNode(3, "data", "Assign", CodeReference(3, 0, 3, 31)) expected_graph.add_edge(expected_call, expected_assign, type="input", arg_index=0) expected_constant_two = WirNode(4, "income-per-year", "Constant", CodeReference(4, 5, 4, 22)) expected_index_subscript = WirNode(5, "Index-Subscript", "Subscript", CodeReference(4, 0, 4, 23), ('pandas.core.frame', '__getitem__')) expected_graph.add_edge(expected_assign, expected_index_subscript, type="caller", arg_index=-1) expected_graph.add_edge(expected_constant_two, expected_index_subscript, type="input", arg_index=0) compare(networkx.to_dict_of_dicts(extracted_wir), networkx.to_dict_of_dicts(expected_graph))
def test_frame__getitem__selection(): """ Tests whether the monkey patching of ('pandas.core.frame', '__getitem__') works for filtering """ test_code = cleandoc(""" import pandas as pd df = pd.DataFrame({'A': [0, 2, 4, 8, 5], 'B': [1, 5, 4, 11, None]}) df_selection = df[df['A'] > 3] df_expected = pd.DataFrame({'A': [4, 8, 5], 'B': [4, 11, None]}) pd.testing.assert_frame_equal(df_selection.reset_index(drop=True), df_expected.reset_index(drop=True)) """) inspector_result = _pipeline_executor.singleton.run( python_code=test_code, track_code_references=True, inspections=[RowLineage(2)]) inspector_result.dag.remove_node(list(inspector_result.dag.nodes)[3]) expected_dag = networkx.DiGraph() expected_data_source = DagNode( 0, BasicCodeLocation("<string-source>", 3), OperatorContext(OperatorType.DATA_SOURCE, FunctionInfo('pandas.core.frame', 'DataFrame')), DagNodeDetails(None, ['A', 'B']), OptionalCodeInfo( CodeReference(3, 5, 3, 67), "pd.DataFrame({'A': [0, 2, 4, 8, 5], 'B': [1, 5, 4, 11, None]})")) expected_projection = DagNode( 1, BasicCodeLocation("<string-source>", 4), OperatorContext(OperatorType.PROJECTION, FunctionInfo('pandas.core.frame', '__getitem__')), DagNodeDetails("to ['A']", ['A']), OptionalCodeInfo(CodeReference(4, 18, 4, 25), "df['A']")) expected_dag.add_edge(expected_data_source, expected_projection) expected_selection = DagNode( 2, BasicCodeLocation("<string-source>", 4), OperatorContext(OperatorType.SELECTION, FunctionInfo('pandas.core.frame', '__getitem__')), DagNodeDetails("Select by Series: df[df['A'] > 3]", ['A', 'B']), OptionalCodeInfo(CodeReference(4, 15, 4, 30), "df[df['A'] > 3]")) expected_dag.add_edge(expected_data_source, expected_selection) compare(networkx.to_dict_of_dicts(inspector_result.dag), networkx.to_dict_of_dicts(expected_dag)) inspection_results_data_source = inspector_result.dag_node_to_inspection_results[ expected_selection] lineage_output = inspection_results_data_source[RowLineage(2)] expected_lineage_df = DataFrame( [[4, 4., {LineageId(0, 2)}], [8, 11., {LineageId(0, 3)}]], columns=['A', 'B', 'mlinspect_lineage']) pandas.testing.assert_frame_equal( lineage_output.reset_index(drop=True), expected_lineage_df.reset_index(drop=True))
def test_inspector_adult_easy_py_pipeline_without_inspections(): """ Tests whether the .py version of the inspector works """ inspector_result = PipelineInspector\ .on_pipeline_from_py_file(ADULT_SIMPLE_PY)\ .execute() extracted_dag = inspector_result.dag expected_dag = get_expected_dag_adult_easy(ADULT_SIMPLE_PY) compare(networkx.to_dict_of_dicts(extracted_dag), networkx.to_dict_of_dicts(expected_dag))
def test_my_word_to_vec_transformer(): """ Tests whether the monkey patching of ('example_pipelines.healthcare.healthcare_utils', 'MyW2VTransformer') works """ test_code = cleandoc(""" import pandas as pd from example_pipelines.healthcare.healthcare_utils import MyW2VTransformer import numpy as np df = pd.DataFrame({'A': ['cat_a', 'cat_b', 'cat_a', 'cat_c']}) word_to_vec = MyW2VTransformer(min_count=2, size=2, workers=1) encoded_data = word_to_vec.fit_transform(df) assert encoded_data.shape == (4, 2) """) inspector_result = _pipeline_executor.singleton.run( python_code=test_code, track_code_references=True, inspections=[RowLineage(3)], custom_monkey_patching=[custom_monkeypatching]) expected_dag = networkx.DiGraph() expected_data_source = DagNode( 0, BasicCodeLocation("<string-source>", 5), OperatorContext(OperatorType.DATA_SOURCE, FunctionInfo('pandas.core.frame', 'DataFrame')), DagNodeDetails(None, ['A']), OptionalCodeInfo( CodeReference(5, 5, 5, 62), "pd.DataFrame({'A': ['cat_a', 'cat_b', 'cat_a', 'cat_c']})")) expected_estimator = DagNode( 1, BasicCodeLocation("<string-source>", 6), OperatorContext( OperatorType.TRANSFORMER, FunctionInfo('example_pipelines.healthcare.healthcare_utils', 'MyW2VTransformer')), DagNodeDetails('Word2Vec', ['array']), OptionalCodeInfo(CodeReference(6, 14, 6, 62), 'MyW2VTransformer(min_count=2, size=2, workers=1)')) expected_dag.add_edge(expected_data_source, expected_estimator) compare(networkx.to_dict_of_dicts(inspector_result.dag), networkx.to_dict_of_dicts(expected_dag)) inspection_results_data_source = inspector_result.dag_node_to_inspection_results[ expected_estimator] lineage_output = inspection_results_data_source[RowLineage(3)] expected_lineage_df = DataFrame( [[numpy.array([0.0, 0.0, 0.0]), {LineageId(0, 0)}], [numpy.array([0.0, 0.0, 0.0]), {LineageId(0, 1)}], [numpy.array([0.0, 0.0, 0.0]), {LineageId(0, 2)}]], columns=['array', 'mlinspect_lineage']) pandas.testing.assert_series_equal( lineage_output["mlinspect_lineage"], expected_lineage_df["mlinspect_lineage"]) assert expected_lineage_df.iloc[0, 0].shape == (3, )
def print_and_plot_graph(self): global G, mentioned_concepts print nx.to_dict_of_dicts(G) # Stuff to plot the graph pos = nx.spring_layout(G) nx.draw_networkx_nodes(G,pos,node_size=3000, node_color='red', font_size=10) nx.draw_networkx_nodes(G,pos,node_size=3000, node_color='green', nodelist=mentioned_concepts, font_size=10) nx.draw_networkx_edges(G,pos,width=5,alpha=0.5,edge_color='black') nx.draw_networkx_labels(G,pos,font_size=10) nx.draw_networkx_edge_labels(G,pos, font_size=8) plt.axis('off') plt.show()
def test_inspector_adult_easy_ipynb_pipeline(): """ Tests whether the .ipynb version of the inspector works """ inspection_result = PipelineInspector\ .on_pipeline_from_ipynb_file(FILE_NB)\ .add_inspection(MaterializeFirstRowsInspection(5))\ .execute() extracted_dag = inspection_result.dag expected_dag = get_expected_dag_adult_easy_ipynb() compare(networkx.to_dict_of_dicts(extracted_dag), networkx.to_dict_of_dicts(expected_dag))
def test_groupby_agg(): """ Tests whether the monkey patching of ('pandas.core.frame', 'groupby') and ('pandas.core.groupbygeneric', 'agg') works. """ test_code = cleandoc(""" import pandas as pd df = pd.DataFrame({'group': ['A', 'B', 'A', 'C', 'B'], 'value': [1, 2, 1, 3, 4]}) df_groupby_agg = df.groupby('group').agg(mean_value=('value', 'mean')) df_expected = pd.DataFrame({'group': ['A', 'B', 'C'], 'mean_value': [1, 3, 3]}) pd.testing.assert_frame_equal(df_groupby_agg.reset_index(drop=False), df_expected.reset_index(drop=True)) """) inspector_result = _pipeline_executor.singleton.run( python_code=test_code, track_code_references=True, inspections=[RowLineage(2)]) inspector_result.dag.remove_node(list(inspector_result.dag.nodes)[2]) expected_dag = networkx.DiGraph() expected_data = DagNode( 0, BasicCodeLocation("<string-source>", 3), OperatorContext(OperatorType.DATA_SOURCE, FunctionInfo('pandas.core.frame', 'DataFrame')), DagNodeDetails(None, ['group', 'value']), OptionalCodeInfo( CodeReference(3, 5, 3, 81), "pd.DataFrame({'group': ['A', 'B', 'A', 'C', 'B'], " "'value': [1, 2, 1, 3, 4]})")) expected_groupby_agg = DagNode( 1, BasicCodeLocation("<string-source>", 4), OperatorContext(OperatorType.GROUP_BY_AGG, FunctionInfo('pandas.core.groupby.generic', 'agg')), DagNodeDetails( "Groupby 'group', Aggregate: '{'mean_value': ('value', 'mean')}'", ['group', 'mean_value']), OptionalCodeInfo( CodeReference(4, 17, 4, 70), "df.groupby('group').agg(mean_value=('value', 'mean'))")) expected_dag.add_edge(expected_data, expected_groupby_agg) compare(networkx.to_dict_of_dicts(inspector_result.dag), networkx.to_dict_of_dicts(expected_dag)) inspection_results_data_source = inspector_result.dag_node_to_inspection_results[ expected_groupby_agg] lineage_output = inspection_results_data_source[RowLineage(2)] expected_lineage_df = DataFrame( [["A", 1, {LineageId(1, 0)}], ['B', 3, {LineageId(1, 1)}]], columns=['group', 'mean_value', 'mlinspect_lineage']) pandas.testing.assert_frame_equal( lineage_output.reset_index(drop=True), expected_lineage_df.reset_index(drop=True))
def test_index_assign(): """ Tests whether the WIR Extraction works for lists """ test_code = cleandoc(""" import pandas as pd data = pd.read_csv('test_path') data['label'] = "test" """) test_ast = ast.parse(test_code) extractor = WirExtractor(test_ast) extracted_wir = extractor.extract_wir() expected_graph = networkx.DiGraph() expected_import = WirNode(0, "pandas", "Import", CodeReference(1, 0, 1, 19)) expected_constant_one = WirNode(1, "test_path", "Constant", CodeReference(3, 19, 3, 30)) expected_call = WirNode(2, "read_csv", "Call", CodeReference(3, 7, 3, 31)) expected_graph.add_edge(expected_import, expected_call, type="caller", arg_index=-1) expected_graph.add_edge(expected_constant_one, expected_call, type="input", arg_index=0) expected_assign = WirNode(3, "data", "Assign", CodeReference(3, 0, 3, 31)) expected_graph.add_edge(expected_call, expected_assign, type="input", arg_index=0) expected_constant_two = WirNode(4, "label", "Constant", CodeReference(4, 5, 4, 12)) expected_graph.add_node(expected_constant_two) expected_constant_three = WirNode(5, "test", "Constant", CodeReference(4, 16, 4, 22)) expected_graph.add_node(expected_constant_three) expected_subscript_assign = WirNode(6, 'data.label', 'Subscript-Assign', CodeReference(4, 0, 4, 13)) expected_graph.add_edge(expected_assign, expected_subscript_assign, type="caller", arg_index=-1) compare(networkx.to_dict_of_dicts(extracted_wir), networkx.to_dict_of_dicts(expected_graph))
def test_frame__getitem__frame(): """ Tests whether the monkey patching of ('pandas.core.frame', '__getitem__') works for multiple string arguments """ test_code = cleandoc(""" import pandas as pd df = pd.DataFrame([[0, None, 2], [1, 2, 3], [4, None, 2], [9, 2, 3], [6, 1, 2], [1, 2, 3]], columns=['A', 'B', 'C']) df_projection = df[['A', 'C']] df_expected = pd.DataFrame([[0, 2], [1, 3], [4, 2], [9, 3], [6, 2], [1, 3]], columns=['A', 'C']) pd.testing.assert_frame_equal(df_projection, df_expected) """) inspector_result = _pipeline_executor.singleton.run( python_code=test_code, track_code_references=True, inspections=[RowLineage(2)]) inspector_result.dag.remove_node(list(inspector_result.dag.nodes)[2]) expected_dag = networkx.DiGraph() expected_data_source = DagNode( 0, BasicCodeLocation("<string-source>", 3), OperatorContext(OperatorType.DATA_SOURCE, FunctionInfo('pandas.core.frame', 'DataFrame')), DagNodeDetails(None, ['A', 'B', 'C']), OptionalCodeInfo( CodeReference(3, 5, 4, 28), "pd.DataFrame([[0, None, 2], [1, 2, 3], [4, None, 2], " "[9, 2, 3], [6, 1, 2], [1, 2, 3]], \n" " columns=['A', 'B', 'C'])")) expected_project = DagNode( 1, BasicCodeLocation("<string-source>", 5), OperatorContext(OperatorType.PROJECTION, FunctionInfo('pandas.core.frame', '__getitem__')), DagNodeDetails("to ['A', 'C']", ['A', 'C']), OptionalCodeInfo(CodeReference(5, 16, 5, 30), "df[['A', 'C']]")) expected_dag.add_edge(expected_data_source, expected_project) compare(networkx.to_dict_of_dicts(inspector_result.dag), networkx.to_dict_of_dicts(expected_dag)) inspection_results_data_source = inspector_result.dag_node_to_inspection_results[ expected_project] lineage_output = inspection_results_data_source[RowLineage(2)] expected_lineage_df = DataFrame( [[0, 2, {LineageId(0, 0)}], [1, 3, {LineageId(0, 1)}]], columns=['A', 'C', 'mlinspect_lineage']) pandas.testing.assert_frame_equal( lineage_output.reset_index(drop=True), expected_lineage_df.reset_index(drop=True))
def test_inspector_adult_easy_str_pipeline(): """ Tests whether the str version of the inspector works """ with open(ADULT_EASY_FILE_PY) as file: code = file.read() inspection_result = PipelineInspector\ .on_pipeline_from_string(code)\ .add_inspection(MaterializeFirstRowsInspection(5))\ .execute() extracted_dag = inspection_result.dag expected_dag = get_expected_dag_adult_easy_py() assert networkx.to_dict_of_dicts( extracted_dag) == networkx.to_dict_of_dicts(expected_dag)
def test_frame_replace(): """ Tests whether the monkey patching of ('pandas.core.frame', 'replace') works """ test_code = cleandoc(""" import pandas as pd df = pd.DataFrame(['Low', 'Medium', 'Low', 'High', None], columns=['A']) df_replace = df.replace('Medium', 'Low') df_expected = pd.DataFrame(['Low', 'Low', 'Low', 'High', None], columns=['A']) pd.testing.assert_frame_equal(df_replace.reset_index(drop=True), df_expected.reset_index(drop=True)) """) inspector_result = _pipeline_executor.singleton.run( python_code=test_code, track_code_references=True, inspections=[RowLineage(2)]) inspector_result.dag.remove_node(list(inspector_result.dag.nodes)[2]) expected_dag = networkx.DiGraph() expected_data_source = DagNode( 0, BasicCodeLocation("<string-source>", 3), OperatorContext(OperatorType.DATA_SOURCE, FunctionInfo('pandas.core.frame', 'DataFrame')), DagNodeDetails(None, ['A']), OptionalCodeInfo( CodeReference(3, 5, 3, 72), "pd.DataFrame(['Low', 'Medium', 'Low', 'High', None], " "columns=['A'])")) expected_modify = DagNode( 1, BasicCodeLocation("<string-source>", 4), OperatorContext(OperatorType.PROJECTION_MODIFY, FunctionInfo('pandas.core.frame', 'replace')), DagNodeDetails("Replace 'Medium' with 'Low'", ['A']), OptionalCodeInfo(CodeReference(4, 13, 4, 40), "df.replace('Medium', 'Low')")) expected_dag.add_edge(expected_data_source, expected_modify) compare(networkx.to_dict_of_dicts(inspector_result.dag), networkx.to_dict_of_dicts(expected_dag)) inspection_results_data_source = inspector_result.dag_node_to_inspection_results[ expected_modify] lineage_output = inspection_results_data_source[RowLineage(2)] expected_lineage_df = DataFrame( [['Low', {LineageId(0, 0)}], ['Low', {LineageId(0, 1)}]], columns=['A', 'mlinspect_lineage']) pandas.testing.assert_frame_equal( lineage_output.reset_index(drop=True), expected_lineage_df.reset_index(drop=True))
def fill_colors(graph): """ Use a graph coloring strategy to populate the graph with the appropiate colors to solve the puzzle. This method is recursively called by populate_color when there are multiple possible paths to explore. graph : The graph networkx sudoku graph containing the puzzle to solve. returns : Networkx sudoku graph populated with values that solve the puzzle. """ size = len(graph.nodes) adjacent = nx.to_dict_of_dicts(graph) pos = optimal_spot(graph, adjacent) # If there are no more positions left to fill, the graph is full. if pos == 0: return graph colors = choose_color(graph, adjacent, pos) # If there are possible colors left, try each path to look for a solution. if len(colors) != 0: for color in colors: # Make a copy of the graph, add the color, run fill_colors again. filled_graph = populate_color(graph.copy(), pos, color) if is_populated(filled_graph): return filled_graph else: continue
def test_sklearn_wir_preprocessing(): """ Tests whether the WIR Extraction works for the adult_easy pipeline """ preprocessed_wir = SklearnWirPreprocessor().preprocess_wir(get_test_wir()) cleaned_wir = WirToDagTransformer.remove_all_nodes_but_calls_and_subscripts( preprocessed_wir) dag = WirToDagTransformer.remove_all_non_operators_and_update_names( cleaned_wir) assert len(dag) == 17 expected_dag = get_expected_dag_adult_easy_py() compare(networkx.to_dict_of_dicts(preprocessed_wir), networkx.to_dict_of_dicts(expected_dag))
def as_dict(self): """ Bson-serializable dict representation of the ConnectedComponent object. Returns: dict: Bson-serializable dict representation of the ConnectedComponent object. """ nodes = {"{:d}".format(node.isite): (node, data) for node, data in self._connected_subgraph.nodes(data=True)} node2stringindex = {node: strindex for strindex, (node, data) in nodes.items()} dict_of_dicts = nx.to_dict_of_dicts(self._connected_subgraph) new_dict_of_dicts = {} for n1, n2dict in dict_of_dicts.items(): in1 = node2stringindex[n1] new_dict_of_dicts[in1] = {} for n2, edges_dict in n2dict.items(): in2 = node2stringindex[n2] new_dict_of_dicts[in1][in2] = {} for ie, edge_data in edges_dict.items(): ied = self._edgekey_to_edgedictkey(ie) new_dict_of_dicts[in1][in2][ied] = jsanitize(edge_data) return { "@module": self.__class__.__module__, "@class": self.__class__.__name__, "nodes": {strindex: (node.as_dict(), data) for strindex, (node, data) in nodes.items()}, "graph": new_dict_of_dicts, }
def test_to_dict_of_dicts_with_edgedata_param(edgelist): G = nx.Graph() G.add_edges_from(edgelist) # Innermost dict value == edge_data when edge_data != None. # In the case when G has edge data, it is overwritten expected = {0: {1: 10}, 1: {0: 10, 2: 10}, 2: {1: 10}} assert nx.to_dict_of_dicts(G, edge_data=10) == expected
def checking(G): ch = nx.to_dict_of_dicts(G) fired = set() result = set() for current in ch: if current not in fired: check, f = DFS(G, current, return_fired = True) fired |= f result.add(check) return(result)
def connectivity(G): g = nx.to_dict_of_dicts(G) used = set() res = set() for curr in g: if curr not in used: comp, u = bfs(G, curr, return_used=True) used |= u res.add(comp) return res
def bfs(G, start, return_used = False): queue = [start] res = nx.Graph() G = nx.to_dict_of_dicts(G) used = {start} while queue: curr = queue.pop(0) for n in G[curr]: if n not in used: used.add(n) res.add_edge(curr, n, weight=G[curr][n]['weight']) queue.append(n) return res if not return_used else (res, used)
def Dijkstra(G, root): g = nx.to_dict_of_dicts(G) D = {n: (float('inf'),None) for n in g} D[root] = (0, None) used =set() while len(used) < len(g): mn = min((i for i in g.items() if i[0] not in used), key=lambda x: D[x[0]][0]) for n in mn[1]: new = D[mn[0]][0] + mn[1][n]['weight'] if new < D[n][0]: D[n] = (new, mn[0]) used.add(mn[0]) return D
def dfs(G, start): stack = [start] res = nx.Graph() G = nx.to_dict_of_dicts(G) used = {start} while stack: curr = stack.pop() for n in G[curr]: if n not in used: used.add(n) res.add_edge(curr, n, weight=G[curr][n]['weight']) stack.append(n) return res
def DFS(G, start, return_fired = False): styck = [start] resgraph = nx.Graph() G = nx.to_dict_of_dicts(G) fired = {start} while styck: curr = styck.pop() for neighbour in G[curr]: if neighbour not in fired: fired.add(neighbour) resgraph.add_edge(curr, neighbour, weight = G[curr][neighbour]['weight']) styck.append(neighbour) return resgraph if not return_fired else (resgraph, fired)
def BFS(G, start): queue = [start] resgraph = nx.Graph() G = nx.to_dict_of_dicts(G) fired = {start} while queue: curr = queue.pop(0) for neighbour in G[curr]: if neighbour not in fired: fired.add(neighbour) resgraph.add_edge(curr, neighbour, weight = G[curr][neighbour]['weight']) queue.append(neighbour) return resgraph
def bfs(G, start): queue = [start] tree = nx.Graph() G = nx.to_dict_of_dicts(G) used = {start} while queue: curr = queue.pop(0) for n in G[curr]: if n not in used: used.add(n) tree.add_edge(curr, n, weight=G[curr][n]['weight']) queue.append(n) return tree
def shortest_path(G,first,last): way=nx.Graph() d = 0 friend = [last] dejks = dejkstra(nx.to_dict_of_dicts(G), first) while len(friend) !=0: for neighbour in G[last]: if d == 0: if G[last][neighbour]['weight'] == (dejks[last] - dejks[neighbour]): way.add_edge(last, neighbour, weight = G[last][neighbour]['weight']) friend.append(neighbour) d = 1 if neighbour ==first: return way d = 0 last = friend.pop(-1)
def graph2dict(g, return_dict_of_dict=True): """Takes a graph and returns an adjacency list. Parameters ---------- g : :any:`networkx.DiGraph`, :any:`networkx.Graph`, etc. Any object that networkx can turn into a :any:`DiGraph<networkx.DiGraph>`. return_dict_of_dict : bool (optional, default: ``True``) Specifies whether this function will return a dict of dicts or a dict of lists. Returns ------- adj : dict An adjacency representation of graph as a dictionary of dictionaries, where a key is the vertex index for a vertex ``v`` and the values are :class:`dicts<.dict>` with keys for the vertex index and values as edge properties. Examples -------- >>> import queueing_tool as qt >>> import networkx as nx >>> adj = {0: [1, 2], 1: [0], 2: [0, 3], 3: [2]} >>> g = nx.DiGraph(adj) >>> qt.graph2dict(g, return_dict_of_dict=True) ... # doctest: +NORMALIZE_WHITESPACE {0: {1: {}, 2: {}}, 1: {0: {}}, 2: {0: {}, 3: {}}, 3: {2: {}}} >>> qt.graph2dict(g, return_dict_of_dict=False) {0: [1, 2], 1: [0], 2: [0, 3], 3: [2]} """ if not isinstance(g, nx.DiGraph): g = QueueNetworkDiGraph(g) dict_of_dicts = nx.to_dict_of_dicts(g) if return_dict_of_dict: return dict_of_dicts else: return {k: list(val.keys()) for k, val in dict_of_dicts.items()}
def get_dictionary_graph(): """ Returns a dictionary representation of the graph using the NetworkX to_dict_of_dicts() function. The dictionary would be structured as follows if producer1 has rated producer2, but producer2 hasn't rated anyone. { producer1.name: {producer1.source_rating1.source.name: {producer1.source_rating1.tag.name: producer1.source_rating1.rating} } producer2.name: { } } """ return to_dict_of_dicts(graph)
def lin_syst(G, order_fcn=None): """ Given a graph *G* with edges labeled with integers :math:`1, ..., M`, compute matrices :math:`A,B` such that .. math:: \mathbf w(t+1) = A \mathbf w + B \mathbf r, where :math:`w_i` represents the number of individual systems at node :math:`n` in mode :math:`m` if .. math:: i = (m-1) K + order\_fcn(n). If no order function is specified, ordering by **G.nodes().index** is used. """ if order_fcn == None: order_fcn = lambda v: G.nodes().index(v) ordering = sorted(G.nodes_iter(), key=order_fcn) adj_data = nx.to_dict_of_dicts(G) T_list = [] for mode in range(1, _maxmode(G) + 1): data = np.array( [ (1, order_fcn(node2), order_fcn(node1)) for (node1, node1_out) in adj_data.iteritems() for node2 in node1_out if node1_out[node2]["mode"] == mode ] ) T_mode = scipy.sparse.coo_matrix((data[:, 0], (data[:, 1], data[:, 2])), shape=(len(G), len(G))) T_list.append(T_mode) A = scipy.sparse.block_diag(tuple(T_list), dtype=np.int8) B = scipy.sparse.bmat([[Ti for i in range(len(T_list))] for Ti in T_list]) - 2 * A return A, B
def dijkstra_all(graph_dict): ans = [] for start in graph_dict.keys(): for end in graph_dict.keys(): ans.append(dijkstra(graph_dict, start, end)) return ans #%% read in data - use a pandas dataframe just for convenience import pandas as pd data = pd.read_table("../data/HW1_4.txt", sep = " ", header = None, names = ['vx', 'vy', 'weight']) # %% use network x to prepare dictionary structure which can be fed in to the # dijkstra function import networkx as nx graph = nx.from_pandas_dataframe(data, 'vx', 'vy', 'weight') # graph_nodes = graph.nodes() graph_dict = nx.to_dict_of_dicts(graph) # %% run the functions path = dijkstra(graph_dict, 1, 6) all_paths = dijkstra_all(graph_dict)
def writeGraphToFile(self,filePath): graph_as_dict=nx.to_dict_of_dicts(self.dGraph) print graph_as_dict gm.saveDict(filePath, graph_as_dict) return
def _store_json(graph, fname, disp_params, **kws): import json # TODO: Obey disp_params on json m = nx.to_dict_of_dicts(graph) json.dump(m, fname, **kws)
def perturbNetwork(network,mirlist=[],ishier=True): ''' Assume hierarchical network for now. Provide list of input weights to initiate hierarchy ''' newnetwork=defaultdict(dict) oldnetwork=nx.to_dict_of_dicts(network) ##collect, for each level, all members of subsequent level tfs,motifs=set(),set() #first, collect all weights mirweights=[] for m in mirlist: if m not in oldnetwork.keys(): #print 'No %s in network'%(m) continue allweights=[oldnetwork[m][d]['weight'] for d in oldnetwork[m].keys()] tfs.update(oldnetwork[m].keys()) mirweights.extend(allweights) # print 'Have %d edge weights for miRNA %s'%(len(allweights),m) np.random.shuffle(mirweights) print mirweights count=0 for m in mirlist: if m not in oldnetwork.keys(): #print 'No %s in network'%(m) continue for tf in oldnetwork[m].keys(): newnetwork[m][tf]={'weight':mirweights[count]} count+=1 # print 'Have %d nodes in new network'%(len(newnetwork.keys())) #now onto tfs tfweights=[] for t in tfs: allweights=[oldnetwork[t][d]['weight'] for d in oldnetwork[t].keys()] motifs.update(oldnetwork[t].keys()) tfweights.extend(allweights) # print 'Have %d edge weights for TF %s'%(len(allweights),t) np.random.shuffle(tfweights) count=0 for t in tfs: for mot in oldnetwork[t].keys(): newnetwork[t][mot]={'weight':tfweights[count]} count+=1 #print 'Have %d nodes in new network'%(len(newnetwork.keys())) #now motifs, then we're done motweights=[] for mo in motifs: allweights=[oldnetwork[mo][d]['weight'] for d in oldnetwork[mo].keys()] motweights.extend(allweights) # print 'Have %d edge weights for Motif %s'%(len(allweights),mo) np.random.shuffle(motweights) count=0 for mo in motifs: for mr in oldnetwork[mo].keys(): newnetwork[mo][mr]={'weight':motweights[count]} count+=1 print 'Have %d nodes in new network'%(len(newnetwork.keys())) return nx.DiGraph(newnetwork)
def get_graph_str(self): return str(nx.to_dict_of_dicts(self.graph))
# Vary number of nodes , draw graphs and call different algorithms for shortest path computation nnodes = 1000 bg = nx.complete_graph(nnodes) nedges = bg.edges() print "number of nodes are" , nnodes print "number of edges are" , len(nedges) bg.add_node(xrange(nnodes)) lnode = nnodes-1 for edge in nedges: bg.add_edge(edge[0], edge[1], {'weight':random.randrange(nnodes)}) pos=nx.spring_layout(bg) # positions for all nodes plt.figure(figsize=(32,32)) nx.draw_networkx(bg,pos,font_size=20,font_family='sans-serif',alpha=.6, width=2.0, node_size=900) plt.axis('off') plt.show() graph = nx.to_dict_of_dicts(bg, nodelist=None, edge_data=None) path1 = shortestPath(graph,0,lnode, "dijkstra_algorithm") print ("Shortest path by dijkstra_algorithm is: " + str(path1)) path2 = shortestPath(graph,0,lnode,"bellman_ford_algorithm") print ("Shortest path by bellman_ford_algorithm is: " + str(path2)) path3 = shortestPath(graph,0,lnode,"floydwarshall_algorithm") print ("Shortest path by floydwarshall_algorithm is: " + str(path3))
friend = [last] dejks = dejkstra(nx.to_dict_of_dicts(G), first) while len(friend) !=0: for neighbour in G[last]: if d == 0: if G[last][neighbour]['weight'] == (dejks[last] - dejks[neighbour]): way.add_edge(last, neighbour, weight = G[last][neighbour]['weight']) friend.append(neighbour) d = 1 if neighbour ==first: return way d = 0 last = friend.pop(-1) first,last = input().split() way = shortest_path(G,first,last) pos = nx.spring_layout(way, iterations=1) nx.draw(way, pos) nx.draw_networkx_edge_labels(way, pos) nx.draw_networkx_labels(way,pos,font_size=7,font_family='sans-serif') way1 = nx.to_dict_of_dicts(way) summary = 0 for neighbour in way1: for neighbour1 in way1[neighbour]: summary+=way1[neighbour][neighbour1]['weight'] print(summary/2) plot.show()
# <codecell> g.nodes() # <codecell> # agency list g.edges() # <codecell> nx.draw(g) # <codecell> d = nx.to_dict_of_dicts(g) # same as # nx.edges # <codecell> # Create a new graph from our dict_of_dicts g2 = nx.from_dict_of_dicts(d) # <codecell> # dumps as a json object import simplejson simplejson.dumps(nx.to_dict_of_dicts(g)) # <codecell>
def get_graph(self): return nx.to_dict_of_dicts(self.state.get_graph())
def to_dict(self): edges = nx.to_dict_of_dicts(self.tree) nodes = self.tree.node j = dict(edges=edges, nodes=nodes) return j
def update_database(self, dataset, architecture, graph): metrics = performer.evaluate_metrics(graph) if metrics == None: return print 'performer.update_database:', self.BENCHMARK, architecture, metrics metadata = [datetime.now(), architecture, self.BENCHMARK, self.OPTIMIZATION_TARGET, to_dict_of_dicts(graph)] design_instance = metrics + self.extract_features(graph) + metadata with open(dataset, 'a') as f: f.write('\t'.join(map(str, design_instance)) + '\n') return
def fitness(self, individual): # init capacities default_speed = 100 for src, dst in [(src, t) for (src, t, data) in self.G.edges(data=True) if "speed" not in data]: self.G[src][dst]["speed"] = default_speed # Copy list to pop weights = individual[:] # set weights # TODO: look at using set_weights function for (src, dst) in self.G.edges(): # TODO: check order - whether this is forwards or backwards # if weights already set # FIX check this! self.G[src][dst]["weight"] = weights.pop() # Cost is sum of network cost (from traffic on links) and link changes # cost (how many links changed from original weights) # TODO look at link change costs link_change_cost = self.link_changes_cost(individual) if link_change_cost >= huge: # Cost is already a huge number, return # (saves expensive computation of network traffic) return link_change_cost # get edge list to store weights in loads = nx.to_dict_of_dicts(self.G, edge_data=0) apsf = nx.all_pairs_dijkstra_path(self.G) for src, data in apsf.items(): for dst, path in data.items(): # load from this source, dest pair load = self.traffmat[src][dst] for (nodea, nodeb) in zip(path, path[1:]): # add load on this edge due to source, dest pair loads[nodea][nodeb] += load def cost(load, cap): # Cost is set according to equation (1) of [1] utilization = load / cap if utilization < 1 / 3: return utilization elif utilization < 2 / 3: return 3 * utilization - 2 / 3 elif utilization < 9 / 10: return 10 * utilization - 16 / 3 elif utilization < 1: return 70 * utilization - 178 / 3 elif utilization < 11 / 10: return 500 * utilization - 1468 / 3 else: return 5000 * utilization - 16318 / 3 # link change cost was a low number, calculate network traffic cost # calculate cost for each edge that has a load set link_costs = [cost(loads[s][t], data["speed"]) for (s, t, data) in self.G.edges(data=True)] link_costs = sum(link_costs) # link_costs = sum(link_costs) # print "cost total {0}".format(link_costs) return link_change_cost + link_costs
def adjacency2graph(adjacency, edge_type=None, adjust=1, **kwargs): """Takes an adjacency list, dict, or matrix and returns a graph. The purpose of this function is take an adjacency list (or matrix) and return a :class:`.QueueNetworkDiGraph` that can be used with a :class:`.QueueNetwork` instance. The Graph returned has the ``edge_type`` edge property set for each edge. Note that the graph may be altered. Parameters ---------- adjacency : dict or :class:`~numpy.ndarray` An adjacency list as either a dict, or an adjacency matrix. adjust : int ``{1, 2}`` (optional, default: 1) Specifies what to do when the graph has terminal vertices (nodes with no out-edges). Note that if ``adjust`` is not 2 then it is assumed to be 1. There are two choices: * ``adjust = 1``: A loop is added to each terminal node in the graph, and their ``edge_type`` of that loop is set to 0. * ``adjust = 2``: All edges leading to terminal nodes have their ``edge_type`` set to 0. **kwargs : Unused. Returns ------- out : :any:`networkx.DiGraph` A directed graph with the ``edge_type`` edge property. Raises ------ TypeError Is raised if ``adjacency`` is not a dict or :class:`~numpy.ndarray`. Examples -------- If terminal nodes are such that all in-edges have edge type ``0`` then nothing is changed. However, if a node is a terminal node then a loop is added with edge type 0. >>> import queueing_tool as qt >>> adj = { ... 0: {1: {}}, ... 1: {2: {}, ... 3: {}}, ... 3: {0: {}}} >>> eTy = {0: {1: 1}, 1: {2: 2, 3: 4}, 3: {0: 1}} >>> # A loop will be added to vertex 2 >>> g = qt.adjacency2graph(adj, edge_type=eTy) >>> ans = qt.graph2dict(g) >>> sorted(ans.items()) # doctest: +NORMALIZE_WHITESPACE [(0, {1: {'edge_type': 1}}), (1, {2: {'edge_type': 2}, 3: {'edge_type': 4}}), (2, {2: {'edge_type': 0}}), (3, {0: {'edge_type': 1}})] You can use a dict of lists to represent the adjacency list. >>> adj = {0 : [1], 1: [2, 3], 3: [0]} >>> g = qt.adjacency2graph(adj, edge_type=eTy) >>> ans = qt.graph2dict(g) >>> sorted(ans.items()) # doctest: +NORMALIZE_WHITESPACE [(0, {1: {'edge_type': 1}}), (1, {2: {'edge_type': 2}, 3: {'edge_type': 4}}), (2, {2: {'edge_type': 0}}), (3, {0: {'edge_type': 1}})] Alternatively, you could have this function adjust the edges that lead to terminal vertices by changing their edge type to 0: >>> # The graph is unaltered >>> g = qt.adjacency2graph(adj, edge_type=eTy, adjust=2) >>> ans = qt.graph2dict(g) >>> sorted(ans.items()) # doctest: +NORMALIZE_WHITESPACE [(0, {1: {'edge_type': 1}}), (1, {2: {'edge_type': 0}, 3: {'edge_type': 4}}), (2, {}), (3, {0: {'edge_type': 1}})] """ if isinstance(adjacency, np.ndarray): adjacency = _matrix2dict(adjacency) elif isinstance(adjacency, dict): adjacency = _dict2dict(adjacency) else: msg = ("If the adjacency parameter is supplied it must be a " "dict, or a numpy.ndarray.") raise TypeError(msg) if edge_type is None: edge_type = {} else: if isinstance(edge_type, np.ndarray): edge_type = _matrix2dict(edge_type, etype=True) elif isinstance(edge_type, dict): edge_type = _dict2dict(edge_type) for u, ty in edge_type.items(): for v, et in ty.items(): adjacency[u][v]['edge_type'] = et g = nx.from_dict_of_dicts(adjacency, create_using=nx.DiGraph()) adjacency = nx.to_dict_of_dicts(g) adjacency = _adjacency_adjust(adjacency, adjust, True) return nx.from_dict_of_dicts(adjacency, create_using=nx.DiGraph())
def _prepare_graph(g, g_colors, q_cls, q_arg, adjust_graph): """Prepares a graph for use in :class:`.QueueNetwork`. This function is called by ``__init__`` in the :class:`.QueueNetwork` class. It creates the :class:`.QueueServer` instances that sit on the edges, and sets various edge and node properties that are used when drawing the graph. Parameters ---------- g : :any:`networkx.DiGraph`, :class:`numpy.ndarray`, dict, \ ``None``, etc. Any object that networkx can turn into a :any:`DiGraph<networkx.DiGraph>` g_colors : dict A dictionary of colors. The specific keys used are ``vertex_color`` and ``vertex_fill_color`` for vertices that do not have any loops. Set :class:`.QueueNetwork` for the default values passed. q_cls : dict A dictionary where the keys are integers that represent an edge type, and the values are :class:`.QueueServer` classes. q_args : dict A dictionary where the keys are integers that represent an edge type, and the values are the arguments that are used when creating an instance of that :class:`.QueueServer` class. adjust_graph : bool Specifies whether the graph will be adjusted using :func:`.adjacency2graph`. Returns ------- g : :class:`.QueueNetworkDiGraph` queues : list A list of :class:`QueueServers<.QueueServer>` where ``queues[k]`` is the ``QueueServer`` that sets on the edge with edge index ``k``. Notes ----- The graph ``g`` should have the ``edge_type`` edge property map. If it does not then an ``edge_type`` edge property is created and set to 1. The following properties are set by each queue: ``vertex_color``, ``vertex_fill_color``, ``vertex_fill_color``, ``edge_color``. See :class:`.QueueServer` for more on setting these values. The following properties are assigned as a properties to the graph; their default values for each edge or vertex is shown: * ``vertex_pen_width``: ``1``, * ``vertex_size``: ``8``, * ``edge_control_points``: ``[]`` * ``edge_marker_size``: ``8`` * ``edge_pen_width``: ``1.25`` Raises ------ TypeError Raised when the parameter ``g`` is not of a type that can be made into a :any:`networkx.DiGraph`. """ g = _test_graph(g) if adjust_graph: pos = nx.get_node_attributes(g, 'pos') ans = nx.to_dict_of_dicts(g) g = adjacency2graph(ans, adjust=2, is_directed=g.is_directed()) g = QueueNetworkDiGraph(g) if len(pos) > 0: g.set_pos(pos) g.new_vertex_property('vertex_color') g.new_vertex_property('vertex_fill_color') g.new_vertex_property('vertex_pen_width') g.new_vertex_property('vertex_size') g.new_edge_property('edge_control_points') g.new_edge_property('edge_color') g.new_edge_property('edge_marker_size') g.new_edge_property('edge_pen_width') queues = _set_queues(g, q_cls, q_arg, 'cap' in g.vertex_properties()) if 'pos' not in g.vertex_properties(): g.set_pos() for k, e in enumerate(g.edges()): g.set_ep(e, 'edge_pen_width', 1.25) g.set_ep(e, 'edge_marker_size', 8) if e[0] == e[1]: g.set_ep(e, 'edge_color', queues[k].colors['edge_loop_color']) else: g.set_ep(e, 'edge_color', queues[k].colors['edge_color']) for v in g.nodes(): g.set_vp(v, 'vertex_pen_width', 1) g.set_vp(v, 'vertex_size', 8) e = (v, v) if g.is_edge(e): g.set_vp(v, 'vertex_color', queues[g.edge_index[e]]._current_color(2)) g.set_vp(v, 'vertex_fill_color', queues[g.edge_index[e]]._current_color()) else: g.set_vp(v, 'vertex_color', g_colors['vertex_color']) g.set_vp(v, 'vertex_fill_color', g_colors['vertex_fill_color']) return g, queues