def test_print_expressions():
    """
    Tests whether the WIR Extraction works for an expression with very simple nested calls
    """
    test_code = cleandoc("""
        print("test".isupper())
        """)
    test_ast = ast.parse(test_code)
    extractor = WirExtractor(test_ast)
    extracted_wir = extractor.extract_wir()
    expected_graph = networkx.DiGraph()

    expected_constant = WirNode(0, "test", "Constant",
                                CodeReference(1, 6, 1, 12))
    expected_call_one = WirNode(1, "isupper", "Call",
                                CodeReference(1, 6, 1, 22))
    expected_graph.add_edge(expected_constant,
                            expected_call_one,
                            type="caller",
                            arg_index=-1)

    expected_call_two = WirNode(2, "print", "Call", CodeReference(1, 0, 1, 23))
    expected_graph.add_edge(expected_call_one,
                            expected_call_two,
                            type="input",
                            arg_index=0)

    compare(networkx.to_dict_of_dicts(extracted_wir),
            networkx.to_dict_of_dicts(expected_graph))
示例#2
0
def test_inspector_adult_easy_str_pipeline():
    """
    Tests whether the str version of the inspector works
    """
    with open(ADULT_SIMPLE_PY) as file:
        code = file.read()

        inspector_result = PipelineInspector\
            .on_pipeline_from_string(code)\
            .add_required_inspection(MaterializeFirstOutputRows(5)) \
            .add_check(NoBiasIntroducedFor(['race'])) \
            .add_check(NoIllegalFeatures()) \
            .execute()
        extracted_dag = inspector_result.dag
        expected_dag = get_expected_dag_adult_easy("<string-source>")
        compare(networkx.to_dict_of_dicts(extracted_dag),
                networkx.to_dict_of_dicts(expected_dag))

        assert HistogramForColumns(['race']) in list(
            inspector_result.dag_node_to_inspection_results.values())[0]
        check_to_check_results = inspector_result.check_to_check_results
        assert check_to_check_results[NoBiasIntroducedFor(
            ['race'])].status == CheckStatus.SUCCESS
        assert check_to_check_results[
            NoIllegalFeatures()].status == CheckStatus.FAILURE
def test_frame_merge_sorted():
    """
    Tests whether the monkey patching of ('pandas.core.frame', 'merge') works if the sort option is set to True
    """
    test_code = cleandoc("""
        import pandas as pd

        df_a = pd.DataFrame({'A': [0, 2, 4, 8, 5], 'B': [7, 5, 4, 2, 1]})
        df_b = pd.DataFrame({'B': [1, 4, 3, 2, 5], 'C': [1, 5, 4, 11, None]})
        df_merged = df_a.merge(df_b, on='B', sort=True)
        df_expected = pd.DataFrame({'A': [5, 8, 4, 2], 'B': [1, 2, 4, 5], 'C': [1, 11, 5, None]})
        pd.testing.assert_frame_equal(df_merged, df_expected)
        """)
    inspector_result = _pipeline_executor.singleton.run(
        python_code=test_code,
        track_code_references=True,
        inspections=[RowLineage(5)])
    inspector_result.dag.remove_node(list(inspector_result.dag.nodes)[3])

    expected_dag = networkx.DiGraph()
    expected_a = DagNode(
        0, BasicCodeLocation("<string-source>", 3),
        OperatorContext(OperatorType.DATA_SOURCE,
                        FunctionInfo('pandas.core.frame', 'DataFrame')),
        DagNodeDetails(None, ['A', 'B']),
        OptionalCodeInfo(
            CodeReference(3, 7, 3, 65),
            "pd.DataFrame({'A': [0, 2, 4, 8, 5], 'B': [7, 5, 4, 2, 1]})"))
    expected_b = DagNode(
        1, BasicCodeLocation("<string-source>", 4),
        OperatorContext(OperatorType.DATA_SOURCE,
                        FunctionInfo('pandas.core.frame', 'DataFrame')),
        DagNodeDetails(None, ['B', 'C']),
        OptionalCodeInfo(
            CodeReference(4, 7, 4, 69),
            "pd.DataFrame({'B': [1, 4, 3, 2, 5], 'C': [1, 5, 4, 11, None]})"))
    expected_join = DagNode(
        2, BasicCodeLocation("<string-source>", 5),
        OperatorContext(OperatorType.JOIN,
                        FunctionInfo('pandas.core.frame', 'merge')),
        DagNodeDetails("on 'B'", ['A', 'B', 'C']),
        OptionalCodeInfo(CodeReference(5, 12, 5, 47),
                         "df_a.merge(df_b, on='B', sort=True)"))
    expected_dag.add_edge(expected_a, expected_join)
    expected_dag.add_edge(expected_b, expected_join)
    compare(networkx.to_dict_of_dicts(inspector_result.dag),
            networkx.to_dict_of_dicts(expected_dag))

    inspection_results_data_source = inspector_result.dag_node_to_inspection_results[
        expected_join]
    lineage_output = inspection_results_data_source[RowLineage(5)]
    expected_lineage_df = DataFrame(
        [[5, 1, 1., {LineageId(0, 4), LineageId(1, 0)}],
         [8, 2, 11., {LineageId(0, 3), LineageId(1, 3)}],
         [4, 4, 5., {LineageId(0, 2), LineageId(1, 1)}],
         [2, 5, math.nan, {LineageId(0, 1), LineageId(1, 4)}]],
        columns=['A', 'B', 'C', 'mlinspect_lineage'])
    pandas.testing.assert_frame_equal(
        lineage_output.reset_index(drop=True),
        expected_lineage_df.reset_index(drop=True))
def test_nested_import_from():
    """
    Tests whether the WIR Extraction works for nested from imports
    """
    test_code = cleandoc("""
            from mlinspect.utils import get_project_root

            print(get_project_root())
            """)
    test_ast = ast.parse(test_code)
    extractor = WirExtractor(test_ast)
    extracted_wir = extractor.extract_wir()
    expected_graph = networkx.DiGraph()

    expected_import = WirNode(0, "mlinspect.utils", "Import",
                              CodeReference(1, 0, 1, 44))
    expected_call_one = WirNode(1, "get_project_root", "Call",
                                CodeReference(3, 6, 3, 24))
    expected_graph.add_edge(expected_import,
                            expected_call_one,
                            type="caller",
                            arg_index=-1)

    expected_call_two = WirNode(2, "print", "Call", CodeReference(3, 0, 3, 25))
    expected_graph.add_edge(expected_call_one,
                            expected_call_two,
                            type="input",
                            arg_index=0)

    compare(networkx.to_dict_of_dicts(extracted_wir),
            networkx.to_dict_of_dicts(expected_graph))
def test_call_after_call():
    """
    Tests whether the WIR Extraction works for a very simple attribute call
    """
    test_code = cleandoc("""
        "hello ".capitalize().count()
        """)
    test_ast = ast.parse(test_code)
    extractor = WirExtractor(test_ast)
    extracted_wir = extractor.extract_wir()
    expected_graph = networkx.DiGraph()

    expected_constant_one = WirNode(0, "hello ", "Constant",
                                    CodeReference(1, 0, 1, 8))
    expected_call_one = WirNode(1, "capitalize", "Call",
                                CodeReference(1, 0, 1, 21))
    expected_call_two = WirNode(2, "count", "Call", CodeReference(1, 0, 1, 29))
    expected_graph.add_edge(expected_constant_one,
                            expected_call_one,
                            type="caller",
                            arg_index=-1)
    expected_graph.add_edge(expected_call_one,
                            expected_call_two,
                            type="caller",
                            arg_index=-1)

    compare(networkx.to_dict_of_dicts(extracted_wir),
            networkx.to_dict_of_dicts(expected_graph))
def test_print_var_usage():
    """
    Tests whether the WIR Extraction works for a very simple var usage
    """
    test_code = cleandoc("""
        test_var = "test"
        print(test_var)""")
    test_ast = ast.parse(test_code)
    extractor = WirExtractor(test_ast)
    extracted_wir = extractor.extract_wir()
    expected_graph = networkx.DiGraph()

    expected_constant = WirNode(0, "test", "Constant",
                                CodeReference(1, 11, 1, 17))
    expected_assign = WirNode(1, "test_var", "Assign",
                              CodeReference(1, 0, 1, 17))
    expected_graph.add_edge(expected_constant,
                            expected_assign,
                            type="input",
                            arg_index=0)

    expected_call = WirNode(2, "print", "Call", CodeReference(2, 0, 2, 15))
    expected_graph.add_node(expected_call)
    expected_graph.add_edge(expected_assign,
                            expected_call,
                            type="input",
                            arg_index=0)

    compare(networkx.to_dict_of_dicts(extracted_wir),
            networkx.to_dict_of_dicts(expected_graph))
def test_black_box_operation():
    """
    Tests whether the monkey patching of pandas function works
    """
    test_code = cleandoc("""
        import pandas
        from mlinspect.testing._testing_helper_utils import black_box_df_op
        
        df = black_box_df_op()
        df = df.dropna()
        print("df")
        """)

    extracted_dag = _pipeline_executor.singleton.run(
        python_code=test_code, track_code_references=True).dag

    expected_dag = networkx.DiGraph()
    expected_missing_op = DagNode(
        -1, BasicCodeLocation("<string-source>", 5),
        OperatorContext(OperatorType.MISSING_OP, None),
        DagNodeDetails(
            'Warning! Operator <string-source>:5 (df.dropna()) encountered a '
            'DataFrame resulting from an operation without mlinspect support!',
            ['A']), OptionalCodeInfo(CodeReference(5, 5, 5, 16),
                                     'df.dropna()'))
    expected_select = DagNode(
        0, BasicCodeLocation("<string-source>", 5),
        OperatorContext(OperatorType.SELECTION,
                        FunctionInfo('pandas.core.frame', 'dropna')),
        DagNodeDetails('dropna', ['A']),
        OptionalCodeInfo(CodeReference(5, 5, 5, 16), 'df.dropna()'))
    expected_dag.add_edge(expected_missing_op, expected_select)
    compare(networkx.to_dict_of_dicts(extracted_dag),
            networkx.to_dict_of_dicts(expected_dag))
def test_string_call_attribute():
    """
    Tests whether the WIR Extraction works for a very simple attribute call
    """
    test_code = cleandoc("""
        "hello ".join("world")
        """)
    test_ast = ast.parse(test_code)
    extractor = WirExtractor(test_ast)
    extracted_wir = extractor.extract_wir()
    expected_graph = networkx.DiGraph()

    expected_constant_one = WirNode(0, "hello ", "Constant",
                                    CodeReference(1, 0, 1, 8))
    expected_constant_two = WirNode(1, "world", "Constant",
                                    CodeReference(1, 14, 1, 21))
    expected_attribute_call = WirNode(2, "join", "Call",
                                      CodeReference(1, 0, 1, 22))
    expected_graph.add_edge(expected_constant_one,
                            expected_attribute_call,
                            type="caller",
                            arg_index=-1)
    expected_graph.add_edge(expected_constant_two,
                            expected_attribute_call,
                            type="input",
                            arg_index=0)

    compare(networkx.to_dict_of_dicts(extracted_wir),
            networkx.to_dict_of_dicts(expected_graph))
def test_func_defs_and_loops():
    """
    Tests whether the monkey patching of pandas function works
    """
    test_code = get_test_code_with_function_def_and_for_loop()

    extracted_dag = _pipeline_executor.singleton.run(
        python_code=test_code, track_code_references=True).dag

    expected_dag = networkx.DiGraph()
    expected_data_source = DagNode(
        0, BasicCodeLocation("<string-source>", 4),
        OperatorContext(OperatorType.DATA_SOURCE,
                        FunctionInfo('pandas.core.frame', 'DataFrame')),
        DagNodeDetails(None, ['A']),
        OptionalCodeInfo(CodeReference(4, 9, 4, 44),
                         "pd.DataFrame([0, 1], columns=['A'])"))
    expected_select_1 = DagNode(
        1, BasicCodeLocation("<string-source>", 8),
        OperatorContext(OperatorType.SELECTION,
                        FunctionInfo('pandas.core.frame', 'dropna')),
        DagNodeDetails('dropna', ['A']),
        OptionalCodeInfo(CodeReference(8, 9, 8, 20), 'df.dropna()'))
    expected_dag.add_edge(expected_data_source, expected_select_1)
    expected_select_2 = DagNode(
        2, BasicCodeLocation("<string-source>", 8),
        OperatorContext(OperatorType.SELECTION,
                        FunctionInfo('pandas.core.frame', 'dropna')),
        DagNodeDetails('dropna', ['A']),
        OptionalCodeInfo(CodeReference(8, 9, 8, 20), 'df.dropna()'))
    expected_dag.add_edge(expected_select_1, expected_select_2)
    compare(networkx.to_dict_of_dicts(extracted_dag),
            networkx.to_dict_of_dicts(expected_dag))
 def as_dict(self):
     return {"@module": self.__class__.__module__,
             "@class": self.__class__.__name__,
             "light_structure_environments": self.light_structure_environments.as_dict(),
             "connectivity_graph": jsanitize(nx.to_dict_of_dicts(self._graph)),
             "environment_subgraphs": {env_key: jsanitize(nx.to_dict_of_dicts(subgraph))
                                       for env_key, subgraph in self.environment_subgraphs.items()}}
示例#11
0
def main():
    np.set_printoptions(precision=2)
    test_complete_file = "../data/toys/toy1.txt"
    test_sample_file = "../data/toys/toy_comps.txt"
    #test_sample_file = "../data/toys/toy_comps_with_unobserved.txt"
    gsamp = nx.to_dict_of_dicts(nx.read_adjlist(test_sample_file))
    gcomp = nx.to_dict_of_dicts(nx.read_adjlist(test_complete_file))
    #net = Network(gcomp, gsamp, feature_type='default')
    net = Network(gcomp, gsamp, feature_type='knn')
    #net = Network(gcomp, gsamp, feature_type='node2vec')
    #net = Network(gcomp, gsamp, feature_type='n2v-refex')

    order = 'linear'
    feat = net.calculate_features(net, order=order)
    print(feat)
    print(net.row_to_node)
    net.probe('9')
    print("Probed node 9...")
    feat = net.update_features(net, '9', order=order)
    print(feat)
    print(net.row_to_node)
    net.probe('12')
    print("Probed node 12...")
    feat = net.update_features(net, '12', order=order)
    print(feat)
    print(net.row_to_node)
    net.probe('11')
    print("Probed node 11...")
    feat = net.update_features(net, '11', order=order)
    print(feat)
    print(net.row_to_node)
    print(net.calculate_features(net, order=order))
def test_list_creation():
    """
    Tests whether the WIR Extraction works for lists
    """
    test_code = cleandoc("""
            print(["test1", "test2"])
            """)
    test_ast = ast.parse(test_code)
    extractor = WirExtractor(test_ast)
    extracted_wir = extractor.extract_wir()
    expected_graph = networkx.DiGraph()

    expected_constant_one = WirNode(0, "test1", "Constant",
                                    CodeReference(1, 7, 1, 14))
    expected_constant_two = WirNode(1, "test2", "Constant",
                                    CodeReference(1, 16, 1, 23))
    expected_list = WirNode(2, "as_list", "List", CodeReference(1, 6, 1, 24))
    expected_graph.add_edge(expected_constant_one,
                            expected_list,
                            type="input",
                            arg_index=0)
    expected_graph.add_edge(expected_constant_two,
                            expected_list,
                            type="input",
                            arg_index=1)

    expected_call = WirNode(3, "print", "Call", CodeReference(1, 0, 1, 25))
    expected_graph.add_edge(expected_list,
                            expected_call,
                            type="input",
                            arg_index=0)

    compare(networkx.to_dict_of_dicts(extracted_wir),
            networkx.to_dict_of_dicts(expected_graph))
def test_import_from():
    """
    Tests whether the WIR Extraction works for from imports
    """
    test_code = cleandoc("""
            from math import sqrt 

            sqrt(4)
            """)
    test_ast = ast.parse(test_code)
    extractor = WirExtractor(test_ast)
    extracted_wir = extractor.extract_wir()
    expected_graph = networkx.DiGraph()

    expected_import = WirNode(0, "math", "Import", CodeReference(1, 0, 1, 21))
    expected_constant = WirNode(1, "4", "Constant", CodeReference(3, 5, 3, 6))
    expected_constant_call = WirNode(2, "sqrt", "Call",
                                     CodeReference(3, 0, 3, 7))
    expected_graph.add_edge(expected_import,
                            expected_constant_call,
                            type="caller",
                            arg_index=-1)
    expected_graph.add_edge(expected_constant,
                            expected_constant_call,
                            type="input",
                            arg_index=0)

    compare(networkx.to_dict_of_dicts(extracted_wir),
            networkx.to_dict_of_dicts(expected_graph))
def test_statsmodels_add_constant():
    """
    Tests whether the monkey patching of ('statsmodel.api', 'add_constant') works
    """
    test_code = cleandoc("""
        import numpy as np
        import statsmodels.api as sm
        np.random.seed(42)
        test = np.random.random(100)
        test = sm.add_constant(test)
        assert len(test) == 100
        """)

    inspector_result = _pipeline_executor.singleton.run(
        python_code=test_code,
        track_code_references=True,
        inspections=[RowLineage(2)])

    expected_dag = networkx.DiGraph()
    expected_random = DagNode(
        0, BasicCodeLocation("<string-source>", 4),
        OperatorContext(OperatorType.DATA_SOURCE,
                        FunctionInfo('numpy.random', 'random')),
        DagNodeDetails('random', ['array']),
        OptionalCodeInfo(CodeReference(4, 7, 4, 28), "np.random.random(100)"))

    expected_constant = DagNode(
        1, BasicCodeLocation("<string-source>", 5),
        OperatorContext(OperatorType.PROJECTION_MODIFY,
                        FunctionInfo('statsmodel.api', 'add_constant')),
        DagNodeDetails('Adds const column', ['array']),
        OptionalCodeInfo(CodeReference(5, 7, 5, 28), "sm.add_constant(test)"))
    expected_dag.add_edge(expected_random, expected_constant)

    compare(networkx.to_dict_of_dicts(inspector_result.dag),
            networkx.to_dict_of_dicts(expected_dag))

    inspection_results_data_source = inspector_result.dag_node_to_inspection_results[
        expected_random]
    lineage_output = inspection_results_data_source[RowLineage(2)]
    expected_lineage_df = DataFrame(
        [[0.5, {LineageId(0, 0)}], [0.5, {LineageId(0, 1)}]],
        columns=['array', 'mlinspect_lineage'])
    pandas.testing.assert_frame_equal(
        lineage_output.reset_index(drop=True),
        expected_lineage_df.reset_index(drop=True),
        atol=1)

    inspection_results_data_source = inspector_result.dag_node_to_inspection_results[
        expected_constant]
    lineage_output = inspection_results_data_source[RowLineage(2)]
    expected_lineage_df = DataFrame(
        [[numpy.array([0.5, 1.]), {LineageId(0, 0)}],
         [numpy.array([0.5, 1.]), {LineageId(0, 1)}]],
        columns=['array', 'mlinspect_lineage'])
    pandas.testing.assert_frame_equal(
        lineage_output.reset_index(drop=True),
        expected_lineage_df.reset_index(drop=True),
        atol=1)
示例#15
0
def test_format_to_dotfile(dependencies):
    graph = create_graph_from(dependencies)
    content = to_dotfile(graph=graph, path=os.getcwd())

    exported_graph = nx.drawing.nx_pydot.read_dot(io.StringIO(content))

    assert exported_graph.nodes() == graph.nodes()
    assert nx.to_dict_of_dicts(graph).keys() == nx.to_dict_of_dicts(exported_graph).keys()
示例#16
0
def test_frame__setitem__():
    """
    Tests whether the monkey patching of ('pandas.core.frame', '__setitem__') works
    """
    test_code = cleandoc("""
                import pandas as pd

                pandas_df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 'two'],
                              'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
                              'baz': [1, 2, 3, 4, 5, 6],
                              'zoo': ['x', 'y', 'z', 'q', 'w', 't']})
                pandas_df['baz'] = pandas_df['baz'] + 1
                df_expected = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 'two'],
                              'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
                              'baz': [2, 3, 4, 5, 6, 7],
                              'zoo': ['x', 'y', 'z', 'q', 'w', 't']})
                pd.testing.assert_frame_equal(pandas_df, df_expected)
                """)
    inspector_result = _pipeline_executor.singleton.run(python_code=test_code, track_code_references=True,
                                                        inspections=[RowLineage(2)])
    inspector_result.dag.remove_node(list(inspector_result.dag.nodes)[3])

    expected_dag = networkx.DiGraph()
    expected_data_source = DagNode(0,
                                   BasicCodeLocation("<string-source>", 3),
                                   OperatorContext(OperatorType.DATA_SOURCE,
                                                   FunctionInfo('pandas.core.frame', 'DataFrame')),
                                   DagNodeDetails(None, ['foo', 'bar', 'baz', 'zoo']),
                                   OptionalCodeInfo(CodeReference(3, 12, 6, 53),
                                                    "pd.DataFrame({'foo': ['one', 'one', 'one', 'two', "
                                                    "'two', 'two'],\n"
                                                    "              'bar': ['A', 'B', 'C', 'A', 'B', 'C'],\n"
                                                    "              'baz': [1, 2, 3, 4, 5, 6],\n"
                                                    "              'zoo': ['x', 'y', 'z', 'q', 'w', 't']})"))
    expected_project = DagNode(1,
                               BasicCodeLocation("<string-source>", 7),
                               OperatorContext(OperatorType.PROJECTION,
                                               FunctionInfo('pandas.core.frame', '__getitem__')),
                               DagNodeDetails("to ['baz']", ['baz']),
                               OptionalCodeInfo(CodeReference(7, 19, 7, 35), "pandas_df['baz']"))
    expected_dag.add_edge(expected_data_source, expected_project)
    expected_project_modify = DagNode(2,
                                      BasicCodeLocation("<string-source>", 7),
                                      OperatorContext(OperatorType.PROJECTION_MODIFY,
                                                      FunctionInfo('pandas.core.frame', '__setitem__')),
                                      DagNodeDetails("modifies ['baz']", ['foo', 'bar', 'baz', 'zoo']),
                                      OptionalCodeInfo(CodeReference(7, 0, 7, 39),
                                                       "pandas_df['baz'] = pandas_df['baz'] + 1"))
    expected_dag.add_edge(expected_data_source, expected_project_modify)

    compare(networkx.to_dict_of_dicts(inspector_result.dag), networkx.to_dict_of_dicts(expected_dag))

    inspection_results_data_source = inspector_result.dag_node_to_inspection_results[expected_project_modify]
    lineage_output = inspection_results_data_source[RowLineage(2)]
    expected_lineage_df = DataFrame([['one', 'A', 2, 'x', {LineageId(0, 0)}],
                                     ['one', 'B', 3, 'y', {LineageId(0, 1)}]],
                                    columns=['foo', 'bar', 'baz', 'zoo', 'mlinspect_lineage'])
    pandas.testing.assert_frame_equal(lineage_output.reset_index(drop=True), expected_lineage_df.reset_index(drop=True))
def test_index_subscript_with_module_information():
    """
    Tests whether the WIR Extraction works for lists
    """
    test_code = cleandoc("""
            import pandas as pd

            data = pd.read_csv('test_path')
            data['income-per-year']
            """)
    test_ast = ast.parse(test_code)
    extractor = WirExtractor(test_ast)
    module_info = {
        CodeReference(3, 7, 3, 31): ('pandas.io.parsers', 'read_csv'),
        CodeReference(4, 0, 4, 23): ('pandas.core.frame', '__getitem__')
    }
    extracted_wir = extractor.extract_wir()
    extractor.add_runtime_info(module_info, {})
    expected_graph = networkx.DiGraph()

    expected_import = WirNode(0, "pandas", "Import",
                              CodeReference(1, 0, 1, 19))
    expected_constant_one = WirNode(1, "test_path", "Constant",
                                    CodeReference(3, 19, 3, 30))
    expected_call = WirNode(2, "read_csv", "Call", CodeReference(3, 7, 3, 31),
                            ('pandas.io.parsers', 'read_csv'))
    expected_graph.add_edge(expected_import,
                            expected_call,
                            type="caller",
                            arg_index=-1)
    expected_graph.add_edge(expected_constant_one,
                            expected_call,
                            type="input",
                            arg_index=0)

    expected_assign = WirNode(3, "data", "Assign", CodeReference(3, 0, 3, 31))
    expected_graph.add_edge(expected_call,
                            expected_assign,
                            type="input",
                            arg_index=0)

    expected_constant_two = WirNode(4, "income-per-year", "Constant",
                                    CodeReference(4, 5, 4, 22))
    expected_index_subscript = WirNode(5, "Index-Subscript", "Subscript",
                                       CodeReference(4, 0, 4, 23),
                                       ('pandas.core.frame', '__getitem__'))
    expected_graph.add_edge(expected_assign,
                            expected_index_subscript,
                            type="caller",
                            arg_index=-1)
    expected_graph.add_edge(expected_constant_two,
                            expected_index_subscript,
                            type="input",
                            arg_index=0)

    compare(networkx.to_dict_of_dicts(extracted_wir),
            networkx.to_dict_of_dicts(expected_graph))
def test_frame__getitem__selection():
    """
    Tests whether the monkey patching of ('pandas.core.frame', '__getitem__') works for filtering
    """
    test_code = cleandoc("""
                import pandas as pd

                df = pd.DataFrame({'A': [0, 2, 4, 8, 5], 'B': [1, 5, 4, 11, None]})
                df_selection = df[df['A'] > 3]
                df_expected = pd.DataFrame({'A': [4, 8, 5], 'B': [4, 11, None]})
                pd.testing.assert_frame_equal(df_selection.reset_index(drop=True), df_expected.reset_index(drop=True))
                """)
    inspector_result = _pipeline_executor.singleton.run(
        python_code=test_code,
        track_code_references=True,
        inspections=[RowLineage(2)])
    inspector_result.dag.remove_node(list(inspector_result.dag.nodes)[3])

    expected_dag = networkx.DiGraph()
    expected_data_source = DagNode(
        0, BasicCodeLocation("<string-source>", 3),
        OperatorContext(OperatorType.DATA_SOURCE,
                        FunctionInfo('pandas.core.frame', 'DataFrame')),
        DagNodeDetails(None, ['A', 'B']),
        OptionalCodeInfo(
            CodeReference(3, 5, 3, 67),
            "pd.DataFrame({'A': [0, 2, 4, 8, 5], 'B': [1, 5, 4, 11, None]})"))
    expected_projection = DagNode(
        1, BasicCodeLocation("<string-source>", 4),
        OperatorContext(OperatorType.PROJECTION,
                        FunctionInfo('pandas.core.frame', '__getitem__')),
        DagNodeDetails("to ['A']", ['A']),
        OptionalCodeInfo(CodeReference(4, 18, 4, 25), "df['A']"))
    expected_dag.add_edge(expected_data_source, expected_projection)
    expected_selection = DagNode(
        2, BasicCodeLocation("<string-source>", 4),
        OperatorContext(OperatorType.SELECTION,
                        FunctionInfo('pandas.core.frame', '__getitem__')),
        DagNodeDetails("Select by Series: df[df['A'] > 3]", ['A', 'B']),
        OptionalCodeInfo(CodeReference(4, 15, 4, 30), "df[df['A'] > 3]"))
    expected_dag.add_edge(expected_data_source, expected_selection)

    compare(networkx.to_dict_of_dicts(inspector_result.dag),
            networkx.to_dict_of_dicts(expected_dag))

    inspection_results_data_source = inspector_result.dag_node_to_inspection_results[
        expected_selection]
    lineage_output = inspection_results_data_source[RowLineage(2)]
    expected_lineage_df = DataFrame(
        [[4, 4., {LineageId(0, 2)}], [8, 11., {LineageId(0, 3)}]],
        columns=['A', 'B', 'mlinspect_lineage'])
    pandas.testing.assert_frame_equal(
        lineage_output.reset_index(drop=True),
        expected_lineage_df.reset_index(drop=True))
示例#19
0
def test_inspector_adult_easy_py_pipeline_without_inspections():
    """
    Tests whether the .py version of the inspector works
    """
    inspector_result = PipelineInspector\
        .on_pipeline_from_py_file(ADULT_SIMPLE_PY)\
        .execute()
    extracted_dag = inspector_result.dag
    expected_dag = get_expected_dag_adult_easy(ADULT_SIMPLE_PY)
    compare(networkx.to_dict_of_dicts(extracted_dag),
            networkx.to_dict_of_dicts(expected_dag))
def test_my_word_to_vec_transformer():
    """
    Tests whether the monkey patching of ('example_pipelines.healthcare.healthcare_utils', 'MyW2VTransformer') works
    """
    test_code = cleandoc("""
                import pandas as pd
                from example_pipelines.healthcare.healthcare_utils import MyW2VTransformer
                import numpy as np

                df = pd.DataFrame({'A': ['cat_a', 'cat_b', 'cat_a', 'cat_c']})
                word_to_vec = MyW2VTransformer(min_count=2, size=2, workers=1)
                encoded_data = word_to_vec.fit_transform(df)
                assert encoded_data.shape == (4, 2)
                """)
    inspector_result = _pipeline_executor.singleton.run(
        python_code=test_code,
        track_code_references=True,
        inspections=[RowLineage(3)],
        custom_monkey_patching=[custom_monkeypatching])

    expected_dag = networkx.DiGraph()
    expected_data_source = DagNode(
        0, BasicCodeLocation("<string-source>", 5),
        OperatorContext(OperatorType.DATA_SOURCE,
                        FunctionInfo('pandas.core.frame', 'DataFrame')),
        DagNodeDetails(None, ['A']),
        OptionalCodeInfo(
            CodeReference(5, 5, 5, 62),
            "pd.DataFrame({'A': ['cat_a', 'cat_b', 'cat_a', 'cat_c']})"))
    expected_estimator = DagNode(
        1, BasicCodeLocation("<string-source>", 6),
        OperatorContext(
            OperatorType.TRANSFORMER,
            FunctionInfo('example_pipelines.healthcare.healthcare_utils',
                         'MyW2VTransformer')),
        DagNodeDetails('Word2Vec', ['array']),
        OptionalCodeInfo(CodeReference(6, 14, 6, 62),
                         'MyW2VTransformer(min_count=2, size=2, workers=1)'))
    expected_dag.add_edge(expected_data_source, expected_estimator)
    compare(networkx.to_dict_of_dicts(inspector_result.dag),
            networkx.to_dict_of_dicts(expected_dag))

    inspection_results_data_source = inspector_result.dag_node_to_inspection_results[
        expected_estimator]
    lineage_output = inspection_results_data_source[RowLineage(3)]
    expected_lineage_df = DataFrame(
        [[numpy.array([0.0, 0.0, 0.0]), {LineageId(0, 0)}],
         [numpy.array([0.0, 0.0, 0.0]), {LineageId(0, 1)}],
         [numpy.array([0.0, 0.0, 0.0]), {LineageId(0, 2)}]],
        columns=['array', 'mlinspect_lineage'])
    pandas.testing.assert_series_equal(
        lineage_output["mlinspect_lineage"],
        expected_lineage_df["mlinspect_lineage"])
    assert expected_lineage_df.iloc[0, 0].shape == (3, )
示例#21
0
文件: context.py 项目: MGM-KTH/kexbot
 def print_and_plot_graph(self):
     global G, mentioned_concepts
     print nx.to_dict_of_dicts(G)
     # Stuff to plot the graph
     pos = nx.spring_layout(G)
     nx.draw_networkx_nodes(G,pos,node_size=3000, node_color='red', font_size=10)
     nx.draw_networkx_nodes(G,pos,node_size=3000, node_color='green', nodelist=mentioned_concepts, font_size=10)
     nx.draw_networkx_edges(G,pos,width=5,alpha=0.5,edge_color='black')
     nx.draw_networkx_labels(G,pos,font_size=10)
     nx.draw_networkx_edge_labels(G,pos, font_size=8)
     plt.axis('off')
     plt.show()
def test_inspector_adult_easy_ipynb_pipeline():
    """
    Tests whether the .ipynb version of the inspector works
    """
    inspection_result = PipelineInspector\
        .on_pipeline_from_ipynb_file(FILE_NB)\
        .add_inspection(MaterializeFirstRowsInspection(5))\
        .execute()
    extracted_dag = inspection_result.dag
    expected_dag = get_expected_dag_adult_easy_ipynb()
    compare(networkx.to_dict_of_dicts(extracted_dag),
            networkx.to_dict_of_dicts(expected_dag))
def test_groupby_agg():
    """
    Tests whether the monkey patching of ('pandas.core.frame', 'groupby') and ('pandas.core.groupbygeneric', 'agg')
    works.
    """
    test_code = cleandoc("""
        import pandas as pd

        df = pd.DataFrame({'group': ['A', 'B', 'A', 'C', 'B'], 'value': [1, 2, 1, 3, 4]})
        df_groupby_agg = df.groupby('group').agg(mean_value=('value', 'mean'))
        
        df_expected = pd.DataFrame({'group': ['A', 'B', 'C'], 'mean_value': [1, 3, 3]})
        pd.testing.assert_frame_equal(df_groupby_agg.reset_index(drop=False), df_expected.reset_index(drop=True))
        """)
    inspector_result = _pipeline_executor.singleton.run(
        python_code=test_code,
        track_code_references=True,
        inspections=[RowLineage(2)])
    inspector_result.dag.remove_node(list(inspector_result.dag.nodes)[2])

    expected_dag = networkx.DiGraph()
    expected_data = DagNode(
        0, BasicCodeLocation("<string-source>", 3),
        OperatorContext(OperatorType.DATA_SOURCE,
                        FunctionInfo('pandas.core.frame', 'DataFrame')),
        DagNodeDetails(None, ['group', 'value']),
        OptionalCodeInfo(
            CodeReference(3, 5, 3, 81),
            "pd.DataFrame({'group': ['A', 'B', 'A', 'C', 'B'], "
            "'value': [1, 2, 1, 3, 4]})"))
    expected_groupby_agg = DagNode(
        1, BasicCodeLocation("<string-source>", 4),
        OperatorContext(OperatorType.GROUP_BY_AGG,
                        FunctionInfo('pandas.core.groupby.generic', 'agg')),
        DagNodeDetails(
            "Groupby 'group', Aggregate: '{'mean_value': ('value', 'mean')}'",
            ['group', 'mean_value']),
        OptionalCodeInfo(
            CodeReference(4, 17, 4, 70),
            "df.groupby('group').agg(mean_value=('value', 'mean'))"))
    expected_dag.add_edge(expected_data, expected_groupby_agg)
    compare(networkx.to_dict_of_dicts(inspector_result.dag),
            networkx.to_dict_of_dicts(expected_dag))

    inspection_results_data_source = inspector_result.dag_node_to_inspection_results[
        expected_groupby_agg]
    lineage_output = inspection_results_data_source[RowLineage(2)]
    expected_lineage_df = DataFrame(
        [["A", 1, {LineageId(1, 0)}], ['B', 3, {LineageId(1, 1)}]],
        columns=['group', 'mean_value', 'mlinspect_lineage'])
    pandas.testing.assert_frame_equal(
        lineage_output.reset_index(drop=True),
        expected_lineage_df.reset_index(drop=True))
def test_index_assign():
    """
    Tests whether the WIR Extraction works for lists
    """
    test_code = cleandoc("""
            import pandas as pd

            data = pd.read_csv('test_path')
            data['label'] = "test"
            """)
    test_ast = ast.parse(test_code)
    extractor = WirExtractor(test_ast)
    extracted_wir = extractor.extract_wir()
    expected_graph = networkx.DiGraph()

    expected_import = WirNode(0, "pandas", "Import",
                              CodeReference(1, 0, 1, 19))
    expected_constant_one = WirNode(1, "test_path", "Constant",
                                    CodeReference(3, 19, 3, 30))
    expected_call = WirNode(2, "read_csv", "Call", CodeReference(3, 7, 3, 31))
    expected_graph.add_edge(expected_import,
                            expected_call,
                            type="caller",
                            arg_index=-1)
    expected_graph.add_edge(expected_constant_one,
                            expected_call,
                            type="input",
                            arg_index=0)

    expected_assign = WirNode(3, "data", "Assign", CodeReference(3, 0, 3, 31))
    expected_graph.add_edge(expected_call,
                            expected_assign,
                            type="input",
                            arg_index=0)

    expected_constant_two = WirNode(4, "label", "Constant",
                                    CodeReference(4, 5, 4, 12))
    expected_graph.add_node(expected_constant_two)

    expected_constant_three = WirNode(5, "test", "Constant",
                                      CodeReference(4, 16, 4, 22))
    expected_graph.add_node(expected_constant_three)

    expected_subscript_assign = WirNode(6, 'data.label', 'Subscript-Assign',
                                        CodeReference(4, 0, 4, 13))
    expected_graph.add_edge(expected_assign,
                            expected_subscript_assign,
                            type="caller",
                            arg_index=-1)

    compare(networkx.to_dict_of_dicts(extracted_wir),
            networkx.to_dict_of_dicts(expected_graph))
def test_frame__getitem__frame():
    """
    Tests whether the monkey patching of ('pandas.core.frame', '__getitem__') works for multiple string arguments
    """
    test_code = cleandoc("""
                import pandas as pd

                df = pd.DataFrame([[0, None, 2], [1, 2, 3], [4, None, 2], [9, 2, 3], [6, 1, 2], [1, 2, 3]], 
                    columns=['A', 'B', 'C'])
                df_projection = df[['A', 'C']]
                df_expected = pd.DataFrame([[0, 2], [1, 3], [4, 2], [9, 3], [6, 2], [1, 3]], columns=['A', 'C'])
                pd.testing.assert_frame_equal(df_projection, df_expected)
                """)
    inspector_result = _pipeline_executor.singleton.run(
        python_code=test_code,
        track_code_references=True,
        inspections=[RowLineage(2)])
    inspector_result.dag.remove_node(list(inspector_result.dag.nodes)[2])

    expected_dag = networkx.DiGraph()
    expected_data_source = DagNode(
        0, BasicCodeLocation("<string-source>", 3),
        OperatorContext(OperatorType.DATA_SOURCE,
                        FunctionInfo('pandas.core.frame', 'DataFrame')),
        DagNodeDetails(None, ['A', 'B', 'C']),
        OptionalCodeInfo(
            CodeReference(3, 5, 4, 28),
            "pd.DataFrame([[0, None, 2], [1, 2, 3], [4, None, 2], "
            "[9, 2, 3], [6, 1, 2], [1, 2, 3]], \n"
            "    columns=['A', 'B', 'C'])"))
    expected_project = DagNode(
        1, BasicCodeLocation("<string-source>", 5),
        OperatorContext(OperatorType.PROJECTION,
                        FunctionInfo('pandas.core.frame', '__getitem__')),
        DagNodeDetails("to ['A', 'C']", ['A', 'C']),
        OptionalCodeInfo(CodeReference(5, 16, 5, 30), "df[['A', 'C']]"))
    expected_dag.add_edge(expected_data_source, expected_project)
    compare(networkx.to_dict_of_dicts(inspector_result.dag),
            networkx.to_dict_of_dicts(expected_dag))

    inspection_results_data_source = inspector_result.dag_node_to_inspection_results[
        expected_project]
    lineage_output = inspection_results_data_source[RowLineage(2)]
    expected_lineage_df = DataFrame(
        [[0, 2, {LineageId(0, 0)}], [1, 3, {LineageId(0, 1)}]],
        columns=['A', 'C', 'mlinspect_lineage'])
    pandas.testing.assert_frame_equal(
        lineage_output.reset_index(drop=True),
        expected_lineage_df.reset_index(drop=True))
def test_inspector_adult_easy_str_pipeline():
    """
    Tests whether the str version of the inspector works
    """
    with open(ADULT_EASY_FILE_PY) as file:
        code = file.read()

        inspection_result = PipelineInspector\
            .on_pipeline_from_string(code)\
            .add_inspection(MaterializeFirstRowsInspection(5))\
            .execute()
        extracted_dag = inspection_result.dag
        expected_dag = get_expected_dag_adult_easy_py()
        assert networkx.to_dict_of_dicts(
            extracted_dag) == networkx.to_dict_of_dicts(expected_dag)
def test_frame_replace():
    """
    Tests whether the monkey patching of ('pandas.core.frame', 'replace') works
    """
    test_code = cleandoc("""
        import pandas as pd

        df = pd.DataFrame(['Low', 'Medium', 'Low', 'High', None], columns=['A'])
        df_replace = df.replace('Medium', 'Low')
        df_expected = pd.DataFrame(['Low', 'Low', 'Low', 'High', None], columns=['A'])
        pd.testing.assert_frame_equal(df_replace.reset_index(drop=True), df_expected.reset_index(drop=True))
        """)
    inspector_result = _pipeline_executor.singleton.run(
        python_code=test_code,
        track_code_references=True,
        inspections=[RowLineage(2)])
    inspector_result.dag.remove_node(list(inspector_result.dag.nodes)[2])

    expected_dag = networkx.DiGraph()
    expected_data_source = DagNode(
        0, BasicCodeLocation("<string-source>", 3),
        OperatorContext(OperatorType.DATA_SOURCE,
                        FunctionInfo('pandas.core.frame', 'DataFrame')),
        DagNodeDetails(None, ['A']),
        OptionalCodeInfo(
            CodeReference(3, 5, 3, 72),
            "pd.DataFrame(['Low', 'Medium', 'Low', 'High', None], "
            "columns=['A'])"))
    expected_modify = DagNode(
        1, BasicCodeLocation("<string-source>", 4),
        OperatorContext(OperatorType.PROJECTION_MODIFY,
                        FunctionInfo('pandas.core.frame', 'replace')),
        DagNodeDetails("Replace 'Medium' with 'Low'", ['A']),
        OptionalCodeInfo(CodeReference(4, 13, 4, 40),
                         "df.replace('Medium', 'Low')"))
    expected_dag.add_edge(expected_data_source, expected_modify)
    compare(networkx.to_dict_of_dicts(inspector_result.dag),
            networkx.to_dict_of_dicts(expected_dag))

    inspection_results_data_source = inspector_result.dag_node_to_inspection_results[
        expected_modify]
    lineage_output = inspection_results_data_source[RowLineage(2)]
    expected_lineage_df = DataFrame(
        [['Low', {LineageId(0, 0)}], ['Low', {LineageId(0, 1)}]],
        columns=['A', 'mlinspect_lineage'])
    pandas.testing.assert_frame_equal(
        lineage_output.reset_index(drop=True),
        expected_lineage_df.reset_index(drop=True))
def fill_colors(graph):
    """
    Use a graph coloring strategy to populate the graph with the appropiate
    colors to solve the puzzle. This method is recursively called by
    populate_color when there are multiple possible paths to explore.

    graph : The graph networkx sudoku graph containing the puzzle to solve.
    returns : Networkx sudoku graph populated with values that solve the puzzle.
    """
    size = len(graph.nodes)
    adjacent = nx.to_dict_of_dicts(graph)
    pos = optimal_spot(graph, adjacent)
    # If there are no more positions left to fill, the graph is full.
    if pos == 0:
        return graph
    colors = choose_color(graph, adjacent, pos)
    # If there are possible colors left, try each path to look for a solution.
    if len(colors) != 0:
        for color in colors:
            # Make a copy of the graph, add the color, run fill_colors again.
            filled_graph = populate_color(graph.copy(), pos, color)
            if is_populated(filled_graph):
                return filled_graph
            else:
                continue
示例#29
0
def test_sklearn_wir_preprocessing():
    """
    Tests whether the WIR Extraction works for the adult_easy pipeline
    """
    preprocessed_wir = SklearnWirPreprocessor().preprocess_wir(get_test_wir())
    cleaned_wir = WirToDagTransformer.remove_all_nodes_but_calls_and_subscripts(
        preprocessed_wir)
    dag = WirToDagTransformer.remove_all_non_operators_and_update_names(
        cleaned_wir)

    assert len(dag) == 17

    expected_dag = get_expected_dag_adult_easy_py()

    compare(networkx.to_dict_of_dicts(preprocessed_wir),
            networkx.to_dict_of_dicts(expected_dag))
示例#30
0
    def as_dict(self):
        """
        Bson-serializable dict representation of the ConnectedComponent object.

        Returns:
            dict: Bson-serializable dict representation of the ConnectedComponent object.
        """
        nodes = {"{:d}".format(node.isite): (node, data) for node, data in self._connected_subgraph.nodes(data=True)}
        node2stringindex = {node: strindex for strindex, (node, data) in nodes.items()}
        dict_of_dicts = nx.to_dict_of_dicts(self._connected_subgraph)
        new_dict_of_dicts = {}
        for n1, n2dict in dict_of_dicts.items():
            in1 = node2stringindex[n1]
            new_dict_of_dicts[in1] = {}
            for n2, edges_dict in n2dict.items():
                in2 = node2stringindex[n2]
                new_dict_of_dicts[in1][in2] = {}
                for ie, edge_data in edges_dict.items():
                    ied = self._edgekey_to_edgedictkey(ie)
                    new_dict_of_dicts[in1][in2][ied] = jsanitize(edge_data)
        return {
            "@module": self.__class__.__module__,
            "@class": self.__class__.__name__,
            "nodes": {strindex: (node.as_dict(), data) for strindex, (node, data) in nodes.items()},
            "graph": new_dict_of_dicts,
        }
示例#31
0
def test_to_dict_of_dicts_with_edgedata_param(edgelist):
    G = nx.Graph()
    G.add_edges_from(edgelist)
    # Innermost dict value == edge_data when edge_data != None.
    # In the case when G has edge data, it is overwritten
    expected = {0: {1: 10}, 1: {0: 10, 2: 10}, 2: {1: 10}}
    assert nx.to_dict_of_dicts(G, edge_data=10) == expected
示例#32
0
def checking(G):
    ch = nx.to_dict_of_dicts(G)
    fired  = set()
    result = set()
    for current in ch:
        if current not in fired:
            check, f  = DFS(G, current, return_fired = True)
            fired |= f
            result.add(check)
    return(result)
示例#33
0
def connectivity(G):
    g = nx.to_dict_of_dicts(G)
    used = set()
    res = set()
    for curr in g:
        if curr not in used:
            comp, u = bfs(G, curr, return_used=True)
            used |= u
            res.add(comp)
    return res
示例#34
0
def bfs(G, start, return_used = False):
    queue = [start]
    res = nx.Graph()
    G = nx.to_dict_of_dicts(G)
    used = {start}
    while queue:
        curr = queue.pop(0)
        for n in G[curr]:
            if n not in used:
                used.add(n)
                res.add_edge(curr, n, weight=G[curr][n]['weight'])
                queue.append(n)
    return res if not return_used else (res, used)
示例#35
0
def Dijkstra(G, root):
    g = nx.to_dict_of_dicts(G)
    D = {n: (float('inf'),None) for n in g}
    D[root] = (0, None)
    used =set()
    while len(used) < len(g):
        mn = min((i for i in g.items() if i[0] not in used), key=lambda x: D[x[0]][0])
        for n in mn[1]:
            new = D[mn[0]][0] + mn[1][n]['weight']
            if new < D[n][0]:
                D[n] = (new, mn[0])
        used.add(mn[0])
    return D
示例#36
0
def dfs(G, start):
    stack = [start]
    res = nx.Graph()
    G = nx.to_dict_of_dicts(G)
    used = {start}
    while stack:
        curr = stack.pop()
        for n in G[curr]:
            if n not in used:
                used.add(n)
                res.add_edge(curr, n, weight=G[curr][n]['weight'])
                stack.append(n)
    return res
示例#37
0
def DFS(G, start, return_fired = False):
    styck = [start]
    resgraph = nx.Graph()
    G = nx.to_dict_of_dicts(G)
    fired = {start}
    while styck:
        curr = styck.pop()
        for neighbour in G[curr]:
            if neighbour not in fired:
                fired.add(neighbour)
                resgraph.add_edge(curr, neighbour, weight = G[curr][neighbour]['weight'])
                styck.append(neighbour)
    return resgraph if not return_fired else (resgraph, fired)
示例#38
0
def BFS(G, start):
    queue = [start]
    resgraph = nx.Graph()
    G = nx.to_dict_of_dicts(G)
    fired = {start}
    while queue:
        curr = queue.pop(0)
        for neighbour in G[curr]:
            if neighbour not in fired:
                fired.add(neighbour)
                resgraph.add_edge(curr, neighbour, weight = G[curr][neighbour]['weight'])
                queue.append(neighbour)
    return resgraph
示例#39
0
def bfs(G, start):
    queue = [start]
    tree = nx.Graph()
    G = nx.to_dict_of_dicts(G)
    used = {start}
    while queue:
        curr = queue.pop(0)
        for n in G[curr]:
            if n not in used:
                used.add(n)
                tree.add_edge(curr, n, weight=G[curr][n]['weight'])
                queue.append(n)
    return tree
示例#40
0
def shortest_path(G,first,last):
    way=nx.Graph()
    d = 0
    friend = [last]
    dejks = dejkstra(nx.to_dict_of_dicts(G), first)
    while len(friend) !=0:
        for neighbour in G[last]:
            if d == 0:
                if G[last][neighbour]['weight'] == (dejks[last] - dejks[neighbour]):
                    way.add_edge(last, neighbour, weight = G[last][neighbour]['weight'])
                    friend.append(neighbour)
                    d = 1
                    if neighbour ==first:
                        return way
        d = 0
        last = friend.pop(-1)
示例#41
0
def graph2dict(g, return_dict_of_dict=True):
    """Takes a graph and returns an adjacency list.

    Parameters
    ----------
    g : :any:`networkx.DiGraph`, :any:`networkx.Graph`, etc.
        Any object that networkx can turn into a
        :any:`DiGraph<networkx.DiGraph>`.
    return_dict_of_dict : bool (optional, default: ``True``)
        Specifies whether this function will return a dict of dicts
        or a dict of lists.

    Returns
    -------
    adj : dict
        An adjacency representation of graph as a dictionary of
        dictionaries, where a key is the vertex index for a vertex
        ``v`` and the values are :class:`dicts<.dict>` with keys for
        the vertex index and values as edge properties.

    Examples
    --------
    >>> import queueing_tool as qt
    >>> import networkx as nx
    >>> adj = {0: [1, 2], 1: [0], 2: [0, 3], 3: [2]}
    >>> g = nx.DiGraph(adj)
    >>> qt.graph2dict(g, return_dict_of_dict=True)
    ...  # doctest: +NORMALIZE_WHITESPACE
    {0: {1: {}, 2: {}},
    1: {0: {}},
    2: {0: {}, 3: {}},
    3: {2: {}}}
    >>> qt.graph2dict(g, return_dict_of_dict=False)
    {0: [1, 2], 1: [0], 2: [0, 3], 3: [2]}
    """
    if not isinstance(g, nx.DiGraph):
        g = QueueNetworkDiGraph(g)

    dict_of_dicts = nx.to_dict_of_dicts(g)
    if return_dict_of_dict:
        return dict_of_dicts
    else:
        return {k: list(val.keys()) for k, val in dict_of_dicts.items()}
示例#42
0
def get_dictionary_graph():
    """
    Returns a dictionary representation of the graph using
    the NetworkX to_dict_of_dicts() function.

    The dictionary would be structured as follows if
    producer1 has rated producer2, but producer2 hasn't
    rated anyone.

    {
        producer1.name: 
            {producer1.source_rating1.source.name: 
                {producer1.source_rating1.tag.name: 
                 producer1.source_rating1.rating}
            }
        producer2.name: 
            { }
    }

    """
    return to_dict_of_dicts(graph)
示例#43
0
def lin_syst(G, order_fcn=None):
    """ Given a graph *G* with edges labeled with integers :math:`1, ..., M`, compute
	 	matrices :math:`A,B` such that 

	 	.. math:: \mathbf w(t+1) = A \mathbf w + B \mathbf r,

	 	where :math:`w_i` represents the number of individual systems at node :math:`n` in mode :math:`m` if

	 	.. math:: i = (m-1) K + order\_fcn(n).

	 	If no order function is specified, ordering by **G.nodes().index** is used.
	"""
    if order_fcn == None:
        order_fcn = lambda v: G.nodes().index(v)

    ordering = sorted(G.nodes_iter(), key=order_fcn)

    adj_data = nx.to_dict_of_dicts(G)

    T_list = []
    for mode in range(1, _maxmode(G) + 1):
        data = np.array(
            [
                (1, order_fcn(node2), order_fcn(node1))
                for (node1, node1_out) in adj_data.iteritems()
                for node2 in node1_out
                if node1_out[node2]["mode"] == mode
            ]
        )
        T_mode = scipy.sparse.coo_matrix((data[:, 0], (data[:, 1], data[:, 2])), shape=(len(G), len(G)))
        T_list.append(T_mode)

    A = scipy.sparse.block_diag(tuple(T_list), dtype=np.int8)
    B = scipy.sparse.bmat([[Ti for i in range(len(T_list))] for Ti in T_list]) - 2 * A

    return A, B
示例#44
0
文件: dijkstra.py 项目: Jim89/icl
def dijkstra_all(graph_dict):
    ans = []
    for start in graph_dict.keys():
        for end in graph_dict.keys():
            ans.append(dijkstra(graph_dict, start, end))
    return ans            



       
#%% read in data - use a pandas dataframe just for convenience
import pandas as pd
data = pd.read_table("../data/HW1_4.txt",
                     sep = " ",
                     header = None, 
                     names = ['vx', 'vy', 'weight'])

# %% use network x to prepare dictionary structure which can be fed in to the 
# dijkstra function
import networkx as nx
graph = nx.from_pandas_dataframe(data, 'vx', 'vy', 'weight')
# graph_nodes = graph.nodes()
graph_dict = nx.to_dict_of_dicts(graph)

# %% run the functions

path = dijkstra(graph_dict, 1, 6)
all_paths = dijkstra_all(graph_dict)
            
        
 def writeGraphToFile(self,filePath):
     graph_as_dict=nx.to_dict_of_dicts(self.dGraph)
     print graph_as_dict
     gm.saveDict(filePath, graph_as_dict)
     return
示例#46
0
def _store_json(graph, fname, disp_params, **kws):
    import json
    # TODO: Obey disp_params on json
    m = nx.to_dict_of_dicts(graph)
    json.dump(m, fname, **kws)
示例#47
0
文件: topaz.py 项目: sgosline/topaz
def perturbNetwork(network,mirlist=[],ishier=True):
    '''
    Assume hierarchical network for now. Provide list of input weights to initiate hierarchy
    '''

    newnetwork=defaultdict(dict)
    oldnetwork=nx.to_dict_of_dicts(network)
    ##collect, for each level, all members of subsequent level
    tfs,motifs=set(),set()

    #first, collect all weights
    mirweights=[]
    for m in mirlist:
        if m not in oldnetwork.keys():
            #print 'No %s in network'%(m)
            continue
        allweights=[oldnetwork[m][d]['weight'] for d in oldnetwork[m].keys()]
        tfs.update(oldnetwork[m].keys())
        mirweights.extend(allweights)
     #   print 'Have %d edge weights for miRNA %s'%(len(allweights),m)
    np.random.shuffle(mirweights)

    print mirweights
    
    count=0
    for m in mirlist:
        if m not in oldnetwork.keys():
            #print 'No %s in network'%(m)
            continue
        for tf in oldnetwork[m].keys():
            newnetwork[m][tf]={'weight':mirweights[count]}
            count+=1
   # print 'Have %d nodes in new network'%(len(newnetwork.keys()))
            
    #now onto tfs
    tfweights=[]
    for t in tfs:
        allweights=[oldnetwork[t][d]['weight'] for d in oldnetwork[t].keys()]
        motifs.update(oldnetwork[t].keys())
        tfweights.extend(allweights)
    #    print 'Have %d edge weights for TF %s'%(len(allweights),t)
        
    np.random.shuffle(tfweights)
    count=0
    for t in tfs:
        for mot in oldnetwork[t].keys():
            newnetwork[t][mot]={'weight':tfweights[count]}
            count+=1

    #print 'Have %d nodes in new network'%(len(newnetwork.keys()))

    #now motifs, then we're done
    motweights=[]
    for mo in motifs:
        allweights=[oldnetwork[mo][d]['weight'] for d in oldnetwork[mo].keys()]
        motweights.extend(allweights)
      #  print 'Have %d edge weights for Motif %s'%(len(allweights),mo)
        
    np.random.shuffle(motweights)
    count=0
    for mo in motifs:
        for mr in oldnetwork[mo].keys():
            newnetwork[mo][mr]={'weight':motweights[count]}
            count+=1

    print 'Have %d nodes in new network'%(len(newnetwork.keys()))
    
    return nx.DiGraph(newnetwork)
示例#48
0
 def get_graph_str(self):
     return str(nx.to_dict_of_dicts(self.graph))
# Vary number of nodes , draw graphs and call different algorithms for shortest path computation 
nnodes = 1000
bg = nx.complete_graph(nnodes)
nedges = bg.edges()
print "number of nodes are" , nnodes
print "number of edges are" , len(nedges)

bg.add_node(xrange(nnodes))
lnode = nnodes-1

for edge in nedges:
    bg.add_edge(edge[0], edge[1], {'weight':random.randrange(nnodes)})

pos=nx.spring_layout(bg) # positions for all nodes

plt.figure(figsize=(32,32))
nx.draw_networkx(bg,pos,font_size=20,font_family='sans-serif',alpha=.6, width=2.0,
                     node_size=900)

plt.axis('off')
plt.show() 
graph = nx.to_dict_of_dicts(bg, nodelist=None, edge_data=None)

path1 = shortestPath(graph,0,lnode, "dijkstra_algorithm")
print ("Shortest path by dijkstra_algorithm is: " + str(path1))
path2 = shortestPath(graph,0,lnode,"bellman_ford_algorithm")
print ("Shortest path by bellman_ford_algorithm is: " + str(path2))
path3 = shortestPath(graph,0,lnode,"floydwarshall_algorithm")
print ("Shortest path by floydwarshall_algorithm is: " + str(path3))
示例#50
0
    friend = [last]
    dejks = dejkstra(nx.to_dict_of_dicts(G), first)
    while len(friend) !=0:
        for neighbour in G[last]:
            if d == 0:
                if G[last][neighbour]['weight'] == (dejks[last] - dejks[neighbour]):
                    way.add_edge(last, neighbour, weight = G[last][neighbour]['weight'])
                    friend.append(neighbour)
                    d = 1
                    if neighbour ==first:
                        return way
        d = 0
        last = friend.pop(-1)
first,last = input().split()
way = shortest_path(G,first,last)
pos = nx.spring_layout(way, iterations=1)
nx.draw(way, pos)
nx.draw_networkx_edge_labels(way, pos)
nx.draw_networkx_labels(way,pos,font_size=7,font_family='sans-serif')
way1 = nx.to_dict_of_dicts(way)
summary = 0
for neighbour in way1:
    for neighbour1 in way1[neighbour]:
        summary+=way1[neighbour][neighbour1]['weight']
print(summary/2)
plot.show()




示例#51
0
# <codecell>

g.nodes()

# <codecell>

# agency list
g.edges()

# <codecell>

nx.draw(g)

# <codecell>

d = nx.to_dict_of_dicts(g)
# same as 
# nx.edges

# <codecell>

# Create a new graph from our dict_of_dicts
g2 = nx.from_dict_of_dicts(d)

# <codecell>

# dumps as a json object
import simplejson
simplejson.dumps(nx.to_dict_of_dicts(g))

# <codecell>
示例#52
0
 def get_graph(self):
     return nx.to_dict_of_dicts(self.state.get_graph())
示例#53
0
 def to_dict(self):
     edges = nx.to_dict_of_dicts(self.tree)
     nodes = self.tree.node
     j = dict(edges=edges, nodes=nodes)
     return j
示例#54
0
 def update_database(self, dataset, architecture, graph):
     metrics = performer.evaluate_metrics(graph)
     if metrics == None:
         return
     print 'performer.update_database:', self.BENCHMARK, architecture, metrics
     metadata = [datetime.now(), architecture, self.BENCHMARK, self.OPTIMIZATION_TARGET, to_dict_of_dicts(graph)]
     design_instance = metrics + self.extract_features(graph) + metadata
     with open(dataset, 'a') as f:
         f.write('\t'.join(map(str, design_instance)) + '\n')
     return
示例#55
0
    def fitness(self, individual):
        # init capacities
        default_speed = 100
        for src, dst in [(src, t) for (src, t, data) in self.G.edges(data=True) if "speed" not in data]:
            self.G[src][dst]["speed"] = default_speed

        # Copy list to pop
        weights = individual[:]
        # set weights
        # TODO: look at using set_weights function
        for (src, dst) in self.G.edges():
            # TODO: check order - whether this is forwards or backwards
            # if weights already set
            # FIX check this!
            self.G[src][dst]["weight"] = weights.pop()

        # Cost is sum of network cost (from traffic on links) and link changes
        # cost (how many links changed from original weights)
        # TODO look at link change costs
        link_change_cost = self.link_changes_cost(individual)
        if link_change_cost >= huge:
            # Cost is already a huge number, return
            # (saves expensive computation of network traffic)
            return link_change_cost

        # get edge list to store weights in
        loads = nx.to_dict_of_dicts(self.G, edge_data=0)

        apsf = nx.all_pairs_dijkstra_path(self.G)
        for src, data in apsf.items():
            for dst, path in data.items():
                # load from this source, dest pair
                load = self.traffmat[src][dst]
                for (nodea, nodeb) in zip(path, path[1:]):
                    # add load on this edge due to source, dest pair
                    loads[nodea][nodeb] += load

        def cost(load, cap):
            # Cost is set according to equation (1) of [1]
            utilization = load / cap

            if utilization < 1 / 3:
                return utilization
            elif utilization < 2 / 3:
                return 3 * utilization - 2 / 3
            elif utilization < 9 / 10:
                return 10 * utilization - 16 / 3
            elif utilization < 1:
                return 70 * utilization - 178 / 3
            elif utilization < 11 / 10:
                return 500 * utilization - 1468 / 3
            else:
                return 5000 * utilization - 16318 / 3

        # link change cost was a low number, calculate network traffic cost
        # calculate cost for each edge that has a load set
        link_costs = [cost(loads[s][t], data["speed"]) for (s, t, data) in self.G.edges(data=True)]

        link_costs = sum(link_costs)
        # link_costs = sum(link_costs)
        # print  "cost total {0}".format(link_costs)
        return link_change_cost + link_costs
示例#56
0
def adjacency2graph(adjacency, edge_type=None, adjust=1, **kwargs):
    """Takes an adjacency list, dict, or matrix and returns a graph.

    The purpose of this function is take an adjacency list (or matrix)
    and return a :class:`.QueueNetworkDiGraph` that can be used with a
    :class:`.QueueNetwork` instance. The Graph returned has the
    ``edge_type`` edge property set for each edge. Note that the graph may
    be altered.

    Parameters
    ----------
    adjacency : dict or :class:`~numpy.ndarray`
        An adjacency list as either a dict, or an adjacency matrix.
    adjust : int ``{1, 2}`` (optional, default: 1)
        Specifies what to do when the graph has terminal vertices
        (nodes with no out-edges). Note that if ``adjust`` is not 2
        then it is assumed to be 1. There are two choices:

        * ``adjust = 1``: A loop is added to each terminal node in the
          graph, and their ``edge_type`` of that loop is set to 0.
        * ``adjust = 2``: All edges leading to terminal nodes have
          their ``edge_type`` set to 0.

    **kwargs :
        Unused.

    Returns
    -------
    out : :any:`networkx.DiGraph`
        A directed graph with the ``edge_type`` edge property.

    Raises
    ------
    TypeError
        Is raised if ``adjacency`` is not a dict or
        :class:`~numpy.ndarray`.

    Examples
    --------
    If terminal nodes are such that all in-edges have edge type ``0``
    then nothing is changed. However, if a node is a terminal node then
    a loop is added with edge type 0.

    >>> import queueing_tool as qt
    >>> adj = {
    ...     0: {1: {}},
    ...     1: {2: {},
    ...         3: {}},
    ...     3: {0: {}}}
    >>> eTy = {0: {1: 1}, 1: {2: 2, 3: 4}, 3: {0: 1}}
    >>> # A loop will be added to vertex 2
    >>> g = qt.adjacency2graph(adj, edge_type=eTy)
    >>> ans = qt.graph2dict(g)
    >>> sorted(ans.items())     # doctest: +NORMALIZE_WHITESPACE
    [(0, {1: {'edge_type': 1}}),
     (1, {2: {'edge_type': 2}, 3: {'edge_type': 4}}), 
     (2, {2: {'edge_type': 0}}),
     (3, {0: {'edge_type': 1}})]

    You can use a dict of lists to represent the adjacency list.

    >>> adj = {0 : [1], 1: [2, 3], 3: [0]}
    >>> g = qt.adjacency2graph(adj, edge_type=eTy)
    >>> ans = qt.graph2dict(g)
    >>> sorted(ans.items())     # doctest: +NORMALIZE_WHITESPACE
    [(0, {1: {'edge_type': 1}}),
     (1, {2: {'edge_type': 2}, 3: {'edge_type': 4}}),
     (2, {2: {'edge_type': 0}}),
     (3, {0: {'edge_type': 1}})]

    Alternatively, you could have this function adjust the edges that
    lead to terminal vertices by changing their edge type to 0:

    >>> # The graph is unaltered
    >>> g = qt.adjacency2graph(adj, edge_type=eTy, adjust=2)
    >>> ans = qt.graph2dict(g)
    >>> sorted(ans.items())     # doctest: +NORMALIZE_WHITESPACE
    [(0, {1: {'edge_type': 1}}),
     (1, {2: {'edge_type': 0}, 3: {'edge_type': 4}}),
     (2, {}),
     (3, {0: {'edge_type': 1}})]
    """

    if isinstance(adjacency, np.ndarray):
        adjacency = _matrix2dict(adjacency)
    elif isinstance(adjacency, dict):
        adjacency = _dict2dict(adjacency)
    else:
        msg = ("If the adjacency parameter is supplied it must be a "
               "dict, or a numpy.ndarray.")
        raise TypeError(msg)

    if edge_type is None:
        edge_type = {}
    else:
        if isinstance(edge_type, np.ndarray):
            edge_type = _matrix2dict(edge_type, etype=True)
        elif isinstance(edge_type, dict):
            edge_type = _dict2dict(edge_type)

    for u, ty in edge_type.items():
        for v, et in ty.items():
            adjacency[u][v]['edge_type'] = et

    g = nx.from_dict_of_dicts(adjacency, create_using=nx.DiGraph())
    adjacency = nx.to_dict_of_dicts(g)
    adjacency = _adjacency_adjust(adjacency, adjust, True)

    return nx.from_dict_of_dicts(adjacency, create_using=nx.DiGraph())
示例#57
0
def _prepare_graph(g, g_colors, q_cls, q_arg, adjust_graph):
    """Prepares a graph for use in :class:`.QueueNetwork`.

    This function is called by ``__init__`` in the
    :class:`.QueueNetwork` class. It creates the :class:`.QueueServer`
    instances that sit on the edges, and sets various edge and node
    properties that are used when drawing the graph.

    Parameters
    ----------
    g : :any:`networkx.DiGraph`, :class:`numpy.ndarray`, dict, \
        ``None``,  etc.
        Any object that networkx can turn into a
        :any:`DiGraph<networkx.DiGraph>`
    g_colors : dict
        A dictionary of colors. The specific keys used are
        ``vertex_color`` and ``vertex_fill_color`` for vertices that
        do not have any loops. Set :class:`.QueueNetwork` for the
        default values passed.
    q_cls : dict
        A dictionary where the keys are integers that represent an edge
        type, and the values are :class:`.QueueServer` classes.
    q_args : dict
        A dictionary where the keys are integers that represent an edge
        type, and the values are the arguments that are used when
        creating an instance of that :class:`.QueueServer` class.
    adjust_graph : bool
        Specifies whether the graph will be adjusted using
        :func:`.adjacency2graph`.

    Returns
    -------
    g : :class:`.QueueNetworkDiGraph`
    queues : list
        A list of :class:`QueueServers<.QueueServer>` where
        ``queues[k]`` is the ``QueueServer`` that sets on the edge with
        edge index ``k``.

    Notes
    -----
    The graph ``g`` should have the ``edge_type`` edge property map.
    If it does not then an ``edge_type`` edge property is
    created and set to 1.

    The following properties are set by each queue: ``vertex_color``,
    ``vertex_fill_color``, ``vertex_fill_color``, ``edge_color``.
    See :class:`.QueueServer` for more on setting these values.

    The following properties are assigned as a properties to the graph;
    their default values for each edge or vertex is shown:

        * ``vertex_pen_width``: ``1``,
        * ``vertex_size``: ``8``,
        * ``edge_control_points``: ``[]``
        * ``edge_marker_size``: ``8``
        * ``edge_pen_width``: ``1.25``

    Raises
    ------
    TypeError
        Raised when the parameter ``g`` is not of a type that can be
        made into a :any:`networkx.DiGraph`.
    """
    g = _test_graph(g)

    if adjust_graph:
        pos = nx.get_node_attributes(g, 'pos')
        ans = nx.to_dict_of_dicts(g)
        g = adjacency2graph(ans, adjust=2, is_directed=g.is_directed())
        g = QueueNetworkDiGraph(g)
        if len(pos) > 0:
            g.set_pos(pos)

    g.new_vertex_property('vertex_color')
    g.new_vertex_property('vertex_fill_color')
    g.new_vertex_property('vertex_pen_width')
    g.new_vertex_property('vertex_size')

    g.new_edge_property('edge_control_points')
    g.new_edge_property('edge_color')
    g.new_edge_property('edge_marker_size')
    g.new_edge_property('edge_pen_width')

    queues = _set_queues(g, q_cls, q_arg, 'cap' in g.vertex_properties())

    if 'pos' not in g.vertex_properties():
        g.set_pos()

    for k, e in enumerate(g.edges()):
        g.set_ep(e, 'edge_pen_width', 1.25)
        g.set_ep(e, 'edge_marker_size', 8)
        if e[0] == e[1]:
            g.set_ep(e, 'edge_color', queues[k].colors['edge_loop_color'])
        else:
            g.set_ep(e, 'edge_color', queues[k].colors['edge_color'])

    for v in g.nodes():
        g.set_vp(v, 'vertex_pen_width', 1)
        g.set_vp(v, 'vertex_size', 8)
        e = (v, v)
        if g.is_edge(e):
            g.set_vp(v, 'vertex_color', queues[g.edge_index[e]]._current_color(2))
            g.set_vp(v, 'vertex_fill_color', queues[g.edge_index[e]]._current_color())
        else:
            g.set_vp(v, 'vertex_color', g_colors['vertex_color'])
            g.set_vp(v, 'vertex_fill_color', g_colors['vertex_fill_color'])

    return g, queues