示例#1
0
def test_split_dag_to_parallel():
    shgraph=SnowShuGraph()
    dag=nx.DiGraph()
    dag.add_edges_from([(1,2,),(1,4,),(2,3,),(5,6,)])
    split=shgraph._split_dag_for_parallel(dag)
    
    assert set([frozenset(val) for val in split]) == set([frozenset([1,2,4,3]),frozenset([5,6])])
示例#2
0
def test_build_graph_fails_many_to_many(stub_graph_set):
    """ Tests build_graph exits on many-to-many relationships """
    shgraph = SnowShuGraph()
    _, vals = stub_graph_set
    full_catalog = [
        vals.iso_relation, vals.view_relation, vals.downstream_relation,
        vals.upstream_relation, vals.birelation_left, vals.birelation_right
    ]
    config_dict = copy.deepcopy(BASIC_CONFIGURATION)
    config_dict["source"]["specified_relations"] = [{
        "database": ".*",
        "schema": ".*",
        "relation": ".*relation_.*$",  # birelations
        "relationships": {
            "directional": [{
                "local_attribute": vals.directional_key,
                "database": ".*",
                "schema": ".*",
                "relation": ".*relation$",  # non birelations
                "remote_attribute": vals.directional_key
            }]
        }
    }]
    config = ConfigurationParser().from_file_or_path(
        StringIO(yaml.dump(config_dict)))

    with mock.MagicMock() as adapter_mock:
        adapter_mock.build_catalog.return_value = full_catalog
        config.source_profile.adapter = adapter_mock

        with pytest.raises(InvalidRelationshipException) as exc:
            shgraph.build_graph(config)
        assert "defines a many-to-many relationship" in str(exc.value)
        assert "Many-to-many relationship are not allowed by SnowShu" in str(
            exc.value)
示例#3
0
def test_graph_allows_upstream_wildcards(stub_graph_set):
    shgraph = SnowShuGraph()
    _, vals = stub_graph_set

    vals.upstream_relation.database = vals.downstream_relation.database
    vals.upstream_relation.schema = vals.downstream_relation.schema
    full_catalog = [vals.iso_relation,
                    vals.view_relation,
                    vals.downstream_relation,
                    vals.upstream_relation,
                    vals.birelation_left,
                    vals.birelation_right]
    config_dict = copy.deepcopy(CONFIGURATION)

    config_dict['source']['specified_relations'] = [dict(relation=vals.downstream_relation.name,
                                                         database=vals.downstream_relation.database,
                                                         schema=vals.downstream_relation.schema,
                                                         unsampled=False,
                                                         relationships=dict(directional=[],
                                                                            bidirectional=[dict(relation=vals.upstream_relation.name,
                                                                                                database='',
                                                                                                schema='',
                                                                                                local_attribute=vals.downstream_relation.attributes[
                                                                                                    0].name,
                                                                                                remote_attribute=vals.upstream_relation.attributes[0].name)]))]

    config = ConfigurationParser().from_file_or_path(StringIO(yaml.dump(config_dict)))

    modified_graph = shgraph._apply_specifications(
        config, nx.DiGraph(), full_catalog)

    assert (vals.upstream_relation, vals.downstream_relation,
            ) in modified_graph.edges
示例#4
0
def test_sets_only_existing_adapters():
    shgraph = SnowShuGraph()

    test_relation = Relation(database='SNOWSHU_DEVELOPMENT',
                             schema='SOURCE_SYSTEM',
                             name='ORDER_ITEMS',
                             materialization=mz.TABLE,
                             attributes=[])
    test_relation.include_outliers, test_relation.unsampled = [
        False for _ in range(2)
    ]
    test_relation.sampling = DefaultSampling()
    config_dict = copy.deepcopy(CONFIGURATION)
    config_dict['preserve_case'] = True
    config_dict['source']['specified_relations'][1]['sampling'] = 'lucky_guess'
    with pytest.raises(AttributeError):
        config = ConfigurationParser().from_file_or_path(
            StringIO(yaml.dump(config_dict)))

    assert isinstance(test_relation.sampling, DefaultSampling)
    config_dict['source']['specified_relations'][1]['sampling'] = 'brute_force'
    config = ConfigurationParser().from_file_or_path(
        StringIO(yaml.dump(config_dict)))

    assert isinstance(
        shgraph._set_overriding_params_for_node(test_relation,
                                                config).sampling,
        BruteForceSampling)
示例#5
0
def test_unsampled(stub_graph_set):
    shgraph = SnowShuGraph()

    _, vals = stub_graph_set

    full_catalog = [vals.iso_relation,
                    vals.view_relation,
                    vals.downstream_relation,
                    vals.upstream_relation,
                    vals.birelation_left,
                    vals.birelation_right]

    config_dict = copy.deepcopy(CONFIGURATION)
    config_dict['source']['specified_relations'] = [dict(relation=vals.iso_relation.name,
                                                         database=vals.iso_relation.database,
                                                         schema=vals.iso_relation.schema,
                                                         unsampled=True)]

    config = ConfigurationParser().from_file_or_path(StringIO(yaml.dump(config_dict)))
    assert vals.iso_relation.unsampled == False

    modified_graph = shgraph._apply_specifications(
        config, nx.DiGraph(), full_catalog)
    modified_graph=shgraph._apply_specifications(config,nx.DiGraph(),full_catalog)       
 
    assert vals.iso_relation.unsampled==True
示例#6
0
def test_graph_difference_more_both_isolated_non_isolated_relations_source(
        stub_graph_set, stub_relation_set):
    """ Tests graph_difference returns graph with expected nodes if source graph has non-isolated
    and isolated nodes which are not present in target catalog """

    _, vals = stub_graph_set
    common_relation = Relation(name=rand_string(10),
                               **stub_relation_set.rand_relation_helper())
    source_catalog = [
        common_relation,
        Relation(name=rand_string(10),
                 **stub_relation_set.rand_relation_helper()),
        Relation(name=rand_string(10),
                 **stub_relation_set.rand_relation_helper()),
        vals.downstream_relation, vals.upstream_relation, vals.birelation_right
    ]

    target_catalog = {
        common_relation,
        Relation(name=rand_string(10),
                 **stub_relation_set.rand_relation_helper()),
        Relation(name=rand_string(10),
                 **stub_relation_set.rand_relation_helper()),
        vals.birelation_left, vals.birelation_right
    }

    config_dict = copy.deepcopy(BASIC_CONFIGURATION)
    config_dict["source"]["specified_relations"] = [{
        "database":
        vals.downstream_relation.database,
        "schema":
        vals.downstream_relation.schema,
        "relation":
        vals.downstream_relation.name,
        "relationships": {
            "directional": [{
                "local_attribute": vals.directional_key,
                "database": ".*",
                "schema": ".*",
                "relation": ".*relation.*$",
                "remote_attribute": vals.directional_key
            }]
        }
    }]
    config = ConfigurationParser().from_file_or_path(
        StringIO(yaml.dump(config_dict)))
    shgraph = SnowShuGraph()

    with mock.MagicMock() as adapter_mock:
        adapter_mock.build_catalog.return_value = source_catalog
        config.source_profile.adapter = adapter_mock
        shgraph.build_graph(config)
        expected_nodes = source_catalog[1:]
        actual = SnowShuGraph.catalog_difference(shgraph, target_catalog)
        assert list(actual.nodes) == expected_nodes
示例#7
0
def test_included_and_excluded(adapter, target):
    shgraph = SnowShuGraph()
    conf_obj = ConfigurationParser().from_file_or_path(
        StringIO(yaml.dump(MOCKED_CONFIG)))
    shgraph.build_graph(conf_obj, MOCKED_CATALOG)
    matched_nodes = shgraph.graph
    assert MOCKED_CATALOG[0] in matched_nodes.nodes
    assert MOCKED_CATALOG[1] in matched_nodes.nodes
    assert MOCKED_CATALOG[2] not in matched_nodes.nodes
    assert MOCKED_CATALOG[3] not in matched_nodes.nodes
    assert MOCKED_CATALOG[4] not in matched_nodes.nodes
    assert MOCKED_CATALOG[5] not in matched_nodes.nodes
    assert MOCKED_CATALOG[6] in matched_nodes.nodes
示例#8
0
def test_graph_builds_dags_correctly(stub_graph_set):
    shgraph = SnowShuGraph()
    _, vals = stub_graph_set

    full_catalog = [
        vals.iso_relation, vals.view_relation, vals.downstream_relation,
        vals.upstream_relation, vals.birelation_left, vals.birelation_right
    ]

    graph = nx.Graph()
    graph.add_nodes_from(full_catalog)
    shgraph.graph = graph

    for sub in shgraph.get_graphs():
        assert isinstance(sub, nx.DiGraph)
示例#9
0
def test_included_and_excluded(target, adapter):
    shgraph = SnowShuGraph()
    conf_obj = ConfigurationParser().from_file_or_path(
        StringIO(yaml.dump(MOCKED_CONFIG)))
    with mock.MagicMock() as adapter_mock:
        adapter_mock.build_catalog.return_value = MOCKED_CATALOG
        conf_obj.source_profile.adapter = adapter_mock
        shgraph.build_graph(conf_obj)
        matched_nodes = shgraph.graph
        assert MOCKED_CATALOG[0] in matched_nodes.nodes
        assert MOCKED_CATALOG[1] in matched_nodes.nodes
        assert MOCKED_CATALOG[2] not in matched_nodes.nodes
        assert MOCKED_CATALOG[3] not in matched_nodes.nodes
        assert MOCKED_CATALOG[4] not in matched_nodes.nodes
        assert MOCKED_CATALOG[5] not in matched_nodes.nodes
        assert MOCKED_CATALOG[6] in matched_nodes.nodes
示例#10
0
def test_build_graph_fails_no_downstream():
    """ Tests build_graph exits on no downstream relations """
    shgraph = SnowShuGraph()
    full_catalog = []  # no relations in filtered catalog
    config_dict = copy.deepcopy(
        CONFIGURATION)  # use the "live" config on random test data
    config = ConfigurationParser().from_file_or_path(
        StringIO(yaml.dump(config_dict)))

    with mock.MagicMock() as adapter_mock:
        adapter_mock.build_catalog.return_value = full_catalog
        config.source_profile.adapter = adapter_mock

        with pytest.raises(InvalidRelationshipException) as exc:
            # building the graph should raise when no downstream relations are found
            shgraph.build_graph(config)
        assert "does not match any relations" in str(exc.value)
示例#11
0
def test_build_graph_partitions_wildcards(stub_graph_set):
    """ Tests build_graph partitions wildcard relationships """
    shgraph = SnowShuGraph()
    _, vals = stub_graph_set
    full_catalog = [
        vals.downstream_wildcard_relation_1,
        vals.downstream_wildcard_relation_2,
        vals.upstream_wildcard_relation_1,
        vals.upstream_wildcard_relation_2,
    ]
    config_dict = copy.deepcopy(BASIC_CONFIGURATION)
    config_dict["source"]["specified_relations"] = [{
        "database":
        f"({vals.downstream_wildcard_relation_1.database}|{vals.downstream_wildcard_relation_2.database})",
        "schema":
        f"({vals.downstream_wildcard_relation_1.schema}|{vals.downstream_wildcard_relation_2.schema})",
        "relation": ".*downstream.*$",
        "relationships": {
            "directional": [{
                "local_attribute": vals.directional_key,
                "database": "",
                "schema": "",
                "relation": ".*upstream.*$",
                "remote_attribute": vals.directional_key
            }]
        }
    }]
    config = ConfigurationParser().from_file_or_path(
        StringIO(yaml.dump(config_dict)))

    with mock.MagicMock() as adapter_mock:
        adapter_mock.build_catalog.return_value = full_catalog
        config.source_profile.adapter = adapter_mock
        shgraph.build_graph(config)
        assert len(shgraph.graph.edges()) == 2
        assert (vals.upstream_wildcard_relation_1,
                vals.downstream_wildcard_relation_1) in shgraph.graph.edges()
        assert (
            vals.upstream_wildcard_relation_1,
            vals.downstream_wildcard_relation_2) not in shgraph.graph.edges()
        assert (
            vals.upstream_wildcard_relation_2,
            vals.downstream_wildcard_relation_1) not in shgraph.graph.edges()
        assert (vals.upstream_wildcard_relation_2,
                vals.downstream_wildcard_relation_2) in shgraph.graph.edges()
示例#12
0
    def _execute(self,
                 barf: bool = False,
                 name: Union[str, None] = None) -> None:
        graph = SnowShuGraph()
        if name is not None:
            self.config.name = name

        graph.build_graph(self.config)
        graphs = graph.get_graphs()
        if len(graphs) < 1:
            return "No relations found per provided replica configuration, exiting."

        # TODO replica container should not be started for analyze commands
        self.config.target_profile.adapter.initialize_replica(
            self.config.source_profile.name)
        runner = GraphSetRunner()
        runner.execute_graph_set(graphs,
                                 self.config.source_profile.adapter,
                                 self.config.target_profile.adapter,
                                 threads=self.config.threads,
                                 analyze=self.run_analyze,
                                 barf=barf)
        if not self.run_analyze:
            relations = [
                relation for graph in graphs for relation in graph.nodes]
            if self.config.source_profile.adapter.SUPPORTS_CROSS_DATABASE:
                logger.info('Creating x-database links in target...')
                self.config.target_profile.adapter.enable_cross_database(
                    relations)
                logger.info('X-database enabled.')

            logger.info(
                'Applying %s emulation functions to target...',
                self.config.source_profile.adapter.name)
            for function in self.config.source_profile.adapter.SUPPORTED_FUNCTIONS:
                self.config.target_profile.adapter.create_function_if_available(
                    function, relations)
            logger.info('Emulation functions applied.')
            self.config.target_profile.adapter.finalize_replica()

        return printable_result(
            graph_to_result_list(graphs),
            self.run_analyze)
示例#13
0
def test_no_duplicates(stub_graph_set):
    shgraph=SnowShuGraph()

    _,vals = stub_graph_set

    full_catalog=[  vals.iso_relation,
                    vals.view_relation,
                    vals.downstream_relation,
                    vals.upstream_relation,
                    vals.birelation_left,
                    vals.birelation_right]

    config_dict=copy.deepcopy(CONFIGURATION)

    config=ConfigurationParser().from_file_or_path(StringIO(yaml.dump(config_dict)))
    
    shgraph.build_graph(config,full_catalog)       
    graphs = shgraph.get_graphs()

    all_nodes=[node for graph in graphs for node in graph.nodes]
    assert len(set(all_nodes)) == len(all_nodes)
示例#14
0
def test_graph_difference_less_relations_source(stub_graph_set):
    """ Tests graph_difference returns graph with no nodes if target catalog has more nodes
    than source, including all nodes present in source graph """
    shgraph = SnowShuGraph()
    _, vals = stub_graph_set
    source_catalog = [
        vals.downstream_relation, vals.upstream_relation, vals.birelation_left,
        vals.birelation_right
    ]
    target_catalog = [
        vals.downstream_relation, vals.upstream_relation, vals.birelation_left,
        vals.birelation_right, vals.iso_relation
    ]
    config_dict = copy.deepcopy(BASIC_CONFIGURATION)
    config_dict["source"]["specified_relations"] = [{
        "database":
        vals.downstream_relation.database,
        "schema":
        vals.downstream_relation.schema,
        "relation":
        vals.downstream_relation.name,
        "relationships": {
            "directional": [{
                "local_attribute": vals.directional_key,
                "database": ".*",
                "schema": ".*",
                "relation": ".*relation.*$",  # incl birelations
                "remote_attribute": vals.directional_key
            }]
        }
    }]
    config = ConfigurationParser().from_file_or_path(
        StringIO(yaml.dump(config_dict)))

    with mock.MagicMock() as adapter_mock:
        adapter_mock.build_catalog.return_value = source_catalog
        config.source_profile.adapter = adapter_mock
        shgraph.build_graph(config)
        actual = SnowShuGraph.catalog_difference(shgraph, target_catalog)
        assert len(actual) == 0
示例#15
0
def test_build_graph_allows_upstream_regex(stub_graph_set):
    """ Tests build_graph builds multiple upstream relationships """
    shgraph = SnowShuGraph()
    _, vals = stub_graph_set
    full_catalog = [
        vals.downstream_relation, vals.upstream_relation, vals.birelation_left,
        vals.birelation_right
    ]
    config_dict = copy.deepcopy(BASIC_CONFIGURATION)
    config_dict["source"]["specified_relations"] = [{
        "database":
        vals.downstream_relation.database,
        "schema":
        vals.downstream_relation.schema,
        "relation":
        vals.downstream_relation.name,
        "relationships": {
            "directional": [{
                "local_attribute": vals.directional_key,
                "database": ".*",
                "schema": ".*",
                "relation": ".*relation.*$",  # incl birelations
                "remote_attribute": vals.directional_key
            }]
        }
    }]
    config = ConfigurationParser().from_file_or_path(
        StringIO(yaml.dump(config_dict)))

    with mock.MagicMock() as adapter_mock:
        adapter_mock.build_catalog.return_value = full_catalog
        config.source_profile.adapter = adapter_mock
        shgraph.build_graph(config)
        assert len(shgraph.graph.edges()) == 3
        assert (vals.upstream_relation,
                vals.downstream_relation) in shgraph.graph.edges()
        assert (vals.birelation_left,
                vals.downstream_relation) in shgraph.graph.edges()
        assert (vals.birelation_right,
                vals.downstream_relation) in shgraph.graph.edges()
示例#16
0
def test_build_graph_fails_no_distinct_upstream(stub_graph_set):
    """ Tests build_graph exits on no distinct upstream relations """
    shgraph = SnowShuGraph()
    _, vals = stub_graph_set
    full_catalog = [
        vals.iso_relation,
        vals.view_relation,
        vals.downstream_relation,
        vals.upstream_relation,
    ]
    config_dict = copy.deepcopy(BASIC_CONFIGURATION)
    # add relationship where downstream == upstream
    config_dict["source"]["specified_relations"] = [{
        "database":
        vals.downstream_relation.database,
        "schema":
        vals.downstream_relation.schema,
        "relation":
        vals.downstream_relation.name,
        "relationships": {
            "directional": [{
                "local_attribute": vals.directional_key,
                "database": vals.downstream_relation.database,
                "schema": vals.downstream_relation.schema,
                "relation": vals.downstream_relation.name,
                "remote_attribute": vals.directional_key
            }]
        }
    }]
    config = ConfigurationParser().from_file_or_path(
        StringIO(yaml.dump(config_dict)))

    with mock.MagicMock() as adapter_mock:
        adapter_mock.build_catalog.return_value = full_catalog
        config.source_profile.adapter = adapter_mock

        with pytest.raises(InvalidRelationshipException) as exc:
            shgraph.build_graph(config)
        assert "was specified as a dependency, but it does not exist." in str(
            exc.value)
示例#17
0
def test_sets_outliers(stub_graph_set):
    shgraph=SnowShuGraph()

    _,vals = stub_graph_set

    full_catalog=[  vals.iso_relation,
                    vals.view_relation,
                    vals.downstream_relation,
                    vals.upstream_relation,
                    vals.birelation_left,
                    vals.birelation_right]

    config_dict=copy.deepcopy(CONFIGURATION)
    config_dict['source']['include_outliers']=True
    config_dict['source']['max_number_of_outliers']=1000

    config=ConfigurationParser().from_file_or_path(StringIO(yaml.dump(config_dict)))
    
    modified_graph=shgraph.build_graph(config,full_catalog)       
 
    assert vals.iso_relation.include_outliers==True
    assert vals.iso_relation.max_number_of_outliers==1000
示例#18
0
def test_no_duplicates(stub_graph_set):
    shgraph = SnowShuGraph()

    _, vals = stub_graph_set

    full_catalog = [
        vals.iso_relation, vals.view_relation, vals.downstream_relation,
        vals.upstream_relation, vals.birelation_left, vals.birelation_right
    ]

    config_dict = copy.deepcopy(BASIC_CONFIGURATION)

    config = ConfigurationParser().from_file_or_path(
        StringIO(yaml.dump(config_dict)))

    with mock.MagicMock() as adapter_mock:
        adapter_mock.build_catalog.return_value = full_catalog
        config.source_profile.adapter = adapter_mock
        shgraph.build_graph(config)
        graphs = shgraph.get_connected_subgraphs()

    all_nodes = [node for graph in graphs for node in graph.nodes]
    assert len(set(all_nodes)) == len(all_nodes)
示例#19
0
def test_graph_difference_empty_target(stub_graph_set):
    """ Tests graph_difference returns source graph with all nodes if target catalog is empty """
    shgraph = SnowShuGraph()
    _, vals = stub_graph_set
    full_catalog = [
        vals.downstream_relation, vals.upstream_relation, vals.birelation_left,
        vals.birelation_right
    ]
    config_dict = copy.deepcopy(BASIC_CONFIGURATION)
    config_dict["source"]["specified_relations"] = [{
        "database":
        vals.downstream_relation.database,
        "schema":
        vals.downstream_relation.schema,
        "relation":
        vals.downstream_relation.name,
        "relationships": {
            "directional": [{
                "local_attribute": vals.directional_key,
                "database": ".*",
                "schema": ".*",
                "relation": ".*relation.*$",
                "remote_attribute": vals.directional_key
            }]
        }
    }]
    config = ConfigurationParser().from_file_or_path(
        StringIO(yaml.dump(config_dict)))

    with mock.MagicMock() as adapter_mock:
        adapter_mock.build_catalog.return_value = full_catalog
        config.source_profile.adapter = adapter_mock
        shgraph.build_graph(config)
        expected = shgraph.graph
        target_catalog = set()
        actual = SnowShuGraph.catalog_difference(shgraph, target_catalog)
        assert actual == expected
示例#20
0
def test_sets_outliers(stub_graph_set):
    shgraph = SnowShuGraph()

    _, vals = stub_graph_set

    full_catalog = [
        vals.iso_relation, vals.view_relation, vals.downstream_relation,
        vals.upstream_relation, vals.birelation_left, vals.birelation_right
    ]

    config_dict = copy.deepcopy(BASIC_CONFIGURATION)
    config_dict['source']['include_outliers'] = True
    config_dict['source']['max_number_of_outliers'] = 1000

    config = ConfigurationParser().from_file_or_path(
        StringIO(yaml.dump(config_dict)))

    with mock.MagicMock() as adapter_mock:
        adapter_mock.build_catalog.return_value = full_catalog
        config.source_profile.adapter = adapter_mock
        _ = shgraph.build_graph(config)

    assert vals.iso_relation.include_outliers is True
    assert vals.iso_relation.max_number_of_outliers == 1000
示例#21
0
    def _execute(self,
                 barf: bool = False,
                 name: Optional[str] = None) -> Optional[str]:
        graph = SnowShuGraph()
        if name is not None:
            self.config.name = name

        graph.build_graph(self.config)

        if self.incremental:
            # TODO replica container should not be started for analyze commands
            self.config.target_profile.adapter.initialize_replica(
                self.config.source_profile.name, self.incremental)

            incremental_target_catalog = self.config.target_profile.adapter.build_catalog(
                patterns=SnowShuGraph.build_sum_patterns_from_configs(
                    self.config),
                thread_workers=self.config.threads)

            graph.graph = SnowShuGraph.catalog_difference(
                graph.graph, incremental_target_catalog)
        graphs = graph.get_connected_subgraphs()
        if len(graphs) < 1:
            args = (' new ', ' incremental ',
                    '; image up-to-date') if self.incremental else (' ', ' ',
                                                                    '')
            message = "No{}relations found per provided{}replica configuration{}, exiting.".format(
                *args)
            return message

        if not self.config.target_profile.adapter.container:
            # TODO replica container should not be started for analyze commands
            self.config.target_profile.adapter.initialize_replica(
                self.config.source_profile.name)

        runner = GraphSetRunner()
        runner.execute_graph_set(graphs,
                                 self.config.source_profile.adapter,
                                 self.config.target_profile.adapter,
                                 threads=self.config.threads,
                                 analyze=self.run_analyze,
                                 barf=barf)
        if not self.run_analyze:
            relations = [
                relation for graph in graphs for relation in graph.nodes
            ]
            if self.config.source_profile.adapter.SUPPORTS_CROSS_DATABASE:
                logger.info('Creating x-database links in target...')
                self.config.target_profile.adapter.enable_cross_database()
                logger.info('X-database enabled.')
            self.config.target_profile.adapter.create_all_database_extensions()

            logger.info('Applying %s emulation functions to target...',
                        self.config.source_profile.adapter.name)
            for function in self.config.source_profile.adapter.SUPPORTED_FUNCTIONS:
                self.config.target_profile.adapter.create_function_if_available(
                    function, relations)
            logger.info('Emulation functions applied.')
            self.config.target_profile.adapter.finalize_replica()

        return printable_result(graph_to_result_list(graphs), self.run_analyze)