Exemplo n.º 1
0
def convert(oozie_graph, pipeline_converter):
    """Convert OozieGraph to Pipeline using given Converter

    Args:
        oozie_graph (OozieGraph):
        pipeline_converter (PipelineConverter):

    Returns:
        Pipeline
    """
    pipeline_nodes = {}
    (input_node, output_node) = pipeline_converter.convert_parameters(
        oozie_graph.parameters)
    for (name, node) in [(Pipeline.get_input_node_name(), input_node),
                         (Pipeline.get_output_node_name(), output_node)]:
        if node is not None:
            pipeline_nodes[name] = node
    for (name, oozie_node) in oozie_graph.nodes.items():
        reserved_names = [Pipeline.get_input_node_name(),
                          Pipeline.get_output_node_name()]
        if name in reserved_names:
            raise Exception('Name of one of Oozie nodes ({}) is one of the '
                            'reserved names: {}.'
                            .format(name, ', '.join(reserved_names)))
        pipeline_node = pipeline_converter.convert_node(name, oozie_node)
        if pipeline_node is not None:
            pipeline_nodes[name] = pipeline_node
    return Pipeline(pipeline_nodes)
Exemplo n.º 2
0
def convert(oozie_graph, pipeline_converter):
    """Convert OozieGraph to Pipeline using given Converter

    Args:
        oozie_graph (OozieGraph):
        pipeline_converter (PipelineConverter):

    Returns:
        Pipeline
    """
    pipeline_nodes = {}
    (input_node, output_node) = pipeline_converter.convert_parameters(
        oozie_graph.parameters)
    for (name, node) in [(Pipeline.get_input_node_name(), input_node),
                         (Pipeline.get_output_node_name(), output_node)]:
        if node is not None:
            pipeline_nodes[name] = node
    for (name, oozie_node) in oozie_graph.nodes.items():
        reserved_names = [
            Pipeline.get_input_node_name(),
            Pipeline.get_output_node_name()
        ]
        if name in reserved_names:
            raise Exception('Name of one of Oozie nodes ({}) is one of the '
                            'reserved names: {}.'.format(
                                name, ', '.join(reserved_names)))
        pipeline_node = pipeline_converter.convert_node(name, oozie_node)
        if pipeline_node is not None:
            pipeline_nodes[name] = pipeline_node
    return Pipeline(pipeline_nodes)
Exemplo n.º 3
0
    def add_node(self, name, node):
        """Args:
            name (string): name of the node
            node (vipe.pipeline.pipeline.Node): data about the node
        """
        if self.__n_reg.contains(name):
            raise Exception('More than two nodes with the same name '
                            '(here: "{}") are not allowed'.format(name))
        color = self.__get_color(node.importance)
        if name in [Pipeline.get_input_node_name(),
                    Pipeline.get_output_node_name()]:
            self.__n_reg.add(name, _NodeInfo(True, True))
            self.__add_advanced_node(name, node, True, True, color, 'folder')
            return

        importance_score = \
            self.__importance_score_map.get_score(node.importance)
        if importance_score > -1:
            self.__n_reg.add(name, _NodeInfo(self.__show_input_ports,
                                             self.__show_output_ports))
            self.__add_advanced_node(name, node,
                                     self.__show_input_ports,
                                     self.__show_output_ports, color)
        else:
            self.__n_reg.add(name, _NodeInfo(False, False))
            if importance_score == -1:
                self.__b.add_node(self.__map(name), labels=[''], shape='box',
                                  width=0.2, height=0.2, color=color)
            elif importance_score < -1:
                self.__b.add_node(self.__map(name), labels=[''], shape='box',
                                  width=0.1, height=0.1, color=color)
def check_changed(dir_name):
    path = ['data', 'low_score_nodes_remover', dir_name]
    src_dir = os.path.join(*(path + ['pipeline.yaml']))
    pipeline_yaml = read_as_string(__name__, src_dir)
    pipeline = Pipeline.from_yaml_dump(pipeline_yaml)
    remover = LowScoreNodesRemover(ImportanceScoreMap(DetailLevel.medium))
    actual = remover.run(pipeline)
    expected_yaml = read_as_string(__name__,
                                   os.path.join(*(path + ['expected.yaml'])))
    expected = Pipeline.from_yaml_dump(expected_yaml)
    assert expected == actual
Exemplo n.º 5
0
def check_changed(dir_name):
    path = ['data', 'low_score_nodes_remover', dir_name]
    src_dir = os.path.join(*(path + ['pipeline.yaml']))
    pipeline_yaml = read_as_string(__name__, src_dir)
    pipeline = Pipeline.from_yaml_dump(pipeline_yaml)
    remover = LowScoreNodesRemover(ImportanceScoreMap(DetailLevel.medium))
    actual = remover.run(pipeline)
    expected_yaml = read_as_string(__name__,
                                   os.path.join(*(path + ['expected.yaml'])))
    expected = Pipeline.from_yaml_dump(expected_yaml)
    assert expected == actual
Exemplo n.º 6
0
def check(oozie_workflow_file_path, expected_pipeline_file_path):
    actual_pipeline = convert_oozie_yaml_to_pipeline(oozie_workflow_file_path)
    expected_pipeline_yaml = read_as_string(__name__,
                                            expected_pipeline_file_path)
    expected = Pipeline.from_yaml_dump(expected_pipeline_yaml)
    assert expected == actual_pipeline, 'expected={},\nactual={}'\
        .format(expected, actual_pipeline)
Exemplo n.º 7
0
def check(oozie_workflow_file_path, expected_pipeline_file_path):
    actual_pipeline = convert_oozie_yaml_to_pipeline(oozie_workflow_file_path)
    expected_pipeline_yaml = read_as_string(
        __name__, expected_pipeline_file_path)
    expected = Pipeline.from_yaml_dump(expected_pipeline_yaml)
    assert expected == actual_pipeline, 'expected={},\nactual={}'\
        .format(expected, actual_pipeline)
Exemplo n.º 8
0
def convert_to_dot(pipeline_file_relative_path,
                   detail_level=DetailLevel.medium,
                   show_input_ports=False, show_output_ports=False):
    pipeline_yaml = read_as_string(__name__, pipeline_file_relative_path)
    pipeline = Pipeline.from_yaml_dump(pipeline_yaml)
    dot_converter = Converter(
        detail_level, show_input_ports, show_output_ports)
    return dot_converter.run(pipeline)
def check_no_changes(dir_name):
    src_dir = os.path.join('data', 'low_score_nodes_remover', dir_name,
                           'pipeline.yaml')
    pipeline_yaml = read_as_string(__name__, src_dir)
    pipeline = Pipeline.from_yaml_dump(pipeline_yaml)
    remover = LowScoreNodesRemover(ImportanceScoreMap(DetailLevel.medium))
    actual = remover.run(pipeline)
    assert pipeline == actual
Exemplo n.º 10
0
def check_no_changes(dir_name):
    src_dir = os.path.join('data', 'low_score_nodes_remover',
                           dir_name, 'pipeline.yaml')
    pipeline_yaml = read_as_string(__name__, src_dir)
    pipeline = Pipeline.from_yaml_dump(pipeline_yaml)
    remover = LowScoreNodesRemover(ImportanceScoreMap(DetailLevel.medium))
    actual = remover.run(pipeline)
    assert pipeline == actual
Exemplo n.º 11
0
def convert_to_dot(pipeline_file_relative_path,
                   detail_level=DetailLevel.medium,
                   show_input_ports=False,
                   show_output_ports=False):
    pipeline_yaml = read_as_string(__name__, pipeline_file_relative_path)
    pipeline = Pipeline.from_yaml_dump(pipeline_yaml)
    dot_converter = Converter(detail_level, show_input_ports,
                              show_output_ports)
    return dot_converter.run(pipeline)
Exemplo n.º 12
0
    def add_node(self, name, node):
        """Args:
            name (string): name of the node
            node (vipe.pipeline.pipeline.Node): data about the node
        """
        if self.__n_reg.contains(name):
            raise Exception('More than two nodes with the same name '
                            '(here: "{}") are not allowed'.format(name))
        color = self.__get_color(node.importance)
        if name in [
                Pipeline.get_input_node_name(),
                Pipeline.get_output_node_name()
        ]:
            self.__n_reg.add(name, _NodeInfo(True, True))
            self.__add_advanced_node(name, node, True, True, color, 'folder')
            return

        importance_score = \
            self.__importance_score_map.get_score(node.importance)
        if importance_score > -1:
            self.__n_reg.add(
                name,
                _NodeInfo(self.__show_input_ports, self.__show_output_ports))
            self.__add_advanced_node(name, node, self.__show_input_ports,
                                     self.__show_output_ports, color)
        else:
            self.__n_reg.add(name, _NodeInfo(False, False))
            if importance_score == -1:
                self.__b.add_node(self.__map(name),
                                  labels=[''],
                                  shape='box',
                                  width=0.2,
                                  height=0.2,
                                  color=color)
            elif importance_score < -1:
                self.__b.add_node(self.__map(name),
                                  labels=[''],
                                  shape='box',
                                  width=0.1,
                                  height=0.1,
                                  color=color)
Exemplo n.º 13
0
def check(pipeline_file_path, data_dict):
    pipeline_yaml = read_as_string(__name__, pipeline_file_path)
    pipeline = Pipeline.from_yaml_dump(pipeline_yaml)
    actual_data = PipelineData.from_pipeline(pipeline)
    expected_data = PipelineData.from_basic_data_types(data_dict)
    assert expected_data == actual_data
Exemplo n.º 14
0
def check(pipeline_file_path, data_dict):
    pipeline_yaml = read_as_string(__name__, pipeline_file_path)
    pipeline = Pipeline.from_yaml_dump(pipeline_yaml)
    actual_data = PipelineData.from_pipeline(pipeline)
    expected_data = PipelineData.from_basic_data_types(data_dict)
    assert expected_data == actual_data