Exemplo n.º 1
0
def test_pipeline_tree_creation(parsed_ordered_dict, sample_metadata, sample_image_metadata):
    pipeline_json = _read_pipeline_resource(PIPELINE_FILE)

    ordered_dict = parsed_ordered_dict

    assert len(ordered_dict.keys()) == len(pipeline_json['pipelines'][0]['nodes'])

    # Verify tree structure is correct
    assert not ordered_dict['cded6818-e601-4fd8-b6b9-c9fdf1fd1fca'].get('parent_operations')
    assert ordered_dict['bb9606ca-29ec-4133-a36a-67bd2a1f6dc3'].get(
        'parent_operations').pop() == 'cded6818-e601-4fd8-b6b9-c9fdf1fd1fca'
    assert ordered_dict['6f5c2ece-1977-48a1-847f-099b327c6ed1'].get(
        'parent_operations').pop() == 'cded6818-e601-4fd8-b6b9-c9fdf1fd1fca'
    assert ordered_dict['4ef63a48-a27c-4d1e-a0ee-2fbbdbe3be74'].get(
        'parent_operations').pop() == 'cded6818-e601-4fd8-b6b9-c9fdf1fd1fca'
    assert ordered_dict['4f7ae91b-682e-476c-8664-58412336b31f'].get(
        'parent_operations').pop() == 'bb9606ca-29ec-4133-a36a-67bd2a1f6dc3'
    assert ordered_dict['f82c4699-b392-4a3e-92b0-45d9e11126fe'].get(
        'parent_operations').pop() == 'bb9606ca-29ec-4133-a36a-67bd2a1f6dc3'
    assert ordered_dict['137d3d2f-4224-42d9-b8c6-cbee9ff2872d'].get(
        'parent_operations') == ['4ef63a48-a27c-4d1e-a0ee-2fbbdbe3be74', '0a7eff92-fe2a-411c-92a6-73d6f3810516']
    assert not ordered_dict['779c2630-64bf-47ca-8a98-9ac8a60e85f7'].get('parent_operations')
    assert ordered_dict['0a7eff92-fe2a-411c-92a6-73d6f3810516'].get(
        'parent_operations').pop() == '779c2630-64bf-47ca-8a98-9ac8a60e85f7'
    assert ordered_dict['92a7a247-1131-489c-8c3e-1e2389d4c673'].get(
        'parent_operations') == ['f82c4699-b392-4a3e-92b0-45d9e11126fe', "137d3d2f-4224-42d9-b8c6-cbee9ff2872d",
                                 '6f5c2ece-1977-48a1-847f-099b327c6ed1']

    for key in ordered_dict.keys():
        for node in pipeline_json['pipelines'][0]['nodes']:
            if node['id'] == key:
                assert ordered_dict[key]['runtime_image'] == node['app_data']['runtime_image']
                for image in sample_image_metadata:
                    if ordered_dict[key]['runtime_image'] == image.metadata['image_name']:
                        assert ordered_dict[key]['image_pull_policy'] == image.metadata['pull_policy']
                assert ordered_dict[key]['filename'] == node['app_data']['filename']
                for env in node['app_data']['env_vars']:
                    var, value = env.split("=")
                    assert ordered_dict[key]['pipeline_envs'][var] == value
                assert ordered_dict[key]['cos_endpoint'] == sample_metadata['cos_endpoint']
                assert ordered_dict[key]['cos_bucket'] == sample_metadata['cos_bucket']
                assert ordered_dict[key]['pipeline_envs']['AWS_ACCESS_KEY_ID'] == sample_metadata['cos_username']
                assert ordered_dict[key]['pipeline_envs']['AWS_SECRET_ACCESS_KEY'] == sample_metadata['cos_password']
                for arg in ["inputs", "outputs"]:
                    if node['app_data'].get(arg):
                        for file in node['app_data'][arg]:
                            assert file in ordered_dict[key]["pipeline_" + arg]
Exemplo n.º 2
0
def parsed_pipeline():
    pipeline_resource = _read_pipeline_resource(PIPELINE_FILE)
    return PipelineParser().parse(pipeline_definitions=pipeline_resource)
Exemplo n.º 3
0
def test_create_file(monkeypatch, processor, parsed_pipeline,
                     parsed_ordered_dict, sample_metadata):
    pipeline_json = _read_pipeline_resource(PIPELINE_FILE)

    export_pipeline_name = "some-name"
    export_file_type = "py"

    mocked_runtime = Metadata(name="test-metadata",
                              display_name="test",
                              schema_name="airflow",
                              metadata=sample_metadata)

    monkeypatch.setattr(processor,
                        "_get_metadata_configuration",
                        lambda name=None, namespace=None: mocked_runtime)
    monkeypatch.setattr(processor, "_upload_dependencies_to_object_store",
                        lambda x, y, z: True)
    monkeypatch.setattr(processor, "_cc_pipeline",
                        lambda x, y: parsed_ordered_dict)

    with tempfile.TemporaryDirectory() as temp_dir:
        export_pipeline_output_path = os.path.join(
            temp_dir, f'{export_pipeline_name}.py')

        response = processor.create_pipeline_file(
            parsed_pipeline,
            pipeline_export_format=export_file_type,
            pipeline_export_path=export_pipeline_output_path,
            pipeline_name=export_pipeline_name)

        assert export_pipeline_output_path == response
        assert os.path.isfile(export_pipeline_output_path)

        file_as_lines = open(response).read().splitlines()

        # Check DAG project name
        for i in range(len(file_as_lines)):
            if "args = {" == file_as_lines[i]:
                assert "project_id" == read_key_pair(file_as_lines[i + 1],
                                                     sep=':')['key']
                assert export_pipeline_name == read_key_pair(file_as_lines[i +
                                                                           1],
                                                             sep=':')['value']

        # For every node in the original pipeline json
        for node in pipeline_json['pipelines'][0]['nodes']:
            for i in range(len(file_as_lines)):
                # Matches an op with a node ID
                if "notebook_op_" + node['id'].replace(
                        "-", "_") + " = NotebookOp(" in file_as_lines[i]:
                    sub_list_line_counter = 0
                    # Gets sub-list slice starting where the Notebook Op starts
                    for line in file_as_lines[i + 1:]:
                        if 'namespace=' in line:
                            assert sample_metadata[
                                'user_namespace'] == read_key_pair(
                                    line)['value']
                        elif 'cos_endpoint=' in line:
                            assert sample_metadata[
                                'cos_endpoint'] == read_key_pair(line)['value']
                        elif 'cos_bucket=' in line:
                            assert sample_metadata[
                                'cos_bucket'] == read_key_pair(line)['value']
                        elif 'name=' in line:
                            assert node['app_data']['ui_data'][
                                'label'] == read_key_pair(line)['value']
                        elif 'notebook=' in line:
                            assert node['app_data'][
                                'filename'] == read_key_pair(line)['value']
                        elif 'image=' in line:
                            assert node['app_data'][
                                'runtime_image'] == read_key_pair(
                                    line)['value']
                        elif 'env_vars=' in line:
                            for env in node['app_data']['env_vars']:
                                var, value = env.split("=")
                                # Gets sub-list slice starting where the env vars starts
                                for env_line in file_as_lines[
                                        i + sub_list_line_counter + 2:]:
                                    if "AWS_ACCESS_KEY_ID" in env_line:
                                        assert sample_metadata[
                                            'cos_username'] == read_key_pair(
                                                env_line, sep=':')['value']
                                    elif "AWS_SECRET_ACCESS_KEY" in env_line:
                                        assert sample_metadata[
                                            'cos_password'] == read_key_pair(
                                                env_line, sep=':')['value']
                                    elif var in env_line:
                                        assert var == read_key_pair(
                                            env_line, sep=':')['key']
                                        assert value == read_key_pair(
                                            env_line, sep=':')['value']
                                    elif env_line.strip(
                                    ) == '},':  # end of env vars
                                        break
                        elif 'pipeline_inputs=' in line and node[
                                'app_data'].get('inputs'):
                            for input in node['app_data']['inputs']:
                                assert input in string_to_list(
                                    read_key_pair(line)['value'])
                        elif 'pipeline_outputs=' in line and node[
                                'app_data'].get('outputs'):
                            for output in node['app_data']['outputs']:
                                assert output in string_to_list(
                                    read_key_pair(line)['value'])
                        elif line == ')':  # End of this Notebook Op
                            break
                        sub_list_line_counter += 1
Exemplo n.º 4
0
def pipeline():
    pipeline_resource = _read_pipeline_resource(
        'resources/sample_pipelines/pipeline_3_node_sample.json')
    return PipelineParser.parse(pipeline_resource)