Exemplo n.º 1
0
    geo_checks.append(
        GEO_CHECK.make_check(
            check_id="geo_check",
            params=dict(
                table_name=f"{dag_id}_{dag_id}_new",
                geotype=["POINT"],
            ),
            pass_value=1,
        )
    )

    total_checks = count_checks + geo_checks

    # 8. RUN bundled CHECKS
    multi_checks = PostgresMultiCheckOperator(task_id="multi_check", checks=total_checks)

    # 9. Rename TABLE
    rename_table = PostgresTableRenameOperator(
        task_id=f"rename_table_{dag_id}",
        old_table_name=f"{dag_id}_{dag_id}_new",
        new_table_name=f"{dag_id}_{dag_id}",
    )

    # 10. Grant database permissions
    grant_db_permissions = PostgresPermissionsOperator(task_id="grants", dag_name=dag_id)

(
    slack_at_start
    >> mkdir
    >> download_data
Exemplo n.º 2
0
                    GEO_CHECK.make_check(
                        check_id=f"geo_check_{splitted_tablename}",
                        params=dict(
                            table_name=f"{dag_id}_{splitted_tablename}_new",
                            geotype=["MULTIPOLYGON"],
                        ),
                        pass_value=1,
                    ))

            total_checks = count_checks + geo_checks
            check_name[splitted_tablename] = total_checks

    # 11. Execute bundled checks on database (see checks definition here above)
    multi_checks = [
        PostgresMultiCheckOperator(
            task_id=f"multi_check_{splitted_tablename}",
            checks=check_name[splitted_tablename],
        ) for key in files_to_download.keys()
        for splitted_tablename in key.split("-") if "themas" not in key
    ]

    # 12. Dummy operator acts as an interface between parallel tasks to another parallel tasks with different number of lanes
    #  (without this intermediar, Airflow will give an error)
    Interface2 = DummyOperator(task_id="interface2")

    # 13. Rename TABLES
    rename_tables = [
        PostgresTableRenameOperator(
            task_id=f"rename_table_{splitted_tablename}",
            old_table_name=f"{dag_id}_{splitted_tablename}_new",
            new_table_name=f"{dag_id}_{splitted_tablename}",
        ) for key in files_to_download.keys()
Exemplo n.º 3
0
                            "POLYGON",
                            "MULTIPOLYGON",
                            "MULTILINESTRING",
                            "LINESTRING",
                        ],
                    ),
                    pass_value=1,
                )
            )

            total_checks = count_checks + geo_checks
            check_name[resource] = total_checks

    # 15. Execute bundled checks on database
    multi_checks = [
        PostgresMultiCheckOperator(task_id=f"multi_check_{resource}", checks=check_name[resource])
        for resources in files_to_import.values()
        for resource in resources
    ]

    # 16. Create the DB target table (as specified in the JSON data schema)
    # if table not exists yet
    create_tables = [
        SqlAlchemyCreateObjectOperator(
            task_id=f"create_{resource}_based_upon_schema",
            data_schema_name=dag_id,
            data_table_name=f"{dag_id}_{resource}",
            ind_table=True,
            # when set to false, it doesn't create indexes; only tables
            ind_extra_index=False,
        )
Exemplo n.º 4
0
        geo_checks.append(
            GEO_CHECK.make_check(
                check_id=f"geo_check_{key}",
                params=dict(
                    table_name=f"{dag_id}_{key}_new",
                    geotype=["MULTIPOLYGON"],
                ),
                pass_value=1,
            ))

        total_checks = count_checks + geo_checks
        check_name[key] = total_checks

    # 10. Execute bundled checks on database
    multi_checks = [
        PostgresMultiCheckOperator(task_id=f"multi_check_{key}",
                                   checks=check_name[key])
        for key in tables_to_create.keys()
    ]

    # 11. Rename TABLE
    rename_tables = [
        PostgresTableRenameOperator(
            task_id=f"rename_table_{key}",
            old_table_name=f"{dag_id}_{key}_new",
            new_table_name=f"{dag_id}_{key}",
        ) for key in tables_to_create.keys()
    ]

    # 11. Grant database permissions
    grant_db_permissions = PostgresPermissionsOperator(task_id="grants",
                                                       dag_name=dag_id)
                params=dict(
                    table_name=f"{dag_id}_{table_name}_new",
                    geotype=["POLYGON", "MULTIPOLYGON", "MULTILINESTRING", "POINT"],
                    geo_column="geometrie",
                ),
                pass_value=1,
            )
        )

        total_checks = count_checks + geo_checks
        check_name[f"{table_name}"] = total_checks

    # 18. Execute bundled checks on database
    multi_checks = [
        PostgresMultiCheckOperator(
            task_id=f"multi_check_{table_name}", checks=check_name[f"{table_name}"]
        )
        for table_name in files_to_download.keys()
    ]

    # 19. Check for changes to merge in target table by using CDC
    change_data_capture = [
        PgComparatorCDCOperator(
            task_id=f"change_data_capture_{table_name}",
            source_table=f"{dag_id}_{table_name}_new",
            target_table=f"{dag_id}_{table_name}",
        )
        for table_name in files_to_download.keys()
    ]

    # 20. Clean up; delete temp table
Exemplo n.º 6
0
    colname_check = COLNAMES_CHECK.make_check(
        check_id="colname_check",
        parameters=["fietspaaltjes"],
        pass_value=set(["id"]),
        result_checker=operator.ge,
    )

    geo_check = GEO_CHECK.make_check(
        check_id="geo_check",
        params=dict(table_name="fietspaaltjes", geotype="POINT"),
        pass_value=1,
    )

    checks = [count_check, colname_check, geo_check]
    multi = PostgresMultiCheckOperator(task_id="multi",
                                       checks=checks,
                                       params=make_params(checks))

    # swift_task
    # sqls = [
    #     "delete from biz_data where biz_id = {{ params.tba }}",
    #     "insert into biz_data (biz_id, naam) values (123456789, 'testje')",
    # ]
    # pgtest = PostgresOperator(task_id="pgtest", sql=sqls)

    # bashtest = BashOperator(
    #     task_id="bashtest", bash_command=f"psql {pg_params} < /tmp/doit.sql",
    # )

    # failing_task = PythonOperator(
    #     task_id="failing_task", python_callable=create_error, provide_context=True,