extract_geojson = BashOperator( task_id="extract_geojson", bash_command=f"ogr2ogr -f 'PGDump' -nlt MULTILINESTRING " "-t_srs EPSG:28992 -s_srs EPSG:4326 " f"-nln {dag_id}_new " f"{tmp_file_prefix}.sql {tmp_file_prefix}.json", ) load_table = BashOperator( task_id="load_table", bash_command=f"psql {pg_params} < {tmp_file_prefix}.sql", ) check_count = PostgresCheckOperator( task_id="check_count", sql=SQL_CHECK_COUNT, params=dict(tablename=f"{dag_id}_new", mincount=3), ) check_geo = PostgresCheckOperator( task_id="check_geo", sql=SQL_CHECK_GEO, params=dict(tablename=f"{dag_id}_new", geotype="ST_MultiLineString"), ) check_colnames = PostgresValueCheckOperator( task_id="check_colnames", sql=SQL_CHECK_COLNAMES, pass_value=colnames, params=dict(tablename=f"{dag_id}_new"), )
fetch_csv = SwiftOperator( task_id="fetch_csv", container="grex", object_id=csv_file, output_path=f"{tmp_dir}/{csv_file}", ) load_data = PythonOperator( task_id="load_data", python_callable=load_grex, op_args=[f"{tmp_dir}/{csv_file}", table_name_new], ) check_count = PostgresCheckOperator( task_id="check_count", sql=SQL_CHECK_COUNT, params=dict(tablename=table_name_new, mincount=400), ) check_geo = PostgresCheckOperator( task_id="check_geo", sql=SQL_CHECK_GEO, params=dict(tablename=table_name_new, geotype="ST_MultiPolygon", geo_column="geometry"), ) rename_table = PostgresOperator(task_id="rename_table", sql=SQL_TABLE_RENAME) mk_tmp_dir >> fetch_csv >> load_data >> check_count >> check_geo >> rename_table
task_id="convert_shp", bash_command=f"iconv -f iso-8859-1 -t utf-8 {tmp_dir}/{dag_id}.sql > " f"{tmp_dir}/{dag_id}.utf8.sql", ) create_tables = PostgresOperator( task_id="create_tables", sql=[ f"{tmp_dir}/{dag_id}.utf8.sql", PROCESS_TABLE, ], ) check_count = PostgresCheckOperator( task_id="check_count", sql=SQL_CHECK_COUNT, params=dict(tablename=f"{dag_id}_new", mincount=110), ) check_colnames = PostgresValueCheckOperator( task_id="check_colnames", sql=SQL_CHECK_COLNAMES, pass_value=colnames, result_checker=checker, params=dict(tablename=f"{dag_id}_new"), ) check_geo = PostgresCheckOperator( task_id="check_geo", sql=SQL_CHECK_GEO, params=dict(
], ) create_tables = BashOperator( task_id="create_tables", bash_command=f"psql {pg_params} < {sql_path}/biz_data_create.sql", ) import_data = BashOperator( task_id="import_data", bash_command=f"psql {pg_params} < {tmp_dir}/biz_data_insert.sql", ) check_count = PostgresCheckOperator( task_id="check_count", sql=SQL_CHECK_COUNT, params=dict(tablename="biz_view_new", mincount=48), ) check_geo = PostgresCheckOperator( task_id="check_geo", sql=SQL_CHECK_GEO, params=dict(tablename="biz_view_new", geotype="ST_Polygon", geo_column="geometrie"), ) check_colnames = PostgresValueCheckOperator( task_id="check_colnames", sql=SQL_CHECK_COLNAMES, pass_value=colnames,
dataset_name="huishoudelijkafval", subset_tables=tables['dump_file']) # 6. DWH STADSDELEN SOURCE # Load voertuigenplanning data into DB load_dwh = PythonOperator( task_id="load_from_dwh_stadsdelen", python_callable=load_from_dwh, op_args=[f"{dag_id}_{to_snake_case(tables['dwh_stadsdelen'])}_new"], ) # 7. Check minimum number of records check_count = PostgresCheckOperator( task_id="check_count", sql=SQL_CHECK_COUNT, params=dict( tablename=f"{dag_id}_{to_snake_case(tables['dwh_stadsdelen'])}_new", mincount=1000), ) # 8. DWH STADSDELEN SOURCE # Rename COLUMNS based on provenance (if specified) provenance_dwh_data = ProvenanceRenameOperator( task_id="provenance_dwh", dataset_name=dag_id, prefix_table_name=f"{dag_id}_", postfix_table_name="_new", subset_tables=["".join(f"{tables['dwh_stadsdelen']}")], rename_indexes=False, pg_schema="public", )
bash_command=f"iconv -f iso-8859-1 -t utf-8 " f"{tmp_dir}/{dag_id}_{name}.sql > " f"{tmp_dir}/{dag_id}_{name}.utf8.sql", )) remove_drops.append( BashOperator( task_id=f"remove_drops_{name}", bash_command=f'perl -i -ne "print unless /DROP TABLE/" ' f"{tmp_dir}/{name}.utf8.sql", )) check_counts.append( PostgresCheckOperator( task_id=f"check_count_{name}", sql=SQL_CHECK_COUNT, params=dict(tablename=f"{dag_id}_{name}_new", mincount=mincount), )) check_colnames.append( PostgresValueCheckOperator( task_id=f"check_colnames_{name}", sql=SQL_CHECK_COLNAMES, pass_value=colnames, result_checker=checker, params=dict(tablename=f"{dag_id}_{name}_new"), )) load_dumps = PostgresOperator( task_id="load_dumps", sql=[
) drop_tables = PostgresOperator( task_id="drop_tables", sql=SQL_DROPS, params=dict(tablename=dag_id), ) import_table = PostgresFilesOperator( task_id="import_table", sql_files=[sql_file_new_path], ) check_count = PostgresCheckOperator( task_id="check_count", sql=SQL_CHECK_COUNT, params=dict(tablename="bb_quotum_new", mincount=90), ) check_geo = PostgresCheckOperator( task_id="check_geo", sql=SQL_CHECK_GEO, params=dict(tablename="bb_quotum_new", geotype="ST_MultiPolygon", geo_column="geo"), ) check_cols = PostgresValueCheckOperator(task_id="check_cols", sql=SQL_CHECK_COLS, pass_value=[[3]])
with DAG( "horeca_exploitatievergunning", default_args=default_args, description="Horeca Exploitatievergunning", schedule_interval="0 9 * * *", ) as dag: load_data = PythonOperator( task_id="load_data", python_callable=load_from_dwh, op_args=[table_name_new], ) check_count = PostgresCheckOperator( task_id="check_count", sql=SQL_CHECK_COUNT, params=dict(tablename=table_name_new, mincount=4000), ) check_geo1 = PostgresCheckOperator( task_id="check_geo1", sql=SQL_CHECK_GEO, params=dict( tablename=table_name_new, geotype="ST_MultiPolygon", geo_column="terrasgeometrie", notnull=False, ), ) check_geo2 = PostgresCheckOperator(
f"{tmp_dir}/{dag_id}.utf8.sql", ) create_table = BashOperator( task_id="create_table", bash_command=f"psql {pg_params} < {tmp_dir}/{dag_id}.utf8.sql", ) add_category = BashOperator( task_id="add_category", bash_command=f"psql {pg_params} < {sql_path}/add_categorie.sql", ) check_count = PostgresCheckOperator( task_id="check_count", sql=SQL_CHECK_COUNT, params=dict(tablename=f"{dag_id}_new", mincount=75), ) check_geo = PostgresCheckOperator( task_id="check_geo", sql=SQL_CHECK_GEO, params=dict( tablename=f"{dag_id}_new", geotype=["ST_Polygon", "ST_MultiPolygon"], check_valid=False, ), ) rename_table = PostgresOperator( task_id="rename_table",
extract_geojson = BashOperator( task_id="extract_geojson", bash_command=f"ogr2ogr --config PG_USE_COPY YES -f 'PGDump' " f"-nln {dag_id}_new " f"{tmp_dir}/objects.sql {tmp_dir}/objects.geo.json", ) create_table = BashOperator( task_id="create_table", bash_command=f"psql {pg_params} < {tmp_dir}/objects.sql", ) check_count = PostgresCheckOperator( task_id="check_count", sql=SQL_CHECK_COUNT, params=dict(tablename=f"{dag_id}_new", mincount=129410), ) rename_table = PostgresOperator( task_id="rename_table", sql=SQL_TABLE_RENAME, params=dict(tablename=f"{dag_id}"), ) ( slack_at_start >> objects_fetch >> types_fetch >> import_data >> extract_geojson