Python SwiftOperator示例，swift_operator.SwiftOperator Python示例

示例#1

0

显示文件

文件： overlastgebieden.py 项目： jimleitch01/dataservices-airflow

        task_id="slack_at_start",
        http_conn_id="slack",
        webhook_token=slack_webhook_token,
        message=f"Starting {dag_id} ({DATAPUNT_ENVIRONMENT})",
        username="******",
    )

    # 2. Create temp directory to store files
    mkdir = BashOperator(task_id="mkdir", bash_command=f"mkdir -p {tmp_dir}")

    # 3. Download data
    download_data = [
        SwiftOperator(
            task_id=f"download_{file}",
            # Default swift = Various Small Datasets objectstore
            # swift_conn_id="SWIFT_DEFAULT",
            container="overlastgebieden",
            object_id=f"{file}",
            output_path=f"{tmp_dir}/{file}",
        ) for file in files_to_download
    ]

    # 4. Dummy operator acts as an interface between parallel tasks to another parallel tasks with different number of lanes
    #  (without this intermediar, Airflow will give an error)
    Interface = DummyOperator(task_id="interface")

    # 5. Create SQL
    SHP_to_SQL = [
        BashOperator(
            task_id=f"create_SQL_{key}",
            bash_command=f"ogr2ogr -f 'PGDump' "
            f"-s_srs EPSG:28992 -t_srs EPSG:28992 "

示例#2

0

显示文件

文件： precariobelasting.py 项目： Amsterdam/dataservices-airflow

        task_id="slack_at_start",
        http_conn_id="slack",
        webhook_token=slack_webhook_token,
        message=f"Starting {dag_id} ({DATAPUNT_ENVIRONMENT})",
        username="******",
    )

    # 2. create download temp directory to store the data
    mk_tmp_dir = BashOperator(task_id="mk_tmp_dir", bash_command=f"mkdir -p {tmp_dir}")

    # 3. Download data
    download_data = [
        SwiftOperator(
            task_id=f"download_{file_name}",
            swift_conn_id="objectstore_dataservices",
            container="Dataservices",
            object_id=url,
            output_path=f"{tmp_dir}/{url}",
        )
        for file_name, url in data_endpoints.items()
    ]

    # 4. Cleanse the downloaded data (remove the space hyphen characters)
    clean_up_data = [
        PythonOperator(
            task_id=f"clean_data_{file_name}",
            python_callable=clean_data,
            op_args=[f"{tmp_dir}/{url}"],
        )
        for file_name, url in data_endpoints.items()
    ]

示例#3

0

显示文件

dag_id = "reclamebelasting"
dag_config = Variable.get(dag_id, deserialize_json=True)

with DAG(
    "reclamebelasting", default_args=default_args, description="reclamebelasting",
) as dag:

    zip_file = dag_config["zip_file"]
    shp_file = dag_config["shp_file"]
    tmp_dir = f"/tmp/{dag_id}"

    mk_tmp_dir = BashOperator(task_id="mk_tmp_dir", bash_command=f"mkdir -p {tmp_dir}")

    fetch_zip = SwiftOperator(
        task_id="fetch_zip",
        container="reclame",
        object_id=zip_file,
        output_path=f"{tmp_dir}/{zip_file}",
    )

    extract_zip = BashOperator(
        task_id="extract_zip",
        bash_command=f"unzip -o {tmp_dir}/{zip_file} -d {tmp_dir}",
    )

    extract_shp = BashOperator(
        task_id="extract_shp",
        bash_command=f"ogr2ogr -f 'PGDump' -t_srs EPSG:28992 -nln {dag_id}_new "
        f"{tmp_dir}/{dag_id}.sql {tmp_dir}/Reclame_tariefgebieden.shp",
    )

    convert_shp = BashOperator(

示例#4

0

显示文件

    slack_at_start = MessageOperator(
        task_id="slack_at_start",
        http_conn_id="slack",
        webhook_token=slack_webhook_token,
        message=f"Starting {dag_id} ({DATAPUNT_ENVIRONMENT})",
        username="******",
    )

    # 2. Create temp directory to store files
    mkdir = BashOperator(task_id="mkdir", bash_command=f"mkdir -p {tmp_dir}")

    # 3. Download data
    download_data = SwiftOperator(
            task_id=f"download_{files_to_download[0]}",
            # when swift_conn_id is ommitted then the default connection will be the VSD objectstore
            # swift_conn_id="SWIFT_DEFAULT",
            container="vastgoed",
            object_id=f"{files_to_download[0]}",
            output_path=f"{tmp_dir}/{files_to_download[0]}",
        )
    
    # 4. Convert data to UTF8 character set
    convert_to_UTF8 = BashOperator(
                      task_id="convert_to_UTF8",
                      bash_command=f"iconv -f iso-8859-1 -t utf-8  {tmp_dir}/{files_to_download[0]} > "
                                   f"{tmp_dir}/{dag_id}_utf8.csv",
    )

    # 5. Create TABLE from CSV
    # The source has no spatial data, but OGR2OGR is used to create the SQL insert statements.
    CSV_to_SQL = BashOperator(
            task_id=f"CSV_to_SQL",

示例#5

0

显示文件

文件： parkeerzones.py 项目： jimleitch01/dataservices-airflow

        task_id="slack_at_start",
        http_conn_id="slack",
        webhook_token=slack_webhook_token,
        message=f"Starting {dag_id} ({DATAPUNT_ENVIRONMENT})",
        username="******",
    )

    # 2. create download temp directory to store the data
    mk_tmp_dir = BashOperator(task_id="mk_tmp_dir", bash_command=f"mkdir -p {tmp_dir}")

    # 3. Download data
    download_data = [
        SwiftOperator(
            task_id="download_file",
            # Default swift = Various Small Datasets objectstore
            # swift_conn_id="SWIFT_DEFAULT",
            container=f"{dag_id}",
            object_id=f"{files_to_download}",
            output_path=f"{tmp_dir}/{file}",
        )
        for file in files_to_download
    ]

    # 3. Unzip
    extract_zip = [
        BashOperator(
            task_id="extract_zip_file",
            bash_command=f'unzip -o "{tmp_dir}/{file}" -d {tmp_dir}',
        )
        for file in files_to_download
    ]

示例#6

0

显示文件

文件： grex.py 项目： sercanyaldiz/dataservices-airflow

with DAG(
        "grex",
        default_args=default_args,
        description="GrondExploitatie",
) as dag:

    csv_file = dag_config["csv_file"]
    tmp_dir = f"/tmp/{dag_id}"

    mk_tmp_dir = BashOperator(task_id="mk_tmp_dir",
                              bash_command=f"mkdir -p {tmp_dir}")

    fetch_csv = SwiftOperator(
        task_id="fetch_csv",
        container="grex",
        object_id=csv_file,
        output_path=f"{tmp_dir}/{csv_file}",
    )

    load_data = PythonOperator(
        task_id="load_data",
        python_callable=load_grex,
        op_args=[f"{tmp_dir}/{csv_file}", table_name_new],
    )

    check_count = PostgresCheckOperator(
        task_id="check_count",
        sql=SQL_CHECK_COUNT,
        params=dict(tablename=table_name_new, mincount=400),
    )

示例#7

0

显示文件

文件： aardgasvrijezones.py 项目： Amsterdam/dataservices-airflow

        task_id="slack_at_start",
        http_conn_id="slack",
        webhook_token=slack_webhook_token,
        message=f"Starting {dag_id} ({DATAPUNT_ENVIRONMENT})",
        username="******",
    )

    # 2. Create temp directory to store files
    mkdir = BashOperator(task_id="mkdir", bash_command=f"mkdir -p {tmp_dir}")

    # 3. Download data
    download_data = [
        SwiftOperator(
            task_id=f"download_{file}",
            #  when swift_conn_id is ommitted then the default connection will be the VSD objectstore
            #  swift_conn_id="SWIFT_DEFAULT",
            container="aardgasvrij",
            object_id=file,
            output_path=f"{tmp_dir}/{file}",
        ) for files in files_to_download.values() for file in files
    ]

    # 4. Dummy operator acts as an interface between parallel tasks to another parallel tasks with different number of lanes
    #  (without this intermediar, Airflow will give an error)
    Interface = DummyOperator(task_id="interface")

    # 5. Create SQL
    SHP_to_SQL = [
        BashOperator(
            task_id=f"create_SQL_{key}",
            bash_command="ogr2ogr -f 'PGDump' "
            "-t_srs EPSG:28992 -s_srs EPSG:28992 "

示例#8

0

显示文件

        task_id="slack_at_start",
        http_conn_id="slack",
        webhook_token=slack_webhook_token,
        message=f"Starting {dag_id} ({DATAPUNT_ENVIRONMENT})",
        username="******",
    )

    # 2. Create temp directory to store files
    mkdir = BashOperator(task_id="mkdir", bash_command=f"mkdir -p {tmp_dir}")

    # 3. Download data
    download_data = [
        SwiftOperator(
            task_id=f"download_{file}",
            swift_conn_id="OBJECTSTORE_MILIEUTHEMAS",
            container="Bommenkaart",
            object_id=file,
            output_path=f"{tmp_dir}/{file}",
        ) for files in files_to_download.values() for file in files
    ]

    # 4. Dummy operator acts as an interface between parallel tasks to another parallel tasks with different number of lanes
    #  (without this intermediar, Airflow will give an error)
    Interface = DummyOperator(task_id="interface")

    # 5. Create SQL
    SHP_to_SQL = [
        BashOperator(
            task_id=f"create_SQL_{key}",
            bash_command="ogr2ogr -f 'PGDump' "
            "-s_srs EPSG:28992 -t_srs EPSG:28992 "

示例#9

0

显示文件

文件： bodem.py 项目： Amsterdam/dataservices-airflow

        task_id="slack_at_start",
        http_conn_id="slack",
        webhook_token=slack_webhook_token,
        message=f"Starting {dag_id} ({DATAPUNT_ENVIRONMENT})",
        username="******",
    )

    # 2. Create temp directory to store files
    mkdir = BashOperator(task_id="mkdir", bash_command=f"mkdir -p {tmp_dir}")

    # 3. Download data
    download_data = [
        SwiftOperator(
            task_id=f"download_file_{key}",
            swift_conn_id="OBJECTSTORE_MILIEUTHEMAS",
            container="Milieuthemas",
            object_id=file,
            output_path=f"{tmp_dir}/{file}",
        ) for key, file in files_to_download.items()
    ]

    # 4. Transform seperator from pipeline to semicolon and set code schema to UTF-8
    change_seperator = [
        BashOperator(
            task_id=f"change_seperator_{key}",
            bash_command=
            f"cat {tmp_dir}/{file} | sed 's/|/;/g' > {tmp_dir}/seperator_{file} ;"
            f"iconv -f iso-8859-1 -t utf-8  {tmp_dir}/seperator_{file} > "
            f"{tmp_dir}/utf-8_{file}",
        ) for key, file in files_to_download.items()
    ]

示例#10

0

显示文件

文件： vergunningen.py 项目： Amsterdam/dataservices-airflow

    # 1. Post message on slack
    slack_at_start = MessageOperator(
        task_id="slack_at_start",
        http_conn_id="slack",
        webhook_token=slack_webhook_token,
        message=f"Starting {dag_id} ({DATAPUNT_ENVIRONMENT})",
        username="******",
    )

    # 2. Download data from objectstore and store in tmp dir
    download_data = [
        SwiftOperator(
            task_id=f"download_{file}",
            # swift_conn_id default when ommited is the Various Small Datasets objectstore
            container="bed_and_breakfast",
            object_id=f"{DATAPUNT_ENVIRONMENT}/{file}",
            output_path=f"{tmp_dir}/{file}",
        ) for file in files_to_download
    ]

    # 3. Modify data: remove all but inserts
    remove_owner_alters = [
        BashOperator(
            task_id=f"get_SQL_inserts_{file}",
            bash_command=f"sed -i -r '/INSERT INTO/!d' {tmp_dir}/{file} && "
            f"echo 'COMMIT;' >> {tmp_dir}/{file}",
        ) for file in files_to_download
    ]

    # 4. Modify data: change table name to tmp name

示例#11

0

显示文件

文件： milieuzones.py 项目： jimleitch01/dataservices-airflow

        task_id="slack_at_start",
        http_conn_id="slack",
        webhook_token=slack_webhook_token,
        message=f"Starting {dag_id} ({DATAPUNT_ENVIRONMENT})",
        username="******",
    )

    # 2. Create temp directory to store files
    mkdir = BashOperator(task_id="mkdir", bash_command=f"mkdir -p {tmp_dir}")

    # 3. Download data
    download_data = [
        SwiftOperator(
            task_id=f"download_{file}",
            # Default swift = Various Small Datasets objectstore
            # swift_conn_id="SWIFT_DEFAULT",
            container="milieuzones",
            object_id=f"{file}",
            output_path=f"{tmp_dir}/{file}",
        )
        for file in files_to_download
    ]

    # 4. Convert data to geojson
    convert_to_geojson = [
        PythonOperator(
            task_id=f"convert_{file}_to_geojson",
            python_callable=import_milieuzones,
            op_args=[f"{tmp_dir}/{file}", f"{tmp_dir}/geojson_{file}",],
        )
        for file in files_to_download
    ]

示例#12

0

显示文件

文件： reclamebelasting.py 项目： Amsterdam/dataservices-airflow

    slack_at_start = MessageOperator(
        task_id="slack_at_start",
        http_conn_id="slack",
        webhook_token=slack_webhook_token,
        message=f"Starting {dag_id} ({DATAPUNT_ENVIRONMENT})",
        username="******",
    )

    # 2. Create temp directory to store files
    mkdir = BashOperator(task_id="mkdir", bash_command=f"mkdir -p {tmp_dir}")

    # 3. Download data
    download_data = SwiftOperator(
        task_id=f"download_{zip_file}",
        # Default swift == Various Small Datasets objectstore
        # swift_conn_id="SWIFT_DEFAULT",
        container="reclame",
        object_id=zip_file,
        output_path=f"{tmp_dir}/{zip_file}",
    )

    # 4. Extract zip file
    extract_zip = BashOperator(
        task_id="extract_zip",
        bash_command=f"unzip -o {tmp_dir}/{zip_file} -d {tmp_dir}",
    )

    # 5. Load data
    load_data = Ogr2OgrOperator(
        task_id=f"import_{shp_file}",
        target_table_name=f"{schema_name}_{table_name}_new",
        input_file=f"{tmp_dir}/{shp_file}",

示例#13

0

显示文件

        webhook_token=slack_webhook_token,
        message=f"Starting {dag_id} ({DATAPUNT_ENVIRONMENT})",
        username="******",
    )

    # 2. Create temp directory to store files
    mkdir = BashOperator(task_id="mkdir", bash_command=f"mkdir -p {tmp_dir}")

    # 3. Download data
    download_data = PythonOperator(task_id="download_data", python_callable=get_data)

    # 4. Upload data to objectstore
    upload_to_obs = SwiftOperator(
        task_id="upload_to_obs",
        swift_conn_id="OBJECTSTORE_VICTOR",
        action_type="upload",
        container="WIOR",
        output_path=f"{tmp_dir}/{dag_id}.geojson",
        object_id=f"{datetime.now(timezone.utc).astimezone(to_zone).strftime('%Y-%m-%d')}_{dag_id}.geojson",  # noqa E501
    )

    # 5. Delete files from objectstore (that do not fit given time window)
    delete_from_obs = SwiftOperator(
        task_id="delete_from_obs",
        swift_conn_id="OBJECTSTORE_VICTOR",
        action_type="delete",
        container="WIOR",
        time_window_in_days=100,
    )

    # 6. Import data
    import_data = Ogr2OgrOperator(

示例#14

0

显示文件

    sql_file_new = f"{sql_file_new_base}.sql"
    tmp_dir = f"/tmp/{dag_id}"
    sql_file_path = f"{tmp_dir}/{DATAPUNT_ENVIRONMENT}/{sql_file}"
    sql_file_new_path = f"{tmp_dir}/{sql_file_new}"

    slack_at_start = MessageOperator(
        task_id="slack_at_start",
        http_conn_id="slack",
        webhook_token=slack_webhook_token,
        message=f"Starting {dag_id} ({DATAPUNT_ENVIRONMENT})",
        username="******",
    )

    fetch_sql = SwiftOperator(
        task_id="fetch_sql",
        container=dag_id,
        object_id=f"{DATAPUNT_ENVIRONMENT}/{sql_file}",
        output_path=sql_file_path,
    )

    remove_owner_alters = BashOperator(
        task_id="remove_owner_alters",
        bash_command=f'egrep -v "^ALTER TABLE.*OWNER TO" {sql_file_path} '
        f'| egrep -v "^GRANT SELECT ON" > "{sql_file_new_path}"',
    )

    replace_tablename = BashOperator(
        task_id="replace_tablename",
        bash_command=f'perl -pi -e "s/quota_bbkaartlaagexport/bb_quotum_new/g" '
        f"{sql_file_new_path}",
    )

示例#15

0

显示文件

        task_id="slack_at_start",
        http_conn_id="slack",
        webhook_token=slack_webhook_token,
        message=f"Starting {dag_id} ({DATAPUNT_ENVIRONMENT})",
        username="******",
    )

    # 2. Create temp directory to store files
    mkdir = BashOperator(task_id="mkdir", bash_command=f"mkdir -p {tmp_dir}")

    # 3. Download data
    download_data = [
        SwiftOperator(
            task_id=f"download_{key}",
            swift_conn_id="OBJECTSTORE_MILIEUTHEMAS",
            container="Milieuthemas",
            object_id=f"{file}",
            output_path=f"{tmp_dir}/{key}_{file}",
        )
        for key, file in files_to_download.items()
    ]

    # 4. Transform seperator from pipeline to semicolon and set code schema to UTF-8
    change_seperators = [
        BashOperator(
            task_id=f"change_seperator_{key}",
            bash_command=f"cat {tmp_dir}/{key}_{file} | "
            f"sed 's/|/;/g' > {tmp_dir}/seperator_{key} ;"
            f"iconv -f iso-8859-1 -t utf-8  {tmp_dir}/seperator_{key} > "
            f"{tmp_dir}/utf_8_{key}.csv",
        )

示例#16

0

显示文件

    shp_files = dag_config["shp_files"]
    tables = dag_config["tables"]
    rename_tablenames = dag_config["rename_tablenames"]
    tmp_dir = f"/tmp/{dag_id}"

    slack_at_start = MessageOperator(
        task_id="slack_at_start",
        http_conn_id="slack",
        webhook_token=slack_webhook_token,
        message=f"Starting {dag_id} ({DATAPUNT_ENVIRONMENT})",
        username="******",
    )

    fetch_zip = SwiftOperator(
        task_id="fetch_zip",
        container=dag_id,
        object_id=zip_file,
        output_path=f"/tmp/{dag_id}/{zip_file}",
    )

    extract_zip = BashOperator(
        task_id="extract_zip",
        bash_command=f'unzip -o "{tmp_dir}/{zip_file}" -d {tmp_dir}',
    )

    for shp_filename, tablename in zip(shp_files, tables):
        extract_shps.append(
            BashOperator(
                task_id=f"extract_{shp_filename}",
                bash_command=f"ogr2ogr -f 'PGDump' -t_srs EPSG:28992 "
                f"-nln {tablename} "
                f"{tmp_dir}/{tablename}.sql {tmp_dir}/Shape/{shp_filename}",

示例#17

0

显示文件

文件： ondergrond.py 项目： Amsterdam/dataservices-airflow

        task_id="slack_at_start",
        http_conn_id="slack",
        webhook_token=slack_webhook_token,
        message=f"Starting {dag_id} ({DATAPUNT_ENVIRONMENT})",
        username="******",
    )

    # 2. Create temp directory to store files
    mkdir = BashOperator(task_id="mkdir", bash_command=f"mkdir -p {tmp_dir}")

    # 3. Download data
    download_data = [
        SwiftOperator(
            task_id=f"download_{data_file}",
            swift_conn_id="objectstore_dataruimte",
            container="ondergrond",
            object_id=f"historische_onderzoeken/{data_file}",
            output_path=f"{tmp_dir}/{data_file}",
        )
        for _, data_file in files_to_download.items()
    ]

    # 4. Create the DB target table (as specified in the JSON data schema)
    # if table not exists yet
    create_tables = [
        SqlAlchemyCreateObjectOperator(
            task_id=f"create_{table_name}_based_upon_schema",
            data_schema_name=dag_id,
            data_table_name=f"{dag_id}_{table_name}",
            ind_table=True,
            # when set to false, it doesn't create indexes; only tables

示例#18

0

显示文件

            print("Duplicates found: {}".format(", ".join(duplicates)))


with DAG(dag_id, default_args=default_args,
         description="Parkeervakken") as dag:
    last_date = find_export_date()
    zip_file = "nivo_{}.zip".format(last_date)
    source = pathlib.Path(TMP_DIR)

    mk_tmp_dir = BashOperator(task_id="mk_tmp_dir",
                              bash_command=f"mkdir -p {TMP_DIR}")

    fetch_zip = SwiftOperator(
        task_id="fetch_zip",
        container="tijdregimes",
        object_id=zip_file,
        output_path=f"{TMP_DIR}/{zip_file}",
        conn_id="parkeervakken_objectstore",
    )

    extract_zip = BashOperator(
        task_id="extract_zip",
        bash_command=f'unzip -o "{TMP_DIR}/{zip_file}" -d {TMP_DIR}',
    )

    create_temp_tables = PostgresOperator(
        task_id="create_temp_tables",
        postgres_conn_id=postgres_conn_id,
        sql=SQL_CREATE_TEMP_TABLES,
        params=dict(base_table=f"{dag_id}_{dag_id}"),
    )

示例#19

0

显示文件

文件： spoorlijnen.py 项目： jimleitch01/dataservices-airflow

        task_id="slack_at_start",
        http_conn_id="slack",
        webhook_token=slack_webhook_token,
        message=f"Starting {dag_id} ({DATAPUNT_ENVIRONMENT})",
        username="******",
    )

    # 2. Create temp directory to store files
    mkdir = BashOperator(task_id="mkdir", bash_command=f"mkdir -p {tmp_dir}")

    # 3. Download data
    download_data = [
        SwiftOperator(
            task_id=f"download_{file}",
            # if conn is ommitted, it defaults to Objecstore Various Small Datasets
            # swift_conn_id="SWIFT_DEFAULT",
            container="spoorlijnen",
            object_id=str(file),
            output_path=f"{tmp_dir}/{file}",
        ) for files in files_to_download.values() for file in files
    ]

    # 4. Dummy operator acts as an interface between parallel tasks to another parallel tasks with different number of lanes
    #  (without this intermediar, Airflow will give an error)
    Interface = DummyOperator(task_id="interface")

    # 5. Create SQL
    SHP_to_SQL = [
        BashOperator(
            task_id=f"create_SQL_{key}",
            bash_command=f"ogr2ogr -f 'PGDump' "
            f"-s_srs EPSG:28992 -t_srs EPSG:28992 "

示例#20

0

显示文件

    mkdir = BashOperator(task_id="mkdir", bash_command=f"mkdir -p {tmp_dir}")

    # 3. Download sensor data (geojson) from maps.amsterdam.nl
    download_geojson = HttpFetchOperator(
        task_id="download_geojson",
        endpoint="open_geodata/geojson.php?KAARTLAAG=CROWDSENSOREN&THEMA=cmsa",
        http_conn_id="ams_maps_conn_id",
        tmp_file=tmp_dir / "sensors.geojson",
    )

    # 4. Download additional data (beacons.csv, cameras.xlsx)
    fetch_files = [
        SwiftOperator(
            task_id=f"download_{file}",
            # if conn is ommitted, it defaults to Objecstore Various Small Datasets
            # swift_conn_id="SWIFT_DEFAULT",
            container="cmsa",
            object_id=file,
            output_path=tmp_dir / file,
        )
        for file in files_to_download
    ]

    # 5. Create SQL insert statements out of downloaded data
    proces_cmsa = PythonOperator(
        task_id="proces_sensor_data",
        python_callable=import_cmsa,
        op_args=[
            tmp_dir / "cameras.xlsx",
            tmp_dir / "beacons.csv",
            tmp_dir / "sensors.geojson",
            tmp_dir,

示例#21

0

显示文件

        task_id="slack_at_start",
        http_conn_id="slack",
        webhook_token=slack_webhook_token,
        message=f"Starting {dag_id} ({DATAPUNT_ENVIRONMENT})",
        username="******",
    )

    # 2. Create temp directory to store files
    mkdir = BashOperator(task_id="mkdir", bash_command=f"mkdir -p {tmp_dir}")

    # 3. Download data
    download_data = [
        SwiftOperator(
            task_id=f"download_{data_file}",
            swift_conn_id="objectstore_dataservices",
            container="Dataservices",
            object_id=f"{dag_id}/{data_file}",
            output_path=f"{tmp_dir}/{data_file}",
        )
        for _, data_file in files_to_download.items()
    ]

    # 4. Create the DB target table (as specified in the JSON data schema)
    # if table not exists yet
    create_tables = [
        SqlAlchemyCreateObjectOperator(
            task_id=f"create_{table_name}_based_upon_schema",
            data_schema_name=f"{dag_id}",
            data_table_name=f"{dag_id}_{table_name}",
            ind_table=True,
            # when set to false, it doesn't create indexes; only tables

示例#22

0

显示文件

文件： overlastgebieden.py 项目： cvriel/dataservices-airflow

    fetch_shp_files = []

    slack_at_start = MessageOperator(
        task_id="slack_at_start",
        http_conn_id="slack",
        webhook_token=slack_webhook_token,
        message=f"Starting {dag_id} ({DATAPUNT_ENVIRONMENT})",
        username="******",
    )

    for ext in ("dbf", "prj", "shp", "shx"):
        file_name = f"OOV_gebieden_totaal.{ext}"
        fetch_shp_files.append(
            SwiftOperator(
                task_id=f"fetch_shp_{ext}",
                container=dag_id,
                object_id=file_name,
                output_path=f"/tmp/{dag_id}/{file_name}",
            ))

    extract_shp = BashOperator(
        task_id="extract_shp",
        bash_command=
        f"ogr2ogr -f 'PGDump' -t_srs EPSG:28992 -skipfailures -nln {dag_id}_new "
        f"{tmp_dir}/{dag_id}.sql {tmp_dir}/OOV_gebieden_totaal.shp",
    )

    convert_shp = BashOperator(
        task_id="convert_shp",
        bash_command=f"iconv -f iso-8859-1 -t utf-8  {tmp_dir}/{dag_id}.sql > "
        f"{tmp_dir}/{dag_id}.utf8.sql",
    )

示例#23

0

显示文件

def create_error(*args, **kwargs):
    raise Exception


with DAG(
        "testdag",
        default_args=default_args,
) as dag:

    swift_task = SwiftOperator(
        task_id="swift_task",
        container="Dataservices",
        object_id=
        "beschermde_stads_en_dorpsgezichten/acceptance/beschermde_stadsdorpsgezichten.zip",
        output_path="/tmp/bsd.zip",
        # container="afval",
        # object_id="acceptance/afval_cluster.zip",
        # output_path="/tmp/blaat/out2.zip",
        # conn_id="afval",
        swift_conn_id="objectstore_dataservices",
    )

    count_check = COUNT_CHECK.make_check(
        check_id="count_check",
        pass_value=1587,
        params=dict(table_name="fietspaaltjes"),
        result_checker=operator.ge,
    )

    colname_check = COLNAMES_CHECK.make_check(
        check_id="colname_check",