from airflow import DAG
from airflow.operators.bash import BashOperator
from pendulum import datetime

with DAG(
        dag_id='dbt_source_freshness_daily',
        start_date=datetime(2019, 1, 1),
        schedule_interval='@daily',
        tags=['validate'],
        catchup=False
) as dag:
    BashOperator(
        bash_command='source /opt/dbt-env/bin/activate && '
                     'dbt source snapshot-freshness --project-dir ${DBT_DIR}/finance-data',
        task_concurrency=1,
        task_id='dbt_source_freshness'
    )
Пример #2
0
        }],
    )
    # [END howto_operator_bigquery_execute_query_save]

    # [START howto_operator_bigquery_get_data]
    get_data = BigQueryGetDataOperator(
        task_id="get_data",
        dataset_id=DATASET_NAME,
        table_id="save_query_result",
        max_results="10",
        selected_fields="value,to_address",
    )
    # [END howto_operator_bigquery_get_data]

    get_data_result = BashOperator(
        task_id="get_data_result",
        bash_command="echo \"{{ task_instance.xcom_pull('get_data') }}\"")

    # [START howto_operator_bigquery_create_external_table]
    create_external_table = BigQueryCreateExternalTableOperator(
        task_id="create_external_table",
        bucket=DATA_SAMPLE_GCS_BUCKET_NAME,
        source_objects=[DATA_SAMPLE_GCS_OBJECT_NAME],
        destination_project_dataset_table="{}.external_table".format(
            DATASET_NAME),
        skip_leading_rows=1,
        schema_fields=[{
            "name": "name",
            "type": "STRING"
        }, {
            "name": "post_abbr",
Пример #3
0
from airflow.models.baseoperator import chain
from airflow.operators.bash import BashOperator
from airflow.operators.python import PythonOperator
from airflow.utils.dates import days_ago

default_args = {"start_date": days_ago(1)}

with models.DAG(
        dag_id="example_complex",
        default_args=default_args,
        schedule_interval=None,
        tags=['example'],
) as dag:

    # Create
    create_entry_group = BashOperator(task_id="create_entry_group",
                                      bash_command="echo create_entry_group")

    create_entry_group_result = BashOperator(
        task_id="create_entry_group_result",
        bash_command="echo create_entry_group_result")

    create_entry_group_result2 = BashOperator(
        task_id="create_entry_group_result2",
        bash_command="echo create_entry_group_result2")

    create_entry_gcs = BashOperator(task_id="create_entry_gcs",
                                    bash_command="echo create_entry_gcs")

    create_entry_gcs_result = BashOperator(
        task_id="create_entry_gcs_result",
        bash_command="echo create_entry_gcs_result")
# under the License.

from datetime import datetime
from textwrap import dedent

from airflow.models import DAG
from airflow.operators.bash import BashOperator

DEFAULT_DATE = datetime(2016, 1, 1)

args = {
    'owner': 'airflow',
    'start_date': DEFAULT_DATE,
}

dag = DAG(dag_id='test_no_impersonation', default_args=args)

test_command = dedent("""\
    sudo ls
    if [ $? -ne 0 ]; then
        echo 'current uid does not have root privileges!'
        exit 1
    fi
    """)

task = BashOperator(
    task_id='test_superuser',
    bash_command=test_command,
    dag=dag,
)

def csvToJson():
    df = pd.read_csv(
        '/goinfre/sbahaddi/gits/DataEngineering_Infrastructure/data/faker_data.csv'
    )
    for _, r in df.iterrows():
        print(r['name'])
    df.to_json('dags/fromAirflow.json', orient='records')


default_args = {
    'owner': 'sbahaddi',
    'start_date': dt.datetime(2021, 3, 26),
    'retries': 1,
    'retry_delay': dt.timedelta(minutes=5),
}

with DAG(
        'MyCSVDAG',
        default_args=default_args,
        schedule_interval=timedelta(minutes=5),
        # '0 * * * *',
) as dag:
    print_starting = BashOperator(
        task_id='starting',
        bash_command='echo "I am reading the CSV now....."')
    convertTojSON = PythonOperator(task_id='convertCSVtoJson',
                                   python_callable=csvToJson)
    print_starting >> convertTojSON
Пример #6
0
    # 'sla_miss_callback': yet_another_function,
    # 'trigger_rule': 'all_success'
}
dag = DAG(
    'SACoronavirus',
    default_args=default_args,
    description='Extract data from SA Coronavirus daily reports',
    # schedule_interval=timedelta(days=1),
    start_date=days_ago(0),
    tags=['covid'],
)

# t1, t2 and t3 are examples of tasks created by instantiating operators
t1 = BashOperator(
    task_id='extract_images2',
    bash_command='/opt/airflow/scrapy/run.sh ',
    dag=dag,
)

t2 = BashOperator(
    task_id='sleep',
    depends_on_past=False,
    bash_command='sleep 5',
    retries=3,
    dag=dag,
)
dag.doc_md = __doc__

t1.doc_md = """\
#### Task Documentation
You can document your task using the attributes `doc_md` (markdown),
import airflow.utils.dates
from airflow.models import DAG
from airflow.operators.bash import BashOperator


def send_error():
    print("ERROR!")


dag = DAG(
    dag_id="chapter12_task_failure_callback",
    default_args={"on_failure_callback": send_error},
    on_failure_callback=send_error,
    schedule_interval=None,
    start_date=airflow.utils.dates.days_ago(3),
)

failing_task = BashOperator(task_id="failing_task",
                            bash_command="exit 1",
                            dag=dag)
          schedule_interval=timedelta(hours=4),
          catchup=True,
          tags=['A_pnnl_test2'])


def init(**kwargs):
    if datetime.now().hour != 12:
        raise ('Failed')


task_start = PythonOperator(task_id="task_start",
                            python_callable=init,
                            dag=dag)

task_create_folder = BashOperator(
    task_id="task_create_folder",
    bash_command='/opt/airflow/dags/create_folder.sh "{{ execution_date }}" ',
    dag=dag)

task_download = BashOperator(
    task_id='task_download',
    bash_command=
    'python3 /opt/airflow/dags/task_download.py "{{ execution_date }}" ',
    do_xcom_push=True,
    dag=dag)

task_download_zoneTemperature = BashOperator(
    task_id="task_download_zoneTemperature",
    bash_command='echo task_download_zoneTemperature ',
    dag=dag)
task_download_zoneAirflow = BashOperator(
    task_id="task_download_zoneAirflow",
Пример #9
0
def create_dag(dag_id, schedule, window, default_args):
    with DAG(
            dag_id,
            default_args=default_args,
            description='creates sliding windows based on months',
            schedule_interval=schedule,
            start_date=datetime.datetime(2021, 4, 30),
            on_failure_callback=dag_fail_slack_alert,
            on_success_callback=dag_success_slack_alert,
            tags=['selection', 'sliding'],
    ) as dag:

        OUTPUT_DIR = WORKING_DIR + "/data/sliding-windows-bealign/" + '_'.join(
            window)
        default_args["params"]["output-dir"] = OUTPUT_DIR
        default_args["params"][
            "meta-output"] = OUTPUT_DIR + '/master-no-sequences.json'
        default_args["params"]["sequence-output"] = OUTPUT_DIR + '/sequences'

        with open(dag.params["region_cfg"], 'r') as stream:
            regions = yaml.safe_load(stream)

        mk_dir_task = BashOperator(
            task_id='make_directory',
            bash_command='mkdir -p {{params.output}}',
            params={'output': default_args['params']['output-dir']},
            dag=dag,
        )

        export_meta_task = PythonOperator(
            task_id='export_meta',
            python_callable=export_meta,
            op_kwargs={"config": default_args['params']},
            pool='mongo',
            dag=dag,
        )

        export_meta_task.set_upstream(mk_dir_task)
        export_sequences_task = PythonOperator(
            task_id='export_sequences',
            python_callable=export_sequences,
            op_kwargs={"config": default_args['params']},
            pool='mongo',
            dag=dag,
        )

        export_sequences_task.set_upstream(mk_dir_task)

        # For each region
        export_by_gene = []

        for gene in regions.keys():

            filepath_prefix = OUTPUT_DIR + '/sequences.' + gene

            nuc_sequence_output = filepath_prefix + '_nuc.fas'
            uniques_fn = filepath_prefix + '_nuc.uniques.fas'
            duplicate_output = filepath_prefix + '.duplicates.json'

            variants_csv_output = filepath_prefix + '.variants.csv'
            variants_json_output = filepath_prefix + '.variants.json'
            filtered_fasta_output = filepath_prefix + '.compressed.filtered.fas'
            filtered_json_output = filepath_prefix + '.filtered.json'
            output_edits_fn = filepath_prefix + '.filtered.edits.json'

            compressor_duplicate_out = filepath_prefix + '.duplicates.variants.json'

            tree_output = filepath_prefix + '.compressed.filtered.fas.rapidnj.bestTree'
            sto_output = filepath_prefix + '.compressed.filtered.sto'

            slac_output_fn = filepath_prefix + '.SLAC.json'
            fel_output_fn = filepath_prefix + '.FEL.json'
            meme_output_fn = filepath_prefix + '.MEME.json'

            summary_output_fn = filepath_prefix + '.json'

            default_args["params"]["nuc-sequence-output"] = nuc_sequence_output
            default_args["params"]["duplicate-output"] = duplicate_output

            with TaskGroup(f"alignment_{gene}") as alignment:

                export_bealign_task = PythonOperator(
                    task_id=f'export_bealign',
                    python_callable=export_bealign_sequences,
                    op_kwargs={
                        "config": default_args['params'],
                        'nuc_output_fn': nuc_sequence_output,
                        'gene': gene
                    },
                    dag=dag,
                )

                # Occasional errors when cleaning up tmp files, so or'ing true
                cleanup_task = BashOperator(
                    task_id=f'cleanup',
                    bash_command=
                    "sed -i '/^>/! s/[^ACTG-]/N/g' $NUC_OUTPUT_FN || true",
                    env={
                        'NUC_OUTPUT_FN': nuc_sequence_output,
                        **os.environ
                    },
                    dag=dag)

                export_bealign_task >> cleanup_task

            with TaskGroup(f"duplicates_{gene}") as duplicates_group:

                compute_duplicates_task = PythonOperator(
                    task_id=f'write_raw_duplicates',
                    python_callable=write_nuc_raw_duplicates,
                    op_kwargs={
                        "input": nuc_sequence_output,
                        "duplicate_output": duplicate_output,
                        'uniques_output': uniques_fn
                    },
                    dag=dag,
                )

                compute_duplicates_task

            # $HYPHY LIBPATH=$HYPHYLIBPATH $COMPRESSOR --msa ${FILE}.${GENE}.compressed.fas --regexp "epi_isl_([0-9]+)" --duplicates ${FILE}.${GENE}.duplicates.json --output ${FILE}.${GENE}.variants.csv --json ${FILE}.${GENE}.variants.json --duplicate-out ${FILE}.${GENE}.duplicates.variants.json

            with TaskGroup(f"filter_{gene}") as filter:
                COMPRESSOR = """
                {{ params.hyphy }} LIBPATH={{params.hyphy_lib_path}} {{ params.compressor }} --msa $FASTA_FN --regexp "epi_isl_([0-9]+)" --duplicates $DUPLICATE_FN --output $VARIANTS_CSV_FN  --json $VARIANTS_JSON_FN --duplicate-out $COMPRESSOR_DUPLICATE_OUT
                """
                compressor_task = BashOperator(task_id=f'compressor',
                                               bash_command=COMPRESSOR,
                                               env={
                                                   'FASTA_FN': uniques_fn,
                                                   'DUPLICATE_FN':
                                                   duplicate_output,
                                                   'VARIANTS_CSV_FN':
                                                   variants_csv_output,
                                                   'VARIANTS_JSON_FN':
                                                   variants_json_output,
                                                   'COMPRESSOR_DUPLICATE_OUT':
                                                   compressor_duplicate_out,
                                                   **os.environ
                                               },
                                               dag=dag)

                # --output-edits ${FILE}.${GENE}.filtered.edits.json
                COMPRESSOR2 = """
                {{ params.hyphy }} LIBPATH={{params.hyphy_lib_path}} {{ params.compressor2 }} --msa $FASTA_FN --duplicates $DUPLICATE_FN --csv $VARIANTS_CSV_FN  --byseq $VARIANTS_JSON_FN --p 0.95 --output $FILTERED_FASTA_FN --json $FILTERED_JSON_FN --output-edits ${OUTPUT_EDITS}
                """
                compressor_two_task = BashOperator(
                    task_id=f'compressor_two',
                    bash_command=COMPRESSOR2,
                    env={
                        'FASTA_FN': uniques_fn,
                        'DUPLICATE_FN': compressor_duplicate_out,
                        'VARIANTS_CSV_FN': variants_csv_output,
                        'VARIANTS_JSON_FN': variants_json_output,
                        'FILTERED_FASTA_FN': filtered_fasta_output,
                        'FILTERED_JSON_FN': filtered_json_output,
                        'OUTPUT_EDITS': output_edits_fn,
                        **os.environ
                    },
                    dag=dag)

                compressor_task >> compressor_two_task

            INFER_TREE = """
            seqmagick convert $FILTERED_FASTA_FN $STO_OUTPUT;
            rapidnj $STO_OUTPUT -i sth > $TREE_OUTPUT
            sed -i "s/'//g" $TREE_OUTPUT;
            """

            infer_tree_task = BashOperator(task_id=f'infer_tree_{gene}',
                                           bash_command=INFER_TREE,
                                           env={
                                               'FILTERED_FASTA_FN':
                                               filtered_fasta_output,
                                               'STO_OUTPUT': sto_output,
                                               'TREE_OUTPUT': tree_output,
                                               **os.environ
                                           },
                                           dag=dag)

            slac_task = BashOperator(
                task_id=f'slac_{gene}',
                bash_command=
                "{{ params.hyphy }} LIBPATH={{params.hyphy_lib_path}} slac --kill-zero-lengths Constrain ENV='_DO_TREE_REBALANCE_=1' --alignment $FILTERED_FASTA_FN --tree $TREE_OUTPUT --branches All --samples 0 --output $SLAC_OUTPUT",
                env={
                    'FILTERED_FASTA_FN': filtered_fasta_output,
                    'TREE_OUTPUT': tree_output,
                    'SLAC_OUTPUT': slac_output_fn,
                    **os.environ
                },
                dag=dag,
            )

            big_data_flags = '--full-model No'

            fel_task = BashOperator(
                task_id=f'fel_{gene}',
                bash_command=
                "{{ params.hyphy }} LIBPATH={{params.hyphy_lib_path}} fel --kill-zero-lengths Constrain ENV='_DO_TREE_REBALANCE_=1' $BIG_DATA_FLAGS --alignment $FILTERED_FASTA_FN --tree $TREE_OUTPUT --branches Internal --output $FEL_OUTPUT",
                env={
                    'FILTERED_FASTA_FN': filtered_fasta_output,
                    'TREE_OUTPUT': tree_output,
                    'FEL_OUTPUT': fel_output_fn,
                    'BIG_DATA_FLAGS': big_data_flags,
                    **os.environ
                },
                dag=dag,
            )

            meme_task = BashOperator(
                task_id=f'meme_{gene}',
                bash_command=
                "{{ params.hyphy }} LIBPATH={{params.hyphy_lib_path}} meme --kill-zero-lengths Constrain ENV='_DO_TREE_REBALANCE_=1' $BIG_DATA_FLAGS --alignment $FILTERED_FASTA_FN --tree $TREE_OUTPUT --branches Internal --output $MEME_OUTPUT",
                env={
                    'FILTERED_FASTA_FN': filtered_fasta_output,
                    'TREE_OUTPUT': tree_output,
                    'MEME_OUTPUT': meme_output_fn,
                    'BIG_DATA_FLAGS': big_data_flags,
                    **os.environ
                },
                dag=dag,
            )

            annotation_file = filepath_prefix + '.annotation.json'
            copy_annotation_task = BashOperator(
                task_id=f'copy_annotation_{gene}',
                bash_command=
                'cp {{params.working_dir}}/data/comparative-annotation.json {{params.annotation_file}}',
                params={
                    'annotation_file': annotation_file,
                    'working_dir': WORKING_DIR
                },
                dag=dag)

            summarize_gene_task = BashOperator(
                task_id=f'summarize_gene_{gene}',
                bash_command=
                '{{ params.python }} {{params.working_dir}}/python/summarize_gene.py -T {{params.working_dir}}/data/ctl/epitopes.json -B {{params.working_dir}}/data/single_mut_effects.csv -D $MASTERNOFASTA -d $DUPLICATES -s $SLAC_OUTPUT -f $FEL_OUTPUT -m $MEME_OUTPUT -P 0.1 --output  $SUMMARY_OUTPUT -c $COMPRESSED_OUTPUT_FN -E {{params.working_dir}}/data/evo_annotation.json -A {{params.working_dir}}/data/mafs.csv -V {{params.working_dir}}/data/evo_freqs.csv -F $FRAGMENT --frame_shift $ADDSHIFT --fragment_shift $SHIFT -S $OFFSET -O $ANNOTATION',
                params={
                    'python': default_args['params']['python'],
                    'working_dir': WORKING_DIR
                },
                env={
                    'MASTERNOFASTA': default_args["params"]["meta-output"],
                    'DUPLICATES': duplicate_output,
                    'SLAC_OUTPUT': slac_output_fn,
                    'FEL_OUTPUT': fel_output_fn,
                    'MEME_OUTPUT': meme_output_fn,
                    'SUMMARY_OUTPUT': summary_output_fn,
                    'COMPRESSED_OUTPUT_FN': filtered_fasta_output,
                    'FRAGMENT': str(regions[gene]['fragment']),
                    'ADDSHIFT': str(regions[gene]['add_one']),
                    'SHIFT': str(regions[gene]['shift']),
                    'OFFSET': str(regions[gene]['offset']),
                    'ANNOTATION': annotation_file,
                    **os.environ
                },
                dag=dag,
            )

            summarize_gene_task.set_upstream(export_meta_task)
            alignment.set_upstream(export_sequences_task)
            export_by_gene.append(
                alignment >> duplicates_group >> filter >> infer_tree_task >> [
                    slac_task, fel_task, meme_task
                ] >> copy_annotation_task >> summarize_gene_task)

        dag.doc_md = __doc__

        # Add export meta and export sequence tasks to be executed in parallel
        cross_downstream([export_meta_task, export_sequences_task],
                         export_by_gene)

        return dag
Пример #10
0
    # 'sla_miss_callback': yet_another_function,
    # 'trigger_rule': 'all_success'
}
with DAG(
    dag_id='create_tokens',
    default_args=default_args,
    description='to create token into wallet',
    schedule_interval=None,
    start_date=days_ago(2),
    catchup=False,
    tags=['wallet', 'treetracker'],
) as dag:

    # t1, t2 and t3 are examples of tasks created by instantiating operators
    t1 = BashOperator(
        task_id='print_date',
        bash_command='date',
    )
    
    
    # define a function
    def create_tokens(ds, **kwargs):
        walletName = kwargs['dag_run'].conf.get('walletName')    
        entityId = kwargs['dag_run'].conf.get('entityId')    
        dryRun = kwargs['dag_run'].conf.get('dryRun')    
        # print them out
        print('walletName:', walletName)
        print('entityId:', entityId)
        print('dryRun:', dryRun)
        # check if wallet exists
        if walletName is None:
            print('walletName is None')
def demo_pipeline():
    """
    ### TaskFlow API Tutorial Documentation
    This is a simple ETL data pipeline example which demonstrates the use of
    the TaskFlow API using three simple tasks for Extract, Transform, and Load.
    Documentation that goes along with the Airflow TaskFlow API tutorial is
    located
    [here](https://airflow.apache.org/docs/stable/tutorial_taskflow_api.html)
    """
    @task()
    def raw_to_edf():
        """
        #### Extract task
        A simple Extract task to get data ready for the rest of the data
        pipeline. In this case, getting data is simulated by reading from a
        hardcoded JSON string.
        """
        # it is not ideal to read from file

        path = Variable.get('path')
        input_file = os.path.join(path, 'raw_input.txt')
        output_file = os.path.join(path, 'edf_output.txt')

        with open(input_file, "r") as in_file:
            line = in_file.read()

        with open(output_file, "w") as out_file:
            out_file.write(line + "World!!!")

        return output_file

    @task()
    def sleep(edf_file):
        """
        #### Transform task
        A simple Transform task which takes in the collection of order data and
        computes the total order value.
        """
        path = Variable.get('path')
        output_path = os.path.join(path, 'sleep_output.txt')
        with open(edf_file, "r") as in_file:
            line = in_file.read()

        with open(output_path, "w") as out_file:
            out_file.write(line + " AND SLEEEP!!!")

        return output_path
        # test
    @task()
    def gait(edf_file):
        """
        #### Load task
        A simple Load task which takes in the result of the Transform task and
        instead of saving it to end user review, just prints it out.
        """
        from macro_gait.WalkingBouts import WalkingBouts
        start_time = '2019-08-22 10:05:16'
        duration = 1000
        l_file = r'/Users/matthewwong/Documents/coding/nimbal/data/OND06_SBH_2891_GNAC_ACCELEROMETER_LAnkle.edf'
        r_file = r'/Users/matthewwong/Documents/coding/nimbal/data/OND06_SBH_2891_GNAC_ACCELEROMETER_RAnkle.edf'
        WalkingBouts(l_file,
                     r_file,
                     start_time=start_time,
                     duration_sec=duration)

        path = Variable.get('path')
        output_path = os.path.join(path, 'gait_output.txt')
        with open(edf_file, "r") as in_file:
            line = in_file.read()

        with open(output_path, "w") as out_file:
            out_file.write(line + " AND GAIT!!!")

        return output_path

    @task()
    def nonwear(edf_file):
        """
        #### Load task
        A simple Load task which takes in the result of the Transform task and
        instead of saving it to end user review, just prints it out.
        """
        path = Variable.get('path')
        output_path = os.path.join(path, 'nw_output.txt')
        with open(edf_file, "r") as in_file:
            line = in_file.read()

        with open(output_path, "w") as out_file:
            out_file.write(line + " AND NONWEAR!!!")

        return output_path

    @task()
    def feedback_form(gait_file, sleep_file, nonwear_file):
        path = Variable.get('path')
        output_path = os.path.join(path, 'feedback_output.txt')

        with open(gait_file, "r") as in_file:
            gait_line = in_file.read()

        with open(sleep_file, "r") as in_file:
            sleep_line = in_file.read()

        with open(nonwear_file, "r") as in_file:
            nonwear_line = in_file.read()

        with open(output_path, "w") as out_file:
            out_file.write(gait_line + sleep_line + nonwear_line)

        return output_path

    requirements = Variable.get('requirements').replace('\n', ' ').replace(
        '\r', ' ')
    install_deps = BashOperator(
        task_id='install_dependencies',
        bash_command=f'python -m pip install -I {requirements}',
    )

    edf_file = raw_to_edf()
    gait_file = gait(edf_file)
    sleep_file = sleep(edf_file)
    nonwear_file = nonwear(edf_file)
    feedback_form(gait_file, sleep_file, nonwear_file)
    install_deps >> edf_file
Пример #12
0
 def test_dryrun(self):
     op = BashOperator(task_id='test_dryrun',
                       bash_command="echo success",
                       dag=self.dag)
     op.dry_run()
Пример #13
0
def test_example():
    task = BashOperator(task_id="test",
                        bash_command="echo 'hello!'",
                        xcom_push=True)
    result = task.execute(context={})
    assert result == "hello!"
Пример #14
0
from airflow import DAG
from airflow.operators.bash import BashOperator
from airflow.operators.python import PythonOperator

dag = DAG(
    dag_id="06_templated_query",
    schedule_interval="@daily",
    start_date=dt.datetime(year=2019, month=1, day=1),
    end_date=dt.datetime(year=2019, month=1, day=5),
)

fetch_events = BashOperator(
    task_id="fetch_events",
    bash_command=("mkdir -p /data/events && "
                  "curl -o /data/events.json "
                  "http://events_api:5000/events?"
                  "start_date={{execution_date.strftime('%Y-%m-%d')}}&"
                  "end_date={{next_execution_date.strftime('%Y-%m-%d')}}"),
    dag=dag,
)


def _calculate_stats(input_path, output_path):
    """Calculates event statistics."""

    events = pd.read_json(input_path)
    stats = events.groupby(["date", "user"]).size().reset_index()

    Path(output_path).parent.mkdir(exist_ok=True)
    stats.to_csv(output_path, index=False)
Пример #15
0
    schedule_interval=None,  # Override to match your needs
    tags=['example'],
) as dag:
    # [START howto_operator_create_instance]
    create_instance = CloudMemorystoreCreateInstanceOperator(
        task_id="create-instance",
        location="europe-north1",
        instance_id=INSTANCE_NAME,
        instance=FIRST_INSTANCE,
        project_id=GCP_PROJECT_ID,
    )
    # [END howto_operator_create_instance]

    # [START howto_operator_create_instance_result]
    create_instance_result = BashOperator(
        task_id="create-instance-result",
        bash_command="echo \"{{ task_instance.xcom_pull('create-instance') }}\"",
    )
    # [END howto_operator_create_instance_result]

    create_instance_2 = CloudMemorystoreCreateInstanceOperator(
        task_id="create-instance-2",
        location="europe-north1",
        instance_id=INSTANCE_NAME_2,
        instance=SECOND_INSTANCE,
        project_id=GCP_PROJECT_ID,
    )

    # [START howto_operator_get_instance]
    get_instance = CloudMemorystoreGetInstanceOperator(
        task_id="get-instance", location="europe-north1", instance=INSTANCE_NAME, project_id=GCP_PROJECT_ID
    )
Пример #16
0
    def test_default_retries(self):
        bash_operator = BashOperator(bash_command='echo "stdout"',
                                     task_id='test_default_retries',
                                     dag=None)

        self.assertEqual(bash_operator.retries, 0)
default_args = {
    "owner": "atb",
    "email": [
        "*****@*****.**",
    ]
}
with DAG("covid19_data_processing",
         schedule_interval="@daily",
         default_args=default_args,
         start_date=timezone.datetime(2021, 3, 1),
         tags=["covid19", "odds"]) as dag:

    start = DummyOperator(task_id="start")

    print_prev_ds = BashOperator(
        task_id="print_prev_ds",
        bash_command="echo {{ prev_ds }}",
    )

    check_api = HttpSensor(
        task_id="check_api",
        endpoint="world",
        response_check=lambda response: True
        if len(response.json()) > 0 else False,
    )

    download_covid19_data = PythonOperator(
        task_id="download_covid19_data",
        python_callable=_download_covid19_data,
    )

    create_table = SqliteOperator(task_id="create_db",
Пример #18
0
from datetime import datetime

from airflow.models import DAG
from airflow.operators.bash import BashOperator
from airflow.operators.python import PythonOperator
from airflow.operators.subdag import SubDagOperator

DEFAULT_DATE = datetime(2016, 1, 1)

default_args = {'owner': 'airflow', 'start_date': DEFAULT_DATE, 'run_as_user': '******'}

dag = DAG(dag_id='impersonation_subdag', default_args=default_args)


def print_today():
    print(f'Today is {datetime.utcnow()}')


subdag = DAG('impersonation_subdag.test_subdag_operation', default_args=default_args)


PythonOperator(python_callable=print_today, task_id='exec_python_fn', dag=subdag)


BashOperator(task_id='exec_bash_operator', bash_command='echo "Running within SubDag"', dag=subdag)


subdag_operator = SubDagOperator(
    task_id='test_subdag_operation', subdag=subdag, mode='reschedule', poke_interval=1, dag=dag
)
args = {"owner": "airflow", "start_date": START_DATE}

for dag_no in range(1, DAG_COUNT + 1):
    dag = DAG(
        dag_id=safe_dag_id("__".join([
            DAG_PREFIX,
            f"SHAPE={SHAPE.name.lower()}",
            f"DAGS_COUNT={dag_no}_of_{DAG_COUNT}",
            f"TASKS_COUNT=${TASKS_COUNT}",
            f"START_DATE=${START_DATE_ENV}",
            f"SCHEDULE_INTERVAL=${SCHEDULE_INTERVAL_ENV}",
        ])),
        is_paused_upon_creation=False,
        default_args=args,
        schedule_interval=SCHEDULE_INTERVAL,
    )

    tasks = [
        BashOperator(task_id="__".join(["tasks", f"{i}_of_{TASKS_COUNT}"]),
                     bash_command='echo test"',
                     dag=dag) for i in range(1, TASKS_COUNT + 1)
    ]
    if SHAPE == DagShape.NO_STRUCTURE:
        # Do nothing
        pass
    elif SHAPE == DagShape.LINEAR:
        chain(*tasks)

    globals()[f"dag_{dag_no}"] = dag
Пример #20
0
from airflow import DAG
from airflow.operators.python_operator import PythonOperator
# Operators; we need this to operate!
from airflow.operators.bash import BashOperator
from airflow.utils.dates import days_ago
# These args will get passed on to each operator
# You can override them on a per-task basis during operator initialization

default_args = {
    'owner': 'admin',
    'depends_on_past': False,
    'email': ['*****@*****.**'],
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=5),
}
dag = DAG(
    'collect_dag',
    default_args=default_args,
    description='A simple tutorial DAG',
    schedule_interval=None,
    start_date=days_ago(2),
    tags=['vk_api'],
)

BashOperator(
    task_id='collect_task',
    bash_command=
    'python3 /opt/DataMiningTasks/Datamining-Course2-2021/DataPiplineTask/DataPiplineTask.py 200 itis_kfu mydatabase.cdwb7v1ldkf1.us-east-1.rds.amazonaws.com postgres qwerty016 postgres 5432',
    dag=dag)
Пример #21
0
with DAG(
        dag_id='example_bash_operator',
        default_args=args,
        schedule_interval='0 0 * * *',
        start_date=days_ago(2),
        dagrun_timeout=timedelta(minutes=60),
        tags=['example', 'example2'],
        params={"example_key": "example_value"},
) as dag:

    run_this_last = DummyOperator(task_id='run_this_last', )

    # [START howto_operator_bash]
    run_this = BashOperator(
        task_id='run_after_loop',
        bash_command='echo 1',
    )
    # [END howto_operator_bash]

    run_this >> run_this_last

    for i in range(3):
        task = BashOperator(
            task_id='runme_' + str(i),
            bash_command='echo "{{ task_instance_key_str }}" && sleep 1',
        )
        task >> run_this

    # [START howto_operator_bash_template]
    also_run_this = BashOperator(
        task_id='also_run_this',
Пример #22
0
from airflow.operators.bash import BashOperator
from airflow.operators.python import PythonOperator
from airflow.utils.dates import days_ago


def run_this_func(**context):
    """
    Print the payload "message" passed to the DagRun conf attribute.

    :param context: The execution context
    :type context: dict
    """
    print("Remotely received value of {} for key=message".format(context["dag_run"].conf["message"]))


with DAG(
    dag_id="example_trigger_target_dag",
    default_args={"owner": "airflow"},
    start_date=days_ago(2),
    schedule_interval=None,
    tags=['example'],
) as dag:

    run_this = PythonOperator(task_id="run_this", python_callable=run_this_func)

    bash_task = BashOperator(
        task_id="bash_task",
        bash_command='echo "Here is the message: $message"',
        env={'message': '{{ dag_run.conf["message"] if dag_run else "" }}'},
    )
Пример #23
0

def _training_model():
    return randint(1, 10)


with DAG("first_dag",
         start_date=datetime(2021, 1, 1),
         schedule_interval="@daily",
         catchup=False) as dag:

    training_model_A = PythonOperator(task_id="training_model_A",
                                      python_callable=_training_model)

    training_model_B = PythonOperator(task_id="training_model_B",
                                      python_callable=_training_model)

    training_model_C = PythonOperator(task_id="training_model_C",
                                      python_callable=_training_model)

    choose_best_model = BranchPythonOperator(
        task_id="choose_best_model", python_callable=_choose_best_model)

    accurate = BashOperator(task_id="accurate", bash_command="echo 'accurate'")

    inaccurate = BashOperator(task_id="inaccurate",
                              bash_command="echo 'inaccurate'")

    [training_model_A, training_model_B, training_model_C
     ] >> choose_best_model >> [accurate, inaccurate]
Пример #24
0
    bash_manually_pushed_value = ti.xcom_pull(key="manually_pushed_value", task_ids='bash_push')
    print(f"The xcom value pushed by task push via return value is {bash_pushed_via_return_value}")
    print(f"The xcom value pushed by task push manually is {bash_manually_pushed_value}")


with DAG(
    'example_xcom',
    schedule="@once",
    start_date=pendulum.datetime(2021, 1, 1, tz="UTC"),
    catchup=False,
    tags=['example'],
) as dag:
    bash_push = BashOperator(
        task_id='bash_push',
        bash_command='echo "bash_push demo"  && '
        'echo "Manually set xcom value '
        '{{ ti.xcom_push(key="manually_pushed_value", value="manually_pushed_value") }}" && '
        'echo "value_by_return"',
    )

    bash_pull = BashOperator(
        task_id='bash_pull',
        bash_command='echo "bash pull demo" && '
        f'echo "The xcom pushed manually is {bash_push.output["manually_pushed_value"]}" && '
        f'echo "The returned_value xcom is {bash_push.output}" && '
        'echo "finished"',
        do_xcom_push=False,
    )

    python_pull_from_bash = pull_value_from_bash_push()
Пример #25
0
    # 'on_success_callback': some_other_function,
    # 'on_retry_callback': another_function,
    # 'sla_miss_callback': yet_another_function,
    # 'trigger_rule': 'all_success'
}
with DAG(
        'tutorial',
        default_args=default_args,
        description='A simple tutorial DAG',
        schedule_interval=timedelta(days=1),
        start_date=days_ago(2),
        tags=['example'],
) as dag:
    # t1, t2 and t3 are examples of tasks created by instantiating operators
    t1 = BashOperator(
        task_id='print_date',
        bash_command='date',
    )

    t2 = BashOperator(
        task_id='sleep',
        depends_on_past=False,
        bash_command='sleep 5',
        retries=3,
    )
    dag.doc_md = __doc__

    t1.doc_md = dedent(
        """\
    #### Task Documentation
    You can document your task using the attributes `doc_md` (markdown),
    `doc` (plain text), `doc_rst`, `doc_json`, `doc_yaml` which gets
Пример #26
0
        start_date=datetime(2021, 1, 1),
        catchup=False,
) as dag:
    # Create
    # [START howto_operator_gcp_datacatalog_create_entry_group]
    create_entry_group = CloudDataCatalogCreateEntryGroupOperator(
        task_id="create_entry_group",
        location=LOCATION,
        entry_group_id=ENTRY_GROUP_ID,
        entry_group={"display_name": "analytics data - jan 2011"},
    )
    # [END howto_operator_gcp_datacatalog_create_entry_group]

    # [START howto_operator_gcp_datacatalog_create_entry_group_result]
    create_entry_group_result = BashOperator(
        task_id="create_entry_group_result",
        bash_command=f"echo {create_entry_group.output['entry_group_id']}",
    )
    # [END howto_operator_gcp_datacatalog_create_entry_group_result]

    # [START howto_operator_gcp_datacatalog_create_entry_group_result2]
    create_entry_group_result2 = BashOperator(
        task_id="create_entry_group_result2",
        bash_command=f"echo {create_entry_group.output}",
    )
    # [END howto_operator_gcp_datacatalog_create_entry_group_result2]

    # [START howto_operator_gcp_datacatalog_create_entry_gcs]
    create_entry_gcs = CloudDataCatalogCreateEntryOperator(
        task_id="create_entry_gcs",
        location=LOCATION,
        entry_group=ENTRY_GROUP_ID,
Пример #27
0
def mean_fare_per_class():
    titanic_dfa = pd.read_csv(get_path('titanic.csv'))
    dfa = titanic_dfa.pivot_table(index=['Pclass'],
                                  values='Fare',
                                  aggfunc='mean').reset_index()
    dfa.to_csv(get_path('titanic_mean_fares.csv'))


# В контексте DAG'а зададим набор task'ок
# Объект-инстанс Operator'а - это и есть task
with DAG(**settings) as dag:

    # BashOperator, выполняющий указанную bash-команду
    first_task = BashOperator(
        task_id='first_task',
        bash_command=
        'echo "Here we start! Info: run_id={{ run_id }} | dag_run={{ dag_run }}"',
        dag=dag,
    )
    # Загрузка датасета
    create_titanic_dataset = PythonOperator(
        task_id='download_titanic_dataset',
        python_callable=download_titanic_dataset,
        dag=dag,
    )
    # Чтение, преобразование и запись датасета
    pivot_titanic_dataset = PythonOperator(
        task_id='pivot_dataset',
        python_callable=pivot_dataset,
        dag=dag,
    )
Пример #28
0
    # ############################## #
    # ### Annotate image example ### #
    # ############################## #

    # [START howto_operator_vision_annotate_image]
    annotate_image = CloudVisionImageAnnotateOperator(
        request=annotate_image_request,
        retry=Retry(maximum=10.0),
        timeout=5,
        task_id='annotate_image')
    # [END howto_operator_vision_annotate_image]

    # [START howto_operator_vision_annotate_image_result]
    annotate_image_result = BashOperator(
        bash_command="echo {{ task_instance.xcom_pull('annotate_image')"
        "['logoAnnotations'][0]['description'] }}",
        task_id='annotate_image_result',
    )
    # [END howto_operator_vision_annotate_image_result]

    # [START howto_operator_vision_detect_text]
    detect_text = CloudVisionDetectTextOperator(
        image=DETECT_IMAGE,
        retry=Retry(maximum=10.0),
        timeout=5,
        task_id="detect_text",
        language_hints="en",
        web_detection_params={'include_geo_results': True},
    )
    # [END howto_operator_vision_detect_text]
Пример #29
0
    )

    purge_queue = CloudTasksQueuePurgeOperator(
        location=LOCATION,
        queue_name=QUEUE_ID,
        task_id="purge_queue",
    )

    get_queue = CloudTasksQueueGetOperator(
        location=LOCATION,
        queue_name=QUEUE_ID,
        task_id="get_queue",
    )

    get_queue_result = BashOperator(
        task_id="get_queue_result",
        bash_command=f"echo {get_queue.output}",
    )

    get_queue >> get_queue_result

    update_queue = CloudTasksQueueUpdateOperator(
        task_queue=Queue(stackdriver_logging_config=dict(sampling_ratio=1)),
        location=LOCATION,
        queue_name=QUEUE_ID,
        update_mask={"paths": ["stackdriver_logging_config.sampling_ratio"]},
        task_id="update_queue",
    )

    list_queue = CloudTasksQueuesListOperator(location=LOCATION,
                                              task_id="list_queue")
Пример #30
0
        'processing_tasks.training_model_c'
    ])
    for accuracy in accuracies:
        if accuracy > 5:
            return 'accurate' #['accurate','inaccurate']
    return 'inaccurate'
    # print(accuracies)

# def _is_accurate():
#     return('accurate')

with DAG('xcom_dag', schedule_interval='@daily', default_args=default_args, catchup=False) as dag:

    downloading_data = BashOperator(
        task_id='downloading_data',
        bash_command='sleep 3',
        do_xcom_push=False
    )

    with TaskGroup('processing_tasks') as processing_tasks:
        training_model_a = PythonOperator(
            task_id='training_model_a',
            python_callable=_training_model
        )

        training_model_b = PythonOperator(
            task_id='training_model_b',
            python_callable=_training_model
        )

        training_model_c = PythonOperator(