示例#1
0
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from airflow.operators import ShortCircuitOperator, DummyOperator
from airflow.models import DAG
import airflow.utils.helpers
from datetime import datetime, timedelta

seven_days_ago = datetime.combine(datetime.today() - timedelta(7),
                                  datetime.min.time())
args = {
    'owner': 'airflow',
    'start_date': seven_days_ago,
}

dag = DAG(dag_id='example_short_circuit_operator', default_args=args)

cond_true = ShortCircuitOperator(task_id='condition_is_True',
                                 python_callable=lambda: True,
                                 dag=dag)

cond_false = ShortCircuitOperator(task_id='condition_is_False',
                                  python_callable=lambda: False,
                                  dag=dag)

ds_true = [DummyOperator(task_id='true_' + str(i), dag=dag) for i in [1, 2]]
ds_false = [DummyOperator(task_id='false_' + str(i), dag=dag) for i in [1, 2]]

airflow.utils.helpers.chain(cond_true, *ds_true)
airflow.utils.helpers.chain(cond_false, *ds_false)
    'retries': 1,
    'retry_delay': timedelta(minutes=5),
}

with models.DAG(
        dag_id='your dag name',
        # Continue to run DAG once per day
        schedule_interval=schedule_interval_dag,
        catchup=True,
        default_args=default_dag_args) as dag:

    email_to_gcs = GmailToGCS.ExtractAttachment(task_id='email_to_gcs',
                                                inbox_name=inbox_name_dag)

    checkforfile = ShortCircuitOperator(task_id='checkforfile',
                                        provide_context=False,
                                        python_callable=checkforfile)

    gcs_to_bq = StorageToBQ.StorageToBigQuery(
        task_id='gcs_to_bq',
        dataset_name=dataset_name_dag,
        bigquery_table_name=bigquery_table_name_dag,
        write_mode=write_mode_dag)

    check_dups = CheckDupBQ.CheckBQDuplication(
        task_id='check_dups',
        dataset_name=dataset_name_dag,
        bigquery_table_name=bigquery_table_name_dag,
        bigquery_table_key=bigquery_table_key_dag,
        date_column=date_column_dag)
    'email': ['*****@*****.**'],
    'email_on_failure': True,
    'email_on_retry': True,
    'retries': 0,
}

with DAG(
        'PriceAlerter',
        default_args=default_args,
        schedule_interval='*/5 9-17 * * *',
        catchup=False,
) as dag:
    price_listener = PythonOperator(
        task_id='listener',
        python_callable=listener.listen,
    )
    email_trigger = ShortCircuitOperator(
        task_id='email_trigger',
        python_callable=lambda: True if listener.trigger else False,
        trigger_rule=TriggerRule.NONE_FAILED,
    )
    email = EmailOperator(
        task_id='email',
        to=email_service.email_list,
        subject=email_service.get_email_subject(listener.summary),
        html_content=email_service.get_html_content(listener.summary),
    )

price_listener.set_downstream(email_trigger)
email_trigger.set_downstream(email)
示例#4
0
cmd = 'ls -l'
run_this_first = DummyOperator(task_id='run_this_first', dag=dag)

# Define ld variables
task_name_ld = "/home/etl/APP/UPCSS/LD"
# Define sp variables
task_name_sp = "/home/etl/APP/UPCSS/SP"
# Define ul variables
task_name_ul = "/home/etl/APP/UPCSS/UL"

# The contents of the sp directory
options = os.listdir(task_name_sp)

branching = ShortCircuitOperator(
    task_id='branching',
    python_callable=lambda: random.choice(options),
    dag=dag)
branching.set_upstream(run_this_first)

join = DummyOperator(task_id='join', trigger_rule='all_success', dag=dag)

for option in options:

    # Get ld file path
    file_path_ld = task_name_ld + '/' + option + '/bin/'
    # The contents of the obtained file_path_ld directory
    file_names_ld = os.listdir(file_path_ld)

    # Get sp file path
    file_path_sp = task_name_sp + '/' + option + '/bin/'
    # The contents of the obtained file_path_sp directory
def load_orders_data_to_csv(**kwargs):
    engine = PostgresHook(postgres_conn_id='tutorial_local').get_sqlalchemy_engine()
    data = pd.read_csv(JOINED_DATA_FILE_PATH)
    df = pd.DataFrame(data)
    df = df.apply(lambda x: x.fillna(0) if x.dtype.kind in 'biufc' else x.fillna(''))
    df.to_sql(
        'orders_table',
        engine,
        if_exists='replace',
        index=False,
    )


postgres_check = ShortCircuitOperator(
    task_id='postgres_check',
    provide_context=False,
    python_callable=check_postgresql_available,
    dag=dag,
)

get_customers_csv = PythonOperator(
    task_id='get_customers_csv',
    provide_context=True,
    python_callable=export_postgres_data_to_csv,
    op_kwargs={'filepath': CUSTOMERS_FILE_PATH, 'query': CUSTOMERS_QUERY},
    dag=dag,
)

get_goods_csv = PythonOperator(
    task_id='get_goods_csv',
    provide_context=True,
    python_callable=export_postgres_data_to_csv,