dag = DAG( dag_id="FirstScript", schedule_interval="@daily", default_args={ "owner": "airflow training", "start_date": dt.datetime(2018, 8, 1), "depends_on_past": True, "email_on_failure": True, "email": "*****@*****.**", }, ) pgsl_to_gcs = PostgresToGoogleCloudStorageOperator( task_id="postgres_to_gcs", dag=dag, sql= "select * from land_registry_price_paid_uk where transfer_date='{{ds}}'", bucket="airflow-training-knab-asv", filename="land_registry_price_paid_uk/{{ds}}/proerties_{}.json", postgres_conn_id="airflow-training-postgres") dataproc_create_cluster = DataprocClusterCreateOperator( task_id="create_dataproc", cluster_name="analyse-pricing-{{ ds }}", project_id=PROJECT_ID, num_workers=2, zone="europe-west4-a", dag=dag, ) land_registry_prices_to_bigquery = DataFlowPythonOperator( task_id="land_registry_prices_to_bigquery",
"start_date": dt.datetime(2018, 9, 11), "depends_on_past": True, "email_on_failure": True, "email": "*****@*****.**", }, ) def print_exec_date(**context): print(context["execution_date"]) pgsl_to_gcs = PostgresToGoogleCloudStorageOperator( task_id="postgres_to_gcs", postgres_conn_id="airflow-training-postgres", sql="SELECT * FROM land_registry_price_paid_uk WHERE transfer_date = '{{ ds }}'", bucket="airflow-training-knab-jochem", filename="land_registry_price_paid_uk/{{ ds }}/properties_{}.json", dag=dag, ) dataproc_create_cluster = DataprocClusterCreateOperator( task_id="create_dataproc", cluster_name="analyse-pricing-{{ ds }}", project_id="gdd-ea393e48abe0a85089b6b551da", num_workers=2, zone="europe-west4-a", dag=dag, auto_delete_ttl=5 * 60, # Autodelete after 5 minutes )
# project_id = "training-airflow" dag = DAG( dag_id="uk_land_dag2", schedule_interval="30 7 * * *", default_args={ "owner": "airflow", "start_date": dt.datetime(2018, 10, 1), "depends_on_past": True, }, ) pg_2_gcs = PostgresToGoogleCloudStorageOperator( task_id="pg_2_gcs", postgres_conn_id="my_db_connection", sql= "SELECT * FROM land_registry_price_paid_uk WHERE transfer_date = '{{ ds }}'", bucket="airflowbolcom_ghermann_dummybucket", filename="mypgdata_{{ ds }}", dag=dag) zone = "europe-west4-a" dataproc_cluster_name = "my-dp-cluster-{{ ds }}" dataproc_create_cluster = DataprocClusterCreateOperator( task_id="my_create_dp_cluster", cluster_name=dataproc_cluster_name, project_id=project_id, num_workers=2, zone=zone, dag=dag,
"start_date": dt.datetime(2018, 10, 1), "depends_on_past": True, "email_on_failure": True, "email": "*****@*****.**", }, ) def print_exec_date(**context): print(context["execution_date"]) get_data = PostgresToGoogleCloudStorageOperator( task_id="postgres_to_gcs", postgres_conn_id="my_database_connection", sql= "SELECT * FROM land_registry_price_paid_uk WHERE transfer_date = '{{ ds }}'", bucket='airflow_training_bucket', filename='land_registry_price_paid_uk/{{ ds }}/result.json', dag=dag) my_task = PythonOperator(task_id="task_name", python_callable=print_exec_date, provide_context=True, dag=dag) create_cluster = DataprocClusterCreateOperator( task_id="create_dataproc", cluster_name="analyse-pricing-{{ ds }}", project_id='airflowbolcom-20165e4959a78c1d', num_workers=2, zone="europe-west4-a",
"start_date": dt.datetime(2018, 10, 10), "depends_on_past": False, "email_on_failure": True, "email": "*****@*****.**", }, ) as dag: usd_conversion_rate = create_conversion_task(task_id="usd_conversion_rate", target_currency="USD") eur_conversion_rate = create_conversion_task(task_id="eur_conversion_rate", target_currency="EUR") psql_to_gcs = PostgresToGoogleCloudStorageOperator( task_id="read_postgres", postgres_conn_id="postgres_training", sql= "select * from land_registry_price_paid_uk where transfer_date = '{{ ds }}'::date", bucket="airflow-training-simple-dag", filename="training-price-paid-uk/{{ ds }}/land_registry.json") cluster_name = "cluster-{{ ds }}" gcs_project_id = "airflowbolcom-544f36a42f5c0d9d" create_cluster = DataprocClusterCreateOperator(task_id="create_cluster", cluster_name=cluster_name, project_id=gcs_project_id, num_workers=2, zone="europe-west4-a") cloud_analytics = DataProcPySparkOperator( task_id="analyze_data",
}, ) def print_exec_date(**context): print(context["execution_date"]) # my_task = PythonOperator( # task_id="task_name", python_callable=print_exec_date, provide_context=True, dag=dag # ) pgsl_to_gcs = PostgresToGoogleCloudStorageOperator( task_id="export_data_to_bucket", postgres_conn_id="training_postgres", sql= "SELECT * FROM land_registry_price_paid_uk WHERE transfer_date = '{{ ds }}'", bucket="airflow_training_data", filename="data_{{ds_nodash}}/land_registry_price.json", dag=dag) dataproc_create_cluster = DataprocClusterCreateOperator( task_id="create_dataproc_cluster", cluster_name="dataproc-cluster-dag-training-{{ ds }}", project_id="airflowbolcom-b9aabd6971d488d9", num_workers=2, zone="europe-west1-d", dag=dag) compute_aggregates = DataProcPySparkOperator( task_id="compute_aggregates", main=
num_workers=2, zone="europe-west4-a", dag=dag, pool="dataproc", ) query = """ SELECT * FROM land_registry_price_paid_uk WHERE transfer_date = '{{ ds }}' """ pgsl_to_gcs = (PostgresToGoogleCloudStorageOperator( task_id="pgsl_to_gcs", postgres_conn_id="postgres_airflow_training", sql=query, bucket=BUCKET, filename="land_registry_price_paid_uk/{{ ds }}/properties_{}.json", dag=dag, ) >> dataproc_create_cluster) for currency in {"EUR", "USD"}: HttpToGcsOperator( task_id="get_currency_" + currency, endpoint="airflow-training-transform-valutas?date={{ ds }}&from=GBP&to=" + currency, bucket=BUCKET, method="GET", http_conn_id="airflow-training-currency-http", gcs_conn_id="airflow-training-data-tim", gcs_path="currency/{{ ds }}-" + currency + ".json", dag=dag,
dataproc_create_cluster = DataprocClusterCreateOperator( task_id="create_dataproc", cluster_name="analyse-pricing-{{ ds }}", project_id="gdd-25d677142443a8e2ace1927d48", num_workers=2, zone="europe-west4-a", dag=dag, ) pgsl_to_gcs = PostgresToGoogleCloudStorageOperator( task_id="postgres_to_gcs", postgres_conn_id="postgres_airflow_training", sql="SELECT * FROM public.land_registry_price_paid_uk WHERE transfer_date = '{{ ds }}'", bucket='airflow-training-knab-geert', filename='land_registry_price_paid_uk/{{ ds }}/properties_{}.json', dag=dag ) >> dataproc_create_cluster for currency in {'EUR', 'USD'}: HttpToGcsOperator( task_id="get_currency_" + currency, method="GET", endpoint="airflow-training-transform-valutas?date={{ ds }}&from=GBP&to=" + currency, http_conn_id="http_airflow_training", gcs_conn_id="google_cloud_default", gcs_bucket="airflow-training-knab-geert", gcs_path="currency/{{ ds }}-" + currency + ".json", dag=dag
dag = DAG( dag_id="training_dag", schedule_interval="30 7 * * *", default_args={ "owner": "airflow", "start_date": dt.datetime(2018, 10, 1), "depends_on_past": True, "email_on_failure": True, "email": "*****@*****.**", }, ) copy_task = PostgresToGoogleCloudStorageOperator( task_id="copy_postgres_to_gcs", postgres_conn_id="training_postgres", sql= "SELECT * FROM land_registry_price_paid_uk WHERE transfer_date = '{{ ds }}'", bucket="airflow-training", filename="exports/{{ ds }}/land_registry_price.json", dag=dag) dataproc_create_cluster = DataprocClusterCreateOperator( task_id="create_cluster", cluster_name="analyse-pricing-{{ ds }}", project_id="airflowbolcom-1d3b3a0049ce78da", num_workers=2, zone="europe-west4-a", dag=dag) copy_task >> dataproc_create_cluster compute_aggregates = DataProcPySparkOperator(