def test_logging_head_error_request(self, mock_session_send): def resp_check(_): return True response = requests.Response() response.status_code = 404 response.reason = 'Not Found' response._content = b'This endpoint doesnt exist' mock_session_send.return_value = response task = HttpSensor(dag=self.dag, task_id='http_sensor_head_method', http_conn_id='http_default', endpoint='', request_params={}, method='HEAD', response_check=resp_check, timeout=5, poke_interval=1) with mock.patch.object(task.hook.log, 'error') as mock_errors: with self.assertRaises(AirflowSensorTimeout): task.execute(None) self.assertTrue(mock_errors.called) calls = [ mock.call('HTTP error: %s', 'Not Found'), mock.call('This endpoint doesnt exist'), mock.call('HTTP error: %s', 'Not Found'), mock.call('This endpoint doesnt exist'), mock.call('HTTP error: %s', 'Not Found'), mock.call('This endpoint doesnt exist'), mock.call('HTTP error: %s', 'Not Found'), mock.call('This endpoint doesnt exist'), mock.call('HTTP error: %s', 'Not Found'), mock.call('This endpoint doesnt exist'), mock.call('HTTP error: %s', 'Not Found'), mock.call('This endpoint doesnt exist'), ] mock_errors.assert_has_calls(calls)
def test_poke_exception(self, mock_session_send): """ Exception occurs in poke function should not be ignored. """ response = requests.Response() response.status_code = 200 mock_session_send.return_value = response def resp_check(_): raise AirflowException('AirflowException raised here!') task = HttpSensor( task_id='http_sensor_poke_exception', http_conn_id='http_default', endpoint='', request_params={}, response_check=resp_check, timeout=5, poke_interval=1, ) with self.assertRaisesRegex(AirflowException, 'AirflowException raised here!'): task.execute(context={})
def test_poke_context(self, mock_session_send): response = requests.Response() response.status_code = 200 mock_session_send.return_value = response def resp_check(_, execution_date): if execution_date == DEFAULT_DATE: return True raise AirflowException('AirflowException raised here!') task = HttpSensor( task_id='http_sensor_poke_exception', http_conn_id='http_default', endpoint='', request_params={}, response_check=resp_check, timeout=5, poke_interval=1, dag=self.dag, ) task_instance = TaskInstance(task=task, execution_date=DEFAULT_DATE) task.execute(task_instance.get_template_context())
def test_head_method(self, mock_session_send): def resp_check(_): return True task = HttpSensor(dag=self.dag, task_id='http_sensor_head_method', http_conn_id='http_default', endpoint='', request_params={}, method='HEAD', response_check=resp_check, timeout=5, poke_interval=1) task.execute(context={}) args, kwargs = mock_session_send.call_args received_request = args[0] prep_request = requests.Request('HEAD', 'https://www.httpbin.org', {}).prepare() self.assertEqual(prep_request.url, received_request.url) self.assertTrue(prep_request.method, received_request.method)
outdata['rates'][pair] = indata['rates'][pair] with open('/opt/airflow/files/forex_rates.json', 'a') as outfile: json.dump(outdata, outfile) outfile.write('\n') with DAG("forex_data_pipeline", start_date=datetime(2021, 1, 1), schedule_interval="@daily", default_args=default_args, catchup=False) as dag: is_forex_rates_available = HttpSensor( task_id="is_forex_rates_available", http_conn_id="forex_api", endpoint="marclamberti/f45f872dea4dfd3eaa015a4a1af4b39b", response_check=lambda response: "rates" in response.text, poke_interval=5, timeout=20) is_forex_currencies_file_available = FileSensor( task_id="is_forex_currencies_file_available", fs_conn_id="forex_path", filepath="forex_currencies.csv", poke_interval=5, timeout=20) downloading_rates = PythonOperator(task_id="downloading_rates", python_callable=download_rates) is_forex_rates_available >> is_forex_currencies_file_available >> downloading_rates
from airflow import DAG from airflow.providers.http.sensors.http import HttpSensor from airflow.operators.bash import BashOperator from datetime import timedelta from airflow.utils.dates import days_ago with DAG(dag_id="SensorExample", start_date=days_ago(1), schedule_interval="@daily", catchup=False) as dag: sensor = HttpSensor(task_id="httpsensor", endpoint="/", http_conn_id="http_conn", retries=5, retry_delay=timedelta(seconds=3)) task1 = BashOperator(task_id="task1", bash_command="echo hello task1") task2 = BashOperator(task_id="task2", bash_command="echo hello task2") sensor >> [task1, task2]
pg_hook.run(insert_statement, parameters=row) os.remove(tot_name) else: print("No file named {}".format(tot_name)) else: print("No file named {}. No data to load.".format(tot_name)) with DAG('fetch_kc_crime_data-v0.1', schedule_interval='@daily', default_args=default_args, catchup=False) as dag: check_endpoint_availability = HttpSensor( task_id='check_endpoint_availability', http_conn_id='http_data_kcmo_org', endpoint=app_config['endpoint']) download_latest_crime_data = PythonOperator( task_id='download_latest_crime_data', python_callable=download_latest_crime_data) load_data = PythonOperator(task_id='load_data_raw', python_callable=load_data_raw) fetch_crime_window = PythonOperator(task_id='fetch_crime_window', python_callable=fetch_crime_window) check_endpoint_availability >> fetch_crime_window >> download_latest_crime_data >> load_data
with DAG("covid19_data_processing", schedule_interval="@daily", default_args=default_args, start_date=timezone.datetime(2021, 3, 1), tags=["covid19", "odds"]) as dag: start = DummyOperator(task_id="start") print_prev_ds = BashOperator( task_id="print_prev_ds", bash_command="echo {{ prev_ds }}", ) check_api = HttpSensor( task_id="check_api", endpoint="world", response_check=lambda response: True if len(response.json()) > 0 else False, ) download_covid19_data = PythonOperator( task_id="download_covid19_data", python_callable=_download_covid19_data, ) create_table = SqliteOperator(task_id="create_db", sqlite_conn_id="sqlite_default", sql=""" CREATE TABLE IF NOT EXISTS covid19 ( NewConfirmed TEXT NOT NULL ); """)
t3 = SimpleHttpOperator( task_id='put_op', method='PUT', endpoint='put', data=json.dumps({"priority": 5}), headers={"Content-Type": "application/json"}, dag=dag, ) t4 = SimpleHttpOperator( task_id='del_op', method='DELETE', endpoint='delete', data="some=data", headers={"Content-Type": "application/x-www-form-urlencoded"}, dag=dag, ) sensor = HttpSensor( task_id='http_sensor_check', http_conn_id='http_default', endpoint='', request_params={}, response_check=lambda response: "httpbin" in response.text, poke_interval=5, dag=dag, ) sensor >> t1 >> t2 >> t3 >> t4 >> t5
default_args = { 'owner': 'airflow', } with DAG( start_date=dt.datetime(2021, 1, 1), dag_id='nyc_taxi_2021_dag', schedule_interval='@monthly', default_args=default_args, ) as dag: check_if_exists = HttpSensor( method='HEAD', endpoint='yellow_tripdata_{{ execution_date.strftime("%Y-%m") }}.csv', http_conn_id='nyc_yellow_taxi_id', task_id='check_if_exists', poke_interval=60 * 60 * 24, # every 24 hours mode='reschedule', ) @task def download_file(): context = get_current_context() return download_dataset(context['execution_date'].strftime('%Y-%m')) @task def to_parquet(file_path: str): context = get_current_context() return convert_to_parquet(context['execution_date'].strftime('%Y-%m'), file_path) file_path = download_file()
df["date"] = today_date df = df[["city", "temperature", "humidity", "date"]] df = df.sort_values("city").reset_index(drop=True) df.to_csv("ukraine_weather_report_{}.csv".format(today_date), index=False) df = pandas.read_csv("ukraine_weather_report_{}.csv".format(today_date)) with DAG(dag_id="weather_data_pipeline", schedule_interval="0 12 * * *", default_args=default_args, catchup=False) as dag: is_weather_api_available = HttpSensor( task_id="is_weather_api_available", method="GET", http_conn_id="weather_api_conn", endpoint="current?access_key={}&query=Kiev".format(key), response_check=lambda response: "request" in response.json(), poke_interval=5, timeout=20) _create_report = PythonOperator(task_id="create_report", python_callable=create_report, provide_context=True, dag=dag) for query in city_queries.keys(): task_id = "get_weather_{}".format(city_queries[query]) _get_weather = PythonOperator(task_id=task_id, op_kwargs={"query": query}, python_callable=get_weather,
with DAG( 'covid19_data_processing', schedule_interval='@daily', default_args=default_args, # Dont forget to add default_args here start_date=timezone.datetime(2021, 3, 1), tags=['covid19', 'ODDS']) as dag: # Defining Operator task start = DummyOperator(task_id='start') print_prev_ds = BashOperator( task_id='print_prev_ds', bash_command='echo {{ prev_ds }} {{ macros.ds_add("2015-01-01", 5) }}') check_api = HttpSensor( task_id='check_api', endpoint='/world', response_check=lambda response: response.status_code == 200) download_covid19_data = PythonOperator( task_id='download_covid19_data', python_callable=_download_covid19_data) create_table = SqliteOperator(task_id='create_table', sqlite_conn_id='sqlite_default', sql=''' CREATE TABLE IF NOT EXISTS covid19 ( NewConfirmed TEXT NOT NULL ); ''') load_data_to_db = BashOperator(task_id='load_data_to_db',
"customers --hive-import --create-hive-table --hive-table airflow.customers " return f"{cmd1} && {cmd2}" with DAG( dag_id="Customer_360_pipeline", start_date=days_ago(1), schedule_interval="@daily", catchup=False, tags=["customer_360", "aws"] ) as dag: aws_sensor = HttpSensor( task_id="watch_for_order_s3", endpoint="orders.csv", http_conn_id="orders_s3", retries=10, response_check=lambda response: response.status_code == 200, retry_delay=timedelta(seconds=10) ) ssh_edge_download_task = SSHOperator( task_id="download_orders", ssh_conn_id="cloudera", command=download_order_command, ) import_customers_info = SSHOperator( task_id="import_customers_from_sql", ssh_conn_id="cloudera", command=load_customer_info_cmd() )
catchup=False) as dag: creating_table = SqliteOperator(task_id='creating_table', sqlite_conn_id='db_sqlite', sql=''' CREATE TABLE IF NOT EXISTS users( firstname TEXT NOT NULL, lastname TEXT NOT NULL, country TEXT NOT NULL, username TEXT NOT NULL, password TEXT NOT NULL, email TEXT NOT NULL PRIMARY KEY ); ''') is_api_avilabel = HttpSensor(task_id='is_api_avilabel', http_conn_id='user_api', endpoint='api/') extracting_user = SimpleHttpOperator( task_id='extracting_user', http_conn_id='user_api', endpoint='api/', method='GET', response_filter=lambda response: json.loads(response.text), log_response=True) processing_user = PythonOperator( task_id='processing_user', python_callable=_processing_user, )
create_table = SqliteOperator( dag=dag, task_id="create_table", sqlite_conn_id="db_sqlite", sql=f""" CREATE TABLE IF NOT EXISTS {_TABLE_NAME} ( name TEXT NOT NULL, country TEXt NOT NULL ); """, ) check_api_available = HttpSensor( dag=dag, task_id="check_api_available", http_conn_id=_API_NAME, endpoint="api/", ) extract_user = SimpleHttpOperator( dag=dag, task_id="extract_user", http_conn_id=_API_NAME, endpoint="api/", method="GET", response_filter=lambda response: json.loads(response.text), log_response=True, ) process_user = PythonOperator( dag=dag,
creating_table = SqliteOperator(task_id="creating_table", sqlite_conn_id="db_sqlite", sql=""" CREATE TABLE IF NOT EXISTS users( firstname TEXT NOT NULL, lastname TEXT NOT NULL, country TEXT NOT NULL, username TEXT NOT NULL, password TEXT NOT NULL, email TEXT NOT NULL PRIMARY KEY ); """) is_api_available = HttpSensor(task_id="is_api_available", http_conn_id="user_api", endpoint="api/") extracting_user = SimpleHttpOperator( task_id="extracting_user", http_conn_id="user_api", endpoint="api/", method="GET", response_filter=lambda response: json.loads(response.text), log_response=False) processing_user = PythonOperator(task_id="processing_user", python_callable=_processing_user) storing_user = BashOperator( task_id="storing_user",
creating_table = SqliteOperator(task_id='creating_table', sqlite_conn_id='db_sqlite', sql=''' CREATE TABLE IF NOT EXISTS users ( firstname TEXT NOT NULL, lastname TEXT NOT NULL, country TEXT NOT NULL, username TEXT NOT NULL, password TEXT NOT NULL, email TEXT NOT NULL PRIMARY KEY ); ''') is_api_available = HttpSensor(task_id='is_api_available', http_conn_id='user_api', endpoint='api/') extracting_user = SimpleHttpOperator( task_id='extracting_user', http_conn_id='user_api', endpoint='api/', method='GET', response_filter=lambda response: json.loads(response.text), log_response=True) processing_user = PythonOperator(task_id='processing_user', python_callable=_processing_user) storing_user = BashOperator( task_id='storing_user',
# task_id='fail_task', # bash_command='exit 1', # on_failure_callback=slack_failed_task, # provide_context=True # ) with DAG(dag_id='forex_data_pipeline', default_args=default_args, schedule_interval='@daily', catchup=False) as dag: is_forex_rates_available = HttpSensor( task_id='is_forex_rates_available', method='GET', http_conn_id='forex_api', endpoint='latest', response_check=lambda response: 'rates' in response.text, poke_interval=5, timeout=20 ) is_forex_currencies_file_available = FileSensor( task_id='is_forex_currencies_file_available', fs_conn_id='forex_path', filepath='forex_currencies.csv', poke_interval=5, timeout=20 ) downloading_rates = PythonOperator( task_id='downloading_rates',