def run_flow_and_wait_for_completion(): run_flow_task = SimpleHttpOperator( task_id='run_flow', endpoint='/v4/jobGroups', data=json.dumps({ "wrangledDataset": { "id": int(recipe_id) }, "runParameters": { "overrides": { "data": [{ "key": "region", "value": str(region) }] } } }), headers=headers, xcom_push=True, dag=dag, ) wait_for_flow_run_to_complete = HttpSensor( task_id='wait_for_flow_run_to_complete', endpoint= '/v4/jobGroups/{{ json.loads(ti.xcom_pull(task_ids="run_flow"))["id"] }}?embed=jobs.errorMessage', headers=headers, response_check=check_flow_run_complete, poke_interval=10, dag=dag, ) run_flow_task.set_downstream(wait_for_flow_run_to_complete) return wait_for_flow_run_to_complete
def test_get(self): t = SimpleHttpOperator( task_id='get_op', method='GET', endpoint='/search', data={"client": "ubuntu", "q": "airflow"}, headers={}, dag=self.dag) t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def test_get_response_check(self): t = SimpleHttpOperator( task_id='get_op', method='GET', endpoint='/search', data={"client": "ubuntu", "q": "airflow"}, response_check=lambda response: ("airbnb/airflow" in response.text), headers={}, dag=self.dag) t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def create_subdag(default_args, subdag_id, job_param_dict, timeout): subdag = DAG(dag_id=subdag_id, default_args=default_args, schedule_interval=None, catchup=False) trigger_job_http_op = SimpleHttpOperator( task_id='http_post_to_databricks', http_conn_id='databricks', endpoint='/api/2.0/jobs/run-now', method='POST', headers={'Content-Type': 'application/json'}, data=json.dumps(job_param_dict), xcom_push=True, response_check=lambda response: response.json().get('run_id' ) is not None, dag=subdag) run_id_extractor = PythonOperator(task_id='extract_run_id', provide_context=True, python_callable=extract_run_id, dag=subdag) state_http_sensor = HttpSensor( task_id='sensor_job_state', http_conn_id='databricks', timeout=timeout, method='GET', endpoint='/api/2.0/jobs/runs/get', request_params={ 'run_id': """{{ ti.xcom_pull(task_ids='extract_run_id') }}""" }, response_check=check_state, poke_interval=30, dag=subdag) fetch_result_http_op = SimpleHttpOperator( task_id='http_get_to_databricks', http_conn_id='databricks', method='GET', data={'run_id': """{{ ti.xcom_pull(task_ids='extract_run_id') }}"""}, endpoint='/api/2.0/jobs/runs/get-output', xcom_push=True, response_check=lambda response: response.json()['metadata'][ 'state'].get('result_state') == 'SUCCESS', dag=subdag) result_extractor = PythonOperator(task_id='extract_result', provide_context=True, python_callable=extract_result, dag=subdag) trigger_job_http_op >> run_id_extractor >> state_http_sensor >> fetch_result_http_op >> result_extractor return subdag
def test_capture_hook_logs(): http_task = SimpleHttpOperator(task_id="capture_logs_http_task", endpoint="foo") connections = [ Connection(conn_id="http_default", host="https://mycoolwebsite.com") ] dagster_op = airflow_operator_to_op(http_task, connections=connections) @job def my_job(): dagster_op() with instance_for_test() as instance: with responses.RequestsMock() as rsps: rsps.add(rsps.POST, "https://mycoolwebsite.com/foo", body="foo") result = my_job.execute_in_process(instance=instance) event_records = [ lr for lr in instance.event_log_storage.get_logs_for_run( result.run_id) if "https://mycoolwebsite.com/foo" in lr.user_message ] assert len(event_records) == 1
def test_simple_http_operator(test_dag, mocker): mocker.patch.object(BaseHook, "get_connection", return_value=Connection(schema="https", host="api.sunrise-sunset.org")) def _check_light(sunset_sunrise_response): results = sunset_sunrise_response.json()["results"] sunrise = datetime.strptime(results["sunrise"][:-6], "%Y-%m-%dT%H:%M:%S") sunset = datetime.strptime(results["sunset"][:-6], "%Y-%m-%dT%H:%M:%S") if sunrise < datetime.utcnow() < sunset: print("It is light!") else: print("It is dark!") return True is_it_light = SimpleHttpOperator( task_id="is_it_light", http_conn_id="my_http_conn", endpoint="json", method="GET", data={ "lat": "52.370216", "lng": "4.895168", "formatted": "0" }, response_check=_check_light, dag=test_dag, ) pytest.helpers.run_task(task=is_it_light, dag=test_dag)
def test_simple_http_operator_no_external_call(test_dag, mocker): mocker.patch.object(BaseHook, "get_connection", return_value=Connection(schema="https", host="api.sunrise-sunset.org")) mock_run = mocker.patch.object(HttpHook, "run") is_it_light = SimpleHttpOperator( task_id="is_it_light", http_conn_id="my_http_conn", endpoint="json", method="GET", data={ "lat": "52.370216", "lng": "4.895168", "date": "{{ ds }}", "formatted": "0" }, dag=test_dag, ) pytest.helpers.run_task(task=is_it_light, dag=test_dag) mock_run.assert_called_once() assert mock_run.call_args_list[0][0][1] == { "lat": "52.370216", "lng": "4.895168", "date": test_dag.start_date.strftime("%Y-%m-%d"), "formatted": "0", }
def test_response_in_logs_after_failed_check(self, m): """ Test that when using SimpleHttpOperator with log_response=True, the response is logged even if request_check fails """ def response_check(response): return response.text != 'invalid response' m.get('http://www.example.com', text='invalid response') operator = SimpleHttpOperator( task_id='test_HTTP_op', method='GET', endpoint='/', http_conn_id='HTTP_EXAMPLE', log_response=True, response_check=response_check ) with mock.patch.object(operator.log, 'info') as mock_info: self.assertRaises(AirflowException, operator.execute, None) calls = [ mock.call('Calling HTTP method'), mock.call('invalid response') ] mock_info.assert_has_calls(calls, any_order=True)
def on_failure_callback(context): """ Define the callback to post on Slack if a failure is detected in the Workflow :return: operator.execute """ operator = SimpleHttpOperator( task_id='weixin_http', http_conn_id='http_weixin', method='POST', endpoint='', data=json.dumps( {"content": "Airflow af_adt_advertiser_revenue_report_ag exec error!", "app": "BigdataMonitor"}), # data=json.dumps({"content": "Airflow exec successfull!","app":"HostMonitoring","username":"******"}), headers={"Content-Type": "application/json"}, ) return operator.execute(context=context)
def test_response_in_logs(self): """ Test that when using SimpleHttpOperator with 'GET' on localhost:8080, the log contains 'Google' in it """ operator = SimpleHttpOperator( task_id='test_HTTP_op', method='GET', endpoint='/', http_conn_id='HTTP_GOOGLE', log_response=True, ) with patch.object(operator.log, 'info') as mock_info: operator.execute(None) mock_info.assert_called_with(AnyStringWith('Google'))
def get_grupo_dados(dag, previous_task, next_task, dados): for dado in dados: extracao = SimpleHttpOperator( task_id='Extracao_de_dados_{}'.format(dado), endpoint='url...', method='GET', trigger_rule="all_success", dag=dag) email_erro = EmailOperator( task_id='Email_Erro_{}'.format(dado), to='*****@*****.**', subject='Airflow Alert Erro', html_content='Erro ao realizar captura de {}'.format(dado), dag=dag, trigger_rule="all_failed", default_args={ 'email': ['*****@*****.**'], 'email_on_failure': True, 'email_on_retry': True, 'retries': 2, 'retry_delay': timedelta(minutes=5) }) salvar_base_raw = BranchPythonOperator( task_id='Salvar_DB_Raw_{}'.format(dado), python_callable=salva_dados_db_raw, trigger_rule="all_success", dag=dag) stop_falha = BranchPythonOperator( task_id='Stop_erro_extracao_{}'.format(dado), python_callable=salva_dados_db_raw, trigger_rule="dummy", dag=dag) transformacao = BranchPythonOperator( task_id='Transformacao_dados_{}'.format(dado), python_callable=transforma_dados, trigger_rule="one_success", dag=dag) salvar_base_staging = BranchPythonOperator( task_id='Salvar_DB_Staging_{}'.format(dado), python_callable=salva_dados_db_staging, trigger_rule="all_success", dag=dag) #definindo fluxo previous_task >> extracao extracao >> email_erro extracao >> salvar_base_raw email_erro >> stop_falha stop_falha >> transformacao salvar_base_raw >> transformacao transformacao >> salvar_base_staging salvar_base_staging >> next_task
def test_response_in_logs(self, m): """ Test that when using SimpleHttpOperator with 'GET', the log contains 'Example Domain' in it """ m.get('http://www.example.com', text='Example.com fake response') operator = SimpleHttpOperator( task_id='test_HTTP_op', method='GET', endpoint='/', http_conn_id='HTTP_EXAMPLE', log_response=True, ) with mock.patch.object(operator.log, 'info') as mock_info: operator.execute(None) mock_info.assert_called_with('Example.com fake response')
def run_flow_and_wait_for_completion(): run_flow_task = SimpleHttpOperator( http_conn_id='dataprep', method='POST', task_id='run_flow', endpoint='/v4/jobGroups', data=json.dumps({"wrangledDataset": {"id": int(output_id)},"runParameters": {"overrides": {"data": [{"key": "country","value": region}]}}}), headers=headers, xcom_push=True, dag=dag, )
def run_function(): run_flow_task = SimpleHttpOperator( http_conn_id='cloud_function', method='POST', task_id='trigger_function', endpoint='run_query', data=json.dumps(json_data), headers={"Content-Type": "application/json"}, xcom_push=True, dag=dag, )
def create_get_config_task(): t = SimpleHttpOperator( endpoint= '/api/services/a305b9c3-17e9-4d98-b27b-5323e26fdd6d/schemas/b341dc52-dee4-4d01-81d1-a5c3daab257a/settings', http_conn_id='configuration', method='GET', response_check=lambda r: r.status_code == 200, xcom_push=True, log_response=True, task_id='get-config', dag=dag) start >> t >> receive_response return t
def process_new_accounts(ds, **kwargs): """ The sensor has detected new ids to process, so we call the http operator for each """ select_sql = "SELECT id from audiences where created_at > '{ds}'".format( ds=ds) print("running select sql {}".format(select_sql)) pg_hook = PostgresHook(postgres_conn_id='letterpress-app') connection = pg_hook.get_conn() cursor = connection.cursor() cursor.execute(select_sql) account_ids = cursor.fetchall() for account_id in account_ids: # Create a sub-dag with each new id # the child dag name export_account_task_name = 'task_process_account_%s' % account_id print("starting task: {}".format(export_account_task_name)) export_account_dag = DAG( dag_id=export_account_task_name, default_args=default_args, schedule_interval='*/5 * * * *' # '@once' ) ## This hits the account export url, _endpoint/account/export?id={ACCOUNT_ID}&token={AUTH_TOKEN} account_export_endpoint_task = SimpleHttpOperator( task_id='account_export_endpoint_task_%s' % (account_id), http_conn_id='application', method='GET', endpoint='_endpoint/account/export', data={ "id": "{}".format(account_id), "token": Variable.get("APPLICATION_ACCESS_TOKEN") }, # http params response_check= response_check, # will retry based on default_args if it fails dag=export_account_dag) print("Created account processing DAG {}".format( export_account_dag.dag_id)) # register the dynamically created DAG in the global namespace? globals()[export_account_task_name] = export_account_dag return account_ids
def data_api_call(connection_id=CONNECTION_ID): return SimpleHttpOperator( task_id=TASK_DATA_API_CALL, http_conn_id=CONNECTION_ID, method="GET", endpoint="/" # data="{\"id\":111333222}" , headers={"Content-Type": "application/json"} # response will be pushed to xcom with COLLABORATION_TASK_ID , xcom_push=True, log_response=True, extra_options={ "verify": False, "cert": None })
def task_solrgetnumdocs(dag, alias_name, taskid, conn_id): solr_endpoint_select = '/solr/' + alias_name + '/select' return SimpleHttpOperator(task_id=taskid, method='GET', http_conn_id=conn_id, endpoint=solr_endpoint_select, data={ "defType": "edismax", "facet": "false", "indent": "on", "q": "*:*", "wt": "json", "rows": "0" }, headers={}, xcom_push=True, dag=dag)
def swap_sc_alias(dag, sc_conn_id, sc_coll_name, sc_configset_name): """Create or point an existing SolrCloud Alias to an existing SolrCloud Collection.""" task_instance = SimpleHttpOperator( task_id="solr_alias_swap", method="GET", http_conn_id=sc_conn_id, endpoint="solr/admin/collections", data={ "action": "CREATEALIAS", "name": sc_configset_name, "collections": [sc_coll_name] }, headers={}, dag=dag, log_response=True ) return task_instance
def alert_api_function(context): response = json.load(open(json_path, 'r')) # get info from dag text_description = ''' Airflow : airflow DAG : {dag_id} Task : {task_id} error : {error} '''.format(dag_id = context['task_instance'], task_id = context['task_instance'].task_id, error = context['exception'].args) response['text'] = text_description[:1024] return SimpleHttpOperator( task_id = 'alert_api', method = 'POST', http_conn_id = 'test_http', endpoint = test_http_endpoint, data = response, log_response = True, # extra_option = {'verify': False} # it is to disable ssl verification ).execute(context)
def test_simple_http_operator(test_dag, mocker): """Example test for SimpleHttpOperator""" mocker.patch.object( BaseHook, "get_connection", return_value=Connection(schema="https", host="api.sunrise-sunset.org"), ) def _check_light(sunset_sunrise_response): results = sunset_sunrise_response.json()["results"] # Example: 2019-02-20T06:59:30+00:00 # Note: there is NO strftime format for +00:00! API docs say the timezone is # always UTC, i.e. +00:00, so considered simplest solution to remove and not use # 3rd party library. Python 3.7 can also do datetime.datetime.fromisoformat(). sunrise = datetime.strptime(results["sunrise"][:-6], "%Y-%m-%dT%H:%M:%S") sunset = datetime.strptime(results["sunset"][:-6], "%Y-%m-%dT%H:%M:%S") if sunrise < datetime.utcnow() < sunset: print("It is light!") else: print("It is dark!") return True is_it_light = SimpleHttpOperator( task_id="is_it_light", http_conn_id="my_http_conn", endpoint="json", method="GET", data={ "lat": "52.370216", "lng": "4.895168", "formatted": "0" }, response_check=_check_light, dag=test_dag, ) pytest.helpers.run_task(task=is_it_light, dag=test_dag)
def test_http_task(): http_task = SimpleHttpOperator(task_id="http_task", endpoint="foo") connections = [ Connection(conn_id="http_default", host="https://mycoolwebsite.com") ] dagster_op = airflow_operator_to_op(http_task, connections=connections) @job def my_job(): dagster_op() with responses.RequestsMock() as rsps: rsps.add(rsps.POST, "https://mycoolwebsite.com/foo", body="foo") result = my_job.execute_in_process() assert result.success assert len(rsps.calls) == 1 response = rsps.calls[0].response assert response.content == b"foo"
def task_solrgetnumdocs(dag, core_name, taskid): solr_endpoint_select = '/solr/' + core_name + '/select' t1 = SimpleHttpOperator(task_id=taskid, method='GET', http_conn_id='AIRFLOW_CONN_SOLR_LEADER', endpoint=solr_endpoint_select, data={ "defType": "edismax", "facet": "false", "indent": "on", "q": "*:*", "wt": "json", "rows": "0" }, headers={}, xcom_push=True, dag=dag) return t1
def create_sc_collection(dag, sc_conn_id, sc_coll_name, sc_coll_repl, sc_configset_name): """Creates a new SolrCloud Collection.""" task_instance = SimpleHttpOperator( task_id="create_collection", method="GET", http_conn_id=sc_conn_id, endpoint="solr/admin/collections", data={ "action": "CREATE", "name": sc_coll_name, "numShards": "1", "replicationFactor": sc_coll_repl, "maxShardsPerNode": "1", "collection.configName": sc_configset_name }, headers={}, dag=dag, log_response=True ) return task_instance
def run_notebook_operator(input_nb, output_nb, dag): webhook_connection = { 'conn_id': 'jupyter_webhook', 'conn_type': 'http', 'host': 'notebook', 'port': 3000 } ## HACK: patch bug in puckel/docker # passing in connections via env vars does not work so we create an # Airflow Connection object manually to satisfy the 'http_conn_id' prop create_airflow_conn(webhook_connection) return SimpleHttpOperator(http_conn_id="jupyter_webhook", task_id=f"run_notebook__{input_nb}", method="POST", endpoint="/", data=json.dumps({ "input_nb": input_nb, "output_nb": output_nb, }), headers={"Content-Type": "application/json"}, dag=dag)
'retry_delay': timedelta(minutes=5), } dag = DAG('example_http_operator', default_args=default_args, tags=['example'], start_date=datetime.datetime.now() - datetime.timedelta(days=1)) dag.doc_md = __doc__ # task_post_op, task_get_op and task_put_op are examples of tasks created by instantiating operators # [START howto_operator_http_task_post_op] task_post_op = SimpleHttpOperator( task_id='post_op', endpoint='post', data=json.dumps({"priority": 5}), headers={"Content-Type": "application/json"}, response_check=lambda response: response.json()['json']['priority'] == 5, dag=dag, ) # [END howto_operator_http_task_post_op] # [START howto_operator_http_task_post_op_formenc] task_post_op_formenc = SimpleHttpOperator( task_id='post_op_formenc', endpoint='post', data="name=Joe", headers={"Content-Type": "application/x-www-form-urlencoded"}, dag=dag, ) # [END howto_operator_http_task_post_op_formenc] # [START howto_operator_http_task_get_op] task_get_op = SimpleHttpOperator(
'email': ['*****@*****.**'], 'email_on_failure': False, 'email_on_retry': False, 'retries': 1, 'retry_delay': timedelta(minutes=5), } dag = DAG('example_http_operator', default_args=default_args) dag.doc_md = __doc__ # t1, t2 and t3 are examples of tasks created by instantiating operators t1 = SimpleHttpOperator( task_id='post_op', endpoint='api/v1.0/nodes', data=json.dumps({"priority": 5}), headers={"Content-Type": "application/json"}, response_check=lambda response: True if len(response.json()) == 0 else False, dag=dag) t5 = SimpleHttpOperator( task_id='post_op_formenc', endpoint='nodes/url', data="name=Joe", headers={"Content-Type": "application/x-www-form-urlencoded"}, dag=dag) t2 = SimpleHttpOperator( task_id='get_op', method='GET', endpoint='api/v1.0/nodes',
default_args=default_args, schedule_interval="0 1 * * 2", # Weekly at 1 AM on Friday start_date=days_ago(2), tags=['permit'], ) as dag: yesterday = (datetime.now() - timedelta(1)).strftime('%Y-%m-%d') lastWeek = (datetime.now() - timedelta(8)).strftime('%Y-%m-%d') interval = '{lastWeek}/{yesterday}'.format(lastWeek=lastWeek, yesterday=yesterday) formattedDate = (datetime.now() - timedelta(1)).strftime('%Y%m%d') templateContent = downloadTemplate(templateUrl) indexSpec = createIndexSpec(templateContent, permitDataSource, interval, basePath, formattedDate, 'nvl("dummyCol1", \'Dallas\')') start = DummyOperator(task_id='start') index = SimpleHttpOperator(task_id='permit-index-' + yesterday, method='POST', http_conn_id='druid-cluster', endpoint='druid/indexer/v1/task', headers={"Content-Type": "application/json"}, data=json.dumps(indexSpec), response_check=lambda response: True if response.status_code == 200 else False) start >> index
dag = DAG( 'ReclameAqui_scraping_scores', default_args=default_args, description='Coleta score no site Reclame Aqui e armazena no mongodb', schedule_interval=timedelta(days=1), ) company = 'Cielo' collection = 'scores' args_t1 = {"company": company} t1 = SimpleHttpOperator( task_id='consulta_scores_reclameaqui', provide_context=True, endpoint='http://api_consultareclameaqui:5000/{}'.format(collection), method='GET', response_check=True, xcom_push=True, log_response=True, data=args_t1) t2 = SimpleHttpOperator( task_id='persiste_dados_mongodb', endpoint='http://api_persistenciadados:5005/{}'.format(collection), method='POST', headers={'content-type': 'application/json'}, response_check=True, log_response=True, kwargs=args_t2, #data = t1.xcom_pull.re(context=['consulta_scores_reclameaqui'], # task_ids='consulta_scores_reclameaqui') -- isto não esta funcionando
with DAG( dag_id='omdena-http-pipelines-proof-of-concept', default_args=default_args, schedule_interval=None, ) as dag: detect_face = SimpleHttpOperator( task_id='face_detection_haar_cascade', endpoint=face_detection_pipeline_endpoint, http_conn_id='azure_pipelines_http_endpoint', method='POST', headers={ 'Authorization': 'Bearer ' + pipeline_token, 'Content-Type': 'application/json' }, data=json.dumps({ "ExperimentName": "Face_detection_Haar_cascade_pipeline_REST", "RunSource": "SDK", "ParameterAssignments": { "sample_num": "1" } }), log_response=True, xcom_push=True) wait_face_detection_pipeline = PythonSensor( task_id='sense_face_detection_pipeline_end', poke_interval=10, timeout=60 * 10, # 10 minutes python_callable=wait_till_pipeline_end, op_kwargs={