def test_namespace_from_connection(self, mock_create_namespaced_crd, mock_kubernetes_hook): op = SparkKubernetesOperator( application_file=TEST_VALID_APPLICATION_JSON, dag=self.dag, kubernetes_conn_id='kubernetes_with_namespace', task_id='test_task_id') op.execute(None) mock_kubernetes_hook.assert_called_once_with() mock_create_namespaced_crd.assert_called_with( body=TEST_APPLICATION_DICT, group='sparkoperator.k8s.io', namespace='mock_namespace', plural='sparkapplications', version='v1beta2')
def test_create_application_from_json_with_api_group_and_version( self, mock_create_namespaced_crd, mock_kubernetes_hook): api_group = 'sparkoperator.example.com' api_version = 'v1alpha1' op = SparkKubernetesOperator( application_file=TEST_VALID_APPLICATION_JSON, dag=self.dag, kubernetes_conn_id='kubernetes_default_kube_config', task_id='test_task_id', api_group=api_group, api_version=api_version, ) op.execute(None) mock_kubernetes_hook.assert_called_once_with() mock_create_namespaced_crd.assert_called_with( body=TEST_APPLICATION_DICT, group=api_group, namespace='default', plural='sparkapplications', version=api_version, )
dag = DAG( 'spark_pi', default_args=default_args, schedule_interval=timedelta(days=1), tags=['example'] ) # spark = open( # "example_spark_kubernetes_operator_pi.yaml").read() submit = SparkKubernetesOperator( task_id='spark_pi_submit', namespace="sampletenant", application_file="example_spark_kubernetes_operator_pi.yaml", kubernetes_conn_id="kubernetes_in_cluster", do_xcom_push=True, dag=dag, api_group="sparkoperator.hpe.com" ) sensor = SparkKubernetesSensor( task_id='spark_pi_monitor', namespace="sampletenant", application_name="{{ task_instance.xcom_pull(task_ids='spark_pi_submit')['metadata']['name'] }}", kubernetes_conn_id="kubernetes_in_cluster", dag=dag, api_group="sparkoperator.hpe.com", attach_log=True )
} # [END default_args] # [START instantiate_dag] dag = DAG('spark_pi', default_args=default_args, description='submit spark-pi as sparkApplication on kubernetes', schedule_interval=None, start_date=days_ago(1), user_defined_macros={'json': json}) t1 = SparkKubernetesOperator( task_id='spark_pi_submit', namespace="spark-work", application_file="example_spark_kubernetes_spark_pi.yaml", kubernetes_conn_id="kubernetes_default", do_xcom_push=True, dag=dag, ) t2 = SparkKubernetesSensor( task_id='spark_pi_monitor', namespace="spark-work", application_name= "{{ task_instance.xcom_pull(task_ids='spark_pi_submit')['metadata']['name'] }}", kubernetes_conn_id="kubernetes_default", attach_log=True, dag=dag, ) t1 >> t2
} # [END default_args] # [START instantiate_dag] dag = DAG('spark_pi', default_args=default_args, schedule_interval=timedelta(minutes=5), dagrun_timeout=timedelta(minutes=5), tags=['example']) spark = open("example_spark_kubernetes_operator_spark_pi.yaml").read() t1 = SparkKubernetesOperator( task_id='spark_pi_submit', namespace="mycspace", application_file=spark, kubernetes_conn_id="kubernetes_default", do_xcom_push=True, dag=dag, ) t2 = SparkKubernetesSensor( task_id='spark_pi_monitor', namespace="mycspace", application_name= "{{ task_instance.xcom_pull(task_ids='spark_pi_submit')['metadata']['name'] }}", kubernetes_conn_id="kubernetes_default", dag=dag) t1 >> t2
default_args = { 'owner': 'Matheus Jericó', 'start_date': datetime(2021, 4, 1), 'depends_on_past': False, 'retries': 1, 'retry_delay': timedelta(minutes=10) } with DAG('minio-fifa-spark-operator', default_args=default_args, schedule_interval='@daily', tags=['development', 's3', 'minio', 'spark-operator']) as dag: etl_fifa_spark_operator = SparkKubernetesOperator( task_id='etl_fifa_spark_operator', namespace='processing', application_file='etl-fifa.yaml', kubernetes_conn_id='minikube', do_xcom_push=True) monitor_spark_app_status = SparkKubernetesSensor( task_id='monitor_spark_app_status', namespace="processing", application_name= "{{ task_instance.xcom_pull(task_ids='etl_fifa_spark_operator')['metadata']['name'] }}", kubernetes_conn_id="minikube") delete_s3_file_raw_zone = S3DeleteObjectsOperator( task_id='delete_s3_file_raw_zone', bucket=RAW_ZONE, keys='data.csv', aws_conn_id='minio',
dag = DAG( 'spark_pi', default_args=default_args, schedule_interval=timedelta(days=1), tags=['example'] ) # spark = open( # "example_spark_kubernetes_operator_pi.yaml").read() submit = SparkKubernetesOperator( task_id='spark_pi_submit', namespace="spark-operator", application_file="example_spark_kubernetes_operator_pi.yaml", kubernetes_conn_id="kubernetes_in_cluster", do_xcom_push=True, dag=dag, # api_group="sparkoperator.hpe.com", enable_impersonation_from_ldap_user=False ) sensor = SparkKubernetesSensor( task_id='spark_pi_monitor', namespace="spark-operator", application_name="{{ task_instance.xcom_pull(task_ids='spark_pi_submit')['metadata']['name'] }}", kubernetes_conn_id="kubernetes_in_cluster", dag=dag, # api_group="sparkoperator.hpe.com", attach_log=True )
# [END default_args] # [START instantiate_dag] dag = DAG('spark_pi', default_args=default_args, schedule_interval=timedelta(days=1), tags=['example']) # spark = open( # "example_spark_kubernetes_operator_pi.yaml").read() submit = SparkKubernetesOperator( task_id='spark_pi_submit', namespace="mycspace", application_file="example_spark_kubernetes_operator_pi.yaml", kubernetes_conn_id="kubernetes_in_cluster", do_xcom_push=True, dag=dag, ) sensor = SparkKubernetesSensor( task_id='spark_pi_monitor', namespace="mycspace", application_name= "{{ task_instance.xcom_pull(task_ids='spark_pi_submit')['metadata']['name'] }}", kubernetes_conn_id="kubernetes_in_cluster", dag=dag) submit >> sensor
name="download_txtfile", namespace="airflow", image="cirrusci/wget", cmds=[ "/bin/sh", "-c", "mkdir -p /mnt1/data && mkdir -p /mnt1/results && wget https://norvig.com/big.txt -O /mnt1/data/big.txt" ], task_id="download_txtfile", resources=compute_resources, volumes=[volume], volume_mounts=[volume_mount], get_logs=True, dag=dag) spark_task = SparkKubernetesOperator( task_id="spark-wordcount", namespace="airflow", application_file="spark-wordcount.yaml", kubernetes_conn_id="kubernetes_default", dag=dag, ) spark_sensor = SparkKubernetesSensor(task_id="spark-wordcount-monitor", namespace="airflow", application_name=spark_application_name, attach_log=True, kubernetes_conn_id="kubernetes_default", dag=dag) download_txtfile >> spark_task >> spark_sensor
default_args = { "owner": "Airflow", "start_date": airflow.utils.dates.days_ago(1), "depends_on_past": False, "email_on_failure": False, "email_on_retry": False, "email": "*****@*****.**", "retries": 1, "retry_delay": timedelta(minutes=1) } with DAG(dag_id="ddt-spark-k8s-operator", schedule_interval="@hourly", default_args=default_args, catchup=False) as dag: t1 = SparkKubernetesOperator( task_id='stage_1_submit', namespace="ddt-compute", application_file="SparkApplication_stage_1.yaml", kubernetes_conn_id="kubernetes_default", do_xcom_push=True ) t2 = SparkKubernetesSensor( task_id='stage_1_monitor', namespace="ddt-compute", application_name="{{ task_instance.xcom_pull(task_ids='stage_1_submit')['metadata']['name'] }}", kubernetes_conn_id="kubernetes_default", ) t1 >> t2 #посмотреть логи спарк оператора