示例#1
0
    def test_execute_with_docker_conn_id_use_hook(self, operator_client_mock,
                                                  operator_docker_hook):
        # Mock out a Docker client, so operations don't raise errors
        client_mock = mock.Mock(name='DockerOperator.APIClient mock',
                                spec=APIClient)
        client_mock.images.return_value = []
        client_mock.create_container.return_value = {'Id': 'some_id'}
        client_mock.attach.return_value = []
        client_mock.pull.return_value = []
        client_mock.wait.return_value = {"StatusCode": 0}
        operator_client_mock.return_value = client_mock

        # Create the DockerOperator
        operator = DockerOperator(image='publicregistry/someimage',
                                  owner='unittest',
                                  task_id='unittest',
                                  docker_conn_id='some_conn_id')

        # Mock out the DockerHook
        hook_mock = mock.Mock(name='DockerHook mock', spec=DockerHook)
        hook_mock.get_conn.return_value = client_mock
        operator_docker_hook.return_value = hook_mock

        operator.execute(None)

        self.assertEqual(
            operator_client_mock.call_count, 0,
            'Client was called on the operator instead of the hook')
        self.assertEqual(
            operator_docker_hook.call_count, 1,
            'Hook was not called although docker_conn_id configured')
        self.assertEqual(client_mock.pull.call_count, 1,
                         'Image was not pulled using operator client')
示例#2
0
    def test_execute_no_docker_conn_id_no_hook(self, operator_client_mock):
        # Mock out a Docker client, so operations don't raise errors
        client_mock = mock.Mock(name='DockerOperator.APIClient mock',
                                spec=APIClient)
        client_mock.images.return_value = []
        client_mock.create_container.return_value = {'Id': 'some_id'}
        client_mock.attach.return_value = []
        client_mock.pull.return_value = []
        client_mock.wait.return_value = {"StatusCode": 0}
        operator_client_mock.return_value = client_mock

        # Create the DockerOperator
        operator = DockerOperator(image='publicregistry/someimage',
                                  owner='unittest',
                                  task_id='unittest')

        # Mock out the DockerHook
        hook_mock = mock.Mock(name='DockerHook mock', spec=DockerHook)
        hook_mock.get_conn.return_value = client_mock
        operator.get_hook = mock.Mock(name='DockerOperator.get_hook mock',
                                      spec=DockerOperator.get_hook,
                                      return_value=hook_mock)

        operator.execute(None)
        self.assertEqual(operator.get_hook.call_count, 0,
                         'Hook called though no docker_conn_id configured')
示例#3
0
 def test_execute_container_fails(self):
     self.client_mock.wait.return_value = {"StatusCode": 1}
     operator = DockerOperator(image='ubuntu',
                               owner='unittest',
                               task_id='unittest')
     with self.assertRaises(AirflowException):
         operator.execute(None)
示例#4
0
    def test_execute_tls(self, tls_class_mock):
        tls_mock = mock.Mock()
        tls_class_mock.return_value = tls_mock

        operator = DockerOperator(
            docker_url='tcp://127.0.0.1:2376',
            image='ubuntu',
            owner='unittest',
            task_id='unittest',
            tls_client_cert='cert.pem',
            tls_ca_cert='ca.pem',
            tls_client_key='key.pem',
        )
        operator.execute(None)

        tls_class_mock.assert_called_once_with(
            assert_hostname=None,
            ca_cert='ca.pem',
            client_cert=('cert.pem', 'key.pem'),
            ssl_version=None,
            verify=True,
        )

        self.client_class_mock.assert_called_once_with(
            base_url='https://127.0.0.1:2376', tls=tls_mock, version=None)
示例#5
0
    def test_execute_tls(self, client_class_mock, tls_class_mock):
        client_mock = mock.Mock(spec=APIClient)
        client_mock.create_container.return_value = {'Id': 'some_id'}
        client_mock.create_host_config.return_value = mock.Mock()
        client_mock.images.return_value = []
        client_mock.attach.return_value = []
        client_mock.pull.return_value = []
        client_mock.wait.return_value = {"StatusCode": 0}

        client_class_mock.return_value = client_mock
        tls_mock = mock.Mock()
        tls_class_mock.return_value = tls_mock

        operator = DockerOperator(docker_url='tcp://127.0.0.1:2376',
                                  image='ubuntu',
                                  owner='unittest',
                                  task_id='unittest',
                                  tls_client_cert='cert.pem',
                                  tls_ca_cert='ca.pem',
                                  tls_client_key='key.pem')
        operator.execute(None)

        tls_class_mock.assert_called_once_with(assert_hostname=None,
                                               ca_cert='ca.pem',
                                               client_cert=('cert.pem',
                                                            'key.pem'),
                                               ssl_version=None,
                                               verify=True)

        client_class_mock.assert_called_once_with(
            base_url='https://127.0.0.1:2376', tls=tls_mock, version=None)
示例#6
0
    def test_auto_remove_container_fails(self):
        self.client_mock.wait.return_value = {"StatusCode": 1}
        operator = DockerOperator(image='ubuntu', owner='unittest', task_id='unittest', auto_remove=True)
        operator.container = {'Id': 'some_id'}
        with pytest.raises(AirflowException):
            operator.execute(None)

        self.client_mock.remove_container.assert_called_once_with('some_id')
示例#7
0
 def test_privileged(self):
     privileged = mock.Mock()
     operator = DockerOperator(task_id='test', image='test', privileged=privileged)
     operator.execute(None)
     self.client_mock.create_container.assert_called_once()
     assert 'host_config' in self.client_mock.create_container.call_args[1]
     assert 'privileged' in self.client_mock.create_host_config.call_args[1]
     assert privileged is self.client_mock.create_host_config.call_args[1]['privileged']
示例#8
0
 def test_extra_hosts(self):
     hosts_obj = mock.Mock()
     operator = DockerOperator(task_id='test', image='test', extra_hosts=hosts_obj)
     operator.execute(None)
     self.client_mock.create_container.assert_called_once()
     assert 'host_config' in self.client_mock.create_container.call_args[1]
     assert 'extra_hosts' in self.client_mock.create_host_config.call_args[1]
     assert hosts_obj is self.client_mock.create_host_config.call_args[1]['extra_hosts']
示例#9
0
    def test_execute(self):
        operator = DockerOperator(
            api_version='1.19',
            command='env',
            environment={'UNIT': 'TEST'},
            private_environment={'PRIVATE': 'MESSAGE'},
            image='ubuntu:latest',
            network_mode='bridge',
            owner='unittest',
            task_id='unittest',
            volumes=['/host/path:/container/path'],
            entrypoint='["sh", "-c"]',
            working_dir='/container/path',
            shm_size=1000,
            host_tmp_dir='/host/airflow',
            container_name='test_container',
            tty=True,
        )
        operator.execute(None)

        self.client_class_mock.assert_called_once_with(
            base_url='unix://var/run/docker.sock', tls=None, version='1.19'
        )

        self.client_mock.create_container.assert_called_once_with(
            command='env',
            name='test_container',
            environment={'AIRFLOW_TMP_DIR': '/tmp/airflow', 'UNIT': 'TEST', 'PRIVATE': 'MESSAGE'},
            host_config=self.client_mock.create_host_config.return_value,
            image='ubuntu:latest',
            user=None,
            entrypoint=['sh', '-c'],
            working_dir='/container/path',
            tty=True,
        )
        self.client_mock.create_host_config.assert_called_once_with(
            binds=['/host/path:/container/path', '/mkdtemp:/tmp/airflow'],
            network_mode='bridge',
            shm_size=1000,
            cpu_shares=1024,
            mem_limit=None,
            auto_remove=False,
            dns=None,
            dns_search=None,
            cap_add=None,
            extra_hosts=None,
            privileged=False,
        )
        self.tempdir_mock.assert_called_once_with(dir='/host/airflow', prefix='airflowtmp')
        self.client_mock.images.assert_called_once_with(name='ubuntu:latest')
        self.client_mock.attach.assert_called_once_with(
            container='some_id', stdout=True, stderr=True, stream=True
        )
        self.client_mock.pull.assert_called_once_with('ubuntu:latest', stream=True, decode=True)
        self.client_mock.wait.assert_called_once_with('some_id')
        assert (
            operator.cli.pull('ubuntu:latest', stream=True, decode=True) == self.client_mock.pull.return_value
        )
示例#10
0
    def test_execute(self, client_class_mock, tempdir_mock):
        host_config = mock.Mock()
        tempdir_mock.return_value.__enter__.return_value = '/mkdtemp'

        client_mock = mock.Mock(spec=APIClient)
        client_mock.create_container.return_value = {'Id': 'some_id'}
        client_mock.create_host_config.return_value = host_config
        client_mock.images.return_value = []
        client_mock.attach.return_value = ['container log']
        client_mock.logs.return_value = ['container log']
        client_mock.pull.return_value = [b'{"status":"pull log"}']
        client_mock.wait.return_value = {"StatusCode": 0}

        client_class_mock.return_value = client_mock

        operator = DockerOperator(api_version='1.19', command='env', environment={'UNIT': 'TEST'},
                                  image='ubuntu:latest', network_mode='bridge', owner='unittest',
                                  task_id='unittest', volumes=['/host/path:/container/path'],
                                  working_dir='/container/path', shm_size=1000,
                                  host_tmp_dir='/host/airflow', container_name='test_container',
                                  tty=True)
        operator.execute(None)

        client_class_mock.assert_called_once_with(base_url='unix://var/run/docker.sock', tls=None,
                                                  version='1.19')

        client_mock.create_container.assert_called_once_with(command='env',
                                                             name='test_container',
                                                             environment={
                                                                 'AIRFLOW_TMP_DIR': '/tmp/airflow',
                                                                 'UNIT': 'TEST'
                                                             },
                                                             host_config=host_config,
                                                             image='ubuntu:latest',
                                                             user=None,
                                                             working_dir='/container/path',
                                                             tty=True
                                                             )
        client_mock.create_host_config.assert_called_once_with(binds=['/host/path:/container/path',
                                                                      '/mkdtemp:/tmp/airflow'],
                                                               network_mode='bridge',
                                                               shm_size=1000,
                                                               cpu_shares=1024,
                                                               mem_limit=None,
                                                               auto_remove=False,
                                                               dns=None,
                                                               dns_search=None)
        tempdir_mock.assert_called_once_with(dir='/host/airflow', prefix='airflowtmp')
        client_mock.images.assert_called_once_with(name='ubuntu:latest')
        client_mock.attach.assert_called_once_with(container='some_id', stdout=True,
                                                   stderr=True, stream=True)
        client_mock.pull.assert_called_once_with('ubuntu:latest', stream=True,
                                                 decode=True)
        client_mock.wait.assert_called_once_with('some_id')
示例#11
0
    def test_execute_unicode_logs(self):
        self.client_mock.attach.return_value = ['unicode container log 😁']

        originalRaiseExceptions = logging.raiseExceptions  # pylint: disable=invalid-name
        logging.raiseExceptions = True

        operator = DockerOperator(image='ubuntu', owner='unittest', task_id='unittest')

        with mock.patch('traceback.print_exception') as print_exception_mock:
            operator.execute(None)
            logging.raiseExceptions = originalRaiseExceptions
            print_exception_mock.assert_not_called()
示例#12
0
    def test_execute_no_docker_conn_id_no_hook(self):
        # Create the DockerOperator
        operator = DockerOperator(image='publicregistry/someimage', owner='unittest', task_id='unittest')

        # Mock out the DockerHook
        hook_mock = mock.Mock(name='DockerHook mock', spec=DockerHook)
        hook_mock.get_conn.return_value = self.client_mock
        operator.get_hook = mock.Mock(
            name='DockerOperator.get_hook mock', spec=DockerOperator.get_hook, return_value=hook_mock
        )

        operator.execute(None)
        assert operator.get_hook.call_count == 0, 'Hook called though no docker_conn_id configured'
示例#13
0
    def test_execute_container_fails(self, client_class_mock):
        client_mock = mock.Mock(spec=APIClient)
        client_mock.create_container.return_value = {'Id': 'some_id'}
        client_mock.create_host_config.return_value = mock.Mock()
        client_mock.images.return_value = []
        client_mock.attach.return_value = []
        client_mock.pull.return_value = []
        client_mock.wait.return_value = {"StatusCode": 1}

        client_class_mock.return_value = client_mock

        operator = DockerOperator(image='ubuntu', owner='unittest', task_id='unittest')

        with self.assertRaises(AirflowException):
            operator.execute(None)
示例#14
0
 def test_private_environment_is_private(self):
     operator = DockerOperator(private_environment={'PRIVATE': 'MESSAGE'},
                               image='ubuntu:latest',
                               task_id='unittest')
     assert operator._private_environment == {
         'PRIVATE': 'MESSAGE'
     }, "To keep this private, it must be an underscored attribute."
示例#15
0
def get_docker_operator(label: str, command: str) -> DockerOperator:
    operator = DockerOperator(
        image=f"airflow-{label}",
        command=command,
        network_mode="bridge",
        task_id=f"docker-{label}",
        do_xcom_push=False,
        volumes=VOLUMES,
    )
    return operator
示例#16
0
    def test_execute_with_docker_conn_id_use_hook(self, hook_class_mock):
        # Create the DockerOperator
        operator = DockerOperator(
            image='publicregistry/someimage',
            owner='unittest',
            task_id='unittest',
            docker_conn_id='some_conn_id',
        )

        # Mock out the DockerHook
        hook_mock = mock.Mock(name='DockerHook mock', spec=DockerHook)
        hook_mock.get_conn.return_value = self.client_mock
        hook_class_mock.return_value = hook_mock

        operator.execute(None)

        assert self.client_class_mock.call_count == 0, 'Client was called on the operator instead of the hook'
        assert hook_class_mock.call_count == 1, 'Hook was not called although docker_conn_id configured'
        assert self.client_mock.pull.call_count == 1, 'Image was not pulled using operator client'
示例#17
0
 def test_extra_hosts(self):
     hosts_obj = mock.Mock()
     operator = DockerOperator(task_id='test',
                               image='test',
                               extra_hosts=hosts_obj)
     operator.execute(None)
     self.client_mock.create_container.assert_called_once()
     self.assertIn(
         'host_config',
         self.client_mock.create_container.call_args.kwargs,
     )
     self.assertIn(
         'extra_hosts',
         self.client_mock.create_host_config.call_args.kwargs,
     )
     self.assertIs(
         hosts_obj,
         self.client_mock.create_host_config.call_args.
         kwargs['extra_hosts'],
     )
示例#18
0
    def test_execute_unicode_logs(self, client_class_mock):
        client_mock = mock.Mock(spec=APIClient)
        client_mock.create_container.return_value = {'Id': 'some_id'}
        client_mock.create_host_config.return_value = mock.Mock()
        client_mock.images.return_value = []
        client_mock.attach.return_value = ['unicode container log 😁']
        client_mock.pull.return_value = []
        client_mock.wait.return_value = {"StatusCode": 0}

        client_class_mock.return_value = client_mock

        originalRaiseExceptions = logging.raiseExceptions  # pylint: disable=invalid-name
        logging.raiseExceptions = True

        operator = DockerOperator(image='ubuntu', owner='unittest', task_id='unittest')

        with mock.patch('traceback.print_exception') as print_exception_mock:
            operator.execute(None)
            logging.raiseExceptions = originalRaiseExceptions
            print_exception_mock.assert_not_called()
示例#19
0
    def test_execute_xcom_behavior(self):
        self.client_mock.pull.return_value = [b'{"status":"pull log"}']

        kwargs = {
            'api_version': '1.19',
            'command': 'env',
            'environment': {
                'UNIT': 'TEST'
            },
            'private_environment': {
                'PRIVATE': 'MESSAGE'
            },
            'image': 'ubuntu:latest',
            'network_mode': 'bridge',
            'owner': 'unittest',
            'task_id': 'unittest',
            'volumes': ['/host/path:/container/path'],
            'working_dir': '/container/path',
            'shm_size': 1000,
            'host_tmp_dir': '/host/airflow',
            'container_name': 'test_container',
            'tty': True,
        }

        xcom_push_operator = DockerOperator(**kwargs, do_xcom_push=True)
        no_xcom_push_operator = DockerOperator(**kwargs, do_xcom_push=False)

        xcom_push_result = xcom_push_operator.execute(None)
        no_xcom_push_result = no_xcom_push_operator.execute(None)

        self.assertEqual(xcom_push_result, b'container log')
        self.assertIs(no_xcom_push_result, None)
示例#20
0
    def test_on_kill():
        client_mock = mock.Mock(spec=APIClient)

        operator = DockerOperator(image='ubuntu', owner='unittest', task_id='unittest')
        operator.cli = client_mock
        operator.container = {'Id': 'some_id'}

        operator.on_kill()

        client_mock.stop.assert_called_once_with('some_id')
示例#21
0
    def test_execute_xcom_behavior(self, client_class_mock, tempdir_mock):
        tempdir_mock.return_value.__enter__.return_value = '/mkdtemp'

        client_mock = mock.Mock(spec=APIClient)
        client_mock.images.return_value = []
        client_mock.create_container.return_value = {'Id': 'some_id'}
        client_mock.attach.return_value = ['container log']
        client_mock.pull.return_value = [b'{"status":"pull log"}']
        client_mock.wait.return_value = {"StatusCode": 0}

        client_class_mock.return_value = client_mock

        kwargs = {
            'api_version': '1.19',
            'command': 'env',
            'environment': {
                'UNIT': 'TEST'
            },
            'private_environment': {
                'PRIVATE': 'MESSAGE'
            },
            'image': 'ubuntu:latest',
            'network_mode': 'bridge',
            'owner': 'unittest',
            'task_id': 'unittest',
            'volumes': ['/host/path:/container/path'],
            'working_dir': '/container/path',
            'shm_size': 1000,
            'host_tmp_dir': '/host/airflow',
            'container_name': 'test_container',
            'tty': True,
        }

        xcom_push_operator = DockerOperator(**kwargs, do_xcom_push=True)
        no_xcom_push_operator = DockerOperator(**kwargs, do_xcom_push=False)

        xcom_push_result = xcom_push_operator.execute(None)
        no_xcom_push_result = no_xcom_push_operator.execute(None)

        self.assertEqual(xcom_push_result, b'container log')
        self.assertIs(no_xcom_push_result, None)
t_move = DockerOperator(
    api_version="1.19",
    docker_url="tcp://localhost:2375",  # replace it with swarm/docker endpoint
    image="centos:latest",
    network_mode="bridge",
    mounts=[
        Mount(source="/your/host/input_dir/path",
              target="/your/input_dir/path",
              type="bind"),
        Mount(source="/your/host/output_dir/path",
              target="/your/output_dir/path",
              type="bind"),
    ],
    command=[
        "/bin/bash",
        "-c",
        "/bin/sleep 30; "
        "/bin/mv {{ params.source_location }}/" + str(t_view.output) +
        " {{ params.target_location }};"
        "/bin/echo '{{ params.target_location }}/" + f"{t_view.output}';",
    ],
    task_id="move_data",
    do_xcom_push=True,
    params={
        "source_location": "/your/input_dir/path",
        "target_location": "/your/output_dir/path"
    },
    dag=dag,
)
示例#23
0
    schedule_interval="@weekly",
    start_date=days_ago(30),
) as dag:
    data_sensor = ExternalTaskSensor(
        task_id="data-sensor",
        external_dag_id="download",
        external_task_id="download",
        check_existence=True,
        timeout=30,
    )

    split = DockerOperator(
        image="airflow-split",
        command=f"-l {DATA_RAW_PATH} -s {DATA_SPLIT_PATH}",
        network_mode="bridge",
        task_id="split",
        do_xcom_push=False,
        auto_remove=True,
        volumes=[f"{HOST_DATA_DIR}:/data"],
    )

    fit_transformer = DockerOperator(
        image="airflow-fit-transformer",
        command=f"-l {DATA_SPLIT_PATH} -s {DATA_TRANSFORMED_PATH} -m {MODEL_PATH}",
        network_mode="bridge",
        task_id="fit_transformer",
        do_xcom_push=False,
        auto_remove=True,
        volumes=[f"{HOST_DATA_DIR}:/data"],
    )
示例#24
0
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=5)
}

dag = DAG('docker_sample',
          default_args=default_args,
          schedule_interval=timedelta(minutes=10))

t1 = BashOperator(task_id='print_date', bash_command='date', dag=dag)

t2 = BashOperator(task_id='sleep', bash_command='sleep 5', retries=3, dag=dag)

t3 = DockerOperator(
    api_version='1.19',
    docker_url='tcp://localhost:2375',  # Set your docker URL
    command='/bin/sleep 30',
    image='centos:latest',
    network_mode='bridge',
    task_id='docker_op_tester',
    dag=dag)

t4 = BashOperator(task_id='print_hello',
                  bash_command='echo "hello world!!!"',
                  dag=dag)

t1 >> t2
t1 >> t3
t3 >> t4
示例#25
0
    'email': ['*****@*****.**'],
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=5),
}

dag = DAG(
    'ml-example-docker',
    default_args=default_args,
    description='A simple ml example',
    schedule_interval=timedelta(days=1),
    start_date=days_ago(2),
    tags=['example'],
)

t1 = DockerOperator(
    task_id='preprocessing-step',
    image='warvito/preprocessing-airflow:v1',
    command="python run.py",
    dag=dag,
)

t2 = DockerOperator(
    task_id='classification-step',
    image='warvito/toy-example-classifier-airflow:v1',
    command="python run.py",
    dag=dag,
)

t1 >> t2
示例#26
0
        filepath='/opt/airflow/data/raw/{{ ds }}/data.csv',
        task_id="await-data",
        poke_interval=10,
        retries=100,
    )
    model_await = FileSensor(
        filepath='/opt/airflow/{{ var.value.model_dir }}/model.pkl',
        task_id="await-model",
        poke_interval=10,
        retries=100,
    )
    preprocessing = DockerOperator(
        task_id="preprocessing",
        image="airflow-preprocess",
        command="--input-dir data/raw/{{ ds }} "
        "--output-dir data/processed/for_preds/{{ ds }} "
        "--prediction",
        network_mode="bridge",
        do_xcom_push=False,
        volumes=[DEFAULT_VOLUME])
    prediction = DockerOperator(
        task_id="prediction",
        image="airflow-predict",
        command="--data-dir data/processed/for_preds/{{ ds }} "
        "--output-dir data/predictions/{{ ds }} "
        "--model-dir {{ var.value.model_dir }}",
        network_mode="bridge",
        do_xcom_push=False,
        volumes=[DEFAULT_VOLUME])
    end_task = DummyOperator(task_id='end-prediction')
    "retry_delay": timedelta(minutes=5),
}

# !!! HOST folder(NOT IN CONTAINER) replace with yours !!!
HOST_DATA_DIR = '/home/stacy/Work/made/prod2/creative-crisis/airflow_ml_dags/'

with DAG(
        "predict_target",
        default_args=default_args,
        schedule_interval="@daily",
        start_date=days_ago(5),
) as dag:

    preprocess = DockerOperator(
        image="airflow-preprocess",
        command=
        "--input-dir /data/raw/{{ ds }} --output-dir /data/processed/{{ ds }}",
        task_id="docker-airflow-preprocess",
        do_xcom_push=False,
        volumes=[f"{HOST_DATA_DIR}/data:/data"])

    predict = DockerOperator(image="airflow-predict",
                             command="--input-dir /data/processed/{{ ds }} "
                             "--model-dir /data/model/{{ var.value.model }} "
                             "--output-dir /data/predictions/{{ ds }}",
                             task_id="docker-airflow-predict",
                             do_xcom_push=False,
                             volumes=[f"{HOST_DATA_DIR}/data:/data"])

    preprocess >> predict
    data_sensor = FileSensor(task_id="Wait_for_data",
                             poke_interval=10,
                             retries=100,
                             filepath="data/raw/{{ ds }}/data.csv")

    target_sensor = FileSensor(task_id="Wait_for_target",
                               poke_interval=10,
                               retries=100,
                               filepath="data/raw/{{ ds }}/target.csv")

    preprocess = DockerOperator(
        task_id="Data_preprocess",
        image="airflow-preprocess",
        command=
        "/data/raw/{{ ds }} /data/processed/{{ ds }} /data/model/{{ ds }}",
        network_mode="bridge",
        do_xcom_push=False,
        volumes=[VOLUME],
    )

    split = DockerOperator(
        task_id="Split_data",
        image="airflow-split",
        command="/data/processed/{{ ds }} /data/splitted/{{ ds }}",
        network_mode="bridge",
        do_xcom_push=False,
        volumes=[VOLUME])

    train = DockerOperator(
        task_id="Train_model",
示例#29
0
from datetime import timedelta

from airflow import DAG
from airflow.providers.docker.operators.docker import DockerOperator
from airflow.utils.dates import days_ago
from airflow.models import Variable
DATA_PATH = "/Users/mariapopova/Documents/GitHub/chydlife/airflow_ml_dags/data:/data"

default_args = {
    "owner": "airflow",
    "email": ["*****@*****.**"],
    "retries": 1,
    "retry_delay": timedelta(minutes=5),
}

with DAG(
        "pred-data",
        default_args=default_args,
        schedule_interval="@daily",
        start_date=days_ago(5),
) as dag:

    predict = DockerOperator(
        image="airflow-predict",
        command=f"--input-dir /data/processed/{{ ds }} --output-dir /data/predictions/{{ ds }}  --model_path {Variable.get('MODEL_PATH')}",
        task_id="docker-airflow-predict",
        do_xcom_push=False,
        volumes=[DATA_PATH]
    )

    predict
示例#30
0
    # 'execution_timeout': timedelta(seconds=300),
    # 'on_failure_callback': some_function,
    # 'on_success_callback': some_other_function,
    # 'on_retry_callback': another_function,
    # 'sla_miss_callback': yet_another_function,
    # 'trigger_rule': 'all_success'
}

dag = DAG(
    'stock_update_with_docker_operator',
    default_args=default_args,
    description='update latest stock price daily.',
    schedule_interval='0 0 * * *',
    start_date=days_ago(2),
    tags=['stock'],
)

task1 = DockerOperator(command='price',
                       task_id='update_stock_price_by_crawler',
                       image="stock_update",
                       dag=dag,
                       do_xcom_push=False)

task2 = DockerOperator(command='stats',
                       task_id='update_revenue_stats',
                       image="stock_update",
                       dag=dag,
                       do_xcom_push=False)

task1 >> task2