Python BashOperator示例，airflow.operators.bash_operator.BashOperator Python示例

示例#1

0

显示文件

文件： test_bash_operator.py 项目： alrolorojas/airflow

    def test_return_value(self):
        bash_operator = BashOperator(
            bash_command='echo "stdout"',
            task_id='test_return_value',
            dag=None
        )
        return_value = bash_operator.execute(context={})

        self.assertEqual(return_value, u'stdout')

示例#2

0

显示文件

文件： test_bash_operator.py 项目： MiguelPeralvo/incubator-airflow

    def test_echo_env_variables(self):
        """
        Test that env variables are exported correctly to the
        task bash environment.
        """
        now = datetime.utcnow()
        now = now.replace(tzinfo=timezone.utc)

        self.dag = DAG(
            dag_id='bash_op_test', default_args={
                'owner': 'airflow',
                'retries': 100,
                'start_date': DEFAULT_DATE
            },
            schedule_interval='@daily',
            dagrun_timeout=timedelta(minutes=60))

        self.dag.create_dagrun(
            run_id='manual__' + DEFAULT_DATE.isoformat(),
            execution_date=DEFAULT_DATE,
            start_date=now,
            state=State.RUNNING,
            external_trigger=False,
        )

        import tempfile
        with tempfile.NamedTemporaryFile() as f:
            fname = f.name
            t = BashOperator(
                task_id='echo_env_vars',
                dag=self.dag,
                bash_command='echo $AIRFLOW_HOME>> {0};'
                             'echo $PYTHONPATH>> {0};'
                             'echo $AIRFLOW_CTX_DAG_ID >> {0};'
                             'echo $AIRFLOW_CTX_TASK_ID>> {0};'
                             'echo $AIRFLOW_CTX_EXECUTION_DATE>> {0};'
                             'echo $AIRFLOW_CTX_DAG_RUN_ID>> {0};'.format(fname)
            )

            original_AIRFLOW_HOME = os.environ['AIRFLOW_HOME']

            os.environ['AIRFLOW_HOME'] = 'MY_PATH_TO_AIRFLOW_HOME'
            t.run(DEFAULT_DATE, DEFAULT_DATE,
                  ignore_first_depends_on_past=True, ignore_ti_state=True)

            with open(fname, 'r') as fr:
                output = ''.join(fr.readlines())
                self.assertIn('MY_PATH_TO_AIRFLOW_HOME', output)
                # exported in run_unit_tests.sh as part of PYTHONPATH
                self.assertIn('tests/test_utils', output)
                self.assertIn('bash_op_test', output)
                self.assertIn('echo_env_vars', output)
                self.assertIn(DEFAULT_DATE.isoformat(), output)
                self.assertIn('manual__' + DEFAULT_DATE.isoformat(), output)

            os.environ['AIRFLOW_HOME'] = original_AIRFLOW_HOME

示例#3

0

显示文件

文件： test_bash_operator.py 项目： jgao54/airflow

    def test_return_value_to_xcom(self):
        bash_operator = BashOperator(
            bash_command='echo "stdout"',
            xcom_push=True,
            task_id='test_return_value_to_xcom',
            dag=None
        )
        xcom_return_value = bash_operator.execute(context={})

        self.assertEqual(xcom_return_value, u'stdout')

示例#4

0

显示文件

文件： sde_extract_tasks.py 项目： MrMaksimize/docker-airflow

def create_sde_tasks(dag,
                     folder,
                     layer,
                     datasd_name,
                     md,
                     path_to_file,
                     sde_to_shp):
    """Dynamically create SDE Airflow tasks.

    dag: DAG defined in _dags file.
    folder: subfolder in the sde folder on S3.
    layer: layer name.
    datasd_name: layer name + _datasd.
    md: name of md file on Seaboard.
    path_to_file: poseidon path + datasd_name.
    sde_to_shp: _jobs specific sde_to_shp function
    """
    #: Latest Only Operator for sde layer
    sde_latest_only = LatestOnlyOperator(task_id='{layer}_latest_only'
                                         .format(layer=layer),
                                         dag=dag)

    #: Convert sde table to shapefile format
    to_shp = PythonOperator(
        task_id='{layer}_to_shp'.format(layer=layer),
        python_callable=sde_to_shp,
        on_failure_callback=notify,
        on_retry_callback=notify,
        on_success_callback=notify,
        dag=dag)

    #: Convert shapefile to GeoJSON format
    to_geojson = BashOperator(
        task_id='{layer}_to_geojson'.format(layer=layer),
        bash_command=shp_to_geojson(path_to_file),
        on_failure_callback=notify,
        on_retry_callback=notify,
        on_success_callback=notify,
        dag=dag)

    #: Convert shapefile to TopoJSON format
    to_topojson = BashOperator(
        task_id='{layer}_to_topojson'.format(layer=layer),
        bash_command=shp_to_topojson(path_to_file),
        on_failure_callback=notify,
        on_retry_callback=notify,
        on_success_callback=notify,
        dag=dag)

    #: Compress shapefile components
    to_zip = PythonOperator(
        task_id='{layer}_shp_to_zip'.format(layer=layer),
        python_callable=shp_to_zip,
        op_kwargs={'datasd_name': datasd_name},
        on_failure_callback=notify,
        on_retry_callback=notify,
        on_success_callback=notify,
        dag=dag)

    #: Upload shapefile to S3
    shp_to_S3 = S3FileTransferOperator(
        task_id='{layer}_shp_to_S3'.format(layer=layer),
        source_base_path=conf['prod_data_dir'],
        source_key='{datasd_name}.zip'.format(datasd_name=datasd_name),
        dest_s3_conn_id=conf['default_s3_conn_id'],
        dest_s3_bucket=conf['dest_s3_bucket'],
        dest_s3_key='sde/{folder}/{datasd_name}.zip'
                    .format(folder=folder, datasd_name=datasd_name),
        on_failure_callback=notify,
        on_retry_callback=notify,
        on_success_callback=notify,
        replace=True,
        dag=dag)

    #: Upload geojson to S3
    geojson_to_S3 = S3FileTransferOperator(
        task_id='{layer}_geojson_to_S3'.format(layer=layer),
        source_base_path=conf['prod_data_dir'],
        source_key='{datasd_name}.geojson'.format(datasd_name=datasd_name),
        dest_s3_conn_id=conf['default_s3_conn_id'],
        dest_s3_bucket=conf['dest_s3_bucket'],
        dest_s3_key='sde/{folder}/{datasd_name}.geojson'
                    .format(folder=folder, datasd_name=datasd_name),
        on_failure_callback=notify,
        on_retry_callback=notify,
        on_success_callback=notify,
        replace=True,
        dag=dag)

    #: Upload topojson to S3
    topojson_to_S3 = S3FileTransferOperator(
        task_id='{layer}_topojson_to_S3'.format(layer=layer),
        source_base_path=conf['prod_data_dir'],
        source_key='{datasd_name}.topojson'.format(datasd_name=datasd_name),
        dest_s3_conn_id=conf['default_s3_conn_id'],
        dest_s3_bucket=conf['dest_s3_bucket'],
        dest_s3_key='sde/{folder}/{datasd_name}.topojson'
                    .format(folder=folder, datasd_name=datasd_name),
        on_failure_callback=notify,
        on_retry_callback=notify,
        on_success_callback=notify,
        replace=True,
        dag=dag)

    #: Update portal modified date
    update_md = get_seaboard_update_dag('{md}.md'.format(md=md), dag)

    if layer not in no_pbf:
        #: Convert GeoJSON to Geobuf format
        to_geobuf = PythonOperator(
            task_id='{layer}_to_geobuf'.format(layer=layer),
            python_callable=geojson_to_geobuf,
            op_kwargs={'path_to_file': path_to_file},
            on_failure_callback=notify,
            on_retry_callback=notify,
            on_success_callback=notify,
            dag=dag)

        #: Convert geobuf to gzipped geobuf
        to_gzip = PythonOperator(
            task_id='{layer}_geobuf_to_gzip'.format(layer=layer),
            python_callable=geobuf_to_gzip,
            op_kwargs={'datasd_name': datasd_name},
            on_failure_callback=notify,
            on_retry_callback=notify,
            on_success_callback=notify,
            dag=dag)

        #: Upload geobuf to S3
        geobuf_to_S3 = S3FileTransferOperator(
            task_id='{layer}_geobuf_to_S3'.format(layer=layer),
            source_base_path=conf['prod_data_dir'],
            source_key='{datasd_name}.pbf'.format(datasd_name=datasd_name),
            dest_s3_conn_id=conf['default_s3_conn_id'],
            dest_s3_bucket=conf['dest_s3_bucket'],
            dest_s3_key='sde/{folder}/{datasd_name}.pbf'
                        .format(folder=folder, datasd_name=datasd_name),
            on_failure_callback=notify,
            on_retry_callback=notify,
            on_success_callback=notify,
            replace=True,
            use_gzip=True,
            dag=dag)

        #: Conversion to geobuf is triggered after conversion to geojson.
        to_geobuf.set_upstream(to_geojson)

        #: Compression to gzip is triggered after conversion to geobuf.
        to_gzip.set_upstream(to_geobuf)

        #: geobuf upload to S3 is triggered after compression to gzipped geobuf.
        geobuf_to_S3.set_upstream(to_gzip)

        #: Github update depends on shapefile S3 upload success.
        update_md.set_upstream(geobuf_to_S3)

    #: Execution rules:
    #: sde_latest_only must run before shp conversion.
    to_shp.set_upstream(sde_latest_only)

    #: Conversion to geojson is triggered after conversion to shp.
    to_geojson.set_upstream(to_shp)

    #: Conversion to topojson is triggered after conversion to shapefile.
    to_topojson.set_upstream(to_shp)

    #: Compression to zip is triggered after conversion to geojson and topojson.
    to_zip.set_upstream(to_geojson)
    to_zip.set_upstream(to_topojson)

    #: shapefile upload to S3 is triggered after conversion to zip.
    shp_to_S3.set_upstream(to_zip)

    #: geojson upload to S3 is triggered after conversion to geojson.
    geojson_to_S3.set_upstream(to_geojson)

    #: topojson upload to S3 is triggered after conversion to topojson.
    topojson_to_S3.set_upstream(to_topojson)

    #: Github update depends on shapefile S3 upload success.
    update_md.set_upstream(shp_to_S3)
    update_md.set_upstream(geojson_to_S3)
    update_md.set_upstream(topojson_to_S3)

示例#5

0

显示文件

文件： pd_cfs_dags.py 项目： MrMaksimize/docker-airflow

start_date = general.start_date['pd_cfs']

dag = DAG(
    dag_id='pd_cfs', default_args=args, start_date=start_date, schedule_interval=schedule['pd_cfs'])


#: Latest Only Operator for pd_cfs
pd_cfs_latest_only = LatestOnlyOperator(
    task_id='pd_cfs_latest_only', dag=dag)


#: Get CFS data from FTP and save to temp folder
get_cfs_data = BashOperator(
    task_id='get_cfs_data',
    bash_command=get_cfs_data(),
    on_failure_callback=notify,
    on_retry_callback=notify,
    on_success_callback=notify,
    dag=dag)

#: Process CFS data and save result to prod folder
process_cfs_data = PythonOperator(
    task_id='process_cfs_data',
    python_callable=process_cfs_data,
    on_failure_callback=notify,
    on_retry_callback=notify,
    on_success_callback=notify,
    dag=dag)

#: Upload prod file to S3
cfs_to_S3 = S3FileTransferOperator(

示例#6

0

显示文件

文件： istio_common_dag.py 项目： veggiemonk/istio

def MakeCommonDag(name='istio_daily_flow_test',
                  schedule_interval='15 9 * * *',
                  monthly=False):
  """Creates the shared part of the daily/monthly dags."""
  common_dag = DAG(
      name,
      catchup=False,
      default_args=default_args,
      schedule_interval=schedule_interval,
  )

  def AirflowGetVariableOrBaseCase(var, base):
    try:
      return Variable.get(var)
    except KeyError:
      return base

  def GenerateTestArgs(**kwargs):
    """Loads the configuration that will be used for this Iteration."""
    conf = kwargs['dag_run'].conf
    if conf is None:
      conf = dict()

    """ Airflow gives the execution date when the job is supposed to be run,
        however we dont backfill and only need to run one build therefore use
        the current date instead of the date that is passed in """
#    date = kwargs['execution_date']
    date = datetime.datetime.now()

    timestamp = time.mktime(date.timetuple())

    # Monthly releases started in Nov 2017 with 0.3.0, so minor is # of months
    # from Aug 2017.
    minor_version = (date.year - 2017) * 12 + (date.month - 1) - 7
    major_version = AirflowGetVariableOrBaseCase('major_version', 0)
    # This code gets information about the latest released version so we know
    # What version number to use for this round.
    r_minor = int(AirflowGetVariableOrBaseCase('released_version_minor', 0))
    r_patch = int(AirflowGetVariableOrBaseCase('released_version_patch', 0))
    # If  we have already released a monthy for this mounth then bump
    # The patch number for the remander of the month.
    if r_minor == minor_version:
      patch = r_patch + 1
    else:
      patch = 0
    # If version is overriden then we should use it otherwise we use it's
    # default or monthly value.
    version = conf.get('VERSION')
    if monthly and not version:
      version = '{}.{}.{}'.format(major_version, minor_version, patch)

    default_conf = environment_config.get_airflow_config(
        version,
        timestamp,
        major=major_version,
        minor=minor_version,
        patch=patch,
        date=date.strftime('%Y%m%d'),
        rc=date.strftime('%H-%M'))
    config_settings = dict(VERSION=default_conf['VERSION'])
    config_settings_name = [
        'PROJECT_ID',
        'MFEST_URL',
        'MFEST_FILE',
        'GCS_STAGING_BUCKET',
        'SVC_ACCT',
        'GITHUB_ORG',
        'GITHUB_REPO',
        'GCS_GITHUB_PATH',
        'TOKEN_FILE',
        'GCR_STAGING_DEST',
        'GCR_RELEASE_DEST',
        'GCS_MONTHLY_RELEASE_PATH',
        'DOCKER_HUB',
        'GCS_BUILD_BUCKET',
        'RELEASE_PROJECT_ID',
    ]

    for name in config_settings_name:
      config_settings[name] = conf.get(name) or default_conf[name]

    if monthly:
      config_settings['MFEST_COMMIT'] = conf.get(
          'MFEST_COMMIT') or Variable.get('latest_sha')
      gcs_path = conf.get('GCS_MONTHLY_STAGE_PATH')
      if not gcs_path:
        gcs_path = default_conf['GCS_MONTHLY_STAGE_PATH']
    else:
      config_settings['MFEST_COMMIT'] = conf.get(
          'MFEST_COMMIT') or default_conf['MFEST_COMMIT']
      gcs_path = conf.get('GCS_DAILY_PATH') or default_conf['GCS_DAILY_PATH']

    config_settings['GCS_STAGING_PATH'] = gcs_path
    config_settings['GCS_BUILD_PATH'] = '{}/{}'.format(
        config_settings['GCS_BUILD_BUCKET'], gcs_path)
    config_settings['GCS_RELEASE_TOOLS_PATH'] = '{}/release-tools/{}'.format(
        config_settings['GCS_BUILD_BUCKET'], gcs_path)
    config_settings['GCS_FULL_STAGING_PATH'] = '{}/{}'.format(
        config_settings['GCS_STAGING_BUCKET'], gcs_path)
    config_settings['ISTIO_REPO'] = 'https://github.com/{}/{}.git'.format(
        config_settings['GITHUB_ORG'], config_settings['GITHUB_REPO'])

    return config_settings

  generate_flow_args = PythonOperator(
      task_id='generate_workflow_args',
      python_callable=GenerateTestArgs,
      provide_context=True,
      dag=common_dag,
  )

  get_git_commit_cmd = """
    {% set settings = task_instance.xcom_pull(task_ids='generate_workflow_args') %}
    git config --global user.name "TestRunnerBot"
    git config --global user.email "*****@*****.**"
    git clone {{ settings.MFEST_URL }} green-builds || exit 2
    pushd green-builds
    git checkout {{ settings.MFEST_COMMIT }} || exit 5
    SHA=`grep {{ settings.GITHUB_ORG }}/{{ settings.GITHUB_REPO }} {{ settings.MFEST_FILE }} | cut -f 6 -d \\"` || exit 3
    if [ -z ${SHA} ]; then
      echo "SHA not found"
      exit 6
    fi
    popd
    git clone {{ settings.ISTIO_REPO }} istio-code
    pushd istio-code/release
    git checkout ${SHA} || exit 4
    gsutil cp *.sh gs://{{ settings.GCS_RELEASE_TOOLS_PATH }}/data/release/
    gsutil cp *.json gs://{{ settings.GCS_RELEASE_TOOLS_PATH }}/data/release/
    popd
    pushd green-builds
    git rev-parse HEAD
    """

  get_git_commit = BashOperator(
      task_id='get_git_commit',
      bash_command=get_git_commit_cmd,
      xcom_push=True,
      dag=common_dag)

  build_template = """
    {% set settings = task_instance.xcom_pull(task_ids='generate_workflow_args') %}
    {% set m_commit = task_instance.xcom_pull(task_ids='get_git_commit') %}
    gsutil cp gs://{{ settings.GCS_RELEASE_TOOLS_PATH }}/data/release/*.json .
    gsutil cp gs://{{ settings.GCS_RELEASE_TOOLS_PATH }}/data/release/*.sh .
    chmod u+x *
    ./start_gcb_build.sh -w -p {{ settings.PROJECT_ID \
    }} -r {{ settings.GCR_STAGING_DEST }} -s {{ settings.GCS_BUILD_PATH }} \
    -v "{{ settings.VERSION }}" \
    -u "{{ settings.MFEST_URL }}" \
    -t "{{ m_commit }}" -m "{{ settings.MFEST_FILE }}" \
    -a {{ settings.SVC_ACCT }}
    """
  # NOTE: if you add commands to build_template after start_gcb_build.sh then take care to preserve its return value

  build = BashOperator(
      task_id='run_cloud_builder', bash_command=build_template, dag=common_dag)

  test_command = """
    cp /home/airflow/gcs/data/githubctl ./githubctl
    chmod u+x ./githubctl
    {% set settings = task_instance.xcom_pull(task_ids='generate_workflow_args') %}
    git config --global user.name "TestRunnerBot"
    git config --global user.email "*****@*****.**"
    ls -l    ./githubctl
    ./githubctl \
    --token_file="{{ settings.TOKEN_FILE }}" \
    --op=dailyRelQual \
    --hub=gcr.io/{{ settings.GCR_STAGING_DEST }} \
    --gcs_path="{{ settings.GCS_BUILD_PATH }}" \
    --tag="{{ settings.VERSION }}"
    """

  run_release_qualification_tests = BashOperator(
      task_id='run_release_qualification_tests',
      bash_command=test_command,
      retries=0,
      dag=common_dag)
  copy_files = GoogleCloudStorageCopyOperator(
      task_id='copy_files_for_release',
      source_bucket=GetSettingTemplate('GCS_BUILD_BUCKET'),
      source_object=GetSettingTemplate('GCS_STAGING_PATH'),
      destination_bucket=GetSettingTemplate('GCS_STAGING_BUCKET'),
      dag=common_dag,
  )
  generate_flow_args >> get_git_commit >> build
  run_release_qualification_tests.set_upstream(build)
  run_release_qualification_tests >> copy_files
  return common_dag, copy_files

示例#7

0

显示文件

文件： foo.py 项目： inazo1115/snippet

# Access to http://localhost:8080/admin/
```

# Commands
```
$ airflow list_dags
$ airflow list_tasks -t __job_name__
$ airflow run __job_name__ print_date_0 2015-08-01
```
"""

from airflow import DAG
from airflow.operators.bash_operator import BashOperator
from datetime import datetime, timedelta

default_args = {
    'owner': 'inazo',
    'start_date': datetime(2016, 1, 1)
}

dag = DAG('__job_name__', default_args=default_args)

# tasks
t1 = BashOperator(task_id='print_date_0', bash_command='date',    dag=dag)
t2 = BashOperator(task_id='sleep',        bash_command='sleep 5', dag=dag)
t3 = BashOperator(task_id='print_date_1', bash_command='date',    dag=dag)

# schedule
t2.set_upstream(t1)
t3.set_upstream(t2)

示例#8

0

显示文件

文件： parking_meters_dags.py 项目： MrMaksimize/docker-airflow

dag = DAG(
    dag_id='parking_meters',
    default_args=args,
    start_date=start_date,
    schedule_interval=schedule)

#: Latest Only Operator for parking meters
parking_meters_latest_only = LatestOnlyOperator(
    task_id='parking_meters_latest_only', dag=dag)


#: Downloads all parking files from FTP
get_parking_files = BashOperator(
    task_id='get_parking_files',
    bash_command=ftp_download_wget(),
    on_failure_callback=notify,
    on_retry_callback=notify,
    on_success_callback=notify,
    dag=dag)

#: Joins downloaded files from ftp to production
build_prod_file = PythonOperator(
    task_id='build_prod_file',
    python_callable=build_prod_file,
    provide_context=True,
    on_failure_callback=notify,
    on_retry_callback=notify,
    on_success_callback=notify,
    dag=dag)

示例#9

0

显示文件

文件： example_vision.py 项目： zw39125432/airflow

    # ############################## #
    # ### Annotate image example ### #
    # ############################## #

    # [START howto_operator_vision_annotate_image]
    annotate_image = CloudVisionAnnotateImageOperator(
        request=annotate_image_request,
        retry=Retry(maximum=10.0),
        timeout=5,
        task_id='annotate_image')
    # [END howto_operator_vision_annotate_image]

    # [START howto_operator_vision_annotate_image_result]
    annotate_image_result = BashOperator(
        bash_command="echo {{ task_instance.xcom_pull('annotate_image')"
        "['logoAnnotations'][0]['description'] }}",
        task_id='annotate_image_result',
    )
    # [END howto_operator_vision_annotate_image_result]

    # [START howto_operator_vision_detect_text]
    detect_text = CloudVisionDetectTextOperator(
        image=DETECT_IMAGE,
        retry=Retry(maximum=10.0),
        timeout=5,
        task_id="detect_text",
        language_hints="en",
        web_detection_params={'include_geo_results': True},
    )
    # [END howto_operator_vision_detect_text]

示例#10

0

显示文件

文件： restic_check.py 项目： x0rzkov/restic-backup-on-airflow-on-wsl

    'owner': 'restic',
    'depends_on_past': False,
    #'start_date': airflow.utils.dates.days_ago(2),
    'start_date': datetime(2020, 2, 29),
    'retries': 0,
}

dag = DAG(
    dag_id='restic_check',
    schedule_interval=None,
    dagrun_timeout=timedelta(minutes=60),
    catchup=False,
    default_args=args,
)

run_this_last = DummyOperator(
    task_id='run_END',
    dag=dag,
)

run_restic_check = BashOperator(
    task_id='run_restic_check',
    bash_command=msys2_bash_invocation("restic-do-check"),
    dag=dag,
)

run_restic_check >> run_this_last

if __name__ == "__main__":
    dag.cli()

示例#11

0

显示文件

    "depends_on_past": False,
    "start_date": datetime.now() - timedelta(days=7),
    "email": ["*****@*****.**"],
    "email_on_failure": False,
    "email_on_retry": False,
    "retries": 1,
    "retry_delay": timedelta(minutes=5),
    # 'queue': 'bash_queue',
    # 'pool': 'backfill',
    # 'priority_weight': 10,
    # 'end_date': datetime(2016, 1, 1),
}

with DAG("scheduled_bash_dag", default_args=default_args) as dag:
    # t1, t2 and t3 are examples of tasks created by instantiating operators
    t1 = BashOperator(task_id="print_date", bash_command="date", dag=dag)
    t2 = BashOperator(task_id="sleep",
                      bash_command="sleep 1",
                      retries=3,
                      dag=dag)

    templated_command = """
        {% for i in range(5) %}
            echo "{{ ds }}"
            echo "{{ macros.ds_add(ds, 7)}}"
            echo "{{ params.my_param }}"
        {% endfor %}
    """
    t3 = BashOperator(
        task_id="templated",
        bash_command=templated_command,

示例#12

0

显示文件

文件： unload_erp.py 项目： eweb-42/5DATA-final

unload_subjects = PythonOperator(
    dag=dag,
    task_id="unload_subjects",
    python_callable=query_to_local,
    op_kwargs={
        'sqlFilePath': '/usr/local/airflow/dags/scripts/unload_subjects.sql',
        'dest': '/tmp/subjects.csv'
    })

csv_to_s3_stage = PythonOperator(dag=dag,
                                 task_id='CSVs_to_S3_stage',
                                 python_callable=csv_to_s3_stage)

clean_tmp_dir = BashOperator(dag=dag,
                             task_id='clean_tmp_dir',
                             bash_command='rm /tmp/*.csv')

with open('/usr/local/airflow/dags/EMR/steps.json') as steps_file:
    emr_steps = json.load(steps_file)

students_average = EmrAddStepsOperator(
    dag=dag,
    task_id='students_average',
    job_flow_id=Variable.get('emr_cluster_id'),
    aws_conn_id='aws_default',
    steps=emr_steps,
    params={'bucket_name': Variable.get('bucket_name')})

step_checker = EmrStepSensor(
    dag=dag,

示例#13

0

显示文件

    "on_failure_callback": slack.task_fail_slack_alert,
    "retries": 0,
}

bucket_base_uri = f"gs://{bucket_name}/"
bucket_image_storage_url = f"{GCP_STORAGE_BASE}{bucket_name}/images/"


dag = DAG(
    "2-export_images_to_gcs_dataset", default_args=default_args, catchup=False, schedule_interval=None
)


create_data_bucket_cmd = f"gsutil ls -b {bucket_base_uri} || gsutil mb {bucket_base_uri}"
create_data_bucket = BashOperator(
    task_id="create_data_bucket", bash_command=create_data_bucket_cmd, provide_context=True, dag=dag
)

set_data_bucket_acl_cmd = f"gsutil defacl ch -u AllUsers:R {bucket_base_uri}"
set_data_bucket_acl = BashOperator(
    task_id="set_data_bucket_acl",
    bash_command=set_data_bucket_acl_cmd,
    provide_context=True,
    trigger_rule="all_success",
    dag=dag,
)

export_images_to_gcs_dataset_cmd = f"gsutil -m cp -r {AIRFLOW_IMAGE_FOLDER} {bucket_base_uri}"
export_images_to_gcs_dataset = BashOperator(
    task_id="export_images_to_gcs",
    bash_command=export_images_to_gcs_dataset_cmd,

示例#14

0

显示文件

    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=5),
}

with DAG('gcp_example', default_args=default_args) as dag:
    create_bq_dataset_if_not_exist = """
    bq ls {0}
    if [ $? -ne 0 ]; then
      bq mk {0}
    fi
  """.format(BQ_DATASET_NAME)

    # Create destination dataset.
    t1 = BashOperator(task_id='create_destination_dataset',
                      bash_command=create_bq_dataset_if_not_exist,
                      dag=dag)

    # Create a bigquery table from a .csv file located in a GCS bucket
    # (gs://example-datasets/game_data_condensed.csv).
    # Store it in our dataset.
    t2 = GoogleCloudStorageToBigQueryOperator(
        task_id='gcs_to_bq',
        bucket='example-datasets',
        source_objects=['game_data_condensed.csv'],
        destination_project_dataset_table='{0}.gcp_example_table'.format(
            BQ_DATASET_NAME),
        schema_fields=[
            {
                'name': 'name',
                'type': 'string',

示例#15

0

显示文件

文件： test_simple.py 项目： rampage644/airflow-dags

          schedule_interval='@once',
          default_args=default_args)

t1 = BashOperator(
    task_id='print_date',
    bash_command='date',
    dag=dag)

t2 = PythonOperator(
    task_id='run_job',
    python_callable=run_job,
    op_args=('75588', 300, '8edd9e11f4de44b39f666777ac79bfe1'),
    retries=1,
    dag=dag)

templated_command = """
    {% for i in range(5) %}
        echo "{{ ds }}"
        echo "{{ macros.ds_add(ds, 7)}}"
        echo "{{ params.my_param }}"
    {% endfor %}
"""

t3 = BashOperator(
    task_id='templated',
    bash_command=templated_command,
    params={'my_param': 'Parameter I passed in'},
    dag=dag)

t2.set_upstream(t1)
t3.set_upstream(t1)

示例#16

0

显示文件

    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=1),
    # 'queue': 'bash_queue',
    # 'pool': 'backfill',
    # 'priority_weight': 10,
    # 'end_date': datetime(2016, 1, 1),
}

dag = DAG('design_stock',
          default_args=default_args,
          schedule_interval="0 0 1 1 *")
a1 = "aa.sh "
# t1, t2 and t3 are examples of tasks created by instantiating operators

t_analysis = BashOperator(task_id='analysis', bash_command=a1, dag=dag)

t_scrap_data = BashOperator(task_id='scrap_data', bash_command=a1, dag=dag)

t_run_main_PROJECTNAME = BashOperator(task_id='run_main_PROJECTNAME',
                                      bash_command=a1,
                                      dag=dag)
t_clean_data = BashOperator(task_id='clean_data', bash_command=a1, dag=dag)
t_download_data = BashOperator(task_id='download_data',
                               bash_command=a1,
                               dag=dag)
t_to_hive = BashOperator(task_id='to_hive', bash_command=a1, dag=dag)
feature_analysis = SubDagOperator(
    task_id='feature_analysis',
    subdag=subdag(DAG_NAME, 'feature_analysis', default_args),
    dag=dag,

示例#17

0

显示文件

文件： tsw_integration_dags.py 项目： MrMaksimize/docker-airflow

start_date = general.start_date['tsw_integration']

#: Dag spec
dag = DAG(dag_id='tsw_integration', default_args=args, start_date=start_date, schedule_interval=schedule)

violations_latest_only = LatestOnlyOperator(task_id='violations_latest_only', dag=dag)


# VPM Extraction Support Tasks


#: Download VPM dump from FTP
get_vpm_violations = BashOperator(
    task_id='get_vpm_violations',
    bash_command=get_vpm_violations_wget(),
    on_failure_callback=notify,
    on_retry_callback=notify,
    on_success_callback=notify,
    dag=dag)


#: Download VPM dump from FTP
#get_vpm_dump = BashOperator(
#    task_id='get_vpm_dump',
#    bash_command=ftp_download_wget(),
#    on_failure_callback=notify,
#    on_retry_callback=notify,
#    on_success_callback=notify,
#    dag=dag)
#
#

示例#18

0

显示文件

文件： anpr.py 项目： Amsterdam/dataservices-airflow

    dag_id,
    default_args=args,
    description="aantal geidentificeerde taxikentekenplaten per dag",
) as dag:

    # 1. starting message on Slack
    slack_at_start = MessageOperator(
        task_id="slack_at_start",
        http_conn_id="slack",
        webhook_token=slack_webhook_token,
        message=f"Starting {dag_id} ({DATAPUNT_ENVIRONMENT})",
        username="******",
    )

    # 2. make temp dir
    mk_tmp_dir = BashOperator(task_id="mk_tmp_dir", bash_command=f"mkdir -p {TMP_PATH}")

    # 3. download the data into temp directory
    download_data = HttpFetchOperator(
        task_id="download",
        endpoint=endpoint,
        http_conn_id=http_conn_id,
        tmp_file=f"{TMP_PATH}/taxi_passages.csv",
        output_type="text",
    )

    create_temp_table = PostgresOperator(
        task_id="create_temp_tables",
        sql=SQL_CREATE_TEMP_TABLE,
        params=dict(base_table=table_id),
    )

示例#19

0

显示文件

文件： bakeoff.py 项目： pombredanne/superpipe

                                                      dag=BAKE_OFF_PIPE,
                                                      task_id="{region}_split".format(**locals())
                                                      )


freebayes_command = """freebayes -f {{ reference }} --vcf {{ outfile }} --targets {{ region }} {{ opts }} {{ in_bam }}"""
freebayes_operators = {}
for toople in chromosome_split_operators.iteritems():
    region, operator = toople
    outfile = "{WORK_DIR}/{region}.vcf"
    freebayes_by_region = BashOperator(bash_command=freebayes_command,
                                       params={
                                           'reference': "/path/to/human.fasta",
                                           'outfile': outfile,
                                           'region': region,
                                           'opts': default_args['freebayes'],
                                           'in_bam': "{WORK_DIR}/{region}.bam".format(**locals())
                                       },
                                       dag=BAKE_OFF_PIPE,
                                       task_id="{region}_freebayes".format(**locals())
                                       )
    freebayes_operators[region] = freebayes_by_region
    freebayes_by_region.set_upstream(operator)


# now merge
vcf_concat_command = """vcf-concat-parts {{ in_files }} | vcf-sort > {{ outfile }}"""
infiles = []
for toople in freebayes_operators.iteritems():
    region, operator = toople
    infiles.append("{WORK_DIR}/{region}.vcf".format(**locals()))

示例#20

0

显示文件

import requests
from airflow import DAG
from airflow.operators.bash_operator import BashOperator
from airflow.operators.python_operator import PythonOperator

# Initialising the DAG object

dag = DAG(
    dag_id="download_rocket_launches",
    start_date=airflow.utils.dates.days_ago(14),
    schedule_interval=None,
)

download_launches = BashOperator(
    task_id="download_launches",
    bash_commands=
    "curl -o /tmp/launches.json 'https://launchlibrary.net/1.4/launch?next=5&mode=verbose'",
    dag=dag,
)


def _get_pictures():
    pathlib.Path("/tmp/images").mkdir(parents=True, exist_ok=True)

    with open("/tmp/launches.json") as f:
        launches = json.load(f)
        image_urls = [
            launch["rocket"]["imageURL"] for launch in launches["launches"]
        ]
        for image_url in image_urls:
            response = requests.get(image_url)
            image_filename = image_url.split("/")[-1]

示例#21

0

显示文件

文件： update_sparksubmit_pyspark.py 项目： ccortezb/pipeline

    'owner': 'airflow',
    'depends_on_past': False,
    'start_date': datetime.now(),
    'email': ['*****@*****.**'],
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 0,
    'retry_delay': timedelta(minutes=5),
    # 'queue': 'bash_queue',
    # 'pool': 'backfill',
    # 'priority_weight': 10,
    # 'end_date': datetime(2016, 4, 24),
}

dag = DAG('update_sparksubmit_pyspark', default_args=default_args)
#, schedule_interval=timedelta(0))

pull_git = BashOperator(
    task_id='pull_git',
    bash_command='cd /root/pipeline && git pull',
    dag=dag)

# t1 is an example of tasks created by instatiating operators
sparksubmit_pyspark = BashOperator(
    task_id='sparksubmit_pyspark',
    bash_command='spark-submit --master local[*] /root/pipeline/jupyterhub.ml/scripts/pi.py 10',
    dag=dag)

# Setup Airflow DAG
sparksubmit_pyspark.set_upstream(pull_git)

示例#22

0

显示文件

文件： dadan_DFCF.py 项目： NAMEs/davidyu_stock

    'depends_on_past': False,
    'start_date': run_time,
    'email': ['*****@*****.**'],
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=30),
    # 'queue': 'bash_queue',
    # 'pool': 'backfill',
    # 'priority_weight': 10,
    # 'end_date': datetime(2016, 1, 1),
}

dag = DAG('dadan_DFCF',
          default_args=default_args,
          schedule_interval="30 21 * * *")

# t1, t2 and t3 are examples of tasks created by instantiating operators
task1_command = "/home/davidyu/stock/scripts/davidyu_stock/scripts/analysis/dadan_DFCF/shell/run_dadan_dfcf_weekly_dadan_cnt.sh "
task2_command = "/home/davidyu/stock/scripts/davidyu_stock/scripts/analysis/dadan_DFCF/shell/run_dadan_dfcf_today_dadan_weekly_dadan_cnt.sh "

t1 = BashOperator(task_id='dadan_dfcf_weekly_dadan_cnt',
                  bash_command=task1_command,
                  dag=dag)

t2 = BashOperator(task_id='dadan_dfcf_today_dadan_weekly_dadan_cnt',
                  bash_command=task2_command,
                  dag=dag)

t2.set_upstream(t1)

示例#23

0

显示文件

文件： undeploy_prediction_pmml.py 项目： ccortezb/pipeline

    # 'queue': 'bash_queue',
    # 'pool': 'backfill',
    # 'priority_weight': 10,
    # 'end_date': datetime(2016, 4, 24),
}

dag = DAG('undeploy_prediction_pmml', default_args=default_args)

# TODO:  dockerFileTag and dockerFilePath should be passed in from webhook
switch_to_aws = BashOperator(
    task_id='switch_to_aws',
    bash_command='sudo kubectl config use-context awsdemo',
    dag=dag)

undeploy_container_aws = BashOperator(
    task_id='undeploy_container_to_aws',
    bash_command='sudo kubectl delete prediction-pmml',
    dag=dag)

switch_to_gcp = BashOperator(
    task_id='switch_to_gcp',
    bash_command='sudo kubectl config use-context gcpdemo', 
    dag=dag)

undeploy_container_gcp = BashOperator(
    task_id='undeploy_container_gcp',
    bash_command='sudo kubectl delete prediction-pmml',
    dag=dag)

# Setup Airflow DAG
undeploy_container_aws.set_upstream(switch_to_aws)
switch_to_gcp.set_upstream(undeploy_container_aws)

示例#24

0

显示文件

文件： TARGET_dag.py 项目： MIG-Data/dags

from airflow import DAG
from airflow.operators.http_operator import SimpleHttpOperator
from airflow.sensors.http_sensor import HttpSensor
from airflow.operators.bash_operator import BashOperator
from airflow.operators.email_operator import EmailOperator
from airflow.contrib.operators.databricks_operator import DatabricksSubmitRunOperator
from datetime import datetime, timedelta

default_args = {
    'owner': 'airflow',
    'depends_on_past': False,
    'start_date': datetime(2020, 3, 15),
    'end_date': None,
    'email': ['*****@*****.**', '*****@*****.**'],
    'email_on_failure': True,
    'email_on_retry': True,
    'retries': 2,
    'retry_delay': timedelta(minutes=5)
}

dag = DAG('TARGET_INVENTORY',
          default_args=default_args,
          schedule_interval='@daily')

target_sh = BashOperator(
    task_id='SHR',
    bash_command=
    "cd /home/ec2-user/TARGET_2/target/target/spiders && python3 -m scrapy crawl target_data.py ",
    queue="pipeline9",
    dag=dag)

示例#25

0

显示文件

文件： tutorial.py 项目： apache/incubator-airflow

    # 'on_success_callback': some_other_function,
    # 'on_retry_callback': another_function,
    # 'trigger_rule': 'all_success'
}

dag = DAG(
    'tutorial',
    default_args=default_args,
    description='A simple tutorial DAG',
    schedule_interval=timedelta(days=1),
)

# t1, t2 and t3 are examples of tasks created by instantiating operators
t1 = BashOperator(
    task_id='print_date',
    bash_command='date',
    dag=dag,
)

t1.doc_md = """\
#### Task Documentation
You can document your task using the attributes `doc_md` (markdown),
`doc` (plain text), `doc_rst`, `doc_json`, `doc_yaml` which gets
rendered in the UI's Task Instance Details page.
![img](http://montcs.bloomu.edu/~bobmon/Semesters/2012-01/491/import%20soul.png)
"""

dag.doc_md = __doc__

t2 = BashOperator(
    task_id='sleep',

示例#26

0

显示文件

文件： test_utils.py 项目： BillelGuerfa/example-dags

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
"""Used for unit tests"""
import airflow
from airflow.models import DAG
from airflow.operators.bash_operator import BashOperator

dag = DAG(dag_id='test_utils', schedule_interval=None)

task = BashOperator(
    task_id='sleeps_forever',
    dag=dag,
    bash_command="sleep 10000000000",
    start_date=airflow.utils.dates.days_ago(2),
    owner='airflow',
)

示例#27

0

显示文件

文件： dsd_dags.py 项目： MrMaksimize/docker-airflow

    dag=dag)

#: Update portal modified date
update_code_enf_md = get_seaboard_update_dag('code-enforcement-violations.md', dag)

#: Execution rules
#: dsd_code_enf_latest_only must run before get_code_enf_files
get_code_enf_files.set_upstream(dsd_ce_latest_only)


for i in fname_list:
    #: Create fme shell command
    build_csv_task = BashOperator(
        task_id='get_' + i,
        bash_command=get_bash_command(i),
        on_failure_callback=notify,
        on_retry_callback=notify,
        on_success_callback=notify,
        dag=dag)

    #: Set Task as Downstream for downloading files
    build_csv_task.set_upstream(get_code_enf_files)

    #: Create S3 Upload task
    s3_task = S3FileTransferOperator(
        task_id='upload_' + i,
        source_base_path=conf['prod_data_dir'],
        source_key=i + '_datasd.csv',
        dest_s3_bucket=conf['dest_s3_bucket'],
        dest_s3_conn_id=conf['default_s3_conn_id'],
        dest_s3_key='dsd/' + i + '_datasd.csv',

示例#28

0

显示文件

文件： execution_date.py 项目： shinznatkid/automating-your-data-pipeline-with-apache-airflow

from airflow import DAG
from airflow.operators.bash_operator import BashOperator
from airflow.operators.python_operator import PythonOperator
from airflow.utils.dates import days_ago


def split_execution_date(**kwargs):
    execution_date = kwargs['execution_date']
    print(execution_date, type(execution_date))


args = {
    'owner': 'Airflow',
    'start_date': days_ago(2),
    'provide_context': True,
}
with DAG('execution_date', schedule_interval='*/5 * * * *', default_args=args, catchup=False) as dag:
    t0 = BashOperator(
        task_id='print_execution_date',
        bash_command='echo {{ ds }} {{ execution_date }} {{ ts }}',
    )

    t1 = PythonOperator(
        task_id='split_execution_date',
        python_callable=split_execution_date,
        op_kwargs={'execution_date': '{{ ds }}'},
    )

    t0 >> t1

示例#29

0

显示文件

文件： update_prediction_pmml.py 项目： ccortezb/pipeline

    # 'pool': 'backfill',
    # 'priority_weight': 10,
    # 'end_date': datetime(2016, 4, 24),
}

dag = DAG('update_prediction_pmml', default_args=default_args)
#, schedule_interval=timedelta(0))

pull_git = BashOperator(
    task_id='pull_git',
    bash_command='cd /root/pipeline && git pull',
    dag=dag)

# TODO:  dockerFileTag and dockerFilePath should be passed in from webhook
build_image = BashOperator(
    task_id='build_docker_image',
    bash_command='sudo docker build -t fluxcapacitor/prediction-pmml /root/pipeline/prediction.ml/pmml/',
    dag=dag)

push_image = BashOperator(
    task_id='push_docker_image',
    bash_command='sudo docker push fluxcapacitor/prediction-pmml',
    dag=dag)

#switch_to_aws = BashOperator(
#    task_id='switch_to_aws',
#    bash_command='sudo kubectl config use-context awsdemo',
#    dag=dag)

update_container_aws = BashOperator(
    task_id='update_container_aws',
    bash_command='kubectl rolling-update prediction-pmml --context=awsdemo --image-pull-policy=Always --image=fluxcapacitor/prediction-pmml',

示例#30

0

显示文件

文件： openstack_api_call.py 项目： eanylin/shipyard

default_args = {
    'owner': 'airflow',
    'depends_on_past': False,
    'start_date': airflow.utils.dates.days_ago(2),
    'email': ['*****@*****.**'],
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=1),
}

dag = DAG('openstack_cli', default_args=default_args, schedule_interval=None)

# print_date
t1 = BashOperator(task_id='print_date', bash_command='date', dag=dag)

## Note that the openrc.sh file needs to be placed on a volume that can be
## accessed by the containers

# openstack endpoint list
t2 = OpenStackOperator(task_id='endpoint_list_task',
                       openrc_file='/usr/local/airflow/dags/openrc.sh',
                       openstack_command=['openstack', 'endpoint', 'list'],
                       dag=dag)

# openstack service list
t3 = OpenStackOperator(task_id='service_list_task',
                       openrc_file='/usr/local/airflow/dags/openrc.sh',
                       openstack_command=['openstack', 'service', 'list'],
                       dag=dag)

示例#31

0

显示文件

文件： WHR.py 项目： MIG-Data/dags

    'owner': 'airflow',
    'depends_on_past': False,
    'start_date': datetime(2020, 12, 24),
    'end_date': None,
    'email': ['*****@*****.**', '*****@*****.**'],
    'email_on_failure': True,
    'email_on_retry': True,
    'retries': 2,
    'retry_delay': timedelta(minutes=5)
}

dag = DAG('WHR', default_args=default_args, schedule_interval='1 0,12 * * *')

HD_sh = BashOperator(
    task_id='HD',
    bash_command=
    "source /home/ec2-user/Scrapes/.venv/bin/activate && python /home/ec2-user/Scrapes/WHR/PYTHON/Homedepot.py ",
    queue="pipeline1",
    dag=dag)

HD_LT_sh = BashOperator(
    task_id='HD_LT',
    bash_command=
    "source /home/ec2-user/Scrapes/.venv/bin/activate && python /home/ec2-user/Scrapes/WHR/PYTHON/Homedepot_LT.py ",
    queue="pipeline1",
    dag=dag)

LOW_sh = BashOperator(
    task_id='LOW',
    bash_command=
    "source /home/ec2-user/Scrapes/.venv/bin/activate && python /home/ec2-user/Scrapes/WHR/PYTHON/Lowes_mb.py ",
    queue="pipeline1",

示例#32

0

显示文件

# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from airflow import DAG
from airflow.operators.bash_operator import BashOperator
from datetime import datetime, timedelta

default_args = {
    'owner': 'airflow',
    'depends_on_past': False,
    'start_date': datetime(2016,10,5,19),
    'email': ['*****@*****.**'],
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 4,
    'retry_delay': timedelta(seconds=0),
}

dag = DAG('test_retry_handling_job', default_args=default_args, schedule_interval='@once')

task1 = BashOperator(
    task_id='test_retry_handling_op',
    bash_command='exit 1',
    dag=dag)

示例#33

0

显示文件

文件： description_markdown.py 项目： Aleks-Ya/yaal_examples

## Subheader
Here's a [url](www.airbnb.com)

My numbered list:

1. one
1. two

My bulleted list:

- first
- second
"""

the_task = BashOperator(
    task_id='the_task',
    bash_command='echo THE_TASK',
    dag=dag)
the_task.doc_md = """\
# Title
Here's a [url](www.airbnb.com)

My list:

1. one
1. two

My bulleted list:

- first
- second
"""

示例#34

0

显示文件

from airflow.operators.bash_operator import BashOperator

airflow_home = os.environ.get('AIRFLOW_HOME')

default_args = {
    'owner': 'airflow',
    'start_date': datetime(2018, 6, 16),
    'depends_on_past': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=5)
}

dag = DAG('bq_stores_sales_processing',
          schedule_interval='*/10 * * * *',
          catchup=False,
          default_args=default_args)

bq_store_sales_task = BashOperator(task_id='bq_store_sales_bash',
                                  bash_command='bq query --use_legacy_sql=False < ' + airflow_home + '/scripts/store_sales.sql ',
                                  dag=dag)

#bq_store_sales_task = BigQueryOperator(
#      task_id='bq_store_sales',
#      bql="INSERT `retail_demo_warehouse.store_sales` (store_id, store_name, lat_long, transaction_date, total_sales, updated_timestamp) SELECT store.store_id, store.store_name, CONCAT(CAST(store.latitude AS STRING), CONCAT(',', CAST(store.longitude AS STRING))) AS lat_long, sales.transaction_date, sales.store_sales AS total_sales, CURRENT_TIMESTAMP() as updated_timestamp FROM ( SELECT transaction_date, CAST(store_id AS INT64) AS store_id, ROUND(SUM((SELECT SUM(item_price_each * quantity) FROM UNNEST(lineitems))),2) AS store_sales FROM `retail_demo_warehouse.sales_events` GROUP BY transaction_date, store_id ) sales JOIN `retail_demo_warehouse.store` store ON store.store_id = sales.store_id",
#      sql=None,
#      destination_dataset_table=False,
#      bigquery_conn_id='google_cloud_default',
#      use_legacy_sql=False,
#      udf_config=False,
#      dag=dag)

示例#35

0

显示文件

文件： hello_world.py 项目： jiluhu/flask_ml

    schedule_interval=timedelta(days=1))

#-------------------------------------------------------------------------------
# first operator

date_operator = BashOperator(
    task_id='date_task',
    bash_command='date',
    dag=dag)

#-------------------------------------------------------------------------------
# second operator

sleep_operator = BashOperator(
    task_id='sleep_task',
    depends_on_past=False,
    bash_command='sleep 5',
    dag=dag)

#-------------------------------------------------------------------------------
# third operator

def print_hello():
    return 'Hello world!'

hello_operator = PythonOperator(
    task_id='hello_task',
    python_callable=print_hello,
    dag=dag)

#-------------------------------------------------------------------------------

示例#36

0

显示文件

文件： get_mysql_data_dag.py 项目： marcoayamada/scd_spark

        ContactID ,
        FirstName ,
        MiddleName ,
        LastName ,
        EmailAddress ,
        Phone 
    from adventureworks.contact c)
"""

t2 = MySqlOperator(sql=qry_populate_staging,
                   mysql_conn_id='mysql_adventure',
                   task_id='populating_staging',
                   dag=dag)

bash_command = """
spark-submit $AIRFLOW_HOME/dags/spark_jobs/process_etl.py
"""

t3 = BashOperator(task_id='proccessing_data',
                  depends_on_past=False,
                  bash_command=bash_command,
                  dag=dag)

# t3 = SparkSubmitOperator(
#     task_id='test_spark',
#     application='$AIRFLOW_HOME/dags/spark_jobs/process_etl.py',
#     start_date=datetime(2020, 6, 4)
# )

t1 >> t2 >> t3

示例#37

0

显示文件

文件： airflow_test.py 项目： rjurney/Agile_Data_Code_2

# Run a simple PySpark Script
pyspark_local_task_one = BashOperator(
  task_id = "pyspark_local_task_one",
  bash_command = """spark-submit \
  --master {{ params.master }}
  {{ params.base_path }}/{{ params.filename }} {{ ts }} {{ params.base_path }}""",
  params = {
    "master": "local[8]",
    "filename": "ch02/pyspark_task_one.py",
    "base_path": "{}/".format(project_home)
  },
  dag=dag
)

# Run another simple PySpark Script that depends on the previous one
pyspark_local_task_two = BashOperator(
  task_id = "pyspark_local_task_two",
  bash_command = """spark-submit \
  --master {{ params.master }}
  {{ params.base_path }}/{{ params.filename }} {{ ts }} {{ params.base_path }}""",
  params = {
    "master": "local[8]",
    "filename": "ch02/pyspark_task_two.py",
    "base_path": "{}/".format(project_home)
  },
  dag=dag
)

# Add the dependency from the second to the first task
pyspark_local_task_two.set_upstream(pyspark_local_task_one)

示例#38

0

显示文件

    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=2)
}

batchdate = time.strftime("%Y%m%d")
filename = "gs://dw-dev-insurance/ivans/current/IE_NCNU_" + batchdate + ".DAT"
schedule_interval = "30 19 * * *"

with DAG('DAG_GCP_IVANS_IE_LOAD',
         schedule_interval=schedule_interval,
         catchup=False,
         default_args=default_args) as dag:
    #     t1 = BashOperator(
    #        task_id='T1_COPY_TO_GCS',
    #        bash_command='python /home/airflow/gcs/data/GCPDWH/util/transfer_mountpoint_to_gcs.py --config "config.properties" --productconfig "ivans.properties" --env "dev"'
    #     )

    t2 = BashOperator(
        task_id='T2_GCP_LOAD',
        bash_command=
        'python /home/airflow/gcs/data/GCPDWH/ivans/load_ie_segments_to_bq_dataflow.py --config "config.properties" --productconfig "ivans.properties" --env "dev" --separator "|" --stripheader "0" --stripdelim "0" --addaudit "1" --writeDeposition "WRITE_APPEND" --system "IE" --input "gs://dw-dev-insurance/ivans/current/IE_NCNU_20180925.DAT"'
    )

    #     t3 = BashOperator(
    #         task_id='T3_GCP_MOVE',
    #         bash_command='gsutil mv gs://dw-dev-insurance/ivans/current/* gs://dw-dev-insurance/ivans/archive/'
    #     )

    #     t1 >> t2 >> t3
    t2

示例#39

0

显示文件

文件： project-workflow.py 项目： dkyos/dev-samples

#COUNTRY='PL'

dag = DAG('project-workflow',description='Project Workflow DAG',
        schedule_interval = '*/5 0 * * *',
        start_date=datetime(2017,7,1),
        catchup=False)

xlsx_to_csv_task = BashOperator(
        task_id='xlsx_to_csv',
        bash_command='"$src"/test.sh "$country" 2nd_param_xlsx',
        env={'src': SRC, 'country': COUNTRY},
        dag=dag)

merge_command = SRC + '/test.sh ' + COUNTRY + ' 2nd_param_merge'
merge_task = BashOperator(
        task_id='merge',
        bash_command=merge_command ,
        dag=dag)

my_templated_command = """
{{ params.src }}/test.sh {{ params.country}} 2nd_param_cleansing
"""
cleansing_task = BashOperator(
        task_id='cleansing',
        bash_command=my_templated_command, 
        params={'src': SRC, 'country': COUNTRY},
        dag=dag)

x1_task = BashOperator(
        task_id='x1',
        bash_command='sleep 1 && echo [x1 start]',
        dag=dag)

示例#40

0

显示文件

import datetime
from dateutil.tz import *

from airflow.models.dag import DAG
from airflow.operators.bash_operator import BashOperator
from airflow.operators.bash_operator import BashOperator


valid_dag = DAG(
    dag_id="ValidDag",
    description="This is a valid test dag",
    start_date=datetime.datetime(2020, 5, 20, 0, 0),
)


task1 = BashOperator(bash_command="echo 1", task_id="Task1", dag=valid_dag)
task2 = BashOperator(bash_command='echo "2"', task_id="Task2", dag=valid_dag)


task1 >> task2

示例#41

0

显示文件

文件： deploy_prediction_codegen.py 项目： ccortezb/pipeline

    # 'queue': 'bash_queue',
    # 'pool': 'backfill',
    # 'priority_weight': 10,
    # 'end_date': datetime(2016, 4, 24),
}

dag = DAG('deploy_prediction_codegen', default_args=default_args)

# TODO:  dockerFileTag and dockerFilePath should be passed in from webhook
build_image = BashOperator(
    task_id='build_docker_image',
    bash_command='sudo docker build -t fluxcapacitor/prediction-codegen /root/pipeline/prediction.ml/codegen/',
    dag=dag)

push_image = BashOperator(
    task_id='push_docker_image',
    bash_command='sudo docker push fluxcapacitor/prediction-codegen',
    dag=dag)

switch_to_aws = BashOperator(
    task_id='switch_to_aws',
    bash_command='sudo kubectl config use-context awsdemo',
    dag=dag)

deploy_container_aws = BashOperator(
    task_id='deploy_container_aws',
    bash_command='sudo kubectl create -f /root/pipeline/prediction.ml/codegen-rc.yaml',
    dag=dag)

switch_to_gcp = BashOperator(
    task_id='switch_to_gcp',
    bash_command='sudo kubectl config use-context gcpdemo',

示例#42

0

显示文件

                        from ''' + modeled_dataset + '''.Patient_SQL_4
                       union all
                        select PATIENT_ID, CASE_ID, SEX, AGE_YRS, AGE_GROUP, WEIGHT, WEIGHT_UNIT
                        from ''' + modeled_dataset + '''.Patient_SQL_5
                       union all
                        select PATIENT_ID, CASE_ID, SEX, AGE as AGE_YRS, AGE_GROUP, WEIGHT, WEIGHT_UNIT
                        from ''' + modeled_dataset + '''.Patient
                        where AGE is null or AGE_UNIT = "YR"
                       )'''

with models.DAG('faers_workflow',
                schedule_interval=None,
                default_args=default_dag_args) as dag:

    create_staging = BashOperator(
        task_id='create_staging_dataset',
        bash_command='bq --location=US mk --dataset ' + staging_dataset)

    create_modeled = BashOperator(
        task_id='create_modeled_dataset',
        bash_command='bq --location=US mk --dataset ' + modeled_dataset)

    load_demo = BashOperator(
        task_id='load_demo',
        bash_command='bq --location=US load --autodetect --skip_leading_rows=1 \
                         --source_format=CSV ' + staging_dataset +
        '.Demographic \
                         "gs://cs327e_project_data/drug_data/demo2018q4.csv"\
                         primaryid:INT64,caseid:INT64,caseversion:INT64,i_f_code:STRING,i_f_code_num:INT64,event_dt:INT64,\
event_dt_num:DATE,mfr_dt:INT64,mfr_dt_num:DATE,init_fda_dt:INT64,init_fda_dt_num:DATE,fda_dt:INT64,fda_dt_num:DATE,\
rept_cod:STRING,rept_cod_num:INT64,auth_num:STRING,mfr_num:STRING,mfr_sndr:STRING,lit_ref:STRING,age:INT64,\

示例#43

0

显示文件

from airflow.models import DAG
from datetime import datetime, timedelta

five_days_ago = datetime.combine(datetime.today() - timedelta(5),
                                 datetime.min.time())
args = {
    'owner': 'airflow',
    'start_date': five_days_ago,
}

dag = DAG(dag_id='perf_dag_2',
          default_args=args,
          schedule_interval='@daily',
          dagrun_timeout=timedelta(minutes=60))

task_1 = BashOperator(
    task_id='perf_task_1',
    bash_command='sleep 5; echo "run_id={{ run_id }} | dag_run={{ dag_run }}"',
    dag=dag)

for i in range(2, 5):
    task = BashOperator(task_id='perf_task_{}'.format(i),
                        bash_command='''
            sleep 5; echo "run_id={{ run_id }} | dag_run={{ dag_run }}"
        ''',
                        dag=dag)
    task.set_upstream(task_1)

if __name__ == "__main__":
    dag.cli()

示例#44

0

显示文件

文件： example_twitter_dag.py 项目： iamon3/incubator-airflow-personal

]
to_channels = ['toTwitter_A', 'toTwitter_B', 'toTwitter_C', 'toTwitter_D']
yesterday = date.today() - timedelta(days=1)
dt = yesterday.strftime("%Y-%m-%d")
# define where you want to store the tweets csv file in your local directory
local_dir = "/tmp/"
# define the location where you want to store in HDFS
hdfs_dir = " /tmp/"

for channel in to_channels:

    file_name = "to_" + channel + "_" + yesterday.strftime("%Y-%m-%d") + ".csv"

    load_to_hdfs = BashOperator(
        task_id="put_" + channel + "_to_hdfs",
        bash_command="HADOOP_USER_NAME=hdfs hadoop fs -put -f " + local_dir +
        file_name + hdfs_dir + channel + "/",
        dag=dag)

    load_to_hdfs.set_upstream(analyze_tweets)

    load_to_hive = HiveOperator(task_id="load_" + channel + "_to_hive",
                                hql="LOAD DATA INPATH '" + hdfs_dir + channel +
                                "/" + file_name + "' "
                                "INTO TABLE " + channel + " "
                                "PARTITION(dt='" + dt + "')",
                                dag=dag)
    load_to_hive.set_upstream(load_to_hdfs)
    load_to_hive.set_downstream(hive_to_mysql)

for channel in from_channels:

示例#45

0

显示文件

文件： pipelines.py 项目： pombredanne/superpipe

    'retry_delay': timedelta(minutes=5),
    # 'queue': 'bash_queue',
    # 'pool': 'backfill',
    # 'priority_weight': 10,
    # 'end_date': datetime(2016, 1, 1),
}
THE_HUMAN_GENOME = "/Users/mlyons/genomics/reference/human_g1k_v37.fasta"
BAM_DIR = "/Users/mlyons/genomics/1kg/bam"
BIN_DIR = "/Users/mlyons/genomics/bin"

simple_mapping_pipeline = DAG(dag_id="simple_mapping_pipeline", default_args=default_args, schedule_interval=timedelta(minutes=2))

# figure out some sensor to look for a fastq file to map
fastq_sensor = FastqSensor(directory="/Users/mlyons/genomics/1kg/unprocessed_fastq",
                           dag=simple_mapping_pipeline,
                           task_id='fastq_sensor',
                           poke_interval=60)

"""bwa mem {{ path_to_reference_file }} {{ ti.xcom_pull('unmapped_fastq') }} > {{ path_to_output }}/{{ task_instance_key_str }}.sam"""
bwa_mem = BashOperator(bash_command=BWA_MEM_COMMAND,
                       params={'path_to_reference_file': THE_HUMAN_GENOME,
                               'path_to_output': BAM_DIR,
                               'bin': BIN_DIR},
                       dag=simple_mapping_pipeline,
                       task_id='bwa_mem',
                       wait_for_downstream=False)

bwa_mem.set_upstream(fastq_sensor)

示例#46

0

显示文件

            'sql': pg_movies_dirs,
            'filename': pg_csv_filename
        },
    )

    upload_pg_file = LocalFilesystemToGCSOperator(
        task_id="PG_UPLOAD_FILE",
        src=pg_csv_filename,
        dst=GCS_FILENAME.format('movies_directors', pg_base_filename),
        bucket=BUCKET,
    )

    upload_mysql_file = LocalFilesystemToGCSOperator(
        task_id="MYSQL_UPLOAD_FILE",
        src=mysql_csv_filename,
        dst=GCS_FILENAME.format('movies_directors', mysql_base_filename),
        bucket=BUCKET,
    )

    # t1, t2 and t3 are examples of tasks created by instantiating operators
    print_date = BashOperator(
        task_id='print_date',
        bash_command='date',
    )

    mysql_poc_pull.set_upstream(print_date)
    pg_poc_pull.set_upstream(print_date)

    upload_mysql_file.set_upstream(mysql_poc_pull)
    upload_pg_file.set_upstream(pg_poc_pull)

示例#47

0

显示文件

文件： example_passing_params_via_test_command.py 项目： ataki/incubator-airflow

def my_py_command(ds, **kwargs):
    # Print out the "foo" param passed in via
    # `airflow test example_passing_params_via_test_command run_this <date>
    # -tp '{"foo":"bar"}'`
    if kwargs["test_mode"]:
        print(" 'foo' was passed in via test={} command : kwargs[params][foo] \
               = {}".format(kwargs["test_mode"], kwargs["params"]["foo"]))
    # Print out the value of "miff", passed in below via the Python Operator
    print(" 'miff' was passed in via task params = {}".format(kwargs["params"]["miff"]))
    return 1

my_templated_command = """
    echo " 'foo was passed in via Airflow CLI Test command with value {{ params.foo }} "
    echo " 'miff was passed in via BashOperator with value {{ params.miff }} "
"""

run_this = PythonOperator(
    task_id='run_this',
    provide_context=True,
    python_callable=my_py_command,
    params={"miff":"agg"},
    dag=dag)

also_run_this = BashOperator(
    task_id='also_run_this',
    bash_command=my_templated_command,
    params={"miff":"agg"},
    dag=dag)
also_run_this.set_upstream(run_this)

示例#48

0

显示文件

文件： spark_batch_job_distributed_mode.py 项目： flix-tech/k8s-spark-example

pod_runtime_info_envs = [
    PodRuntimeInfoEnv('MY_POD_NAMESPACE', 'metadata.namespace'),
    PodRuntimeInfoEnv('MY_POD_NAME', 'metadata.name'),
    PodRuntimeInfoEnv('MY_POD_IP', 'status.podIP')
]

args = {'owner': 'Airflow', 'start_date': airflow.utils.dates.days_ago(2)}
# base path returned zip dag path
base_path = os.path.split(__file__)[0]

plain_txt = read_packaged_file(f"{base_path}/plain_files/plain.txt")

with DAG(dag_id=DAG_NAME, default_args=args,
         schedule_interval='30 0 * * *') as dag:
    # Use the zip binary, which is only found in this special docker image
    read_local_file = BashOperator(task_id='read_local_file',
                                   bash_command=f"echo {plain_txt}")
    # Limit resources on this operator/task with node affinity & tolerations
    spark_batch_job_distributed_mode = KubernetesPodOperator(
        namespace=os.environ['AIRFLOW__KUBERNETES__NAMESPACE'],
        name="spark_batch_job_distributed_mode",
        image=docker_image,
        image_pull_policy="IfNotPresent",
        cmds=["/bin/sh", "-c"],
        arguments=[spark_submit_sh],
        env_vars=envs,
        service_account_name="airflow",
        resources={
            'request_memory': "1024Mi",
            'request_cpu': "100m"
        },
        task_id="spark_batch_job_distributed_mode",

示例#49

0

显示文件

文件： perf_dag_1.py 项目： AdamUnger/incubator-airflow

from airflow.operators.bash_operator import BashOperator
from airflow.models import DAG
from datetime import timedelta

args = {
    'owner': 'airflow',
    'start_date': airflow.utils.dates.days_ago(3),
}

dag = DAG(
    dag_id='perf_dag_1', default_args=args,
    schedule_interval='@daily',
    dagrun_timeout=timedelta(minutes=60))

task_1 = BashOperator(
    task_id='perf_task_1',
    bash_command='sleep 5; echo "run_id={{ run_id }} | dag_run={{ dag_run }}"',
    dag=dag)

for i in range(2, 5):
    task = BashOperator(
        task_id='perf_task_{}'.format(i),
        bash_command='''
            sleep 5; echo "run_id={{ run_id }} | dag_run={{ dag_run }}"
        ''',
        dag=dag)
    task.set_upstream(task_1)

if __name__ == "__main__":
    dag.cli()

示例#50

0

显示文件

dag = DAG(dag_id='impersonation_subdag', default_args=default_args)


def print_today():
    print('Today is {}'.format(datetime.utcnow()))


subdag = DAG('impersonation_subdag.test_subdag_operation',
             default_args=default_args)


PythonOperator(
    python_callable=print_today,
    task_id='exec_python_fn',
    dag=subdag)


BashOperator(
    task_id='exec_bash_operator',
    bash_command='echo "Running within SubDag"',
    dag=subdag
)


subdag_operator = SubDagOperator(task_id='test_subdag_operation',
                                 subdag=subdag,
                                 mode='reschedule',
                                 poke_interval=1,
                                 dag=dag)

示例#51

0

显示文件

文件： example_twitter_dag.py 项目： AdamUnger/incubator-airflow

from_channels = ['fromTwitter_A', 'fromTwitter_B', 'fromTwitter_C', 'fromTwitter_D']
to_channels = ['toTwitter_A', 'toTwitter_B', 'toTwitter_C', 'toTwitter_D']
yesterday = date.today() - timedelta(days=1)
dt = yesterday.strftime("%Y-%m-%d")
# define where you want to store the tweets csv file in your local directory
local_dir = "/tmp/"
# define the location where you want to store in HDFS
hdfs_dir = " /tmp/"

for channel in to_channels:

    file_name = "to_" + channel + "_" + yesterday.strftime("%Y-%m-%d") + ".csv"

    load_to_hdfs = BashOperator(
        task_id="put_" + channel + "_to_hdfs",
        bash_command="HADOOP_USER_NAME=hdfs hadoop fs -put -f " +
                     local_dir + file_name +
                     hdfs_dir + channel + "/",
        dag=dag)

    load_to_hdfs.set_upstream(analyze_tweets)

    load_to_hive = HiveOperator(
        task_id="load_" + channel + "_to_hive",
        hql="LOAD DATA INPATH '" +
            hdfs_dir + channel + "/" + file_name + "' "
            "INTO TABLE " + channel + " "
            "PARTITION(dt='" + dt + "')",
        dag=dag)
    load_to_hive.set_upstream(load_to_hdfs)
    load_to_hive.set_downstream(hive_to_mysql)

示例#52

0

显示文件

文件： npmjs_static_22.py 项目： Young-X/maloss

def get_bash_op(pkg_name, dag, configpath='/home/maloss/config/astgen_javascript_smt.config', cache_dir='/home/maloss/metadata', outdir='/home/maloss/result'):
    return BashOperator(
        task_id=get_sanitized_pkgname(pkg_name=pkg_name),
        execution_timeout=timedelta(hours=2),
        bash_command='cd /home/maloss/src/ && python main.py astfilter --ignore_dep_version -n %s -c %s -d %s -o %s -l javascript' % (pkg_name, configpath, cache_dir, outdir),
        dag=dag)

示例#53

0

显示文件

文件： tutorial.py 项目： owlabs/incubator-airflow

    # 'execution_timeout': timedelta(seconds=300),
    # 'on_failure_callback': some_function,
    # 'on_success_callback': some_other_function,
    # 'on_retry_callback': another_function,
    # 'trigger_rule': u'all_success'
}

dag = DAG(
    'tutorial',
    default_args=default_args,
    description='A simple tutorial DAG',
    schedule_interval=timedelta(days=1))

# t1, t2 and t3 are examples of tasks created by instantiating operators
t1 = BashOperator(
    task_id='print_date',
    bash_command='date',
    dag=dag)

t1.doc_md = """\
#### Task Documentation
You can document your task using the attributes `doc_md` (markdown),
`doc` (plain text), `doc_rst`, `doc_json`, `doc_yaml` which gets
rendered in the UI's Task Instance Details page.
![img](http://montcs.bloomu.edu/~bobmon/Semesters/2012-01/491/import%20soul.png)
"""

dag.doc_md = __doc__

t2 = BashOperator(
    task_id='sleep',
    depends_on_past=False,

示例#54

0

显示文件

    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=5),
    # 'queue': 'bash_queue',
    # 'pool': 'backfill',
    # 'priority_weight': 10,
    # 'end_date': datetime(2016, 1, 1),
}

dag = DAG(
    'sample', default_args=default_args, schedule_interval=timedelta(1))

# t1, t2 and t3 are examples of tasks created by instantiating operators
t1 = BashOperator(
    task_id='print_date',
    bash_command='date',
    dag=dag)

t2 = BashOperator(
    task_id='sleep',
    bash_command='sleep 5',
    retries=3,
    dag=dag)

templated_command = """
    {% for i in range(5) %}
        echo "{{ ds }}"
        echo "{{ macros.ds_add(ds, 7)}}"
        echo "{{ params.my_param }}"
    {% endfor %}
"""

示例#55

0

显示文件

文件： test_example_bash_operator.py 项目： 7digital/incubator-airflow


args = {
    'owner': 'airflow',
    'start_date': airflow.utils.dates.days_ago(2)
}

dag = DAG(
    dag_id='test_example_bash_operator', default_args=args,
    schedule_interval='0 0 * * *',
    dagrun_timeout=timedelta(minutes=60))

cmd = 'ls -l'
run_this_last = DummyOperator(task_id='run_this_last', dag=dag)

run_this = BashOperator(
    task_id='run_after_loop', bash_command='echo 1', dag=dag)
run_this.set_downstream(run_this_last)

for i in range(3):
    i = str(i)
    task = BashOperator(
        task_id='runme_'+i,
        bash_command='echo "{{ task_instance_key_str }}" && sleep 1',
        dag=dag)
    task.set_downstream(run_this)

task = BashOperator(
    task_id='also_run_this',
    bash_command='echo "run_id={{ run_id }} | dag_run={{ dag_run }}"',
    dag=dag)
task.set_downstream(run_this_last)

示例#56

0

显示文件

文件： 1_hello_dag.py 项目： BasPH/airflow-rocket

"""Demo DAG showing a Hello World example."""

import airflow
from airflow.models import DAG
from airflow.operators.bash_operator import BashOperator

args = {
    "owner": "godatadriven",
    "start_date": airflow.utils.dates.days_ago(14)
}

dag = DAG(
    dag_id="1_hello_dag",
    default_args=args,
    schedule_interval="0 0 * * *",
    description="Demo DAG showing a hello world example.",
)

t1 = BashOperator(task_id="sleep_a_bit", bash_command="sleep 5", dag=dag)
t2 = BashOperator(task_id="print_date", bash_command="date", dag=dag)
t1 >> t2

示例#57

0

显示文件

文件： air_flow.py 项目： luotigerlsx/DataAnalysis_ML

    'email': ['*****@*****.**'],
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=5),
    # 'queue': 'bash_queue',
    # 'pool': 'backfill',
    # 'priority_weight': 10,
    # 'end_date': datetime(2016, 1, 1),
}

dag = DAG('test_1', default_args=default_args)

# t1, t2 and t3 are examples of tasks created by instantiating operators
t1 = BashOperator(
    task_id='step1',
    bash_command='echo start',
    dag=dag)

template_command='''
    sh step2.sh
'''

t2 = BashOperator(
    task_id='step2',
    bash_command=template_command,
    retries=3,
    dag=dag)


t2.set_upstream(t1)

示例#58

0

显示文件

from airflow.operators.python_operator import PythonOperator
from airflow.operators.bash_operator import BashOperator

def print_hello():
    return 'Hello world!'

default_args = {
    'owner': 'Oliver',
    'depends_on_past': False,
    'start_date': datetime(2019, 2, 1),
    'email': ['*****@*****.**'],
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=1)
}

dag = DAG('helloworld', description='hello world example', default_args=default_args, schedule_interval=timedelta(days=1), catchup=False)

dummy_operator = DummyOperator(task_id='dummy_task', retries=3, dag=dag)

python_operator = PythonOperator(task_id='python_task', python_callable=print_hello, dag=dag)

bash_script = '/usr/local/airflow/scripts/hello_bash.sh'
if path.exists(bash_script):
    bash_operator = BashOperator(task_id='bash_task', bash_command=f"{bash_script} ", dag=dag)
    bash_operator.set_upstream(python_operator)

dummy_operator >> python_operator

示例#59

0

显示文件

文件： test_external_task_sensor.py 项目： doordash/incubator-airflow

    def test_external_task_sensor_fn_multiple_execution_dates(self):
        bash_command_code = """
{% set s=execution_date.time().second %}
echo "second is {{ s }}"
if [[ $(( {{ s }} % 60 )) == 1 ]]
    then
        exit 1
fi
exit 0
"""
        dag_external_id = TEST_DAG_ID + '_external'
        dag_external = DAG(
            dag_external_id,
            default_args=self.args,
            schedule_interval=timedelta(seconds=1))
        task_external_with_failure = BashOperator(
            task_id="task_external_with_failure",
            bash_command=bash_command_code,
            retries=0,
            dag=dag_external)
        task_external_without_failure = DummyOperator(
            task_id="task_external_without_failure",
            retries=0,
            dag=dag_external)

        task_external_without_failure.run(
            start_date=DEFAULT_DATE,
            end_date=DEFAULT_DATE + timedelta(seconds=1),
            ignore_ti_state=True)

        session = settings.Session()
        TI = TaskInstance
        try:
            task_external_with_failure.run(
                start_date=DEFAULT_DATE,
                end_date=DEFAULT_DATE + timedelta(seconds=1),
                ignore_ti_state=True)
            # The test_with_failure task is excepted to fail
            # once per minute (the run on the first second of
            # each minute).
        except Exception as e:
            failed_tis = session.query(TI).filter(
                TI.dag_id == dag_external_id,
                TI.state == State.FAILED,
                TI.execution_date == DEFAULT_DATE + timedelta(seconds=1)).all()
            if len(failed_tis) == 1 and \
               failed_tis[0].task_id == 'task_external_with_failure':
                pass
            else:
                raise e

        dag_id = TEST_DAG_ID
        dag = DAG(
            dag_id,
            default_args=self.args,
            schedule_interval=timedelta(minutes=1))
        task_without_failure = ExternalTaskSensor(
            task_id='task_without_failure',
            external_dag_id=dag_external_id,
            external_task_id='task_external_without_failure',
            execution_date_fn=lambda dt: [dt + timedelta(seconds=i)
                                          for i in range(2)],
            allowed_states=['success'],
            retries=0,
            timeout=1,
            poke_interval=1,
            dag=dag)
        task_with_failure = ExternalTaskSensor(
            task_id='task_with_failure',
            external_dag_id=dag_external_id,
            external_task_id='task_external_with_failure',
            execution_date_fn=lambda dt: [dt + timedelta(seconds=i)
                                          for i in range(2)],
            allowed_states=['success'],
            retries=0,
            timeout=1,
            poke_interval=1,
            dag=dag)

        task_without_failure.run(
            start_date=DEFAULT_DATE,
            end_date=DEFAULT_DATE,
            ignore_ti_state=True)

        with self.assertRaises(AirflowSensorTimeout):
            task_with_failure.run(
                start_date=DEFAULT_DATE,
                end_date=DEFAULT_DATE,
                ignore_ti_state=True)

示例#60

0

显示文件

文件： sidewalk_dags.py 项目： MrMaksimize/docker-airflow

    dag=dag)

#: Get sidewalks shapefile from Atlas
get_sw_shapefiles = PythonOperator(
    task_id='get_sidewalk_gis',
    python_callable=get_sidewalk_gis,
    on_failure_callback=notify,
    on_retry_callback=notify,
    on_success_callback=notify,
    dag=dag)

#: Convert shp to geojson
sidewalks_to_geojson = BashOperator(
    task_id='sidewalks_to_geojson',
    bash_command=shp_to_geojson(),
    on_failure_callback=notify,
    on_retry_callback=notify,
    on_success_callback=notify,
    dag=dag)

#: Convert shp to topojson
sidewalks_to_topojson = BashOperator(
    task_id='sidewalks_to_topojson',
    bash_command=shp_to_topojson(),
    on_failure_callback=notify,
    on_retry_callback=notify,
    on_success_callback=notify,
    dag=dag)

#: Convert geojson to geobuf
sidewalks_to_geobuf = PythonOperator(