示例#1
0
文件: cli.py 项目: vmuthuk2/dagster
def construct_scaffolded_file_contents(module_name, pipeline_name, environment_dict):
    printer = IndentingStringIoPrinter(indent_level=4)
    printer.line('\'\'\'')
    printer.line(
        'The airflow DAG scaffold for {module_name}.{pipeline_name}'.format(
            module_name=module_name, pipeline_name=pipeline_name
        )
    )
    printer.blank_line()
    printer.line('Note that this docstring must contain the strings "airflow" and "DAG" for')
    printer.line('Airflow to properly detect it as a DAG')
    printer.line('See: http://bit.ly/307VMum')
    printer.line('\'\'\'')
    printer.line('import datetime')
    printer.line('import yaml')
    printer.blank_line()
    printer.line('from dagster_airflow.factory import make_airflow_dag')
    printer.blank_line()
    printer.blank_line()
    printer.line('ENVIRONMENT = \'\'\'')
    printer.line(yaml.dump(environment_dict, default_flow_style=False))
    printer.line('\'\'\'')
    printer.blank_line()
    printer.blank_line()
    printer.comment('NOTE: these arguments should be edited for your environment')
    printer.line('DEFAULT_ARGS = {')
    with printer.with_indent():
        printer.line("'owner': 'airflow',")
        printer.line("'depends_on_past': False,")
        printer.line("'start_date': datetime.datetime(2019, 5, 7),")
        printer.line("'email': ['*****@*****.**'],")
        printer.line("'email_on_failure': False,")
        printer.line("'email_on_retry': False,")
    printer.line('}')
    printer.blank_line()
    printer.line('dag, tasks = make_airflow_dag(')
    with printer.with_indent():
        printer.comment(
            'NOTE: you must ensure that {module_name} is installed or available on sys.path, '
            'otherwise, this import will fail.'.format(module_name=module_name)
        )
        printer.line('module_name=\'{module_name}\','.format(module_name=module_name))
        printer.line('pipeline_name=\'{pipeline_name}\','.format(pipeline_name=pipeline_name))
        printer.line("environment_dict=yaml.load(ENVIRONMENT),")
        printer.line("dag_kwargs={'default_args': DEFAULT_ARGS, 'max_active_runs': 1}")
    printer.line(')')

    return printer.read().encode()
示例#2
0
文件: cli.py 项目: keyz/dagster
def construct_scaffolded_file_contents(module_name, job_name, run_config):
    yesterday = datetime.now() - timedelta(1)

    printer = IndentingStringIoPrinter(indent_level=4)
    printer.line("'''")
    printer.line(
        "The airflow DAG scaffold for {module_name}.{job_name}".format(
            module_name=module_name, job_name=job_name))
    printer.blank_line()
    printer.line(
        'Note that this docstring must contain the strings "airflow" and "DAG" for'
    )
    printer.line("Airflow to properly detect it as a DAG")
    printer.line("See: http://bit.ly/307VMum")
    printer.line("'''")
    printer.line("import datetime")
    printer.blank_line()
    printer.line("import yaml")
    printer.line("from dagster_airflow.factory import make_airflow_dag")
    printer.blank_line()
    printer.line("#" * 80)
    printer.comment("#")
    printer.comment(
        "# This environment is auto-generated from your configs and/or presets"
    )
    printer.comment("#")
    printer.line("#" * 80)
    printer.line("ENVIRONMENT = '''")
    printer.line(yaml.dump(run_config, default_flow_style=False))
    printer.line("'''")
    printer.blank_line()
    printer.blank_line()
    printer.line("#" * 80)
    printer.comment("#")
    printer.comment(
        "# NOTE: these arguments should be edited for your environment")
    printer.comment("#")
    printer.line("#" * 80)
    printer.line("DEFAULT_ARGS = {")
    with printer.with_indent():
        printer.line("'owner': 'airflow',")
        printer.line("'depends_on_past': False,")

        # start date -> yesterday
        printer.line("'start_date': datetime.datetime(%s, %d, %d)," %
                     (yesterday.year, yesterday.month, yesterday.day))
        printer.line("'email': ['*****@*****.**'],")
        printer.line("'email_on_failure': False,")
        printer.line("'email_on_retry': False,")
    printer.line("}")
    printer.blank_line()
    printer.line("dag, tasks = make_airflow_dag(")
    with printer.with_indent():
        printer.comment(f"NOTE: you must ensure that {module_name} is ")
        printer.comment(
            "installed or available on sys.path, otherwise, this import will fail."
        )
        printer.line(f"module_name='{module_name}',")
        printer.line(f"job_name='{job_name}',")
        printer.line("run_config=yaml.safe_load(ENVIRONMENT),")
        printer.line(
            "dag_kwargs={'default_args': DEFAULT_ARGS, 'max_active_runs': 1}")
    printer.line(")")

    return printer.read().encode("utf-8")
示例#3
0
def scaffold(dag_name, module_name, pipeline_name, output_path, environment_file):
    '''Creates a DAG file for a specified dagster pipeline'''

    # Validate output path
    if not output_path:
        raise Exception('You must specify --output-path or set AIRFLOW_HOME to use this script.')

    # We construct the YAML environment and then put it directly in the DAG file
    environment_yaml = _construct_yml(environment_file, dag_name)

    printer = IndentingStringIoPrinter(indent_level=4)
    printer.line('\'\'\'')
    printer.line(
        'The airflow DAG scaffold for {module_name}.{pipeline_name}'.format(
            module_name=module_name, pipeline_name=pipeline_name
        )
    )
    printer.blank_line()
    printer.line('Note that this docstring must contain the strings "airflow" and "DAG" for')
    printer.line('Airflow to properly detect it as a DAG')
    printer.line('See: http://bit.ly/307VMum')
    printer.line('\'\'\'')
    printer.line('import datetime')
    printer.line('import yaml')
    printer.blank_line()
    printer.line('from dagster_airflow.factory import make_airflow_dag')
    printer.blank_line()
    printer.blank_line()
    printer.line('ENVIRONMENT = \'\'\'')
    printer.line(environment_yaml)
    printer.line('\'\'\'')
    printer.blank_line()
    printer.blank_line()
    printer.comment('NOTE: these arguments should be edited for your environment')
    printer.line('DEFAULT_ARGS = {')
    with printer.with_indent():
        printer.line("'owner': 'airflow',")
        printer.line("'depends_on_past': False,")
        printer.line("'start_date': datetime.datetime(2019, 5, 7),")
        printer.line("'email': ['*****@*****.**'],")
        printer.line("'email_on_failure': False,")
        printer.line("'email_on_retry': False,")
    printer.line('}')
    printer.blank_line()
    printer.line('dag, tasks = make_airflow_dag(')
    with printer.with_indent():
        printer.comment(
            'NOTE: you must ensure that {module_name} is installed or available on sys.path, '
            'otherwise, this import will fail.'.format(module_name=module_name)
        )
        printer.line('module_name=\'{module_name}\','.format(module_name=module_name))
        printer.line('pipeline_name=\'{pipeline_name}\','.format(pipeline_name=pipeline_name))
        printer.line("environment_dict=yaml.load(ENVIRONMENT),")
        printer.line("dag_kwargs={'default_args': DEFAULT_ARGS, 'max_active_runs': 1}")
    printer.line(')')

    # Ensure output_path/dags exists
    dags_path = os.path.join(output_path, 'dags')
    if not os.path.isdir(dags_path):
        os.makedirs(dags_path)

    dag_file = os.path.join(output_path, 'dags', dag_name + '.py')
    with open(dag_file, 'wb') as f:
        f.write(printer.read().encode())