def construct_scaffolded_file_contents(module_name, pipeline_name, environment_dict): printer = IndentingStringIoPrinter(indent_level=4) printer.line('\'\'\'') printer.line( 'The airflow DAG scaffold for {module_name}.{pipeline_name}'.format( module_name=module_name, pipeline_name=pipeline_name ) ) printer.blank_line() printer.line('Note that this docstring must contain the strings "airflow" and "DAG" for') printer.line('Airflow to properly detect it as a DAG') printer.line('See: http://bit.ly/307VMum') printer.line('\'\'\'') printer.line('import datetime') printer.line('import yaml') printer.blank_line() printer.line('from dagster_airflow.factory import make_airflow_dag') printer.blank_line() printer.blank_line() printer.line('ENVIRONMENT = \'\'\'') printer.line(yaml.dump(environment_dict, default_flow_style=False)) printer.line('\'\'\'') printer.blank_line() printer.blank_line() printer.comment('NOTE: these arguments should be edited for your environment') printer.line('DEFAULT_ARGS = {') with printer.with_indent(): printer.line("'owner': 'airflow',") printer.line("'depends_on_past': False,") printer.line("'start_date': datetime.datetime(2019, 5, 7),") printer.line("'email': ['*****@*****.**'],") printer.line("'email_on_failure': False,") printer.line("'email_on_retry': False,") printer.line('}') printer.blank_line() printer.line('dag, tasks = make_airflow_dag(') with printer.with_indent(): printer.comment( 'NOTE: you must ensure that {module_name} is installed or available on sys.path, ' 'otherwise, this import will fail.'.format(module_name=module_name) ) printer.line('module_name=\'{module_name}\','.format(module_name=module_name)) printer.line('pipeline_name=\'{pipeline_name}\','.format(pipeline_name=pipeline_name)) printer.line("environment_dict=yaml.load(ENVIRONMENT),") printer.line("dag_kwargs={'default_args': DEFAULT_ARGS, 'max_active_runs': 1}") printer.line(')') return printer.read().encode()
def _format_subdict(dict_, current_indent=0): check.dict_param(dict_, "dict_", key_type=str) printer = IndentingStringIoPrinter(indent_level=2, current_indent=current_indent) printer.line("{") n_elements = len(dict_) for i, key in enumerate(sorted(dict_, key=lambda x: x[0])): value = dict_[key] with printer.with_indent(): formatted_value = ( _format_item(value, current_indent=printer.current_indent) .lstrip(" ") .rstrip("\n") ) printer.line( "{key}: {formatted_value}{comma}".format( key=key, formatted_value=formatted_value, comma="," if i != n_elements - 1 else "", ) ) printer.line("}") return printer.read()
def _format_subdict(dict_, current_indent=0): check.dict_param(dict_, 'dict_', key_type=str) printer = IndentingStringIoPrinter(indent_level=2, current_indent=current_indent) printer.line('{') n_elements = len(dict_) for i, key in enumerate(sorted(dict_, key=lambda x: x[0])): value = dict_[key] with printer.with_indent(): formatted_value = ( _format_item(value, current_indent=printer.current_indent) .lstrip(' ') .rstrip('\n') ) printer.line( '{key}: {formatted_value}{comma}'.format( key=key, formatted_value=formatted_value, comma=',' if i != n_elements - 1 else '', ) ) printer.line('}') return printer.read()
def _format_config_sublist(config, current_indent=0): printer = IndentingStringIoPrinter(indent_level=2, current_indent=current_indent) printer.line('[') n_elements = len(config) for i, value in enumerate(config): with printer.with_indent(): formatted_value = (_format_config_item( value, current_indent=printer.current_indent).lstrip( ' ').rstrip('\n')) printer.line('{formatted_value}{comma}'.format( formatted_value=formatted_value, comma=',' if i != n_elements - 1 else '')) printer.line(']') return printer.read()
def _format_sublist(dict_, current_indent=0): printer = IndentingStringIoPrinter(indent_level=2, current_indent=current_indent) printer.line("[") n_elements = len(dict_) for i, value in enumerate(dict_): with printer.with_indent(): formatted_value = ( _format_item(value, current_indent=printer.current_indent) .lstrip(" ") .rstrip("\n") ) printer.line( "{formatted_value}{comma}".format( formatted_value=formatted_value, comma="," if i != n_elements - 1 else "" ) ) printer.line("]") return printer.read()
def construct_scaffolded_file_contents(module_name, job_name, run_config): yesterday = datetime.now() - timedelta(1) printer = IndentingStringIoPrinter(indent_level=4) printer.line("'''") printer.line( "The airflow DAG scaffold for {module_name}.{job_name}".format( module_name=module_name, job_name=job_name)) printer.blank_line() printer.line( 'Note that this docstring must contain the strings "airflow" and "DAG" for' ) printer.line("Airflow to properly detect it as a DAG") printer.line("See: http://bit.ly/307VMum") printer.line("'''") printer.line("import datetime") printer.blank_line() printer.line("import yaml") printer.line("from dagster_airflow.factory import make_airflow_dag") printer.blank_line() printer.line("#" * 80) printer.comment("#") printer.comment( "# This environment is auto-generated from your configs and/or presets" ) printer.comment("#") printer.line("#" * 80) printer.line("ENVIRONMENT = '''") printer.line(yaml.dump(run_config, default_flow_style=False)) printer.line("'''") printer.blank_line() printer.blank_line() printer.line("#" * 80) printer.comment("#") printer.comment( "# NOTE: these arguments should be edited for your environment") printer.comment("#") printer.line("#" * 80) printer.line("DEFAULT_ARGS = {") with printer.with_indent(): printer.line("'owner': 'airflow',") printer.line("'depends_on_past': False,") # start date -> yesterday printer.line("'start_date': datetime.datetime(%s, %d, %d)," % (yesterday.year, yesterday.month, yesterday.day)) printer.line("'email': ['*****@*****.**'],") printer.line("'email_on_failure': False,") printer.line("'email_on_retry': False,") printer.line("}") printer.blank_line() printer.line("dag, tasks = make_airflow_dag(") with printer.with_indent(): printer.comment(f"NOTE: you must ensure that {module_name} is ") printer.comment( "installed or available on sys.path, otherwise, this import will fail." ) printer.line(f"module_name='{module_name}',") printer.line(f"job_name='{job_name}',") printer.line("run_config=yaml.safe_load(ENVIRONMENT),") printer.line( "dag_kwargs={'default_args': DEFAULT_ARGS, 'max_active_runs': 1}") printer.line(")") return printer.read().encode("utf-8")
def scaffold(dag_name, module_name, pipeline_name, output_path, environment_file): '''Creates a DAG file for a specified dagster pipeline''' # Validate output path if not output_path: raise Exception('You must specify --output-path or set AIRFLOW_HOME to use this script.') # We construct the YAML environment and then put it directly in the DAG file environment_yaml = _construct_yml(environment_file, dag_name) printer = IndentingStringIoPrinter(indent_level=4) printer.line('\'\'\'') printer.line( 'The airflow DAG scaffold for {module_name}.{pipeline_name}'.format( module_name=module_name, pipeline_name=pipeline_name ) ) printer.blank_line() printer.line('Note that this docstring must contain the strings "airflow" and "DAG" for') printer.line('Airflow to properly detect it as a DAG') printer.line('See: http://bit.ly/307VMum') printer.line('\'\'\'') printer.line('import datetime') printer.line('import yaml') printer.blank_line() printer.line('from dagster_airflow.factory import make_airflow_dag') printer.blank_line() printer.blank_line() printer.line('ENVIRONMENT = \'\'\'') printer.line(environment_yaml) printer.line('\'\'\'') printer.blank_line() printer.blank_line() printer.comment('NOTE: these arguments should be edited for your environment') printer.line('DEFAULT_ARGS = {') with printer.with_indent(): printer.line("'owner': 'airflow',") printer.line("'depends_on_past': False,") printer.line("'start_date': datetime.datetime(2019, 5, 7),") printer.line("'email': ['*****@*****.**'],") printer.line("'email_on_failure': False,") printer.line("'email_on_retry': False,") printer.line('}') printer.blank_line() printer.line('dag, tasks = make_airflow_dag(') with printer.with_indent(): printer.comment( 'NOTE: you must ensure that {module_name} is installed or available on sys.path, ' 'otherwise, this import will fail.'.format(module_name=module_name) ) printer.line('module_name=\'{module_name}\','.format(module_name=module_name)) printer.line('pipeline_name=\'{pipeline_name}\','.format(pipeline_name=pipeline_name)) printer.line("environment_dict=yaml.load(ENVIRONMENT),") printer.line("dag_kwargs={'default_args': DEFAULT_ARGS, 'max_active_runs': 1}") printer.line(')') # Ensure output_path/dags exists dags_path = os.path.join(output_path, 'dags') if not os.path.isdir(dags_path): os.makedirs(dags_path) dag_file = os.path.join(output_path, 'dags', dag_name + '.py') with open(dag_file, 'wb') as f: f.write(printer.read().encode())