def init_attributes(cls, orig_cls): cls.add_config_attributes( [ Attribute( attribute_name="module", parent_fields=["task_parameters"], comment="Name of the module to import from. E.g.: airflow.opeartors.python_operator", ), Attribute( attribute_name="class_name", parent_fields=["task_parameters"], comment="Name of the operator class. E.g.: PythonBranchOperator", ), Attribute( attribute_name="python", parent_fields=["task_parameters"], required=False, comment="Relative path to python file that implements the function", ), Attribute( attribute_name="function", parent_fields=["task_parameters"], required=False, comment="Name of the function", ), ] )
def init_attributes(cls, orig_cls): cls.add_config_attributes([ Attribute(attribute_name="database_type", comment="mysql, postgresql, etc"), Attribute(attribute_name="conn_id"), Attribute(attribute_name="table"), ])
def init_attributes(cls, orig_cls): cls.add_config_attributes( [ Attribute( attribute_name="schema", comment="Leave it empty for system tables" ), Attribute(attribute_name="table"), ] )
def init_attributes(cls, orig_cls): cls.add_config_attributes([ Attribute( attribute_name="folder", format_help= "ID shown at the URL address of the Google Drive folder", ), Attribute(attribute_name="file_name"), ])
def init_attributes(cls, orig_cls): cls.add_config_attributes([ Attribute( attribute_name="s3_protocol", required=False, comment="S3 protocol: s3a/s3/s3n", ), Attribute(attribute_name="bucket"), Attribute(attribute_name="path"), ])
def init_attributes(cls, orig_cls): cls.add_config_attributes([ Attribute(attribute_name="type", auto_value=orig_cls.ref_name), Attribute(attribute_name="description"), Attribute( attribute_name="inputs", format_help="list", comment="Use dagger init-io cli", ), Attribute( attribute_name="outputs", format_help="list", comment="Use dagger init-io cli", ), Attribute(attribute_name="pool", required=False), Attribute(attribute_name="timeout_in_seconds", required=False, format_help="int", validator=int), Attribute( attribute_name="airflow_task_parameters", nullable=True, format_help="dictionary", ), Attribute( attribute_name="template_parameters", nullable=True, format_help="dictionary", ), Attribute(attribute_name="task_parameters", nullable=True), ])
def init_attributes(cls, orig_cls): cls.add_config_attributes([ Attribute( attribute_name="sql", parent_fields=["task_parameters"], comment="Relative path to sql file", ), Attribute( attribute_name="postgres_conn_id", required=False, parent_fields=["task_parameters"], ), ])
def init_attributes(cls, orig_cls): cls.add_config_attributes([ Attribute( attribute_name="sql", parent_fields=["task_parameters"], comment="Relative path to sql file", ), Attribute( attribute_name="aws_conn_id", required=False, parent_fields=["task_parameters"], ), Attribute( attribute_name="s3_tmp_results_location", required=False, parent_fields=["task_parameters"], ), Attribute( attribute_name="s3_output_bucket", required=False, parent_fields=["task_parameters"], ), Attribute( attribute_name="s3_output_path", required=False, parent_fields=["task_parameters"], ), Attribute( attribute_name="workgroup", required=False, parent_fields=["task_parameters"], ), Attribute( attribute_name="is_incremental", required=True, validator=bool, comment="""If set yes then SQL going to be an INSERT INTO\ statement, otherwise a DROP TABLE; CTAS statement""", parent_fields=["task_parameters"], ), Attribute( attribute_name="partitioned_by", required=False, validator=list, comment= "The list of fields to partition by. These fields should come last in the select statement", parent_fields=["task_parameters"], ), Attribute( attribute_name="output_format", required=False, validator=str, comment="Output file format. One of PARQUET/ORC/JSON/CSV", parent_fields=["task_parameters"], ) ])
def init_attributes(cls, orig_cls): cls.add_config_attributes( [ Attribute( attribute_name="executable", parent_fields=["task_parameters"], comment="E.g.: my_code.py", ), Attribute( attribute_name="executable_prefix", nullable=True, parent_fields=["task_parameters"], comment="E.g.: python", ), Attribute(attribute_name="job_name", parent_fields=["task_parameters"], required=False), Attribute(attribute_name="absolute_job_name", parent_fields=["task_parameters"], required=False), Attribute( attribute_name="overrides", parent_fields=["task_parameters"], required=False, validator=dict, comment="Batch overrides dictionary: https://docs.aws.amazon.com/sdkforruby/api/Aws/Batch/Types/ContainerOverrides.html", ), Attribute( attribute_name="aws_conn_id", parent_fields=["task_parameters"], required=False, ), Attribute( attribute_name="region_name", parent_fields=["task_parameters"], required=False, ), Attribute( attribute_name="cluster_name", parent_fields=["task_parameters"], required=False, ), Attribute( attribute_name="job_queue", parent_fields=["task_parameters"], required=False, ), Attribute( attribute_name="max_retries", parent_fields=["task_parameters"], required=False, ), ] )
def init_attributes(cls, orig_cls): cls.add_config_attributes( [ Attribute( attribute_name="python", parent_fields=["task_parameters"], comment="Relative path to python file that implements the function", ), Attribute( attribute_name="function", parent_fields=["task_parameters"], comment="Name of the function", ), ] )
def init_attributes(cls, orig_cls): cls.add_config_attributes([ Attribute( attribute_name="channel", validator=str, comment= "Name of slack channel or slack id of user E.g.: #airflow-jobs or UN01EL1RU", ), Attribute( attribute_name="mentions", validator=list, nullable=True, comment= "List of slack user ids or slack groups. E.g.: <@UN01EL1RU> for user, @data-eng for slack group", ), ])
def init_attributes(cls, orig_cls): cls.add_config_attributes([ Attribute(attribute_name="type", auto_value=orig_cls.ref_name), Attribute(attribute_name="name"), Attribute( attribute_name="has_dependency", required=False, comment= "Weather this i/o should be added to the dependency graph or not. Default is True", ), Attribute( attribute_name="follow_external_dependency", required=False, comment= "Weather an external task sensor should be created if this dataset" "is created in another pipeline. Default is False", ), ])
def init_attributes(cls, orig_cls): cls.add_config_attributes([ Attribute( attribute_name="sql", nullable=True, parent_fields=["task_parameters"], comment= "Relative path to sql file. If not present default is SELECT * FROM <input_table>", ), Attribute( attribute_name="iam_role", required=False, parent_fields=["task_parameters"], ), Attribute( attribute_name="allow_overwrite", required=False, parent_fields=["task_parameters"], format_help="on/off", comment="Default is on", ), Attribute( attribute_name="postgres_conn_id", required=False, parent_fields=["task_parameters"], ), Attribute( attribute_name="extra_unload_parameters", required=True, nullable=True, parent_fields=["task_parameters"], format_help="dictionary", comment= "Any additional parameter will be added like <key value> \ Check https://docs.aws.amazon.com/redshift/latest/dg/r_UNLOAD.html#unload-parameters", ), ])
def init_attributes(cls, orig_cls): cls.add_config_attributes([ Attribute( attribute_name="sql", parent_fields=["task_parameters"], required=False, ), Attribute( attribute_name="where", parent_fields=["task_parameters"], required=False, ), Attribute( attribute_name="columns", parent_fields=["task_parameters"], required=False, ), Attribute( attribute_name="num_mappers", parent_fields=["task_parameters"], required=False, ), Attribute( attribute_name="split_by", parent_fields=["task_parameters"], required=True, ), Attribute( attribute_name="delete_target_dir", parent_fields=["task_parameters"], required=False, validator=bool, ), Attribute( attribute_name="format", parent_fields=["task_parameters"], required=False, ), Attribute( attribute_name="emr_master", parent_fields=["task_parameters"], required=False, ), ])
def init_attributes(cls, orig_cls): cls.add_config_attributes([ Attribute( attribute_name="owner", validator=str, format_help="<team|person>@domain.com", ), Attribute(attribute_name="description", validator=str), Attribute(attribute_name="schedule", format_help="crontab e.g.: 0 3 * * *"), Attribute( attribute_name="start_date", format_help="2019-11-01T03:00", validator=lambda x: datetime.strptime(x, "%Y-%m-%dT%H:%M"), ), Attribute(attribute_name="airflow_parameters"), Attribute( attribute_name="default_args", required=True, nullable=True, validator=dict, parent_fields=["airflow_parameters"], format_help="dictionary", ), Attribute( attribute_name="dag_parameters", required=True, nullable=True, validator=dict, parent_fields=["airflow_parameters"], format_help="dictionary", ), Attribute( attribute_name="alerts", required=True, nullable=True, validator=list, format_help="list", ), ])
def init_attributes(cls, orig_cls): cls.add_config_attributes([ Attribute( attribute_name="spark_engine", parent_fields=["task_parameters"], required=True, comment= "Where to run spark job. Accepted values: emr, batch, glue", ), Attribute(attribute_name="job_file", parent_fields=["task_parameters"], required=True), Attribute(attribute_name="cluster_name", parent_fields=["task_parameters"], required=False), Attribute(attribute_name="job_bucket", parent_fields=["task_parameters"], required=False), Attribute( attribute_name="spark_args", parent_fields=["task_parameters"], required=False, format_help="Dictionary", ), Attribute( attribute_name="spark_conf_args", parent_fields=["task_parameters"], required=False, format_help="Dictionary", ), Attribute( attribute_name="job_file", parent_fields=["task_parameters"], required=False, ), Attribute( attribute_name="extra_py_files", parent_fields=["task_parameters"], required=False, ), Attribute( attribute_name="emr_conn_id", parent_fields=["task_parameters"], required=False, ), Attribute( attribute_name="overrides", parent_fields=["task_parameters"], required=False, validator=dict, comment= "Batch overrides dictionary: https://docs.aws.amazon.com/sdkforruby/api/Aws/Batch/Types/ContainerOverrides.html", ), Attribute( attribute_name="aws_conn_id", parent_fields=["task_parameters"], required=False, ), Attribute( attribute_name="region_name", parent_fields=["task_parameters"], required=False, ), Attribute( attribute_name="job_queue", parent_fields=["task_parameters"], required=False, ), Attribute( attribute_name="max_retries", parent_fields=["task_parameters"], required=False, ), ])
def init_attributes(cls, orig_cls): cls.add_config_attributes( [Attribute(attribute_name="type", auto_value=orig_cls.ref_name)])
def init_attributes(cls, orig_cls): cls.add_config_attributes([ Attribute( attribute_name="iam_role", required=False, parent_fields=["task_parameters"], ), Attribute( attribute_name="columns", required=False, parent_fields=["task_parameters"], ), Attribute( attribute_name="incremental", required=True, parent_fields=["task_parameters"], validator=bool, format_help="on/off/yes/no/true/false", auto_value="true", ), Attribute( attribute_name="delete_condition", required=True, nullable=True, parent_fields=["task_parameters"], format_help="SQL where statement", comment="Recommended when doing incremental load", ), Attribute( attribute_name="max_errors", required=False, parent_fields=["task_parameters"], comment="Default is 0", ), Attribute( attribute_name="postgres_conn_id", required=False, parent_fields=["task_parameters"], ), Attribute( attribute_name="extra_load_parameters", required=True, nullable=True, parent_fields=["task_parameters"], format_help="dictionary", comment= "Any additional parameter will be added like <key value> \ Check https://docs.aws.amazon.com/redshift/latest/dg/r_COPY.html", ), Attribute( attribute_name="tmp_table_prefix", required=False, parent_fields=["task_parameters"], format_help="string", comment= "Only valid if job is truncated. If set table will be loaded into a tmp table prefixed " "<tmp_table_prefix> and than it will be moved to it's final destination", ), Attribute( attribute_name="create_table_ddl", required=False, parent_fields=["task_parameters"], format_help="string", comment="Path to the file which contains the create table ddl", ), Attribute( attribute_name="copy_ddl_from", required=False, parent_fields=["task_parameters"], format_help="string {schema}.{table}", comment= "If you have the schema of the table e.g.: in spectrum you can copy the ddl from there", ), Attribute( attribute_name="sort_keys", required=False, parent_fields=["task_parameters"], format_help="Comma separated list of strings. {col1,col2}", comment= "Redshift sort keys. If this is set, interleaved sort_keys must be null.", ), ])