Пример #1
0
    def build(self, dag):
        """
        Builds adapter jar operators.
        :param dag: The DAG to which all relevant "adapter" operators should be added
        :type dag: airflow.models.DAG
        :return: The adapter_jar operator
        :rtype: presidio.operators.fixed_duration_jar_operator.FixedDurationJarOperator
        """
        self.log.debug("populating the %s dag with adapter tasks", dag.dag_id)

        task_sensor_service = TaskSensorService()
        adapter_operator = AdapterOperator(
            fixed_duration_strategy=timedelta(hours=1),
            command=PresidioDagBuilder.presidio_command,
            schema=self.schema,
            dag=dag)

        task_sensor_service.add_task_sequential_sensor(adapter_operator)

        # 60 * 60 * 24 * 7 -> 1 week
        hour_is_ready_sensor = HourIsReadySensorOperatorBuilder(
            self.schema, timeout=60 * 60 * 24 * 7,
            time_to_sleep_in_seconds=60).build(dag)

        return hour_is_ready_sensor >> adapter_operator
Пример #2
0
    def __init__(self, builder, dag, add_sequential_sensor,
                 short_circuit_operator, *args, **kwargs):
        super(MultiPointGroupConnector, self).__init__(dag=dag,
                                                       *args,
                                                       **kwargs)

        old_tasks = dag.tasks
        builder.build(dag)
        new_tasks = [item for item in dag.tasks if item not in old_tasks]

        self._first_tasks = [
            task for task in new_tasks if not task.upstream_list
            and not isinstance(task, MultiPointGroupConnector)
        ]
        self._last_tasks = [
            task for task in new_tasks if not task.downstream_list
            and not isinstance(task, MultiPointGroupConnector)
        ]

        task_sensor_service = TaskSensorService()
        if add_sequential_sensor:
            self._first_tasks = self.add_sensor(new_tasks, task_sensor_service)
        if short_circuit_operator:
            self._first_tasks = self.add_short_circuit(short_circuit_operator,
                                                       self._first_tasks,
                                                       task_sensor_service)
Пример #3
0
    def _build_output_operator(self, smart_record_conf_name, entity_type, dag,
                               smart_operator):

        self.log.debug("populating the %s dag with output tasks", dag.dag_id)

        # build hourly output processor
        task_sensor_service = TaskSensorService()
        # This operator validates that output run in intervals that are no less than hourly intervals and that the dag
        # start only after the defined gap.
        output_short_circuit_operator = self._create_infinite_retry_short_circuit_operator(
            task_id='output_short_circuit',
            dag=dag,
            python_callable=lambda **kwargs: is_execution_date_valid(
                kwargs['execution_date'], FIX_DURATION_STRATEGY_HOURLY, dag.
                schedule_interval) & PresidioDagBuilder.
            validate_the_gap_between_dag_start_date_and_current_execution_date(
                dag, self._min_gap_from_dag_start_date_to_start_scoring,
                kwargs['execution_date'], dag.schedule_interval))

        hourly_output_operator = OutputOperator(
            fixed_duration_strategy=timedelta(hours=1),
            command=PresidioDagBuilder.presidio_command,
            smart_record_conf_name=smart_record_conf_name,
            entity_type=entity_type,
            dag=dag,
        )
        task_sensor_service.add_task_sequential_sensor(hourly_output_operator)
        task_sensor_service.add_task_short_circuit(
            hourly_output_operator, output_short_circuit_operator)

        # build entity score
        entity_score_operator = EntityScoreOperatorBuilder(
            smart_record_conf_name, entity_type).build(dag)
        # Create daily short circuit operator to wire the output processing and the entity score recalculation
        daily_short_circuit_operator = self._create_infinite_retry_short_circuit_operator(
            task_id='output_daily_short_circuit',
            dag=dag,
            python_callable=lambda **kwargs: is_execution_date_valid(
                kwargs['execution_date'], FIX_DURATION_STRATEGY_DAILY, dag.
                schedule_interval) & PresidioDagBuilder.
            validate_the_gap_between_dag_start_date_and_current_execution_date(
                dag,
                EntityScoreOperatorBuilder.
                get_min_gap_from_dag_start_date_to_start_modeling(
                    PresidioDagBuilder.conf_reader), kwargs[
                        'execution_date'], dag.schedule_interval))

        daily_short_circuit_operator >> entity_score_operator
        self._push_forwarding(hourly_output_operator,
                              daily_short_circuit_operator, dag, entity_type)

        smart_operator >> output_short_circuit_operator

        return entity_score_operator
Пример #4
0
    def _create_sub_dag_operator(self, sub_dag_builder, sub_dag_id, dag,
                                 short_circuit_operator,
                                 add_sequential_sensor):
        """
         create a sub dag of the received "dag" fill it with a flow using the sub_dag_builder
         and wrap it with a sub dag operator.
         wire short_circuit_operator and add_sequential_sensor.
        :param sub_dag_builder: sub_dag_builder
        :param sub_dag_id: sub_dag_id
        :param dag: dag
        :return: SubDagOperator
        """

        sub_dag = DAG(dag_id='{}.{}'.format(dag.dag_id, sub_dag_id),
                      schedule_interval=dag.schedule_interval,
                      start_date=dag.start_date,
                      default_args=dag.default_args)

        retry_args = self._calc_retry_args(sub_dag_id)

        sub_dag = SubDagOperator(
            subdag=sub_dag_builder.build(sub_dag),
            task_id=sub_dag_id,
            dag=dag,
            retries=retry_args['retries'],
            retry_delay=timedelta(seconds=int(retry_args['retry_delay'])),
            retry_exponential_backoff=retry_args['retry_exponential_backoff'],
            max_retry_delay=timedelta(
                seconds=int(retry_args['max_retry_delay'])))

        task_sensor_service = TaskSensorService()
        if add_sequential_sensor:
            task_sensor_service.add_task_sequential_sensor(sub_dag)
        if short_circuit_operator:
            task_sensor_service.add_task_short_circuit(sub_dag,
                                                       short_circuit_operator)

        return sub_dag
Пример #5
0
    def _build_smart(self, root_dag_gap_sensor_operator, smart_dag,
                     smart_record_conf_name):
        task_sensor_service = TaskSensorService()
        smart_short_circuit_operator = self._create_infinite_retry_short_circuit_operator(
            task_id='ade_scoring_hourly_short_circuit',
            dag=smart_dag,
            python_callable=lambda **kwargs: is_execution_date_valid(
                kwargs['execution_date'], FIX_DURATION_STRATEGY_HOURLY,
                smart_dag.schedule_interval) & PresidioDagBuilder.
            validate_the_gap_between_dag_start_date_and_current_execution_date(
                smart_dag, self._min_gap_from_dag_start_date_to_start_scoring,
                kwargs['execution_date'], smart_dag.schedule_interval))

        smart_operator = SmartEventsOperator(
            command=SmartEventsOperator.liors_special_run_command,
            fixed_duration_strategy=FIX_DURATION_STRATEGY_HOURLY,
            smart_events_conf=smart_record_conf_name,
            dag=smart_dag,
        )
        task_sensor_service.add_task_sequential_sensor(smart_operator)
        task_sensor_service.add_task_short_circuit(
            smart_operator, smart_short_circuit_operator)

        root_dag_gap_sensor_operator >> smart_short_circuit_operator

        smart_model_dag_id = SmartModelDagFactory.get_dag_id(
            smart_record_conf_name)

        python_callable = lambda context, dag_run_obj: dag_run_obj if is_execution_date_valid(
            context['execution_date'], FIX_DURATION_STRATEGY_DAILY, smart_dag.
            schedule_interval) else None
        smart_model_trigger = self._create_expanded_trigger_dag_run_operator(
            "smart_model_trigger", smart_model_dag_id, smart_dag,
            python_callable)

        set_schedule_interval(smart_model_dag_id, FIX_DURATION_STRATEGY_DAILY)
        smart_operator >> smart_model_trigger
        return smart_operator
Пример #6
0
    'owner': 'airflow',
    'depends_on_past': False,
    'start_date': datetime(2015, 6, 1),
    'email': ['*****@*****.**'],
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=5),
    # 'queue': 'bash_queue',
    # 'pool': 'backfill',
    # 'priority_weight': 10,
    # 'end_date': datetime(2016, 1, 1),
}
dag = DAG('sensor_example', default_args=default_args)

taskSensorService = TaskSensorService()

# t1, t2 and t3 are examples of tasks created by instantiating operators
t1 = BashOperator(task_id='print_date', bash_command='date', dag=dag)
taskSensorService.add_task_sequential_sensor(t1)

t2 = BashOperator(task_id='sleep', bash_command='sleep 5', retries=3, dag=dag)
taskSensorService.add_task_sequential_sensor(t2)

templated_command = """
    {% for i in range(5) %}
        echo "{{ ds }}"
        echo "{{ macros.ds_add(ds, 7)}}"
        echo "{{ params.my_param }}"
    {% endfor %}
"""
Пример #7
0
    def build(self, dag):
        """
        Receives an indicator DAG, creates the adapter, input and scoring operators, links them to the DAG and
        configures the dependencies between them.
        :param dag: The indicator DAG to populate
        :type dag: airflow.models.DAG
        :return: The given indicator DAG, after it has been populated
        :rtype: airflow.models.DAG
        """
        self.log.debug("populating the %s dag with input tasks", dag.dag_id)
        schema = dag.default_args.get('schema')

        adapter_operator = AdapterOperatorBuilder(schema).build(dag)

        input_task_sensor_service = TaskSensorService()
        input_operator = InputOperator(
            fixed_duration_strategy=timedelta(hours=1),
            command=PresidioDagBuilder.presidio_command,
            schema=schema,
            dag=dag)
        input_task_sensor_service.add_task_sequential_sensor(input_operator)

        self.log.debug("populating the %s dag with scoring tasks", dag.dag_id)
        scoring_task_sensor_service = TaskSensorService()
        feature_aggregations_operator = FeatureAggregationsOperator(
            fixed_duration_strategy=FIX_DURATION_STRATEGY_HOURLY,
            command=PresidioDagBuilder.presidio_command,
            data_source=schema,
            dag=dag)
        scoring_task_sensor_service.add_task_sequential_sensor(
            feature_aggregations_operator)

        score_aggregations_operator = ScoreAggregationsOperator(
            fixed_duration_strategy=FIX_DURATION_STRATEGY_HOURLY,
            command=PresidioDagBuilder.presidio_command,
            data_source=schema,
            dag=dag)
        scoring_task_sensor_service.add_task_sequential_sensor(
            score_aggregations_operator)

        hourly_short_circuit_operator = self._create_infinite_retry_short_circuit_operator(
            task_id='ade_scoring_hourly_short_circuit',
            dag=dag,
            python_callable=lambda **kwargs: is_execution_date_valid(
                kwargs['execution_date'], FIX_DURATION_STRATEGY_HOURLY,
                get_schedule_interval(dag)) & PresidioDagBuilder.
            validate_the_gap_between_dag_start_date_and_current_execution_date(
                dag, self._min_gap_from_dag_start_date_to_start_scoring,
                kwargs['execution_date'], get_schedule_interval(dag)))

        if schema in InputPreProcessingDagFactory.get_registered_schemas():
            input_pre_processing_trigger = self._build_input_pre_processing_trigger_operator(
                dag, schema)

            input_pre_processing_gap_sensor = DagIntervalGapSequentialSensorOperator(
                dag=dag,
                task_id='input_pre_processing_gap_sensor_{0}'.format(schema),
                dag_ids=[InputPreProcessingDagFactory.get_dag_id(schema)],
                interval=timedelta(hours=1),
                start_time=dag.start_date,
                fixed_duration_strategy=FIX_DURATION_STRATEGY_DAILY,
                poke_interval=5)

            input_pre_processing_gap_sensor >> input_operator >> input_pre_processing_trigger

        adapter_operator >> input_operator >> hourly_short_circuit_operator
        scoring_task_sensor_service.add_task_short_circuit(
            feature_aggregations_operator, hourly_short_circuit_operator)
        scoring_task_sensor_service.add_task_short_circuit(
            score_aggregations_operator, hourly_short_circuit_operator)

        model_trigger = self._build_model_trigger_operator(dag, schema)
        input_operator >> model_trigger

        return dag
Пример #8
0
def test_task_sensor_service():
    return TaskSensorService()