Пример #1
0
  def create_task(self, main_dag: models.DAG = None, is_retry: bool = False
                 ) -> data_connector_operator.DataConnectorOperator:
    """Creates and initializes the main DAG.

    Args:
      main_dag: The dag that the task attaches to.
      is_retry: Whether or not the operator should includ a retry task.

    Returns:
      DataConnectorOperator.
    """
    return data_connector_operator.DataConnectorOperator(
        dag_name=_DAG_NAME,
        task_id=self.get_task_id('gcs_to_ga', is_retry),
        input_hook=hook_factory.InputHookType.GOOGLE_CLOUD_STORAGE,
        output_hook=hook_factory.OutputHookType.GOOGLE_ANALYTICS,
        is_retry=is_retry,
        return_report=self.dag_enable_run_report,
        enable_monitoring=self.dag_enable_monitoring,
        monitoring_dataset=self.monitoring_dataset,
        monitoring_table=self.monitoring_table,
        monitoring_bq_conn_id=self.monitoring_bq_conn_id,
        gcs_bucket=models.Variable.get('gcs_bucket_name', ''),
        gcs_prefix=models.Variable.get('gcs_bucket_prefix', ''),
        gcs_content_type=models.Variable.get('gcs_content_type',
                                             _GCS_CONTENT_TYPE).upper(),
        ga_tracking_id=models.Variable.get('ga_tracking_id', ''),
        ga_base_params=_GA_BASE_PARAMS,
        dag=main_dag)
Пример #2
0
    def create_task(self,
                    main_dag: Optional[dag.DAG] = None,
                    is_retry: bool = False
                    ) -> data_connector_operator.DataConnectorOperator:
        """Creates and initializes the main DAG.

    Args:
      main_dag: The dag that the task attaches to.
      is_retry: Whether or not the operator should includ a retry task.

    Returns:
      DataConnectorOperator.
    """
        return data_connector_operator.DataConnectorOperator(
            dag_name=self.dag_name,
            task_id=self.get_task_id('bq_to_ga', is_retry),
            input_hook=hook_factory.InputHookType.BIG_QUERY,
            output_hook=hook_factory.OutputHookType.GOOGLE_ANALYTICS,
            is_retry=is_retry,
            return_report=self.dag_enable_run_report,
            enable_monitoring=self.dag_enable_monitoring,
            monitoring_dataset=self.monitoring_dataset,
            monitoring_table=self.monitoring_table,
            monitoring_bq_conn_id=self.monitoring_bq_conn_id,
            bq_conn_id=_BQ_CONN_ID,
            bq_dataset_id=variable.Variable.get('bq_dataset_id', ''),
            bq_table_id=variable.Variable.get('bq_table_id', ''),
            ga_tracking_id=variable.Variable.get('ga_tracking_id', ''),
            ga_base_params=_GA_BASE_PARAMS,
            dag=main_dag)
Пример #3
0
    def create_task(
        self,
        main_dag: models.DAG = None,
        is_retry: bool = False
    ) -> data_connector_operator.DataConnectorOperator:
        """Creates and initializes the main DAG.

    Args:
      main_dag: The dag that the task attaches to.
      is_retry: Whether or not the operator should includ a retry task.

    Returns:
      DataConnectorOperator.
    """
        return data_connector_operator.DataConnectorOperator(
            dag_name=_DAG_NAME,
            task_id=self.get_task_id('bq_to_ads_oc', is_retry),
            input_hook=hook_factory.InputHookType.BIG_QUERY,
            output_hook=hook_factory.OutputHookType.
            GOOGLE_ADS_OFFLINE_CONVERSIONS,
            is_retry=is_retry,
            return_report=self.dag_enable_run_report,
            enable_monitoring=self.dag_enable_monitoring,
            monitoring_dataset=self.monitoring_dataset,
            monitoring_table=self.monitoring_table,
            monitoring_bq_conn_id=self.monitoring_bq_conn_id,
            bq_conn_id=_BQ_CONN_ID,
            bq_dataset_id=models.Variable.get('bq_dataset_id', ''),
            bq_table_id=models.Variable.get('bq_table_id', ''),
            ads_credentials=models.Variable.get('ads_credentials', ''),
            dag=main_dag)
Пример #4
0
  def create_task(self, main_dag: models.DAG = None, is_retry: bool = False
                 ) -> data_connector_operator.DataConnectorOperator:
    """Creates and initializes the main DAG.

    Args:
      main_dag: The dag that the task attaches to.
      is_retry: Whether or not the operator should includ a retry task.

    Returns:
      DataConnectorOperator.
    """
    return data_connector_operator.DataConnectorOperator(
        dag_name=_DAG_NAME,
        task_id=self.get_task_id('gcs_to_ads_cm', is_retry),
        input_hook=hook_factory.InputHookType.GOOGLE_CLOUD_STORAGE,
        output_hook=hook_factory.OutputHookType.GOOGLE_ADS_CUSTOMER_MATCH,
        is_retry=is_retry,
        return_report=self.dag_enable_run_report,
        enable_monitoring=self.dag_enable_monitoring,
        monitoring_dataset=self.monitoring_dataset,
        monitoring_table=self.monitoring_table,
        monitoring_bq_conn_id=self.monitoring_bq_conn_id,
        gcs_bucket=models.Variable.get('gcs_bucket_name', ''),
        gcs_content_type=models.Variable.get('gcs_content_type',
                                             _GCS_CONTENT_TYPE).upper(),
        gcs_prefix=models.Variable.get('gcs_bucket_prefix', ''),
        ads_credentials=models.Variable.get('ads_credentials', ''),
        ads_upload_key_type=models.Variable.get('ads_upload_key_type', ''),
        ads_cm_app_id=models.Variable.get('ads_cm_app_id', None),
        ads_cm_create_list=models.Variable.get('ads_cm_create_list', True),
        ads_cm_membership_lifespan=models.Variable.get(
            'ads_cm_membership_lifespan', _ADS_MEMBERSHIP_LIFESPAN_DAYS),
        ads_cm_user_list_name=models.Variable.get('ads_cm_user_list_name', ''),
        dag=main_dag)
Пример #5
0
    def create_task(self,
                    main_dag: Optional[dag.DAG] = None,
                    is_retry: bool = False
                    ) -> data_connector_operator.DataConnectorOperator:
        """Creates and initializes the main DAG.

    Args:
      main_dag: The dag that the task attaches to.
      is_retry: Whether or not the operator should includ a retry task.

    Returns:
      DataConnectorOperator.
    """
        return data_connector_operator.DataConnectorOperator(
            dag_name=_DAG_NAME,
            task_id=self.get_task_id('gcs_to_ads_oc', is_retry),
            input_hook=hook_factory.InputHookType.GOOGLE_CLOUD_STORAGE,
            output_hook=hook_factory.OutputHookType.
            GOOGLE_ADS_OFFLINE_CONVERSIONS,
            is_retry=is_retry,
            return_report=self.dag_enable_run_report,
            enable_monitoring=self.dag_enable_monitoring,
            monitoring_dataset=self.monitoring_dataset,
            monitoring_table=self.monitoring_table,
            monitoring_bq_conn_id=self.monitoring_bq_conn_id,
            gcs_bucket=variable.Variable.get('gcs_bucket_name', ''),
            gcs_content_type=variable.Variable.get('gcs_content_type',
                                                   _GCS_CONTENT_TYPE).upper(),
            gcs_prefix=variable.Variable.get('gcs_bucket_prefix', ''),
            ads_credentials=variable.Variable.get('ads_credentials', ''),
            dag=main_dag)
Пример #6
0
  def create_task(
      self,
      main_dag: Optional[dag.DAG] = None,
      is_retry: bool = False) -> data_connector_operator.DataConnectorOperator:
    """Creates and initializes the main DAG.

    Args:
      main_dag: The dag that the task attaches to.
      is_retry: Whether or not the operator should includ a retry task.

    Returns:
      DataConnectorOperator.
    """
    return data_connector_operator.DataConnectorOperator(
        dag_name=_DAG_NAME,
        task_id=self.get_task_id('bq_to_cm', is_retry),
        input_hook=hook_factory.InputHookType.BIG_QUERY,
        output_hook=(hook_factory.OutputHookType
                     .GOOGLE_CAMPAIGN_MANAGER_OFFLINE_CONVERSIONS),
        is_retry=is_retry,
        return_report=self.dag_enable_run_report,
        enable_monitoring=self.dag_enable_monitoring,
        monitoring_dataset=self.monitoring_dataset,
        monitoring_table=self.monitoring_table,
        monitoring_bq_conn_id=self.monitoring_bq_conn_id,
        bq_conn_id=_BQ_CONN_ID,
        bq_dataset_id=variable.Variable.get('bq_dataset_id', ''),
        bq_table_id=variable.Variable.get('bq_table_id', ''),
        cm_service_account=variable.Variable.get('cm_service_account', ''),
        cm_profile_id=variable.Variable.get('cm_profile_id', ''),
        dag=main_dag)
Пример #7
0
    def create_task(self,
                    main_dag: Optional[dag.DAG] = None,
                    is_retry: bool = False
                    ) -> data_connector_operator.DataConnectorOperator:
        """Creates and initializes the main DAG.

    Args:
      main_dag: The dag that the task attaches to.
      is_retry: Whether or not the operator should includ a retry task.

    Returns:
      DataConnectorOperator.
    """
        return data_connector_operator.DataConnectorOperator(
            dag_name=self.dag_name,
            task_id=self.get_task_id('bq_to_ga4', is_retry),
            input_hook=hook_factory.InputHookType.BIG_QUERY,
            output_hook=hook_factory.OutputHookType.GOOGLE_ANALYTICS_4,
            is_retry=is_retry,
            return_report=self.dag_enable_run_report,
            enable_monitoring=self.dag_enable_monitoring,
            monitoring_dataset=self.monitoring_dataset,
            monitoring_table=self.monitoring_table,
            monitoring_bq_conn_id=self.monitoring_bq_conn_id,
            bq_conn_id=_BQ_CONN_ID,
            bq_dataset_id=variable.Variable.get('bq_dataset_id', ''),
            bq_table_id=variable.Variable.get('bq_table_id', ''),
            api_secret=variable.Variable.get('api_secret', ''),
            payload_type=variable.Variable.get('payload_type', ''),
            measurement_id=variable.Variable.get('measurement_id', ''),
            firebase_app_id=variable.Variable.get('firebase_app_id', ''),
            dag=main_dag)
Пример #8
0
    def create_task(
        self,
        main_dag: models.DAG = None,
        is_retry: bool = False
    ) -> data_connector_operator.DataConnectorOperator:
        """Creates and initializes the main DAG.

    Args:
      main_dag: The dag that the task attaches to.
      is_retry: Whether or not the operator should includ a retry task.

    Returns:
      DataConnectorOperator.
    """
        return data_connector_operator.DataConnectorOperator(
            dag_name=_DAG_NAME,
            task_id=self.get_task_id('gcs_to_cm', is_retry),
            input_hook=hook_factory.InputHookType.GOOGLE_CLOUD_STORAGE,
            output_hook=(hook_factory.OutputHookType.
                         GOOGLE_CAMPAIGN_MANAGER_OFFLINE_CONVERSIONS),
            is_retry=is_retry,
            return_report=self.dag_enable_run_report,
            enable_monitoring=self.dag_enable_monitoring,
            monitoring_dataset=self.monitoring_dataset,
            monitoring_table=self.monitoring_table,
            monitoring_bq_conn_id=self.monitoring_bq_conn_id,
            gcs_bucket=models.Variable.get('gcs_bucket_name', ''),
            gcs_prefix=models.Variable.get('gcs_bucket_prefix', ''),
            gcs_content_type=models.Variable.get('gcs_content_type',
                                                 _GCS_CONTENT_TYPE).upper(),
            cm_service_account=models.Variable.get('cm_service_account', ''),
            cm_profile_id=models.Variable.get('cm_profile_id', ''),
            dag=main_dag)
Пример #9
0
 def test_execute_monitoring_bad_values(self):
   with self.assertRaises(errors.MonitoringValueError):
     data_connector_operator.DataConnectorOperator(
         dag_name='dag_name',
         input_hook=hook_factory.InputHookType.GOOGLE_CLOUD_STORAGE,
         output_hook=hook_factory.OutputHookType.GOOGLE_ANALYTICS,
         return_report=True, monitoring_dataset='',
         **self.test_operator_kwargs)
Пример #10
0
 def test_execute_monitoring_use_default_bq_conn_id(self):
   with mock.patch('plugins.pipeline_plugins.hooks.'
                   'monitoring_hook.MonitoringHook', autospec=True) as mocker:
     data_connector_operator.DataConnectorOperator(
         dag_name='dag_name',
         input_hook=hook_factory.InputHookType.GOOGLE_CLOUD_STORAGE,
         output_hook=hook_factory.OutputHookType.GOOGLE_ANALYTICS,
         return_report=True, monitoring_dataset='test_dataset',
         monitoring_table='test_table',
         monitoring_bq_conn_id='test_monitoring_bq_conn_id',
         **self.test_operator_kwargs)
     mocker.assert_called_with(bq_conn_id='bigquery_default')
 def test_execute_monitoring_use_default_bq_conn_id(self):
     data_connector_operator.DataConnectorOperator(
         dag_name='dag_name',
         input_hook=hook_factory.InputHookType.GOOGLE_CLOUD_STORAGE,
         output_hook=hook_factory.OutputHookType.GOOGLE_ANALYTICS,
         return_report=True,
         monitoring_dataset='test_dataset',
         monitoring_table='test_table',
         monitoring_bq_conn_id='test_monitoring_bq_conn_id',
         **self.test_operator_kwargs)
     monitoring_hook.MonitoringHook.assert_called_with(
         bq_conn_id='test_monitoring_bq_conn_id',
         dag_name='dag_name',
         enable_monitoring=True,
         location=mock.ANY,
         monitoring_dataset='test_dataset',
         monitoring_table='test_table')
Пример #12
0
    def create_task(self,
                    main_dag: Optional[dag.DAG] = None,
                    is_retry: bool = False
                    ) -> data_connector_operator.DataConnectorOperator:
        """Creates and initializes the main DAG.

    Args:
      main_dag: The dag that the task attaches to.
      is_retry: Whether or not the operator should include a retry task.

    Returns:
      DataConnectorOperator.
    """
        return data_connector_operator.DataConnectorOperator(
            dag_name=_DAG_NAME,
            task_id=self.get_task_id('bq_to_ads_cm', is_retry),
            input_hook=hook_factory.InputHookType.BIG_QUERY,
            output_hook=hook_factory.OutputHookType.GOOGLE_ADS_CUSTOMER_MATCH,
            is_retry=is_retry,
            return_report=self.dag_enable_run_report,
            enable_monitoring=self.dag_enable_monitoring,
            monitoring_dataset=self.monitoring_dataset,
            monitoring_table=self.monitoring_table,
            monitoring_bq_conn_id=self.monitoring_bq_conn_id,
            bq_conn_id=_BQ_CONN_ID,
            bq_dataset_id=variable.Variable.get('bq_dataset_id', ''),
            bq_table_id=variable.Variable.get('bq_table_id', ''),
            ads_credentials=variable.Variable.get('ads_credentials', ''),
            ads_upload_key_type=variable.Variable.get('ads_upload_key_type',
                                                      ''),
            ads_cm_app_id=variable.Variable.get('ads_cm_app_id', None),
            ads_cm_create_list=variable.Variable.get('ads_cm_create_list',
                                                     True),
            ads_cm_membership_lifespan=variable.Variable.get(
                'ads_cm_membership_lifespan', _ADS_MEMBERSHIP_LIFESPAN_DAYS),
            ads_cm_user_list_name=variable.Variable.get(
                'ads_cm_user_list_name', ''),
            dag=main_dag)
    def setUp(self):
        super().setUp()
        self.addCleanup(mock.patch.stopall)

        self.test_operator_kwargs = {
            'task_id': 'test_task_id',
            'tcrm_gcs_to_ga_schedule': '@once',
            'ga_tracking_id': 'UA-12345-67',
            'ga_base_params': {
                'v': '1'
            },
            'gcs_bucket': 'test_bucket',
            'gcs_prefix': 'test_dataset',
            'gcs_content_type': 'JSON',
        }

        self.mock_hook_factory_input = mock.patch.object(
            hook_factory, 'get_input_hook', autospec=True).start()
        self.mock_hook_factory_output = mock.patch.object(
            hook_factory, 'get_output_hook', autospec=True).start()

        self.original_gcp_hook_init = gcp_api_base_hook.GoogleCloudBaseHook.__init__
        gcp_api_base_hook.GoogleCloudBaseHook.__init__ = mock.MagicMock()

        self.mock_monitoring_hook = mock.patch.object(monitoring_hook,
                                                      'MonitoringHook',
                                                      autospec=True).start()
        self.mock_generator = (self.mock_monitoring_hook.return_value.
                               generate_processed_blobs_ranges.return_value)

        self.dc_operator = data_connector_operator.DataConnectorOperator(
            dag_name='dag_name',
            input_hook=hook_factory.InputHookType.GOOGLE_CLOUD_STORAGE,
            output_hook=hook_factory.OutputHookType.GOOGLE_ANALYTICS,
            return_report=True,
            monitoring_dataset='test_dataset',
            monitoring_table='test_table',
            monitoring_bq_conn_id='test_monitoring_bq_conn_id',
            **self.test_operator_kwargs)

        self.dc_operator_disable_monitoring = (
            data_connector_operator.DataConnectorOperator(
                dag_name='dag_name',
                input_hook=hook_factory.InputHookType.GOOGLE_CLOUD_STORAGE,
                output_hook=hook_factory.OutputHookType.GOOGLE_ANALYTICS,
                return_report=True,
                enable_monitoring=False,
                monitoring_dataset='test_dataset',
                monitoring_table='test_table',
                monitoring_bq_conn_id='test_monitoring_bq_conn_id',
                **self.test_operator_kwargs))

        self.dc_operator_no_report = (
            data_connector_operator.DataConnectorOperator)(
                dag_name='dag_name',
                input_hook=hook_factory.InputHookType.GOOGLE_CLOUD_STORAGE,
                output_hook=hook_factory.OutputHookType.GOOGLE_ANALYTICS,
                monitoring_dataset='test_dataset',
                monitoring_table='test_table',
                monitoring_bq_conn_id='test_monitoring_bq_conn_id',
                **self.test_operator_kwargs)

        self.event = {
            'cid': '12345.67890',
            'ec': 'ClientID',
            'ea': 'PredictedPayer',
            'el': '20190423',
            'ev': 1,
            'z': '1558517072202080'
        }
        self.blob = blob.Blob(events=([self.event] * 2), location='blob')