Exemplo n.º 1
0
    def test_bucket_name_value(self):

        bad_start_bucket_name = '/testing123'
        with self.assertRaises(ValueError):

            gcs_hook.GoogleCloudStorageHook().create_bucket(
                bucket_name=bad_start_bucket_name)

        bad_end_bucket_name = 'testing123/'
        with self.assertRaises(ValueError):
            gcs_hook.GoogleCloudStorageHook().create_bucket(
                bucket_name=bad_end_bucket_name)
Exemplo n.º 2
0
 def setUp(self):
     with mock.patch(
         GCS_STRING.format('GoogleCloudBaseHook.__init__'),
         new=mock_base_gcp_hook_default_project_id,
     ):
         self.gcs_hook = gcs_hook.GoogleCloudStorageHook(
             google_cloud_storage_conn_id='test')
def move_to_completion_bucket(target_bucket, target_infix, **kwargs):
    """A utility method to move an object to a target location in GCS."""
    # Here we establish a connection hook to GoogleCloudStorage.
    # Google Cloud Composer automatically provides a google_cloud_storage_default
    # connection id that is used by this hook.
    conn = gcs_hook.GoogleCloudStorageHook()

    # The external trigger (Google Cloud Function) that initiates this DAG
    # provides a dag_run.conf dictionary with event attributes that specify
    # the information about the GCS object that triggered this DAG.
    # We extract the bucket and object name from this dictionary.
    source_bucket = kwargs['dag_run'].conf['bucket']
    source_object = kwargs['dag_run'].conf['name']
    completion_ds = kwargs['ds']

    target_object = os.path.join(target_infix, completion_ds, source_object)

    logging.info('Copying %s to %s',
                 os.path.join(source_bucket, source_object),
                 os.path.join(target_bucket, target_object))
    conn.copy(source_bucket, source_object, target_bucket, target_object)

    logging.info('Deleting %s',
                 os.path.join(source_bucket, source_object))
    conn.delete(source_bucket, source_object)
def DPLF_move_into_inv_bucket(target_bucket, **kwargs):
    v_st = kwargs['dag_run'].conf['name']
    v_name_no_ext = v_st[:v_st.rfind(".")]
    v_split = v_name_no_ext.split("_")
    v_table_name = "_".join(v_split[2:])
    v_sourceID = kwargs['dag_run'].conf['name']
    # Establish a connection hook to GoogleCloudStorage.
    conn = gcs_hook.GoogleCloudStorageHook()
    # Arguments are passed by the Cloud Function
    source_bucket = kwargs['dag_run'].conf['bucket']
    source_object = kwargs['dag_run'].conf['name']
    target_object = source_object
    target_bucket = g_failed_bucket
    # Upon failure of the previous task, the .csv file is moved to the invalid bucket
    DPLF_WriteLog('INFO',
                  "INGESTION-Copying " + source_object + " from " +
                  source_bucket + " into " + target_bucket,
                  p_tablename=v_table_name,
                  p_timestamp=g_tstamp,
                  p_sourceID=v_sourceID,
                  p_jobID=kwargs.get('dag', None),
                  p_taskID=kwargs.get('task', None))
    conn.copy(source_bucket, source_object, target_bucket, target_object)
    DPLF_WriteLog('INFO',
                  "INGESTION-Deleting " + source_object + " from " +
                  source_bucket,
                  p_tablename=v_table_name,
                  p_timestamp=g_tstamp,
                  p_sourceID=v_sourceID,
                  p_jobID=kwargs.get('dag', None),
                  p_taskID=kwargs.get('task', None))
    conn.delete(source_bucket, source_object)
Exemplo n.º 5
0
  def execute(self, context: Mapping[Text, Any]) -> None:
    """Executes operator.

    Args:
      context: Airflow context that contains references to related objects to
        the task instance.

    Raises:
       airflow.AirflowException: Raised when the task failed to call BigQuery
        API.
    """
    bq_hook = bigquery_hook.BigQueryHook()
    storage_hook = gcs_hook.GoogleCloudStorageHook()
    try:
      logging.info('Starting cleanup routine...')
      bq_hook.delete_table(dataset_id=self._dataset_id, table_id=self._table_id)
    except bigquery_hook.BigQueryApiError as bq_api_error:
      raise exceptions.AirflowException(
          'BigQuery API returned an error while deleting the items table.'
      ) from bq_api_error
    try:
      storage_hook.delete(bucket=self._bucket_id, object='EOF.lock')
      logging.info('Successfully deleted the EOF.lock file.')
    except cloud_exceptions.GoogleCloudError as gcs_api_error:
      raise exceptions.AirflowException(
          'Cloud Storage API returned an error while deleting EOF.lock.'
      ) from gcs_api_error
    logging.info('Clean up task finished!')
Exemplo n.º 6
0
 def test_storage_client_creation(self, mock_client, mock_get_credentials,
                                  mock_projec_id):
     hook = gcs_hook.GoogleCloudStorageHook()
     result = hook.get_conn()
     # test that Storage Client is called with required arguments
     mock_client.assert_called_once_with(credentials="CREDENTIALS",
                                         project="PROJECT_ID")
     self.assertEqual(mock_client.return_value, result)
Exemplo n.º 7
0
def update_on_completion(src, dst, **kwargs):
    """Write to GCS on completion of dataflow task.
  Update the completion status. This writes to either success.txt or
  failure.txt. gcs_hook doesn't have update api, so we use copy.
  """
    conn = gcs_hook.GoogleCloudStorageHook()
    bucket = config['completion_status_file_bucket']
    conn.copy(bucket, dst, bucket, src)
Exemplo n.º 8
0
    def setUp(self):
        with mock.patch(BASE_STRING.format('GoogleCloudBaseHook.__init__')):
            self.gcs_hook = gcs_hook.GoogleCloudStorageHook(
                google_cloud_storage_conn_id='test')

        # generate a 384KiB test file (larger than the minimum 256KiB multipart chunk size)
        self.testfile = tempfile.NamedTemporaryFile(delete=False)
        self.testfile.write(b"x" * 393216)
        self.testfile.flush()
    def test_storage_client_creation(self):
        with mock.patch('google.cloud.storage.Client') as mock_client:
            gcs_hook_1 = gcs_hook.GoogleCloudStorageHook()
            gcs_hook_1.get_conn()

            # test that Storage Client is called with required arguments
            mock_client.assert_called_once_with(
                client_info=mock.ANY,
                credentials=mock.ANY,
                project=mock.ANY)
Exemplo n.º 10
0
    def execute(self, context):
        hook = gcs_hook.GoogleCloudStorageHook(
            google_cloud_storage_conn_id=self._google_cloud_storage_conn_id,
            delegate_to=self._delegate_to)

        objects = hook.list(self._bucket, prefix=self._directory)

        for obj in objects:
            if not hook.delete(self._bucket, obj):
                raise RuntimeError('Deleting object %s failed.' % obj)
Exemplo n.º 11
0
def move_to_completion_bucket(target_bucket, target_infix, **kwargs):
    """A utility method to move an object to a target location in GCS."""
    # hook to cloud storaged
    conn = gcs_hook.GoogleCloudStorageHook()

    # we get conf dictionary from cloud function pass data
    # which has has varible names
    source_bucket = kwargs["dag_run"].conf["bucket"]
    source_object = kwargs["dag_run"].conf["name"]

    # set path
    target_object = os.path.join(target_infix, source_object)

    logging.info(
        "Copying %s to %s",
        os.path.join(source_bucket, source_object),
        os.path.join(target_bucket, target_object),
    )
    # copy file to processed bucket
    conn.copy(source_bucket, source_object, target_bucket, target_object)

    logging.info("Deleting %s", os.path.join(source_bucket, source_object))
    # delete from processing bucket
    conn.delete(source_bucket, source_object)
Exemplo n.º 12
0
# Default args that will be applied to all tasks in the DAG
default_args = {
    'owner': 'airflow',
    'depends_on_past': False,
    'start_date': start_date,
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': datetime.timedelta(seconds=10),
}

# Hooks for connecting to GCS, SA360 API and SA360's sFTP endpoint.
sa360_reporting_hook = sa360_reporting_hook.SA360ReportingHook(
    sa360_report_conn_id=sa360_conn_id)
gcs_hook = gcs_hook.GoogleCloudStorageHook()

# SA360 request builder
request_builder = request_builder.SA360ReportRequestBuilder(
    agency_id, list(elem['advertiserId'] for elem in advertisers))

output_file_header = ','.join(request_builder.get_headers())

# DAG definition
dag = airflow.DAG('mozart_dag',
                  default_args=default_args,
                  schedule_interval=datetime.timedelta(1),
                  concurrency=3)

create_report = create_operator.SA360CreateReportOperator(
    task_id='create_kw_report',
Exemplo n.º 13
0
 def setUp(self):
     with mock.patch(BASE_STRING.format('GoogleCloudBaseHook.__init__')):
         self.gcs_hook = gcs_hook.GoogleCloudStorageHook(
             google_cloud_storage_conn_id='test')