Exemplo n.º 1
0
    def execute(self, context):
        self.log.info('Exporting data to Cloud Storage bucket %s', self.bucket)

        if self.overwrite_existing and self.namespace:
            gcs_hook = GoogleCloudStorageHook(self.cloud_storage_conn_id)
            objects = gcs_hook.list(self.bucket, prefix=self.namespace)
            for obj in objects:
                gcs_hook.delete(self.bucket, obj)

        ds_hook = DatastoreHook(self.datastore_conn_id, self.delegate_to)
        result = ds_hook.export_to_storage_bucket(
            bucket=self.bucket,
            namespace=self.namespace,
            entity_filter=self.entity_filter,
            labels=self.labels,
            project_id=self.project_id)
        operation_name = result['name']
        result = ds_hook.poll_operation_until_done(
            operation_name, self.polling_interval_in_seconds)

        state = result['metadata']['common']['state']
        if state != 'SUCCESSFUL':
            raise AirflowException(
                'Operation failed: result={}'.format(result))

        return result
Exemplo n.º 2
0
    def execute(self, context):
        self.log.info('Importing data from Cloud Storage bucket %s',
                      self.bucket)
        ds_hook = DatastoreHook(self.datastore_conn_id, self.delegate_to)
        result = ds_hook.import_from_storage_bucket(
            bucket=self.bucket,
            file=self.file,
            namespace=self.namespace,
            entity_filter=self.entity_filter,
            labels=self.labels)
        operation_name = result['name']
        result = ds_hook.poll_operation_until_done(
            operation_name, self.polling_interval_in_seconds)

        state = result['metadata']['common']['state']
        if state != 'SUCCESSFUL':
            raise AirflowException(
                'Operation failed: result={}'.format(result))

        return result
Exemplo n.º 3
0
 def setUp(self):
     with patch(
             'airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook.__init__',
             new=mock_init):
         self.datastore_hook = DatastoreHook()
Exemplo n.º 4
0
class TestDatastoreHook(unittest.TestCase):
    def setUp(self):
        with patch(
                'airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook.__init__',
                new=mock_init):
            self.datastore_hook = DatastoreHook()

    @patch('airflow.gcp.hooks.datastore.DatastoreHook._authorize')
    @patch('airflow.gcp.hooks.datastore.build')
    def test_get_conn(self, mock_build, mock_authorize):
        conn = self.datastore_hook.get_conn()

        mock_build.assert_called_once_with('datastore',
                                           'v1',
                                           http=mock_authorize.return_value,
                                           cache_discovery=False)
        self.assertEqual(conn, mock_build.return_value)
        self.assertEqual(conn, self.datastore_hook.connection)

    @patch('airflow.gcp.hooks.datastore.DatastoreHook.get_conn')
    def test_allocate_ids(self, mock_get_conn):
        self.datastore_hook.connection = mock_get_conn.return_value
        partial_keys = []

        keys = self.datastore_hook.allocate_ids(partial_keys=partial_keys,
                                                project_id=GCP_PROJECT_ID)

        projects = self.datastore_hook.connection.projects
        projects.assert_called_once_with()
        allocate_ids = projects.return_value.allocateIds
        allocate_ids.assert_called_once_with(projectId=GCP_PROJECT_ID,
                                             body={'keys': partial_keys})
        execute = allocate_ids.return_value.execute
        execute.assert_called_once_with(num_retries=mock.ANY)
        self.assertEqual(keys, execute.return_value['keys'])

    @patch('airflow.gcp.hooks.datastore.DatastoreHook.get_conn')
    def test_allocate_ids_no_project_id(self, mock_get_conn):
        self.datastore_hook.connection = mock_get_conn.return_value
        partial_keys = []

        with self.assertRaises(AirflowException) as err:
            self.datastore_hook.allocate_ids(partial_keys=partial_keys)
        self.assertIn("project_id", str(err.exception))

    @patch('airflow.gcp.hooks.datastore.DatastoreHook.get_conn')
    def test_begin_transaction(self, mock_get_conn):
        self.datastore_hook.connection = mock_get_conn.return_value

        transaction = self.datastore_hook.begin_transaction(
            project_id=GCP_PROJECT_ID)

        projects = self.datastore_hook.connection.projects
        projects.assert_called_once_with()
        begin_transaction = projects.return_value.beginTransaction
        begin_transaction.assert_called_once_with(projectId=GCP_PROJECT_ID,
                                                  body={})
        execute = begin_transaction.return_value.execute
        execute.assert_called_once_with(num_retries=mock.ANY)
        self.assertEqual(transaction, execute.return_value['transaction'])

    @patch('airflow.gcp.hooks.datastore.DatastoreHook.get_conn')
    def test_begin_transaction_no_project_id(self, mock_get_conn):
        self.datastore_hook.connection = mock_get_conn.return_value
        with self.assertRaises(AirflowException) as err:
            self.datastore_hook.begin_transaction()
        self.assertIn("project_id", str(err.exception))

    @patch('airflow.gcp.hooks.datastore.DatastoreHook.get_conn')
    def test_commit(self, mock_get_conn):
        self.datastore_hook.connection = mock_get_conn.return_value
        body = {'item': 'a'}

        resp = self.datastore_hook.commit(body=body, project_id=GCP_PROJECT_ID)

        projects = self.datastore_hook.connection.projects
        projects.assert_called_once_with()
        commit = projects.return_value.commit
        commit.assert_called_once_with(projectId=GCP_PROJECT_ID, body=body)
        execute = commit.return_value.execute
        execute.assert_called_once_with(num_retries=mock.ANY)
        self.assertEqual(resp, execute.return_value)

    @patch('airflow.gcp.hooks.datastore.DatastoreHook.get_conn')
    def test_commit_no_project_id(self, mock_get_conn):
        self.datastore_hook.connection = mock_get_conn.return_value
        body = {'item': 'a'}

        with self.assertRaises(AirflowException) as err:
            self.datastore_hook.commit(body=body)
        self.assertIn("project_id", str(err.exception))

    @patch('airflow.gcp.hooks.datastore.DatastoreHook.get_conn')
    def test_lookup(self, mock_get_conn):
        self.datastore_hook.connection = mock_get_conn.return_value
        keys = []
        read_consistency = 'ENUM'
        transaction = 'transaction'

        resp = self.datastore_hook.lookup(keys=keys,
                                          read_consistency=read_consistency,
                                          transaction=transaction,
                                          project_id=GCP_PROJECT_ID)

        projects = self.datastore_hook.connection.projects
        projects.assert_called_once_with()
        lookup = projects.return_value.lookup
        lookup.assert_called_once_with(projectId=GCP_PROJECT_ID,
                                       body={
                                           'keys': keys,
                                           'readConsistency': read_consistency,
                                           'transaction': transaction
                                       })
        execute = lookup.return_value.execute
        execute.assert_called_once_with(num_retries=mock.ANY)
        self.assertEqual(resp, execute.return_value)

    @patch('airflow.gcp.hooks.datastore.DatastoreHook.get_conn')
    def test_lookup_no_project_id(self, mock_get_conn):
        self.datastore_hook.connection = mock_get_conn.return_value
        keys = []
        read_consistency = 'ENUM'
        transaction = 'transaction'

        with self.assertRaises(AirflowException) as err:
            self.datastore_hook.lookup(
                keys=keys,
                read_consistency=read_consistency,
                transaction=transaction,
            )
        self.assertIn("project_id", str(err.exception))

    @patch('airflow.gcp.hooks.datastore.DatastoreHook.get_conn')
    def test_rollback(self, mock_get_conn):
        self.datastore_hook.connection = mock_get_conn.return_value
        transaction = 'transaction'

        self.datastore_hook.rollback(transaction=transaction,
                                     project_id=GCP_PROJECT_ID)

        projects = self.datastore_hook.connection.projects
        projects.assert_called_once_with()
        rollback = projects.return_value.rollback
        rollback.assert_called_once_with(projectId=GCP_PROJECT_ID,
                                         body={'transaction': transaction})
        execute = rollback.return_value.execute
        execute.assert_called_once_with(num_retries=mock.ANY)

    @patch('airflow.gcp.hooks.datastore.DatastoreHook.get_conn')
    def test_rollback_no_project_id(self, mock_get_conn):
        self.datastore_hook.connection = mock_get_conn.return_value
        transaction = 'transaction'

        with self.assertRaises(AirflowException) as err:
            self.datastore_hook.rollback(transaction=transaction)
        self.assertIn("project_id", str(err.exception))

    @patch('airflow.gcp.hooks.datastore.DatastoreHook.get_conn')
    def test_run_query(self, mock_get_conn):
        self.datastore_hook.connection = mock_get_conn.return_value
        body = {'item': 'a'}

        resp = self.datastore_hook.run_query(body=body,
                                             project_id=GCP_PROJECT_ID)

        projects = self.datastore_hook.connection.projects
        projects.assert_called_once_with()
        run_query = projects.return_value.runQuery
        run_query.assert_called_once_with(projectId=GCP_PROJECT_ID, body=body)
        execute = run_query.return_value.execute
        execute.assert_called_once_with(num_retries=mock.ANY)
        self.assertEqual(resp, execute.return_value['batch'])

    @patch('airflow.gcp.hooks.datastore.DatastoreHook.get_conn')
    def test_run_query_no_project_id(self, mock_get_conn):
        self.datastore_hook.connection = mock_get_conn.return_value
        body = {'item': 'a'}

        with self.assertRaises(AirflowException) as err:
            self.datastore_hook.run_query(body=body)
        self.assertIn("project_id", str(err.exception))

    @patch('airflow.gcp.hooks.datastore.DatastoreHook.get_conn')
    def test_get_operation(self, mock_get_conn):
        self.datastore_hook.connection = mock_get_conn.return_value
        name = 'name'

        resp = self.datastore_hook.get_operation(name=name)

        projects = self.datastore_hook.connection.projects
        projects.assert_called_once_with()
        operations = projects.return_value.operations
        operations.assert_called_once_with()
        get = operations.return_value.get
        get.assert_called_once_with(name=name)
        execute = get.return_value.execute
        execute.assert_called_once_with(num_retries=mock.ANY)
        self.assertEqual(resp, execute.return_value)

    @patch('airflow.gcp.hooks.datastore.DatastoreHook.get_conn')
    def test_delete_operation(self, mock_get_conn):
        self.datastore_hook.connection = mock_get_conn.return_value
        name = 'name'

        resp = self.datastore_hook.delete_operation(name=name)

        projects = self.datastore_hook.connection.projects
        projects.assert_called_once_with()
        operations = projects.return_value.operations
        operations.assert_called_once_with()
        delete = operations.return_value.delete
        delete.assert_called_once_with(name=name)
        execute = delete.return_value.execute
        execute.assert_called_once_with(num_retries=mock.ANY)
        self.assertEqual(resp, execute.return_value)

    @patch('airflow.gcp.hooks.datastore.time.sleep')
    @patch('airflow.gcp.hooks.datastore.DatastoreHook.get_operation',
           side_effect=[{
               'metadata': {
                   'common': {
                       'state': 'PROCESSING'
                   }
               }
           }, {
               'metadata': {
                   'common': {
                       'state': 'NOT PROCESSING'
                   }
               }
           }])
    def test_poll_operation_until_done(self, mock_get_operation,
                                       mock_time_sleep):
        name = 'name'
        polling_interval_in_seconds = 10

        result = self.datastore_hook.poll_operation_until_done(
            name, polling_interval_in_seconds)

        mock_get_operation.assert_has_calls([call(name), call(name)])
        mock_time_sleep.assert_called_once_with(polling_interval_in_seconds)
        self.assertEqual(result,
                         {'metadata': {
                             'common': {
                                 'state': 'NOT PROCESSING'
                             }
                         }})

    @patch('airflow.gcp.hooks.datastore.DatastoreHook.get_conn')
    def test_export_to_storage_bucket(self, mock_get_conn):
        self.datastore_hook.admin_connection = mock_get_conn.return_value
        bucket = 'bucket'
        namespace = None
        entity_filter = {}
        labels = {}

        resp = self.datastore_hook.export_to_storage_bucket(
            bucket=bucket,
            namespace=namespace,
            entity_filter=entity_filter,
            labels=labels,
            project_id=GCP_PROJECT_ID)

        projects = self.datastore_hook.admin_connection.projects
        projects.assert_called_once_with()
        export = projects.return_value.export
        export.assert_called_once_with(
            projectId=GCP_PROJECT_ID,
            body={
                'outputUrlPrefix':
                'gs://' + '/'.join(filter(None, [bucket, namespace])),
                'entityFilter':
                entity_filter,
                'labels':
                labels,
            })
        execute = export.return_value.execute
        execute.assert_called_once_with(num_retries=mock.ANY)
        self.assertEqual(resp, execute.return_value)

    @patch('airflow.gcp.hooks.datastore.DatastoreHook.get_conn')
    def test_export_to_storage_bucket_no_project_id(self, mock_get_conn):
        self.datastore_hook.admin_connection = mock_get_conn.return_value
        bucket = 'bucket'
        namespace = None
        entity_filter = {}
        labels = {}

        with self.assertRaises(AirflowException) as err:
            self.datastore_hook.export_to_storage_bucket(
                bucket=bucket,
                namespace=namespace,
                entity_filter=entity_filter,
                labels=labels,
            )
        self.assertIn("project_id", str(err.exception))

    @patch('airflow.gcp.hooks.datastore.DatastoreHook.get_conn')
    def test_import_from_storage_bucket(self, mock_get_conn):
        self.datastore_hook.admin_connection = mock_get_conn.return_value
        bucket = 'bucket'
        file = 'file'
        namespace = None
        entity_filter = {}
        labels = {}

        resp = self.datastore_hook.import_from_storage_bucket(
            bucket=bucket,
            file=file,
            namespace=namespace,
            entity_filter=entity_filter,
            labels=labels,
            project_id=GCP_PROJECT_ID)

        projects = self.datastore_hook.admin_connection.projects
        projects.assert_called_once_with()
        import_ = projects.return_value.import_
        import_.assert_called_once_with(
            projectId=GCP_PROJECT_ID,
            body={
                'inputUrl':
                'gs://' + '/'.join(filter(None, [bucket, namespace, file])),
                'entityFilter':
                entity_filter,
                'labels':
                labels,
            })
        execute = import_.return_value.execute
        execute.assert_called_once_with(num_retries=mock.ANY)
        self.assertEqual(resp, execute.return_value)

    @patch('airflow.gcp.hooks.datastore.DatastoreHook.get_conn')
    def test_import_from_storage_bucket_no_project_id(self, mock_get_conn):
        self.datastore_hook.admin_connection = mock_get_conn.return_value
        bucket = 'bucket'
        file = 'file'
        namespace = None
        entity_filter = {}
        labels = {}

        with self.assertRaises(AirflowException) as err:
            self.datastore_hook.import_from_storage_bucket(
                bucket=bucket,
                file=file,
                namespace=namespace,
                entity_filter=entity_filter,
                labels=labels,
            )
        self.assertIn("project_id", str(err.exception))
Exemplo n.º 5
0
 def setUp(self):
     with patch('airflow.gcp.hooks.base.CloudBaseHook.__init__',
                new=mock_init):
         self.datastore_hook = DatastoreHook()