Пример #1
0
    def create_cluster(self, project_id, region, cluster):
        self._check_region_matches_endpoint(region)

        # convert dict to object
        if not isinstance(cluster, Cluster):
            cluster = Cluster(**cluster)

        if cluster.project_id:
            if cluster.project_id != project_id:
                raise InvalidArgument(
                    'If provided, CreateClusterRequest.cluster.project_id must'
                    ' match CreateClusterRequest.project_id')
        else:
            cluster.project_id = project_id

        if not cluster.cluster_name:
            raise InvalidArgument('Cluster name is required')

        # initialize cluster status
        cluster.status.state = _cluster_state_value('CREATING')

        cluster_key = (project_id, region, cluster.cluster_name)

        if cluster_key in self.mock_clusters:
            raise AlreadyExists('Already exists: Cluster ' +
                                _cluster_path(*cluster_key))

        self.mock_clusters[cluster_key] = cluster
Пример #2
0
 def test_create_preexisting_topic_failifexists(self, mock_service):
     mock_service.return_value.create_topic.side_effect = AlreadyExists(
         'Topic already exists: %s' % TEST_TOPIC
     )
     with self.assertRaises(PubSubException) as e:
         self.pubsub_hook.create_topic(project_id=TEST_PROJECT, topic=TEST_TOPIC, fail_if_exists=True)
     self.assertEqual(str(e.exception), 'Topic already exists: %s' % TEST_TOPIC)
Пример #3
0
    def test_execute_if_cluster_exists_in_error_state(self, mock_hook):
        mock_hook.return_value.create_cluster.side_effect = [AlreadyExists("test")]
        cluster_status = mock_hook.return_value.get_cluster.return_value.status
        cluster_status.state = 0
        cluster_status.ERROR = 0

        op = DataprocCreateClusterOperator(
            task_id=TASK_ID,
            region=GCP_LOCATION,
            project_id=GCP_PROJECT,
            cluster_config=CONFIG,
            labels=LABELS,
            cluster_name=CLUSTER_NAME,
            delete_on_error=True,
            gcp_conn_id=GCP_CONN_ID,
            retry=RETRY,
            timeout=TIMEOUT,
            metadata=METADATA,
            request_id=REQUEST_ID,
        )
        with self.assertRaises(AirflowException):
            op.execute(context={})

        mock_hook.return_value.diagnose_cluster.assert_called_once_with(
            region=GCP_LOCATION, project_id=GCP_PROJECT, cluster_name=CLUSTER_NAME
        )
        mock_hook.return_value.delete_cluster.assert_called_once_with(
            region=GCP_LOCATION, project_id=GCP_PROJECT, cluster_name=CLUSTER_NAME
        )
Пример #4
0
    def test_execute_if_cluster_exists_in_deleting_state(
        self, mock_hook, mock_get_cluster, mock_create_cluster, mock_generator
    ):
        cluster = mock.MagicMock()
        cluster.status.state = 0
        cluster.status.DELETING = 0

        cluster2 = mock.MagicMock()
        cluster2.status.state = 0
        cluster2.status.ERROR = 0

        mock_create_cluster.side_effect = [AlreadyExists("test"), cluster2]
        mock_generator.return_value = [0]
        mock_get_cluster.side_effect = [cluster, NotFound("test")]

        op = DataprocCreateClusterOperator(
            task_id=TASK_ID,
            region=GCP_LOCATION,
            project_id=GCP_PROJECT,
            cluster_config=CONFIG,
            labels=LABELS,
            cluster_name=CLUSTER_NAME,
            delete_on_error=True,
            gcp_conn_id=GCP_CONN_ID,
        )
        with self.assertRaises(AirflowException):
            op.execute(context={})

        calls = [mock.call(mock_hook.return_value), mock.call(mock_hook.return_value)]
        mock_get_cluster.assert_has_calls(calls)
        mock_create_cluster.assert_has_calls(calls)
        mock_hook.return_value.diagnose_cluster.assert_called_once_with(
            region=GCP_LOCATION, project_id=GCP_PROJECT, cluster_name=CLUSTER_NAME
        )
Пример #5
0
 def test_execute_if_cluster_exists(self, mock_hook):
     mock_hook.return_value.create_cluster.side_effect = [
         AlreadyExists("test")
     ]
     op = DataprocCreateClusterOperator(
         task_id=TASK_ID,
         region=GCP_LOCATION,
         project_id=GCP_PROJECT,
         cluster=CLUSTER,
         gcp_conn_id=GCP_CONN_ID,
         retry=RETRY,
         timeout=TIMEOUT,
         metadata=METADATA,
         request_id=REQUEST_ID,
     )
     op.execute(context={})
     mock_hook.assert_called_once_with(gcp_conn_id=GCP_CONN_ID)
     mock_hook.return_value.create_cluster.assert_called_once_with(
         region=GCP_LOCATION,
         project_id=GCP_PROJECT,
         cluster=CLUSTER,
         request_id=REQUEST_ID,
         retry=RETRY,
         timeout=TIMEOUT,
         metadata=METADATA,
     )
     mock_hook.return_value.get_cluster.assert_called_once_with(
         region=GCP_LOCATION,
         project_id=GCP_PROJECT,
         cluster_name=CLUSTER_NAME,
         retry=RETRY,
         timeout=TIMEOUT,
         metadata=METADATA,
     )
Пример #6
0
 def test_create_subscription_nofailifexists(self, mock_service):
     mock_service.create_subscription.side_effect = AlreadyExists(
         'Subscription already exists: %s' % EXPANDED_SUBSCRIPTION)
     response = self.pubsub_hook.create_subscription(
         project_id=TEST_PROJECT,
         topic=TEST_TOPIC,
         subscription=TEST_SUBSCRIPTION)
     self.assertEqual(TEST_SUBSCRIPTION, response)
Пример #7
0
 def test_create_preexisting_topic_failifexists(self, mock_service):
     mock_service.return_value.create_topic.side_effect = AlreadyExists(
         f'Topic already exists: {TEST_TOPIC}')
     with pytest.raises(PubSubException) as ctx:
         self.pubsub_hook.create_topic(project_id=TEST_PROJECT,
                                       topic=TEST_TOPIC,
                                       fail_if_exists=True)
     assert str(ctx.value) == f'Topic already exists: {TEST_TOPIC}'
Пример #8
0
 def test_create_subscription_failifexists(self, mock_service):
     mock_service.create_subscription.side_effect = AlreadyExists(
         'Subscription already exists: %s' % EXPANDED_SUBSCRIPTION
     )
     with self.assertRaises(PubSubException) as e:
         self.pubsub_hook.create_subscription(
             project_id=TEST_PROJECT, topic=TEST_TOPIC, subscription=TEST_SUBSCRIPTION, fail_if_exists=True
         )
     self.assertEqual(str(e.exception), 'Subscription already exists: %s' % EXPANDED_SUBSCRIPTION)
Пример #9
0
    def create_instance(self, parent, instance_id, instance, metadata=None):
        from google.api_core.exceptions import AlreadyExists, Unknown

        self._created_instance = (parent, instance_id, instance, metadata)
        if self._rpc_error:
            raise Unknown("error")
        if self._create_instance_conflict:
            raise AlreadyExists("conflict")
        return self._create_instance_response
Пример #10
0
class TestCloudVisionReferenceImageCreate(unittest.TestCase):
    @mock.patch(
        'airflow.providers.google.cloud.operators.vision.CloudVisionHook', )
    def test_minimal_green_path(self, mock_hook):
        mock_hook.return_value.create_reference_image.return_value = {}
        op = CloudVisionCreateReferenceImageOperator(
            location=LOCATION_TEST,
            product_id=PRODUCT_ID_TEST,
            reference_image=REFERENCE_IMAGE_TEST,
            task_id='id',
        )
        op.execute(context=None)
        mock_hook.assert_called_once_with(
            gcp_conn_id=GCP_CONN_ID,
            impersonation_chain=None,
        )
        mock_hook.return_value.create_reference_image.assert_called_once_with(
            location=LOCATION_TEST,
            product_id=PRODUCT_ID_TEST,
            reference_image=REFERENCE_IMAGE_TEST,
            reference_image_id=None,
            project_id=None,
            retry=None,
            timeout=None,
            metadata=None,
        )

    @mock.patch(
        'airflow.providers.google.cloud.operators.vision.CloudVisionHook',
        **{
            'return_value.create_reference_image.side_effect':
            AlreadyExists("MESSAGe")
        },
    )
    def test_already_exists(self, mock_hook):
        # Exception AlreadyExists not raised, caught in the operator's execute() - idempotence
        op = CloudVisionCreateReferenceImageOperator(
            location=LOCATION_TEST,
            product_id=PRODUCT_ID_TEST,
            reference_image=REFERENCE_IMAGE_TEST,
            task_id='id',
        )
        op.execute(context=None)
        mock_hook.assert_called_once_with(
            gcp_conn_id=GCP_CONN_ID,
            impersonation_chain=None,
        )
        mock_hook.return_value.create_reference_image.assert_called_once_with(
            location=LOCATION_TEST,
            product_id=PRODUCT_ID_TEST,
            reference_image=REFERENCE_IMAGE_TEST,
            reference_image_id=None,
            project_id=None,
            retry=None,
            timeout=None,
            metadata=None,
        )
    def test_create_cluster_already_exists(self, wait_mock, convert_mock, log_mock):
        from google.api_core.exceptions import AlreadyExists
        # To force an error
        message = 'Already Exists'
        self.gke_hook._client.create_cluster.side_effect = AlreadyExists(message=message)

        self.gke_hook.create_cluster({})
        wait_mock.assert_not_called()
        self.assertEqual(convert_mock.call_count, 1)
        log_mock.info.assert_any_call("Assuming Success: %s", message)
Пример #12
0
 def test_create_subscription_failifexists(self, mock_service):
     mock_service.create_subscription.side_effect = AlreadyExists(
         f'Subscription already exists: {EXPANDED_SUBSCRIPTION}')
     with pytest.raises(PubSubException) as ctx:
         self.pubsub_hook.create_subscription(
             project_id=TEST_PROJECT,
             topic=TEST_TOPIC,
             subscription=TEST_SUBSCRIPTION,
             fail_if_exists=True)
     assert str(ctx.value
                ) == f'Subscription already exists: {EXPANDED_SUBSCRIPTION}'
Пример #13
0
 def test_already_exists(self, mock_hook):
     mock_hook.return_value.create_product_set.side_effect = AlreadyExists(message='')
     # Exception AlreadyExists not raised, caught in the operator's execute() - idempotence
     op = CloudVisionCreateProductSetOperator(
         location=LOCATION_TEST,
         product_set=PRODUCTSET_TEST,
         product_set_id=PRODUCTSET_ID_TEST,
         project_id='mock-project-id',
         task_id='id',
     )
     result = op.execute(None)
     self.assertEqual(PRODUCTSET_ID_TEST, result)
 def test_already_exists(self, create_product_mock, get_conn):
     get_conn.return_value = {}
     create_product_mock.side_effect = AlreadyExists(message='')
     # Exception AlreadyExists not raised, caught in the operator's execute() - idempotence
     op = CloudVisionProductCreateOperator(
         location=LOCATION_TEST,
         product=PRODUCT_TEST,
         product_id=PRODUCT_ID_TEST,
         project_id='mock-project-id',
         task_id='id',
     )
     result = op.execute(None)
     self.assertEqual(PRODUCT_ID_TEST, result)
Пример #15
0
    def create_database(self, parent, create_statement, extra_statements=None,
                        metadata=None):
        from google.api_core.exceptions import AlreadyExists, NotFound, Unknown

        self._created_database = (
            parent, create_statement, extra_statements, metadata)
        if self._rpc_error:
            raise Unknown('error')
        if self._create_database_conflict:
            raise AlreadyExists('conflict')
        if self._database_not_found:
            raise NotFound('not found')
        return self._create_database_response
Пример #16
0
    def test_create_cluster_already_exists(self, wait_mock, convert_mock,
                                           log_mock, mock_get_credentials):
        from google.api_core.exceptions import AlreadyExists

        # To force an error
        message = 'Already Exists'
        self.gke_hook._client.create_cluster.side_effect = AlreadyExists(
            message=message)

        self.gke_hook.create_cluster(cluster={},
                                     project_id=TEST_GCP_PROJECT_ID)
        wait_mock.assert_not_called()
        assert convert_mock.call_count == 1
        log_mock.info.assert_any_call("Assuming Success: %s", message)
Пример #17
0
    def create_cluster(self, project_id, region, cluster):
        self._check_region_matches_endpoint(region)

        # convert dict to object
        if not isinstance(cluster, Cluster):
            cluster = Cluster(**cluster)

        if cluster.project_id:
            if cluster.project_id != project_id:
                raise InvalidArgument(
                    'If provided, CreateClusterRequest.cluster.project_id must'
                    ' match CreateClusterRequest.project_id')
        else:
            cluster.project_id = project_id

        if not cluster.cluster_name:
            raise InvalidArgument('Cluster name is required')

        # add in default disk config
        for x in ('master', 'worker', 'secondary_worker'):
            field = x + '_config'
            conf = getattr(cluster.config, field, None)
            if conf and str(conf):  # empty DiskConfigs are still true-ish
                if not conf.disk_config:
                    conf.disk_config = DiskConfig()
                if not conf.disk_config.boot_disk_size_gb:
                    conf.disk_config.boot_disk_size_gb = _DEFAULT_DISK_SIZE_GB

        # update gce_cluster_config
        gce_config = cluster.config.gce_cluster_config

        # add in default scopes and sort
        scopes = set(gce_config.service_account_scopes)

        if not scopes:
            scopes.update(_DEFAULT_SCOPES)
        scopes.update(_MANDATORY_SCOPES)

        gce_config.service_account_scopes[:] = sorted(scopes)

        # initialize cluster status
        cluster.status.state = _cluster_state_value('CREATING')

        cluster_key = (project_id, region, cluster.cluster_name)

        if cluster_key in self.mock_clusters:
            raise AlreadyExists('Already exists: Cluster ' +
                                _cluster_path(*cluster_key))

        self.mock_clusters[cluster_key] = cluster
Пример #18
0
    def create_instance(self,
                        create_in_production,
                        cluster_name,
                        location_id,
                        nr_nodes,
                        use_ssd_storage,
                        timeout=100):
        """To create an instance, you also have to configure cluster parameters. If the cluster
        already exists, an AlreadyExists exception is raised.

        Example configuration:
        - create_in_production=False
        - cluster_name='test_cluster'
        - location_id='us-central1-f'
        - nr_nodes=1
        - ssd_storage=False

        Args:
            create_in_production (bool): If True, then production instance is created. If False,
                a development instance will be created; note that nr_nodes will not be used as a
                parameter in that case.
            cluster_name (str): name of the cluster that will be created within the instance.
            location_id (str): the zone to create the cluster in.
            nr_nodes (int): only meaningful for production instance. Otherwise, in development
                instance, will be set to 1.
            use_ssd_storage (bool): if True, then SSD storage is used, otherwise HDD storage.
            timeout (int):

        Returns:
            None
        """
        if self.instance.exists():
            raise AlreadyExists("Instance '{}' already exists.".format(
                self.instance_id))
        logging.info("Creating instance '%s'.", self.instance_id)
        # instance configurations
        production, nr_nodes = (bt_enums.Instance.Type.PRODUCTION,
                                nr_nodes) if create_in_production else (
                                    bt_enums.Instance.Type.DEVELOPMENT, None)
        self.instance = self.client.instance(instance_id=self.instance_id,
                                             display_name=self.instance_id,
                                             instance_type=production)
        # cluster configurations
        cluster = self.create_cluster_config(cluster_name, location_id,
                                             nr_nodes, use_ssd_storage)
        # Create the instance with a cluster
        operation = self.instance.create(clusters=[cluster])
        # We want to make sure the operation completes.
        operation.result(timeout=timeout)
Пример #19
0
    def test_create_topic_flow_when_topic_exists(self, mock_json_renderer,
                                                 mock_google_client):
        with self.settings(GCLOUD_PUBSUB_PROJECT_ID="pubsub"):
            mock_publisher = mock_google_client()
            mock_publisher.topic_path.return_value = "topic_path"
            mock_json_renderer()

            mock_publisher.create_topic.side_effect = AlreadyExists(
                "Topic Exists!")

            with self.assertLogs(
                    "google_pubsub_adapter.publisher.google_pubsub_publisher",
                    level="INFO") as mock_log:
                publisher = GooglePubsubPublisher()
                publisher.create_topic("topic")

                self.assertIn("already exists", mock_log.output[0])
Пример #20
0
 def test_execute_if_cluster_exists_do_not_use(self, mock_hook):
     mock_hook.return_value.create_cluster.side_effect = [AlreadyExists("test")]
     mock_hook.return_value.get_cluster.return_value.status.state = 0
     op = DataprocCreateClusterOperator(
         task_id=TASK_ID,
         region=GCP_LOCATION,
         project_id=GCP_PROJECT,
         cluster=CLUSTER,
         gcp_conn_id=GCP_CONN_ID,
         retry=RETRY,
         timeout=TIMEOUT,
         metadata=METADATA,
         request_id=REQUEST_ID,
         use_if_exists=False,
     )
     with self.assertRaises(AlreadyExists):
         op.execute(context={})
Пример #21
0
 def test_execute_if_cluster_exists(self, mock_hook, to_dict_mock):
     mock_hook.return_value.create_cluster.side_effect = [
         AlreadyExists("test")
     ]
     mock_hook.return_value.get_cluster.return_value.status.state = 0
     op = DataprocCreateClusterOperator(
         task_id=TASK_ID,
         region=GCP_LOCATION,
         project_id=GCP_PROJECT,
         cluster_config=CONFIG,
         labels=LABELS,
         cluster_name=CLUSTER_NAME,
         gcp_conn_id=GCP_CONN_ID,
         retry=RETRY,
         timeout=TIMEOUT,
         metadata=METADATA,
         request_id=REQUEST_ID,
         impersonation_chain=IMPERSONATION_CHAIN,
     )
     op.execute(context={})
     mock_hook.assert_called_once_with(
         gcp_conn_id=GCP_CONN_ID, impersonation_chain=IMPERSONATION_CHAIN)
     mock_hook.return_value.create_cluster.assert_called_once_with(
         region=GCP_LOCATION,
         project_id=GCP_PROJECT,
         cluster_config=CONFIG,
         labels=LABELS,
         cluster_name=CLUSTER_NAME,
         request_id=REQUEST_ID,
         retry=RETRY,
         timeout=TIMEOUT,
         metadata=METADATA,
     )
     mock_hook.return_value.get_cluster.assert_called_once_with(
         region=GCP_LOCATION,
         project_id=GCP_PROJECT,
         cluster_name=CLUSTER_NAME,
         retry=RETRY,
         timeout=TIMEOUT,
         metadata=METADATA,
     )
     to_dict_mock.assert_called_once_with(
         mock_hook.return_value.get_cluster.return_value)
Пример #22
0
    def submit_job(self, project_id, region, job):
        self._check_region_matches_endpoint(region)

        # convert dict to object
        if not isinstance(job, Job):
            job = Job(**job)

        if not (project_id and job.reference.job_id):
            raise NotImplementedError('generation of job IDs not implemented')
        job_id = job.reference.job_id

        if not job.placement.cluster_name:
            raise InvalidArgument('Cluster name is required')

        # cluster must exist
        cluster_key = (project_id, region, job.placement.cluster_name)
        if cluster_key not in self.mock_clusters:
            raise NotFound('Not Found: Cluster ' + _cluster_path(*cluster_key))

        if not job.hadoop_job:
            raise NotImplementedError('only hadoop jobs are supported')

        if job.reference.project_id:
            if job.reference.project_id != project_id:
                raise InvalidArgument(
                    'If provided, SubmitJobRequest.job.job_reference'
                    '.project_id must match SubmitJobRequest.project_id')
        else:
            job.reference.project_id = project_id

        job.status.state = _job_state_value('SETUP_DONE')

        job_key = (project_id, region, job_id)

        if job_key in self.mock_jobs:
            raise AlreadyExists(
                'Already exists: Job ' + _job_path(*job_key))

        self.mock_jobs[job_key] = job

        return deepcopy(job)
class TestCatchHttpException(unittest.TestCase):
    # pylint:disable=no-method-argument,unused-argument
    @parameterized.expand([
        ("no_exception", None, LoggingMixin, None, None),
        ("raise_airflowexception", MovedPermanently("MESSAGE"), LoggingMixin,
         None, AirflowException),
        ("raise_airflowexception",
         RetryError("MESSAGE", cause=Exception("MESSAGE")), LoggingMixin, None,
         AirflowException),
        ("raise_airflowexception", ValueError("MESSAGE"), LoggingMixin, None,
         AirflowException),
        ("raise_alreadyexists", AlreadyExists("MESSAGE"), LoggingMixin, None,
         AlreadyExists),
        ("raise_http_error",
         HttpError(mock.Mock(**{"reason.return_value": None}),
                   b"CONTENT"), BaseHook, {
                       "source": None
                   }, AirflowException),
    ])
    def test_catch_exception(self, name, exception, base_class,
                             base_class_args, assert_raised):
        self.called = False  # pylint:disable=attribute-defined-outside-init

        class FixtureClass(base_class):
            @hook.GoogleCloudBaseHook.catch_http_exception
            def test_fixture(*args, **kwargs):  # pylint:disable=unused-argument,no-method-argument
                self.called = True  # pylint:disable=attribute-defined-outside-init
                if exception is not None:
                    raise exception

        if assert_raised is None:
            FixtureClass(base_class_args).test_fixture()
        else:
            with self.assertRaises(assert_raised):
                FixtureClass(base_class_args).test_fixture()
        self.assertTrue(self.called)
Пример #24
0
    def create_cluster(self, project_id, region, cluster):
        self._check_region_matches_endpoint(region)

        # convert dict to object
        if not isinstance(cluster, Cluster):
            cluster = Cluster(**cluster)

        if cluster.project_id:
            if cluster.project_id != project_id:
                raise InvalidArgument(
                    'If provided, CreateClusterRequest.cluster.project_id must'
                    ' match CreateClusterRequest.project_id')
        else:
            cluster.project_id = project_id

        if not cluster.cluster_name:
            raise InvalidArgument('Cluster name is required')

        # add in default disk config
        for x in ('master', 'worker', 'secondary_worker'):
            field = x + '_config'
            conf = getattr(cluster.config, field, None)
            if conf and str(conf):  # empty DiskConfigs are still true-ish
                if not conf.disk_config:
                    conf.disk_config = DiskConfig()
                if not conf.disk_config.boot_disk_size_gb:
                    conf.disk_config.boot_disk_size_gb = _DEFAULT_DISK_SIZE_GB

        # update gce_cluster_config
        gce_config = cluster.config.gce_cluster_config

        # check region and zone_uri
        if region == 'global':
            if gce_config.zone_uri:
                cluster_region = _zone_to_region(gce_config.zone_uri)
            else:
                raise InvalidArgument(
                    "Must specify a zone in GCE configuration"
                    " when using 'regions/global'")
        else:
            cluster_region = region

        # add in default scopes and sort
        scopes = set(gce_config.service_account_scopes)

        if not scopes:
            scopes.update(_DEFAULT_SCOPES)
        scopes.update(_MANDATORY_SCOPES)

        gce_config.service_account_scopes[:] = sorted(scopes)

        # handle network_uri and subnetwork_uri
        if gce_config.network_uri and gce_config.subnetwork_uri:
            raise InvalidArgument('GceClusterConfiguration cannot contain both'
                                  ' Network URI and Subnetwork URI')

        if not (gce_config.network_uri or gce_config.subnetwork_uri):
            gce_config.network_uri = 'default'

        if gce_config.network_uri:
            gce_config.network_uri = _fully_qualify_network_uri(
                gce_config.network_uri, project_id)

        if gce_config.subnetwork_uri:
            gce_config.subnetwork_uri = _fully_qualify_subnetwork_uri(
                gce_config.subnetwork_uri, project_id, region)

        # add in default cluster properties
        props = cluster.config.software_config.properties

        for k, v in _DEFAULT_CLUSTER_PROPERTIES.items():
            if k not in props:
                props[k] = v

        # initialize cluster status
        cluster.status.state = _cluster_state_value('CREATING')

        cluster_key = (project_id, region, cluster.cluster_name)

        if cluster_key in self.mock_clusters:
            raise AlreadyExists('Already exists: Cluster ' +
                                _cluster_path(*cluster_key))

        self.mock_clusters[cluster_key] = cluster
Пример #25
0
 def test_fixutre(*args, **kwargs):
     self.called = True
     raise AlreadyExists("MESSAGE")
Пример #26
0
    def upload(self, metadata_file_name):
        # type: (str) -> None
        metadata_file_name = os.path.abspath(metadata_file_name)
        metadata_directory = os.path.dirname(metadata_file_name)
        with open(metadata_file_name, 'r') as file:
            metadata = json.load(file)
        if not isinstance(metadata, dict):
            raise RuntimeError('Metadata is not a dictionary in file %s' %
                               metadata_file_name)
        structured_to_send = {}
        uploads_to_send = {}
        nb_no_category = 0
        nb_sendings = 0
        for image_path, image_metadata in sorted(metadata.items()):
            image_basename = os.path.basename(image_path)
            sending = Sending()
            sending.category = image_metadata.get('category', None)
            sending.location = image_metadata['location']
            sending.timestamp = image_metadata['timestamp']
            sending.firebase_path = image_basename.replace('_', '/')
            sending.collection_id, sending.upload_id, image_name = sending.firebase_path.split(
                '/')
            sending.image_id = int(os.path.splitext(image_name)[0])
            sending.local_path = os.path.join(metadata_directory,
                                              image_basename)
            assert sending.timestamp, 'Got an invalid timestamp'
            nb_no_category += sending.category is None
            nb_sendings += 1
            structured_to_send.setdefault(sending.collection_id,
                                          {}).setdefault(
                                              sending.upload_id,
                                              []).append(sending)

        # Check categories.
        if nb_no_category == nb_sendings:
            inferred_category = os.path.basename(metadata_directory)
            print('Getting category from metadata containing folder',
                  inferred_category)
            for upload_id_to_sendings in structured_to_send.values():
                for sendings in upload_id_to_sendings.values():
                    for sending in sendings:
                        sending.category = inferred_category
        elif nb_no_category != 0:
            raise RuntimeError(
                'Invalid metadata: all images should either have a category or '
                'no category specified (to be retrieved from metadata folder name).'
            )

        # Check timestamps.
        for collection_id, uploads in structured_to_send.items():
            for upload_id, images in uploads.items():
                timestamps = {sending.timestamp for sending in images}
                if len(timestamps) != 1:
                    raise RuntimeError(
                        'No same timestamp for all images in %s' % upload_id)
                timestamp = images[0].timestamp
                # Update timestamp to current date if timestamp is unknown
                if timestamp == ImageInfo.UNKNOWN_CATEGORY:
                    timestamp = datetime.now().isoformat()
                uploads_to_send.setdefault(collection_id, []).append(
                    UploadToSend(upload_id, timestamp, images))

        for collection_id, uploads in uploads_to_send.items():
            col = self.__database.collection(collection_id)
            for upload in sorted(uploads, key=lambda u: u.upload_id):
                doc = col.document(upload.upload_id)
                doc_just_created = False
                try:
                    doc.create({
                        'timestamp':
                        datetime.fromisoformat(upload.timestamp),
                        'images':
                        None
                    })
                    doc_just_created = True
                    for sending in upload.images:  # type: Sending
                        blob = self.__storage.blob(sending.firebase_path)
                        if blob.exists():
                            raise AlreadyExists('An image already exists: %s' %
                                                sending.firebase_path)
                    for sending in sorted(
                            upload.images,
                            key=lambda s: s.image_id):  # type: Sending
                        blob = self.__storage.blob(sending.firebase_path)
                        blob.upload_from_filename(sending.local_path)
                        sending.url = blob.public_url
                        print('UPLOADED', sending.firebase_path)
                    doc.update({
                        'images': [
                            sending.to_upload()
                            for sending in sorted(upload.images,
                                                  key=lambda s: s.image_id)
                        ]
                    })
                    print('CREATED UPLOAD', upload.upload_id)
                except AlreadyExists as exc:
                    print('CANNOT SEND UPLOAD', upload.upload_id, exc)
                    if doc_just_created:
                        doc.delete()
Пример #27
0
class TestCloudDataCatalogCreateEntryOperator(TestCase):
    @mock.patch(
        "airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogHook",
        **{"return_value.create_entry.return_value": TEST_ENTRY},
    )
    def test_assert_valid_hook_call(self, mock_hook) -> None:
        task = CloudDataCatalogCreateEntryOperator(
            task_id="task_id",
            location=TEST_LOCATION,
            entry_group=TEST_ENTRY_GROUP_ID,
            entry_id=TEST_ENTRY_ID,
            entry=TEST_ENTRY,
            project_id=TEST_PROJECT_ID,
            retry=TEST_RETRY,
            timeout=TEST_TIMEOUT,
            metadata=TEST_METADATA,
            gcp_conn_id=TEST_GCP_CONN_ID,
            impersonation_chain=TEST_IMPERSONATION_CHAIN,
        )
        ti = mock.MagicMock()
        result = task.execute(context={"task_instance": ti})
        mock_hook.assert_called_once_with(
            gcp_conn_id=TEST_GCP_CONN_ID,
            impersonation_chain=TEST_IMPERSONATION_CHAIN,
        )
        mock_hook.return_value.create_entry.assert_called_once_with(
            location=TEST_LOCATION,
            entry_group=TEST_ENTRY_GROUP_ID,
            entry_id=TEST_ENTRY_ID,
            entry=TEST_ENTRY,
            project_id=TEST_PROJECT_ID,
            retry=TEST_RETRY,
            timeout=TEST_TIMEOUT,
            metadata=TEST_METADATA,
        )
        ti.xcom_push.assert_called_once_with(key="entry_id", value=TEST_ENTRY_ID)
        self.assertEqual(TEST_ENTRY_DICT, result)

    @mock.patch(
        "airflow.providers.google.cloud.operators.datacatalog.CloudDataCatalogHook",
        **{
            "return_value.create_entry.side_effect": AlreadyExists(message="message"),
            "return_value.get_entry.return_value": TEST_ENTRY,
        },
    )
    def test_assert_valid_hook_call_when_exists(self, mock_hook) -> None:
        task = CloudDataCatalogCreateEntryOperator(
            task_id="task_id",
            location=TEST_LOCATION,
            entry_group=TEST_ENTRY_GROUP_ID,
            entry_id=TEST_ENTRY_ID,
            entry=TEST_ENTRY,
            project_id=TEST_PROJECT_ID,
            retry=TEST_RETRY,
            timeout=TEST_TIMEOUT,
            metadata=TEST_METADATA,
            gcp_conn_id=TEST_GCP_CONN_ID,
            impersonation_chain=TEST_IMPERSONATION_CHAIN,
        )
        ti = mock.MagicMock()
        result = task.execute(context={"task_instance": ti})
        mock_hook.assert_called_once_with(
            gcp_conn_id=TEST_GCP_CONN_ID,
            impersonation_chain=TEST_IMPERSONATION_CHAIN,
        )
        mock_hook.return_value.create_entry.assert_called_once_with(
            location=TEST_LOCATION,
            entry_group=TEST_ENTRY_GROUP_ID,
            entry_id=TEST_ENTRY_ID,
            entry=TEST_ENTRY,
            project_id=TEST_PROJECT_ID,
            retry=TEST_RETRY,
            timeout=TEST_TIMEOUT,
            metadata=TEST_METADATA,
        )
        mock_hook.return_value.get_entry.assert_called_once_with(
            location=TEST_LOCATION,
            entry_group=TEST_ENTRY_GROUP_ID,
            entry=TEST_ENTRY_ID,
            project_id=TEST_PROJECT_ID,
            retry=TEST_RETRY,
            timeout=TEST_TIMEOUT,
            metadata=TEST_METADATA,
        )
        ti.xcom_push.assert_called_once_with(key="entry_id", value=TEST_ENTRY_ID)
        self.assertEqual(TEST_ENTRY_DICT, result)
Пример #28
0
 def test_create_preexisting_topic_nofailifexists(self, mock_service):
     mock_service.return_value.create_topic.side_effect = AlreadyExists(
         'Topic already exists: %s' % EXPANDED_TOPIC)
     self.pubsub_hook.create_topic(project_id=TEST_PROJECT,
                                   topic=TEST_TOPIC)