Пример #1
0
 def execute(self, context: Dict):
     hook = DataFusionHook(
         gcp_conn_id=self.gcp_conn_id,
         delegate_to=self.delegate_to,
         api_version=self.api_version,
     )
     self.log.info("Creating Data Fusion instance: %s", self.instance_name)
     try:
         operation = hook.create_instance(
             instance_name=self.instance_name,
             instance=self.instance,
             location=self.location,
             project_id=self.project_id,
         )
         instance = hook.wait_for_operation(operation)
         self.log.info("Instance %s created successfully",
                       self.instance_name)
     except HttpError as err:
         if err.resp.status not in (409, '409'):
             raise
         self.log.info("Instance %s already exists", self.instance_name)
         instance = hook.get_instance(instance_name=self.instance_name,
                                      location=self.location,
                                      project_id=self.project_id)
         # Wait for instance to be ready
         for time_to_wait in exponential_sleep_generator(initial=10,
                                                         maximum=120):
             if instance['state'] != 'CREATING':
                 break
             sleep(time_to_wait)
             instance = hook.get_instance(instance_name=self.instance_name,
                                          location=self.location,
                                          project_id=self.project_id)
     return instance
Пример #2
0
    def retry_wrapper(*args, **kwargs):
        sleep_generator = core_retry.exponential_sleep_generator(
            _DEFAULT_INITIAL_DELAY,
            _DEFAULT_MAXIMUM_DELAY,
            _DEFAULT_DELAY_MULTIPLIER,
        )

        for sleep_time in itertools.islice(sleep_generator, retries + 1):
            try:
                result = callback(*args, **kwargs)
                if isinstance(result, tasklets.Future):
                    result = yield result
            except Exception as e:
                # `e` is removed from locals at end of block
                error = e  # See: https://goo.gl/5J8BMK
                if not is_transient_error(error):
                    raise error
            else:
                raise tasklets.Return(result)

            yield tasklets.sleep(sleep_time)

        raise core_exceptions.RetryError(
            "Maximum number of {} retries exceeded while calling {}".format(
                retries, callback),
            cause=error,
        )
Пример #3
0
    def _connect_to_instance(self, user, hostname, pkey,
                             proxy_command) -> paramiko.SSHClient:
        self.log.info(
            "Opening remote connection to host: username=%s, hostname=%s",
            user, hostname)
        max_time_to_wait = 10
        for time_to_wait in exponential_sleep_generator(
                initial=1, maximum=max_time_to_wait):
            try:
                client = _GCloudAuthorizedSSHClient(self._compute_hook)
                # Default is RejectPolicy
                # No known host checking since we are not storing privatekey
                client.set_missing_host_key_policy(paramiko.AutoAddPolicy())

                client.connect(
                    hostname=hostname,
                    username=user,
                    pkey=pkey,
                    sock=paramiko.ProxyCommand(proxy_command)
                    if proxy_command else None,
                    look_for_keys=False,
                )
                return client
            except paramiko.SSHException:
                # exponential_sleep_generator is an infinite generator, so we need to
                # check the end condition.
                if time_to_wait == max_time_to_wait:
                    raise
            self.log.info("Failed to connect. Waiting %ds to retry",
                          time_to_wait)
            time.sleep(time_to_wait)
        raise AirflowException("Caa not connect to instance")
Пример #4
0
    def execute(self, context: "Context") -> dict:
        hook = DataplexHook(
            gcp_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to,
            api_version=self.api_version,
            impersonation_chain=self.impersonation_chain,
        )
        self.log.info("Creating Dataplex task %s", self.dataplex_task_id)
        DataplexTaskLink.persist(context=context, task_instance=self)

        try:
            operation = hook.create_task(
                project_id=self.project_id,
                region=self.region,
                lake_id=self.lake_id,
                body=self.body,
                dataplex_task_id=self.dataplex_task_id,
                validate_only=self.validate_only,
                retry=self.retry,
                timeout=self.timeout,
                metadata=self.metadata,
            )
            if not self.asynchronous:
                self.log.info("Waiting for Dataplex task %s to be created",
                              self.dataplex_task_id)
                task = hook.wait_for_operation(timeout=self.timeout,
                                               operation=operation)
                self.log.info("Task %s created successfully",
                              self.dataplex_task_id)
            else:
                is_done = operation.done()
                self.log.info("Is operation done already? %s", is_done)
                return is_done
        except HttpError as err:
            if err.resp.status not in (409, '409'):
                raise
            self.log.info("Task %s already exists", self.dataplex_task_id)
            # Wait for task to be ready
            for time_to_wait in exponential_sleep_generator(initial=10,
                                                            maximum=120):
                task = hook.get_task(
                    project_id=self.project_id,
                    region=self.region,
                    lake_id=self.lake_id,
                    dataplex_task_id=self.dataplex_task_id,
                    retry=self.retry,
                    timeout=self.timeout,
                    metadata=self.metadata,
                )
                if task['state'] != 'CREATING':
                    break
                sleep(time_to_wait)

        return Task.to_dict(task)
Пример #5
0
 def wait_for_operation(self, operation: Dict[str, Any]) -> Dict[str, Any]:
     """Waits for long-lasting operation to complete."""
     for time_to_wait in exponential_sleep_generator(initial=10, maximum=120):
         sleep(time_to_wait)
         operation = (
             self.get_conn().projects().locations().operations().get(name=operation.get("name")).execute()
         )
         if operation.get("done"):
             break
     if "error" in operation:
         raise AirflowException(operation["error"])
     return operation["response"]
Пример #6
0
 async def retry_wrapped_func(*args, **kwargs):
     """A wrapper that calls target function with retry."""
     target = functools.partial(func, *args, **kwargs)
     sleep_generator = exponential_sleep_generator(
         self._initial, self._maximum, multiplier=self._multiplier)
     return await retry_target(
         target,
         self._predicate,
         sleep_generator,
         self._deadline,
         on_error=on_error,
     )
Пример #7
0
    def retry_wrapper(*args, **kwargs):
        from google.cloud.ndb import context as context_module

        sleep_generator = core_retry.exponential_sleep_generator(
            _DEFAULT_INITIAL_DELAY,
            _DEFAULT_MAXIMUM_DELAY,
            _DEFAULT_DELAY_MULTIPLIER,
        )

        for sleep_time in itertools.islice(sleep_generator, retries + 1):
            context = context_module.get_context()
            if not context.in_retry():
                # We need to be able to identify if we are inside a nested
                # retry. Here, we set the retry state in the context. This is
                # used for deciding if an exception should be raised
                # immediately or passed up to the outer retry block.
                context.set_retry_state(repr(callback))
            try:
                result = callback(*args, **kwargs)
                if isinstance(result, tasklets.Future):
                    result = yield result
            except exceptions.NestedRetryException as e:
                error = e
            except Exception as e:
                # `e` is removed from locals at end of block
                error = e  # See: https://goo.gl/5J8BMK
                if not is_transient_error(error):
                    # If we are in an inner retry block, use special nested
                    # retry exception to bubble up to outer retry. Else, raise
                    # actual exception.
                    if context.get_retry_state() != repr(callback):
                        message = getattr(error, "message", str(error))
                        raise exceptions.NestedRetryException(message)
                    else:
                        raise error
            else:
                raise tasklets.Return(result)
            finally:
                # No matter what, if we are exiting the top level retry,
                # clear the retry state in the context.
                if context.get_retry_state() == repr(
                        callback):  # pragma: NO BRANCH
                    context.clear_retry_state()

            yield tasklets.sleep(sleep_time)

        raise core_exceptions.RetryError(
            "Maximum number of {} retries exceeded while calling {}".format(
                retries, callback),
            cause=error,
        )
Пример #8
0
            def retry_wrapper(*args, **kwargs):
                sleep_generator = core_retry.exponential_sleep_generator(0.1, 1)
                attempts = 5
                for sleep_time in sleep_generator:  # pragma: NO BRANCH
                    # pragma is required because loop never exits normally, it only gets
                    # raised out of.
                    attempts -= 1
                    try:
                        result = yield wrapped(*args, **kwargs)
                        raise tasklets.Return(result)
                    except transient_errors:
                        if not attempts:
                            raise

                    yield tasklets.sleep(sleep_time)
Пример #9
0
 def _wait_for_restore_service(self, hook: DataprocMetastoreHook):
     """
     Workaround to check that restore service was finished successfully.
     We discovered an issue to parse result to Restore inside the SDK
     """
     for time_to_wait in exponential_sleep_generator(initial=10, maximum=120):
         sleep(time_to_wait)
         service = hook.get_service(
             region=self.region,
             project_id=self.project_id,
             service_id=self.service_id,
             retry=self.retry,
             timeout=self.timeout,
             metadata=self.metadata,
         )
         activities: MetadataManagementActivity = service.metadata_management_activity
         restore_service: Restore = activities.restores[0]
         if restore_service.state == Restore.State.SUCCEEDED:
             return restore_service
         if restore_service.state == Restore.State.FAILED:
             raise AirflowException("Restoring service FAILED")
Пример #10
0
 def _wait_for_export_metadata(self, hook: DataprocMetastoreHook):
     """
     Workaround to check that export was created successfully.
     We discovered a issue to parse result to MetadataExport inside the SDK
     """
     for time_to_wait in exponential_sleep_generator(initial=10, maximum=120):
         sleep(time_to_wait)
         service = hook.get_service(
             region=self.region,
             project_id=self.project_id,
             service_id=self.service_id,
             retry=self.retry,
             timeout=self.timeout,
             metadata=self.metadata,
         )
         activities: MetadataManagementActivity = service.metadata_management_activity
         metadata_export: MetadataExport = activities.metadata_exports[0]
         if metadata_export.state == MetadataExport.State.SUCCEEDED:
             return metadata_export
         if metadata_export.state == MetadataExport.State.FAILED:
             raise AirflowException(
                 f"Exporting metadata from Dataproc Metastore {metadata_export.name} FAILED"
             )
Пример #11
0
def test_exponential_sleep_generator_base_2(uniform):
    gen = retry.exponential_sleep_generator(1, 60, multiplier=2)

    result = list(itertools.islice(gen, 8))
    assert result == [1, 2, 4, 8, 16, 32, 60, 60]
Пример #12
0
def test_exponential_sleep_generator_base_2(uniform):
    gen = retry.exponential_sleep_generator(1, 60, multiplier=2)

    result = list(itertools.islice(gen, 8))
    assert result == [1, 2, 4, 8, 16, 32, 60, 60]
Пример #13
0
 def _retry_generator(self):
     """Generates retry intervals that exponentially back off."""
     return retry.exponential_sleep_generator(
         initial=self.DEFAULT_INITIAL_DELAY,
         maximum=self.DEFAULT_MAXIMUM_DELAY)