def WaitForWorkflowTemplateOperation(dataproc, operation, timeout_s=None, poll_period_s=5): """Poll dataproc Operation until its status is done or timeout reached. Args: dataproc: wrapper for Dataproc messages, resources, and client operation: Operation, message of the operation to be polled. timeout_s: number, seconds to poll with retries before timing out. poll_period_s: number, delay in seconds between requests. Returns: Operation: the return value of the last successful operations.get request. Raises: OperationError: if the operation times out or finishes with an error. """ request = dataproc.messages.DataprocProjectsRegionsOperationsGetRequest( name=operation.name) log.status.Print('Waiting on operation [{0}].'.format(operation.name)) start_time = time.time() operations = {'createCluster': None, 'deleteCluster': None} status = {} errors = {} # If no timeout is specified, poll forever. while timeout_s is None or timeout_s > (time.time() - start_time): try: operation = dataproc.client.projects_regions_operations.Get(request) metadata = ParseOperationJsonMetadata(operation.metadata, dataproc.messages.WorkflowMetadata) PrintWorkflowMetadata(metadata, status, operations, errors) if operation.done: break except apitools_exceptions.HttpError as http_exception: # Do not retry on 4xx errors. if IsClientHttpException(http_exception): raise time.sleep(poll_period_s) metadata = ParseOperationJsonMetadata(operation.metadata, dataproc.messages.WorkflowMetadata) if not operation.done: raise exceptions.OperationTimeoutError( 'Operation [{0}] timed out.'.format(operation.name)) elif operation.error: raise exceptions.OperationError('Operation [{0}] failed: {1}.'.format( operation.name, FormatRpcError(operation.error))) for op in ['createCluster', 'deleteCluster']: if op in operations and operations[op] is not None and operations[op].error: raise exceptions.OperationError('Operation [{0}] failed: {1}.'.format( operations[op].operationId, operations[op].error)) log.info('Operation [%s] finished after %.3f seconds', operation.name, (time.time() - start_time)) return operation
def Run(self, args): dataproc = dp.Dataproc(self.ReleaseTrack()) cluster_ref = args.CONCEPTS.cluster.Parse() request = None if args.tarball_access is not None: tarball_access = arg_utils.ChoiceToEnum( args.tarball_access, dataproc.messages.DiagnoseClusterRequest. TarballAccessValueValuesEnum) diagnose_request = dataproc.messages.DiagnoseClusterRequest( tarballAccess=tarball_access) request = dataproc.messages.DataprocProjectsRegionsClustersDiagnoseRequest( clusterName=cluster_ref.clusterName, region=cluster_ref.region, projectId=cluster_ref.projectId, diagnoseClusterRequest=diagnose_request) else: request = dataproc.messages.DataprocProjectsRegionsClustersDiagnoseRequest( clusterName=cluster_ref.clusterName, region=cluster_ref.region, projectId=cluster_ref.projectId) operation = dataproc.client.projects_regions_clusters.Diagnose(request) # TODO(b/36052522): Stream output during polling. operation = util.WaitForOperation( dataproc, operation, message='Waiting for cluster diagnose operation', timeout_s=args.timeout) if not operation.response: raise exceptions.OperationError('Operation is missing response') properties = encoding.MessageToDict(operation.response) output_uri = properties['outputUri'] if not output_uri: raise exceptions.OperationError('Response is missing outputUri') log.err.Print('Output from diagnostic:') log.err.Print('-----------------------------------------------') driver_log_stream = storage_helpers.StorageObjectSeriesStream( output_uri) # A single read might not read whole stream. Try a few times. read_retrier = retry.Retryer(max_retrials=4, jitter_ms=None) try: read_retrier.RetryOnResult( lambda: driver_log_stream.ReadIntoWritable(log.err), sleep_ms=100, should_retry_if=lambda *_: driver_log_stream.open) except retry.MaxRetrialsException: log.warning('Diagnostic finished successfully, ' 'but output did not finish streaming.') log.err.Print('-----------------------------------------------') return output_uri
def WaitForOperation(self, operation, message, timeout_s, poll_period_s=5): """Poll dataproc Operation until its status is done or timeout reached. Args: operation: Operation, message of the operation to be polled. message: str, message to display to user while polling. timeout_s: number, seconds to poll with retries before timing out. poll_period_s: number, delay in seconds between requests. Returns: Operation: the return value of the last successful operations.get request. Raises: OperationError: if the operation times out or finishes with an error. """ request = self.messages.DataprocProjectsRegionsOperationsGetRequest( name=operation.name) log.status.Print('Waiting on operation [{0}].'.format(operation.name)) start_time = time.time() warnings_so_far = 0 is_tty = console_io.IsInteractive(error=True) tracker_separator = '\n' if is_tty else '' def _LogWarnings(warnings): new_warnings = warnings[warnings_so_far:] if new_warnings: # Drop a line to print nicely with the progress tracker. log.err.write(tracker_separator) for warning in new_warnings: log.warn(warning) with progress_tracker.ProgressTracker(message, autotick=True): while timeout_s > (time.time() - start_time): try: operation = self.client.projects_regions_operations.Get( request) metadata = self.ParseOperationJsonMetadata( operation.metadata) _LogWarnings(metadata.warnings) warnings_so_far = len(metadata.warnings) if operation.done: break except apitools_exceptions.HttpError: # Keep trying until we timeout in case error is transient. pass time.sleep(poll_period_s) metadata = self.ParseOperationJsonMetadata(operation.metadata) _LogWarnings(metadata.warnings) if not operation.done: raise exceptions.OperationTimeoutError( 'Operation [{0}] timed out.'.format(operation.name)) elif operation.error: raise exceptions.OperationError( 'Operation [{0}] failed: {1}.'.format( operation.name, util.FormatRpcError(operation.error))) log.info('Operation [%s] finished after %.3f seconds', operation.name, (time.time() - start_time)) return operation
def Run(self, args): client = self.context['dataproc_client'] messages = self.context['dataproc_messages'] cluster_ref = util.ParseCluster(args.name, self.context) request = messages.DataprocProjectsRegionsClustersDiagnoseRequest( clusterName=cluster_ref.clusterName, region=cluster_ref.region, projectId=cluster_ref.projectId) operation = client.projects_regions_clusters.Diagnose(request) # TODO(b/36052522): Stream output during polling. operation = util.WaitForOperation( operation, self.context, message='Waiting for cluster diagnose operation', timeout_s=args.timeout) if not operation.response: raise exceptions.OperationError('Operation is missing response') properties = encoding.MessageToDict(operation.response) output_uri = properties['outputUri'] if not output_uri: raise exceptions.OperationError('Response is missing outputUri') log.err.Print('Output from diagnostic:') log.err.Print('-----------------------------------------------') driver_log_stream = storage_helpers.StorageObjectSeriesStream( output_uri) # A single read might not read whole stream. Try a few times. read_retrier = retry.Retryer(max_retrials=4, jitter_ms=None) try: read_retrier.RetryOnResult( lambda: driver_log_stream.ReadIntoWritable(log.err), sleep_ms=100, should_retry_if=lambda *_: driver_log_stream.open) except retry.MaxRetrialsException: log.warn('Diagnostic finished successfully, ' 'but output did not finish streaming.') log.err.Print('-----------------------------------------------') return output_uri
def WaitForOperation(operation, context, message, timeout_s=2100, poll_period_s=5): """Poll dataproc Operation until its status is done or timeout reached. Args: operation: Operation, message of the operation to be polled. context: dict, dataproc Command context. message: str, message to display to user while polling. timeout_s: number, seconds to poll with retries before timing out. poll_period_s: number, delay in seconds between requests. Returns: Operation: the return value of the last successful operations.get request. Raises: OperationError: if the operation times out or finishes with an error. """ client = context['dataproc_client'] messages = context['dataproc_messages'] request = messages.DataprocProjectsRegionsOperationsGetRequest( name=operation.name) log.status.Print('Waiting on operation [{0}].'.format(operation.name)) start_time = time.time() with progress_tracker.ProgressTracker(message, autotick=True): while timeout_s > (time.time() - start_time): try: operation = client.projects_regions_operations.Get(request) if operation.done: break except apitools_exceptions.HttpError: # Keep trying until we timeout in case error is transient. pass time.sleep(poll_period_s) # TODO(user): Parse operation metadata. log.debug('Operation:\n' + encoding.MessageToJson(operation)) if not operation.done: raise exceptions.OperationTimeoutError( 'Operation [{0}] timed out.'.format(operation.name)) elif operation.error: raise exceptions.OperationError('Operation [{0}] failed: {1}.'.format( operation.name, FormatRpcError(operation.error))) log.info('Operation [%s] finished after %.3f seconds', operation.name, (time.time() - start_time)) return operation
def _GetResult(self, session): """Handles errors. Error handling for sessions. This happen after the session reaches one of the complete states. Overrides. Args: session: The session resource. Returns: None. The result is directly output to log.err. Raises: OperationTimeoutError: When waiter timed out. OperationError: When remote session creation is failed. """ if not session: # Session resource is None but polling is considered done. # This only happens when the waiter timed out. raise exceptions.OperationTimeoutError( 'Timed out while waiting for session creation.') if (session.state == self.dataproc.messages.Session.StateValueValuesEnum.FAILED): err_message = 'Session creation is FAILED.' if session.stateMessage: err_message = '{} Detail: {}'.format(err_message, session.stateMessage) if err_message[-1] != '.': err_message += '.' raise exceptions.OperationError(err_message) # Nothing to return, since the result is directly output to users. return self._GetOutputUri(session)