def CreateApp(api_client, project, region): """Create an App Engine app in the given region. Prints info about the app being created and displays a progress tracker. Args: api_client: The App Engine Admin API client project: The GCP project region: The region to create the app Raises: AppAlreadyExistsError if app already exists """ message = ('Creating App Engine application in project [{project}] and ' 'region [{region}].'.format(project=project, region=region)) with progress_tracker.ProgressTracker(message): try: api_client.CreateApp(region) except api_lib_exceptions.ConflictError: raise AppAlreadyExistsError( 'The project [{project}] already contains an App Engine application. ' 'You can deploy your application using `gcloud app deploy`.'. format(project=project))
def DeleteVersions(api_client, versions): """Delete the given version of the given services.""" errors = {} for version in versions: version_path = '{0}/{1}'.format(version.service, version.id) try: with progress_tracker.ProgressTracker( 'Deleting [{0}]'.format(version_path)): api_client.DeleteVersion(version.service, version.id) except (calliope_exceptions.HttpException, operations_util.OperationError, operations_util.OperationTimeoutError) as err: errors[version_path] = str(err) if errors: printable_errors = {} for version_path, error_msg in errors.items(): printable_errors[version_path] = '[{0}]: {1}'.format(version_path, error_msg) raise VersionsDeleteError( 'Issue deleting {0}: [{1}]\n\n'.format( text.Pluralize(len(printable_errors), 'version'), ', '.join(printable_errors.keys())) + '\n\n'.join(printable_errors.values()))
def testProgressTrackerSameMultiTwice(self): # 20: arbitrary size that will make a multiline display console_size = self.SetConsoleSize(20) msg = None with progress_tracker.ProgressTracker( 'tracker', autotick=False, detail_message_callback=lambda: msg) as t: msg = 'this is a multiline display message' t.Tick() msg = 'this is a multiline display message' t.Tick() self.AssertErrContains( '\r' + ' ' * console_size + '\rtracker...' '\r' + ' ' * console_size + '\rtracker... this is a\n' ' multiline display m\n' 'essage.../' '\r' + ' ' * console_size + '\ressage...-' '\r' + ' ' * console_size + '\ressage...done.\n')
def PatchApplication( release_track, split_health_checks=None, use_container_optimized_os=None): """Updates an App Engine application via API client. Args: release_track: The release track of the app update command to run. split_health_checks: Boolean, whether to enable split health checks by default. use_container_optimized_os: Boolean, whether to enable Container-Opimized OS as Flex base VM image by default. """ api_client = appengine_app_update_api_client.GetApiClientForTrack( release_track) if (split_health_checks is not None or use_container_optimized_os is not None): with progress_tracker.ProgressTracker( 'Updating the app [{0}]'.format(api_client.project)): api_client.PatchApplication( split_health_checks=split_health_checks, use_container_optimized_os=use_container_optimized_os) else: log.status.Print('Nothing to update.')
def DeleteServices(api_client, services): """Delete the given services.""" errors = {} for service in services: try: with progress_tracker.ProgressTracker('Deleting [{0}]'.format( service.id)): api_client.DeleteService(service.id) except (calliope_exceptions.HttpException, operations_util.OperationError, operations_util.OperationTimeoutError, app_exceptions.Error) as err: errors[service.id] = str(err) if errors: printable_errors = {} for service_id, error_msg in errors.items(): printable_errors[service_id] = '[{0}]: {1}'.format( service_id, error_msg) raise ServicesDeleteError('Issue deleting {0}: [{1}]\n\n'.format( text.Pluralize(len(printable_errors), 'service'), ', '.join( printable_errors.keys())) + '\n\n'.join(printable_errors.values()))
def LoadOrGenerate(self, directories, verbose=False, warn_on_exceptions=False): """Loads the CLI tree or generates it if it's out of date.""" if not self.CliCommandExists(): if verbose: log.warn(u'Command [{}] not found.'.format(self.cli_name)) return None up_to_date = False path, f = self.FindTreeFile(directories) if f: with f: try: tree = json.load(f) except ValueError: # Corrupt JSON -- could have been interrupted. tree = None if tree: version = self.GetVersion() up_to_date = tree.get(cli_tree.LOOKUP_CLI_VERSION) == version if up_to_date: if verbose: log.status.Print(u'[{}] CLI tree version [{}] is up to date.'.format( self.cli_name, version)) return tree with progress_tracker.ProgressTracker( u'{} the [{}] CLI tree'.format( 'Updating' if f else 'Generating', self.cli_name)): tree = self.GenerateTree() try: f = open(path, 'w') except IOError as e: if not warn_on_exceptions: raise log.warn(str(e)) else: with f: resource_printer.Print(tree, print_format='json', out=f)
def Run(self, args): api_client = appengine_api_client.GetApiClient() all_instances = api_client.GetAllInstances( args.service, args.version, version_filter=lambda v: util.Environment.IsFlexible(v.environment )) try: res = resources.REGISTRY.Parse(args.instance) except Exception: # pylint:disable=broad-except # If parsing fails, use interactive selection or provided instance ID. instance = instances_util.GetMatchingInstance( all_instances, service=args.service, version=args.version, instance=args.instance) else: instance = instances_util.GetMatchingInstance( all_instances, service=res.servicesId, version=res.versionsId, instance=res.instancesId) console_io.PromptContinue( 'About to enable debug mode for instance [{0}].'.format(instance), cancel_on_no=True) message = 'Enabling debug mode for instance [{0}]'.format(instance) res = resources.REGISTRY.Parse(instance.id, params={ 'versionsId': instance.version, 'instancesId': instance.id, 'servicesId': instance.service }, collection='appengine.apps.services.' 'versions.instances') with progress_tracker.ProgressTracker(message): api_client.DebugInstance(res)
def Run(self, args): # TODO(user): This fails with "module/version does not exist" even # when it exists if the scaling mode is set to auto. It would be good # to improve that error message. api_client = appengine_api_client.GetApiClient() services = api_client.ListServices() versions = version_util.GetMatchingVersions( api_client.ListVersions(services), args.versions, args.service) if not versions: log.warn('No matching versions found.') return fmt = 'list[title="Starting the following versions:"]' resource_printer.Print(versions, fmt, out=log.status) console_io.PromptContinue(cancel_on_no=True) errors = {} for version in versions: try: with progress_tracker.ProgressTracker( 'Starting [{0}]'.format(version)): api_client.StartVersion(version.service, version.id) except (calliope_exceptions.HttpException, operations.OperationError, operations.OperationTimeoutError) as err: errors[version] = str(err) if errors: printable_errors = {} for version, error_msg in errors.items(): short_name = '[{0}/{1}]'.format(version.service, version.id) printable_errors[short_name] = '{0}: {1}'.format( short_name, error_msg) raise VersionsStartError( 'Issues starting version(s): {0}\n\n'.format(', '.join( printable_errors.keys())) + '\n\n'.join(printable_errors.values()))
def Run(self, args): # TODO(b/36052475): This fails with "module/version does not exist" even # when it exists if the scaling mode is set to auto. It would be good # to improve that error message. api_client = appengine_api_client.GetApiClientForTrack(self.ReleaseTrack()) services = api_client.ListServices() versions = version_util.GetMatchingVersions( api_client.ListVersions(services), args.versions, args.service) if not versions: log.warning('No matching versions found.') return fmt = 'list[title="Starting the following versions:"]' resource_printer.Print(versions, fmt, out=log.status) console_io.PromptContinue(cancel_on_no=True) errors = {} # Sort versions to make behavior deterministic enough for unit testing. for version in sorted(versions, key=str): try: with progress_tracker.ProgressTracker('Starting [{0}]'.format(version)): operations_util.CallAndCollectOpErrors( api_client.StartVersion, version.service, version.id) except operations_util.MiscOperationError as err: errors[version] = six.text_type(err) if errors: printable_errors = {} for version, error_msg in errors.items(): short_name = '[{0}/{1}]'.format(version.service, version.id) printable_errors[short_name] = '{0}: {1}'.format(short_name, error_msg) raise VersionsStartError( 'Issues starting version(s): {0}\n\n'.format( ', '.join(list(printable_errors.keys()))) + '\n\n'.join(list(printable_errors.values())))
def _WaitForOperation(client, get_request, message): """Wait for an operation to complete. No operation is done instantly. Wait for it to finish following this logic: * we wait 1s (jitter is also 1s) * we query service * if the operation is not finished we loop to first point * wait limit is 620s - if we get to that point it means something is wrong and we can throw an exception Args: client: The client used to make requests. get_request: A GetOperatioRequest message. message: str, The string to print while polling. Returns: True if the operation succeeded without error. Raises: FunctionsError: If the operation takes more than 620s. """ with console_progress_tracker.ProgressTracker(message, autotick=False) as pt: # This is actually linear retryer. retryer = retry.Retryer(exponential_sleep_multiplier=1, max_wait_ms=MAX_WAIT_MS, wait_ceiling_ms=WAIT_CEILING_MS) try: retryer.RetryOnResult(_GetOperationStatus, [client, get_request], {'progress_tracker': pt}, should_retry_if=None, sleep_ms=SLEEP_MS) except retry.WaitException: raise exceptions.FunctionsError( 'Operation {0} is taking too long'.format(get_request.name))
def WaitForResourceDeletion(request_method, resource_ref, message, timeout_s=60, poll_period_s=5): """Poll Dataproc resource until it no longer exists.""" with progress_tracker.ProgressTracker(message, autotick=True): start_time = time.time() while timeout_s > (time.time() - start_time): try: request_method(resource_ref) except apitools_exceptions.HttpNotFoundError: # Object deleted return except apitools_exceptions.HttpError as error: log.debug('Get request for [{0}] failed:\n{1}', resource_ref, error) # Do not retry on 4xx errors if IsClientHttpException(error): raise time.sleep(poll_period_s) raise exceptions.OperationTimeoutError( 'Deleting resource [{0}] timed out.'.format(resource_ref))
def RunDeploy( args, api_client, use_beta_stager=False, runtime_builder_strategy=runtime_builders.RuntimeBuilderStrategy.NEVER, parallel_build=True, flex_image_build_option=FlexImageBuildOptions.ON_CLIENT, disable_build_cache=False): """Perform a deployment based on the given args. Args: args: argparse.Namespace, An object that contains the values for the arguments specified in the ArgsDeploy() function. api_client: api_lib.app.appengine_api_client.AppengineClient, App Engine Admin API client. use_beta_stager: Use the stager registry defined for the beta track rather than the default stager registry. runtime_builder_strategy: runtime_builders.RuntimeBuilderStrategy, when to use the new CloudBuild-based runtime builders (alternative is old externalized runtimes). parallel_build: bool, whether to use parallel build and deployment path. Only supported in v1beta and v1alpha App Engine Admin API. flex_image_build_option: FlexImageBuildOptions, whether a flex deployment should upload files so that the server can build the image or build the image on client. disable_build_cache: bool, disable the build cache. Returns: A dict on the form `{'versions': new_versions, 'configs': updated_configs}` where new_versions is a list of version_util.Version, and updated_configs is a list of config file identifiers, see yaml_parsing.ConfigYamlInfo. """ project = properties.VALUES.core.project.Get(required=True) deploy_options = DeployOptions.FromProperties( runtime_builder_strategy=runtime_builder_strategy, parallel_build=parallel_build, flex_image_build_option=flex_image_build_option) with files.TemporaryDirectory() as staging_area: stager = _MakeStager(args.skip_staging, use_beta_stager, args.staging_command, staging_area) services, configs = deployables.GetDeployables( args.deployables, stager, deployables.GetPathMatchers()) service_infos = [d.service_info for d in services] flags.ValidateImageUrl(args.image_url, service_infos) # pylint: disable=protected-access log.debug( 'API endpoint: [{endpoint}], API version: [{version}]'.format( endpoint=api_client.client.url, version=api_client.client._VERSION)) # The legacy admin console API client. # The Admin Console API existed long before the App Engine Admin API, and # isn't being improved. We're in the process of migrating all of the calls # over to the Admin API, but a few things (notably config deployments) # haven't been ported over yet. ac_client = appengine_client.AppengineClient(args.server, args.ignore_bad_certs) app = _PossiblyCreateApp(api_client, project) _RaiseIfStopped(api_client, app) app = _PossiblyRepairApp(api_client, app) # Tell the user what is going to happen, and ask them to confirm. version_id = args.version or util.GenerateVersionId() deployed_urls = output_helpers.DisplayProposedDeployment( app, project, services, configs, version_id, deploy_options.promote) console_io.PromptContinue(cancel_on_no=True) if service_infos: # Do generic app setup if deploying any services. # All deployment paths for a service involve uploading source to GCS. metrics.CustomTimedEvent(metric_names.GET_CODE_BUCKET_START) code_bucket_ref = args.bucket or flags.GetCodeBucket(app, project) metrics.CustomTimedEvent(metric_names.GET_CODE_BUCKET) log.debug( 'Using bucket [{b}].'.format(b=code_bucket_ref.ToBucketUrl())) # Prepare Flex if any service is going to deploy an image. if any([s.RequiresImage() for s in service_infos]): if deploy_options.use_service_management: deploy_command_util.PossiblyEnableFlex(project) else: deploy_command_util.DoPrepareManagedVms(ac_client) all_services = dict([(s.id, s) for s in api_client.ListServices()]) else: code_bucket_ref = None all_services = {} new_versions = [] deployer = ServiceDeployer(api_client, deploy_options) # Track whether a service has been deployed yet, for metrics. service_deployed = False for service in services: if not service_deployed: metrics.CustomTimedEvent( metric_names.FIRST_SERVICE_DEPLOY_START) new_version = version_util.Version(project, service.service_id, version_id) deployer.Deploy(service, new_version, code_bucket_ref, args.image_url, all_services, app.gcrDomain, disable_build_cache=disable_build_cache, flex_image_build_option=flex_image_build_option) new_versions.append(new_version) log.status.Print('Deployed service [{0}] to [{1}]'.format( service.service_id, deployed_urls[service.service_id])) if not service_deployed: metrics.CustomTimedEvent(metric_names.FIRST_SERVICE_DEPLOY) service_deployed = True # Deploy config files. if configs: metrics.CustomTimedEvent(metric_names.UPDATE_CONFIG_START) for config in configs: message = 'Updating config [{config}]'.format(config=config.name) with progress_tracker.ProgressTracker(message): ac_client.UpdateConfig(config.name, config.parsed) metrics.CustomTimedEvent(metric_names.UPDATE_CONFIG) updated_configs = [c.name for c in configs] PrintPostDeployHints(new_versions, updated_configs) # Return all the things that were deployed. return {'versions': new_versions, 'configs': updated_configs}
def WaitForComputeOperations(self, project, zone, operation_ids, message, timeout_s=1200, poll_period_s=5): """Poll Compute Operations until their status is done or timeout reached. Args: project: project on which the operation is performed zone: zone on which the operation is performed operation_ids: list/set of ids of the compute operations to wait for message: str, message to display to user while polling. timeout_s: number, seconds to poll with retries before timing out. poll_period_s: number, delay in seconds between requests. Returns: Operations: list of the last successful operations.getrequest for each op. Raises: Error: if the operation times out or finishes with an error. """ operation_ids = deque(operation_ids) operations = {} errors = [] with progress_tracker.ProgressTracker(message, autotick=True): start_time = time.clock() ops_to_retry = [] while timeout_s > (time.clock() - start_time) and operation_ids: op_id = operation_ids.popleft() try: operation = self.GetComputeOperation(project, zone, op_id) operations[op_id] = operation if not self.IsComputeOperationFinished(operation): # Operation is still in progress. ops_to_retry.append(op_id) continue log.debug('Operation %s succeeded after %.3f seconds', operation, (time.clock() - start_time)) error = self.GetOperationError(operation) if error: # Operation Failed! msg = 'Operation [{0}] finished with error: {1}'.format( op_id, error) log.debug(msg) errors.append(msg) except apitools_exceptions.HttpError as error: log.debug('GetComputeOperation failed: %s', error) # Keep trying until we timeout in case error is transient. # TODO(user): add additional backoff if server is returning 500s if not operation_ids and ops_to_retry: operation_ids = deque(ops_to_retry) ops_to_retry = [] time.sleep(poll_period_s) operation_ids.extend(ops_to_retry) for op_id in operation_ids: errors.append('Operation [{0}] is still running'.format(op_id)) if errors: raise util.Error(linesep.join(errors)) return operations.values()
def Deploy(self, service, new_version, code_bucket_ref, image, all_services, gcr_domain, flex_image_build_option=False): """Deploy the given service. Performs all deployment steps for the given service (if applicable): * Enable endpoints (for beta deployments) * Build and push the Docker image (Flex only, if image_url not provided) * Upload files (non-hermetic deployments and flex deployments with flex_image_build_option=FlexImageBuildOptions.ON_SERVER) * Create the new version * Promote the version to receieve all traffic (if --promote given (default)) * Stop the previous version (if new version promoted and --stop-previous-version given (default)) Args: service: deployables.Service, service to be deployed. new_version: version_util.Version describing where to deploy the service code_bucket_ref: cloud_storage.BucketReference where the service's files have been uploaded image: str or None, the URL for the Docker image to be deployed (if image already exists). all_services: dict of service ID to service_util.Service objects corresponding to all pre-existing services (used to determine how to promote this version to receive all traffic, if applicable). gcr_domain: str, Cloud Registry domain, determines the physical location of the image. E.g. `us.gcr.io`. flex_image_build_option: FlexImageBuildOptions, whether a flex deployment should upload files so that the server can build the image or build the image on client. """ log.status.Print( 'Beginning deployment of service [{service}]...'.format( service=new_version.service)) source_dir = service.upload_dir service_info = service.service_info endpoints_info = self._PossiblyConfigureEndpoints( service_info, source_dir, new_version) self._PossiblyRewriteRuntime(service_info) build = self._PossiblyBuildAndPush(new_version, service_info, source_dir, image, code_bucket_ref, gcr_domain, flex_image_build_option) manifest = self._PossiblyUploadFiles(image, service_info, source_dir, code_bucket_ref, flex_image_build_option) extra_config_settings = None if flex_image_build_option == FlexImageBuildOptions.ON_SERVER: extra_config_settings = { 'cloud_build_timeout': properties.VALUES.app.cloud_build_timeout.Get(), 'runtime_root': properties.VALUES.app.runtime_root.Get(), } # Actually create the new version of the service. metrics.CustomTimedEvent(metric_names.DEPLOY_API_START) self.api_client.DeployService(new_version.service, new_version.id, service_info, manifest, build, endpoints_info, extra_config_settings) metrics.CustomTimedEvent(metric_names.DEPLOY_API) message = 'Updating service [{service}]'.format( service=new_version.service) with progress_tracker.ProgressTracker(message): self._PossiblyPromote(all_services, new_version)
def WaitFor(poller, operation_ref, message, pre_start_sleep_ms=1000, max_retrials=None, max_wait_ms=300000, exponential_sleep_multiplier=1.4, jitter_ms=1000, wait_ceiling_ms=180000, sleep_ms=2000): """Waits with retrues for operation to be done given poller. Args: poller: OperationPoller, poller to use during retrials. operation_ref: object, passed to operation poller poll method. message: str, string to display for progrss_tracker. pre_start_sleep_ms: int, Time to wait before making first poll request. max_retrials: int, max number of retrials before raising RetryException. max_wait_ms: int, number of ms to wait before raising WaitException. exponential_sleep_multiplier: float, factor to use on subsequent retries. jitter_ms: int, random (up to the value) additional sleep between retries. wait_ceiling_ms: int, Maximum wait between retries. sleep_ms: int or iterable: for how long to wait between trials. Returns: poller.GetResult(operation). Raises: AbortWaitError: if ctrl-c was pressed. TimeoutError: if retryer has finished wihout being done. """ def _CtrlCHandler(unused_signal, unused_frame): raise AbortWaitError('Ctrl-C aborted wait.') try: with execution_utils.CtrlCSection(_CtrlCHandler): try: with progress_tracker.ProgressTracker(message) as tracker: if pre_start_sleep_ms: _SleepMs(pre_start_sleep_ms) def _StatusUpdate(unused_result, unused_status): tracker.Tick() retryer = retry.Retryer( max_retrials=max_retrials, max_wait_ms=max_wait_ms, exponential_sleep_multiplier=exponential_sleep_multiplier, jitter_ms=jitter_ms, wait_ceiling_ms=wait_ceiling_ms, status_update_func=_StatusUpdate) def _IsNotDone(operation, unused_state): return not poller.IsDone(operation) operation = retryer.RetryOnResult( func=poller.Poll, args=(operation_ref,), should_retry_if=_IsNotDone, sleep_ms=sleep_ms) except retry.WaitException: raise TimeoutError( 'Operation {0} has not finished in {1} seconds' .format(operation_ref, int(max_wait_ms / 1000))) except retry.MaxRetrialsException as e: raise TimeoutError( 'Operation {0} has not finished in {1} seconds ' 'after max {2} retrials' .format(operation_ref, int(e.state.time_passed_ms / 1000), e.state.retrial)) except AbortWaitError: # Write this out now that progress tracker is done. sys.stderr.write('Aborting wait for operation {0}.\n'.format(operation_ref)) raise return poller.GetResult(operation)
def WaitForOperation(cls, sql_client, operation_ref, message, max_wait_seconds=300): """Wait for a Cloud SQL operation to complete. No operation is done instantly. Wait for it to finish following this logic: First wait 1s, then query, then retry waiting exponentially more from 2s. We want to limit to 20s between retries to maintain some responsiveness. Finally, we want to limit the whole process to a conservative 300s. If we get to that point it means something is wrong and we can throw an exception. Args: sql_client: apitools.BaseApiClient, The client used to make requests. operation_ref: resources.Resource, A reference for the operation to poll. message: str, The string to print while polling. max_wait_seconds: integer or None, the number of seconds before the poller times out. Returns: True if the operation succeeded without error. Raises: OperationError: If the operation has an error code, is in UNKNOWN state, or if the operation takes more than max_wait_seconds when a value is specified. """ def ShouldRetryFunc(result, state): # In case of HttpError, retry for up to _HTTP_MAX_RETRY_MS at most. if isinstance(result, base_exceptions.HttpError): if state.time_passed_ms > _BaseOperations._HTTP_MAX_RETRY_MS: raise result return True # In case of other Exceptions, raise them immediately. if isinstance(result, Exception): raise result # Otherwise let the retryer do it's job until the Operation is done. return not result # Set the max wait time. max_wait_ms = None if max_wait_seconds: max_wait_ms = max_wait_seconds * _MS_PER_SECOND with console_progress_tracker.ProgressTracker( message, autotick=False) as pt: time.sleep(_BaseOperations._PRE_START_SLEEP_SEC) retryer = retry.Retryer( exponential_sleep_multiplier=2, max_wait_ms=max_wait_ms, wait_ceiling_ms=_BaseOperations._WAIT_CEILING_MS) try: retryer.RetryOnResult( cls.GetOperationStatus, [sql_client, operation_ref], {'progress_tracker': pt}, should_retry_if=ShouldRetryFunc, sleep_ms=_BaseOperations._INITIAL_SLEEP_MS) except retry.WaitException: raise exceptions.OperationError( ('Operation {0} is taking longer than expected. You can continue ' 'waiting for the operation by running `{1}`').format( operation_ref, cls.GetOperationWaitCommand(operation_ref)))
def testNoOp(self): properties.VALUES.core.interactive_ux_style.Set( properties.VALUES.core.InteractiveUXStyles.OFF.name) with progress_tracker.ProgressTracker('tracker', autotick=False): pass self.AssertErrEquals('')
def WaitForOperation(operation, context, message, timeout_s, poll_period_s=5): """Poll dataproc Operation until its status is done or timeout reached. Args: operation: Operation, message of the operation to be polled. context: dict, dataproc Command context. message: str, message to display to user while polling. timeout_s: number, seconds to poll with retries before timing out. poll_period_s: number, delay in seconds between requests. Returns: Operation: the return value of the last successful operations.get request. Raises: OperationError: if the operation times out or finishes with an error. """ client = context['dataproc_client'] messages = context['dataproc_messages'] request = messages.DataprocProjectsRegionsOperationsGetRequest( name=operation.name) log.status.Print('Waiting on operation [{0}].'.format(operation.name)) start_time = time.time() warnings_so_far = 0 is_tty = console_io.IsInteractive(error=True) tracker_separator = '\n' if is_tty else '' def _LogWarnings(warnings): new_warnings = warnings[warnings_so_far:] if new_warnings: # Drop a line to print nicely with the progress tracker. log.err.write(tracker_separator) for warning in new_warnings: log.warn(warning) with progress_tracker.ProgressTracker(message, autotick=True): while timeout_s > (time.time() - start_time): try: operation = client.projects_regions_operations.Get(request) metadata = ParseOperationMetadata(operation.metadata, messages) _LogWarnings(metadata.warnings) warnings_so_far = len(metadata.warnings) if operation.done: break except apitools_exceptions.HttpError: # Keep trying until we timeout in case error is transient. pass time.sleep(poll_period_s) metadata = ParseOperationMetadata(operation.metadata, messages) _LogWarnings(metadata.warnings) if not operation.done: raise exceptions.OperationTimeoutError( 'Operation [{0}] timed out.'.format(operation.name)) elif operation.error: raise exceptions.OperationError('Operation [{0}] failed: {1}.'.format( operation.name, FormatRpcError(operation.error))) log.info('Operation [%s] finished after %.3f seconds', operation.name, (time.time() - start_time)) return operation
def SetProjectMetadata(self, client, new_metadata): """Sets the project metadata to the new metadata with progress tracker.""" with progress_tracker.ProgressTracker('Updating project ssh metadata'): self._SetProjectMetadata(client, new_metadata)
def Run(self, args): tfplan_to_cai_operation = TerraformToolsTfplanToCaiOperation() validate_cai_operation = TerraformToolsValidateOperation() validate_tfplan_operation = TerraformToolsValidateOperation() env_vars = { 'GOOGLE_OAUTH_ACCESS_TOKEN': GetFreshAccessToken(account=properties.VALUES.core.account.Get()), 'USE_STRUCTURED_LOGGING': 'true', 'GOOGLE_TERRAFORM_VALIDATOR_USERAGENT_EXTENSION': metrics.GetUserAgent(), } with files.TemporaryDirectory() as tempdir: cai_assets = os.path.join(tempdir, 'cai_assets.json') response = tfplan_to_cai_operation( command='tfplan-to-cai', project=args.project or properties.VALUES.core.project.Get(), terraform_plan_json=args.terraform_plan_json, verbosity=args.verbosity, output_path=cai_assets, env=env_vars) self.exit_code = response.exit_code if self.exit_code > 0: # The streaming binary backed operation handles its own writing to # stdout and stderr, so there's nothing left to do here. return None with progress_tracker.ProgressTracker( message='Validating resources', aborted_message='Aborted validation.'): cai_response = validate_cai_operation( command='validate-cai', policy_library=args.policy_library, input_file=cai_assets, verbosity=args.verbosity, env=env_vars) tfplan_response = validate_tfplan_operation( command='validate-tfplan', policy_library=args.policy_library, input_file=args.terraform_plan_json, verbosity=args.verbosity, env=env_vars) # exit code 2 from a validate_* command indicates violations; we need to # pass that through to users so they can detect this case. However, if # either command errors out (exit code 1) return that instead. if cai_response.exit_code == 1 or tfplan_response.exit_code == 1: self.exit_code = 1 elif cai_response.exit_code == 2 or tfplan_response.exit_code == 2: self.exit_code = 2 # Output from validate commands uses "structured output", same as the # streaming output from conversion. The final output should be a combined # list of violations. violations = [] for policy_type, response in (('CAI', cai_response), ('Terraform', tfplan_response)): if response.stdout: try: msg = binary_operations.ReadStructuredOutput( response.stdout, as_json=True) except binary_operations.StructuredOutputError: log.warning( 'Could not parse {} policy validation output.'.format( policy_type)) else: violations += msg.resource_body if response.stderr: handler = binary_operations.DefaultStreamStructuredErrHandler( None) for line in response.stderr.split('\n'): handler(line) return violations
def WaitForJobTermination(dataproc, job, job_ref, message, goal_state, error_state=None, stream_driver_log=False, log_poll_period_s=1, dataproc_poll_period_s=10, timeout_s=None): """Poll dataproc Job until its status is terminal or timeout reached. Args: dataproc: wrapper for dataproc resources, client and messages job: The job to wait to finish. job_ref: Parsed dataproc.projects.regions.jobs resource containing a projectId, region, and jobId. message: str, message to display to user while polling. goal_state: JobStatus.StateValueValuesEnum, the state to define success error_state: JobStatus.StateValueValuesEnum, the state to define failure stream_driver_log: bool, Whether to show the Job's driver's output. log_poll_period_s: number, delay in seconds between checking on the log. dataproc_poll_period_s: number, delay in seconds between requests to the Dataproc API. timeout_s: number, time out for job completion. None means no timeout. Returns: Job: the return value of the last successful jobs.get request. Raises: JobError: if the job finishes with an error. """ request = dataproc.messages.DataprocProjectsRegionsJobsGetRequest( projectId=job_ref.projectId, region=job_ref.region, jobId=job_ref.jobId) driver_log_stream = None last_job_poll_time = 0 job_complete = False wait_display = None driver_output_uri = None def ReadDriverLogIfPresent(): if driver_log_stream and driver_log_stream.open: # TODO(b/36049794): Don't read all output. driver_log_stream.ReadIntoWritable(log.err) def PrintEqualsLine(): attr = console_attr.GetConsoleAttr() log.err.Print('=' * attr.GetTermSize()[0]) if stream_driver_log: log.status.Print('Waiting for job output...') wait_display = NoOpProgressDisplay() else: wait_display = progress_tracker.ProgressTracker(message, autotick=True) start_time = now = time.time() with wait_display: while not timeout_s or timeout_s > (now - start_time): # Poll logs first to see if it closed. ReadDriverLogIfPresent() log_stream_closed = driver_log_stream and not driver_log_stream.open if (not job_complete and job.status.state in dataproc.terminal_job_states): job_complete = True # Wait an 10s to get trailing output. timeout_s = now - start_time + 10 if job_complete and (not stream_driver_log or log_stream_closed): # Nothing left to wait for break regular_job_poll = ( not job_complete # Poll less frequently on dataproc API and now >= last_job_poll_time + dataproc_poll_period_s) # Poll at regular frequency before output has streamed and after it has # finished. expecting_output_stream = stream_driver_log and not driver_log_stream expecting_job_done = not job_complete and log_stream_closed if regular_job_poll or expecting_output_stream or expecting_job_done: last_job_poll_time = now try: job = dataproc.client.projects_regions_jobs.Get(request) except apitools_exceptions.HttpError as error: log.warning('GetJob failed:\n{}'.format( six.text_type(error))) # Do not retry on 4xx errors. if IsClientHttpException(error): raise if (stream_driver_log and job.driverOutputResourceUri and job.driverOutputResourceUri != driver_output_uri): if driver_output_uri: PrintEqualsLine() log.warning( "Job attempt failed. Streaming new attempt's output." ) PrintEqualsLine() driver_output_uri = job.driverOutputResourceUri driver_log_stream = storage_helpers.StorageObjectSeriesStream( job.driverOutputResourceUri) time.sleep(log_poll_period_s) now = time.time() # TODO(b/34836493): Get better test coverage of the next 20 lines. state = job.status.state # goal_state and error_state will always be terminal if state in dataproc.terminal_job_states: if stream_driver_log: if not driver_log_stream: log.warning('Expected job output not found.') elif driver_log_stream.open: log.warning( 'Job terminated, but output did not finish streaming.') if state is goal_state: return job if error_state and state is error_state: if job.status.details: raise exceptions.JobError( 'Job [{0}] failed with error:\n{1}'.format( job_ref.jobId, job.status.details)) raise exceptions.JobError('Job [{0}] failed.'.format( job_ref.jobId)) if job.status.details: log.info('Details:\n' + job.status.details) raise exceptions.JobError( 'Job [{0}] entered state [{1}] while waiting for [{2}].'.format( job_ref.jobId, state, goal_state)) raise exceptions.JobTimeoutError( 'Job [{0}] timed out while in state [{1}].'.format( job_ref.jobId, state))
def _RunCheck(self, check, first_run=True): with progress_tracker.ProgressTracker('{0} {1}'.format( 'Checking' if first_run else 'Rechecking', check.issue)): result, fixer = check.Check(first_run=first_run) self._PrintResult(result) return result, fixer
def WaitForOperation(operation): wait_message = 'Waiting for [{0}] to finish'.format(operation.name) with tracker.ProgressTracker(wait_message, autotick=False) as pt: retryer = OperationRetryer() poller = OperationPoller(pt) return retryer.RetryPollOperation(poller, operation)
def testProgressTrackerDoesNotCrash(self): self.console_size_mock.return_value = (0, 'unused size') with progress_tracker.ProgressTracker('tracker', autotick=True): time.sleep(1)
def LoadOrGenerate(self, directories=None, force=False, generate=True, ignore_out_of_date=False, tarball=False, verbose=False, warn_on_exceptions=False): """Loads the CLI tree or generates it if necessary, and returns the tree.""" f = None try: path, f = self.FindTreeFile(directories) if not f: # TODO(b/69033748): disable until issues resolved if _DisableLongRunningCliTreeGeneration(self.command): return None else: up_to_date = False try: tree = json.load(f) except ValueError: # Corrupt JSON -- could have been interrupted. tree = None if tree: readonly, up_to_date = self.IsUpToDate(tree, verbose=verbose) if readonly: return tree elif up_to_date: if not force: return tree elif ignore_out_of_date: return None finally: if f: f.close() def _Generate(): """Helper that generates a CLI tree and writes it to a JSON file.""" tree = self.Generate() if tree: try: f = files.FileWriter(path) except files.Error as e: # CLI data config dir may not be initialized yet. directory, _ = os.path.split(path) try: files.MakeDir(directory) f = files.FileWriter(path) except files.Error: if not warn_on_exceptions: raise log.warning(six.text_type(e)) return None with f: resource_printer.Print(tree, print_format='json', out=f) return tree # At this point: # (1) the tree is not found or is out of date # (2) the tree is not readonly # (3) we have a generator for the tree if not generate: raise NoCliTreeForCommand('No CLI tree for [{}].'.format(self.command)) if not verbose: return _Generate() with progress_tracker.ProgressTracker( '{} the [{}] CLI tree'.format( 'Updating' if f else 'Generating', self.command)): return _Generate()
def SetInstanceMetadata(self, client, instance, new_metadata): """Sets the instance metadata to the new metadata with progress tracker.""" with progress_tracker.ProgressTracker( 'Updating instance ssh metadata'): self._SetInstanceMetadata(client, instance, new_metadata)
def WaitForJobTermination(job, context, message, goal_state, stream_driver_log=False, log_poll_period_s=1, dataproc_poll_period_s=10, timeout_s=None): """Poll dataproc Job until its status is terminal or timeout reached. Args: job: The job to wait to finish. context: dict, dataproc Command context. message: str, message to display to user while polling. goal_state: JobStatus.StateValueValuesEnum, the state to define success stream_driver_log: bool, Whether to show the Job's driver's output. log_poll_period_s: number, delay in seconds between checking on the log. dataproc_poll_period_s: number, delay in seconds between requests to the Dataproc API. timeout_s: number, time out for job completion. None means no timeout. Returns: Operation: the return value of the last successful operations.get request. Raises: OperationError: if the operation times out or finishes with an error. """ client = context['dataproc_client'] job_ref = ParseJob(job.reference.jobId, context) request = client.MESSAGES_MODULE.DataprocProjectsRegionsJobsGetRequest( projectId=job_ref.projectId, region=job_ref.region, jobId=job_ref.jobId) driver_log_stream = None last_job_poll_time = 0 job_complete = False wait_display = None driver_output_uri = None def ReadDriverLogIfPresent(): if driver_log_stream and driver_log_stream.open: # TODO(b/36049794): Don't read all output. driver_log_stream.ReadIntoWritable(log.err) def PrintEqualsLine(): attr = console_attr.GetConsoleAttr() log.err.Print('=' * attr.GetTermSize()[0]) if stream_driver_log: log.status.Print('Waiting for job output...') wait_display = NoOpProgressDisplay() else: wait_display = progress_tracker.ProgressTracker(message, autotick=True) start_time = now = time.time() with wait_display: while not timeout_s or timeout_s > (now - start_time): # Poll logs first to see if it closed. ReadDriverLogIfPresent() log_stream_closed = driver_log_stream and not driver_log_stream.open if not job_complete and job.status.state in constants.TERMINAL_JOB_STATES: job_complete = True # Wait an 10s to get trailing output. timeout_s = now - start_time + 10 if job_complete and (not stream_driver_log or log_stream_closed): # Nothing left to wait for break regular_job_poll = ( not job_complete # Poll less frequently on dataproc API and now >= last_job_poll_time + dataproc_poll_period_s) # Poll at regular frequency before output has streamed and after it has # finished. expecting_output_stream = stream_driver_log and not driver_log_stream expecting_job_done = not job_complete and log_stream_closed if regular_job_poll or expecting_output_stream or expecting_job_done: last_job_poll_time = now try: job = client.projects_regions_jobs.Get(request) except apitools_exceptions.HttpError as error: log.warn('GetJob failed:\n{1}', error) # Keep trying until we timeout in case error is transient. if (stream_driver_log and job.driverOutputResourceUri and job.driverOutputResourceUri != driver_output_uri): if driver_output_uri: PrintEqualsLine() log.warn( "Job attempt failed. Streaming new attempt's output." ) PrintEqualsLine() driver_output_uri = job.driverOutputResourceUri driver_log_stream = storage_helpers.StorageObjectSeriesStream( job.driverOutputResourceUri) time.sleep(log_poll_period_s) now = time.time() # TODO(b/34836493): Get better test coverage of the next 20 lines. state = job.status.state if state is not goal_state and job.status.details: # Just log details, because the state will be in the error message. log.info(job.status.details) if state in constants.TERMINAL_JOB_STATES: if stream_driver_log: if not driver_log_stream: log.warn('Expected job output not found.') elif driver_log_stream.open: log.warn( 'Job terminated, but output did not finish streaming.') if state is goal_state: return job raise exceptions.JobError( 'Job [{0}] entered state [{1}] while waiting for [{2}].'.format( job_ref.jobId, state, goal_state)) raise exceptions.JobTimeoutError( 'Job [{0}] timed out while in state [{1}].'.format( job_ref.jobId, state))
def Run(self, args): message = ( 'A personal authentication session will propagate your personal ' 'credentials to the cluster, so make sure you trust the cluster ' 'and the user who created it.') console_io.PromptContinue( message=message, cancel_on_no=True, cancel_string='Enabling session aborted by user') dataproc = dp.Dataproc(self.ReleaseTrack()) cluster_ref = args.CONCEPTS.cluster.Parse() project = cluster_ref.projectId region = cluster_ref.region cluster_name = cluster_ref.clusterName get_request = dataproc.messages.DataprocProjectsRegionsClustersGetRequest( projectId=project, region=region, clusterName=cluster_name) cluster = dataproc.client.projects_regions_clusters.Get(get_request) cluster_uuid = cluster.clusterUuid if args.access_boundary: with files.FileReader(args.access_boundary, mode='r') as abf: access_boundary_json = abf.read() else: access_boundary_json = flags.ProjectGcsObjectsAccessBoundary( project) openssl_executable = args.openssl_command if not openssl_executable: try: openssl_executable = files.FindExecutableOnPath('openssl') except ValueError: log.fatal( 'Could not find openssl on your system. The enable-session ' 'command requires openssl to be installed.') operation_poller = waiter.CloudOperationPollerNoResources( dataproc.client.projects_regions_operations, lambda operation: operation.name) try: cluster_key = clusters.ClusterKey(cluster) if not cluster_key: raise exceptions.PersonalAuthError( 'The cluster {} does not support personal auth.'.format( cluster_name)) with progress_tracker.ProgressTracker( 'Injecting initial credentials into the cluster {}'.format( cluster_name), autotick=True): self.inject_credentials(dataproc, project, region, cluster_name, cluster_uuid, cluster_key, access_boundary_json, openssl_executable, operation_poller) if not args.refresh_credentials: return update_message = ( 'Periodically refreshing credentials for cluster {}. This' ' will continue running until the command is interrupted' ).format(cluster_name) with progress_tracker.ProgressTracker(update_message, autotick=True): try: # Cluster keys are periodically regenerated, so fetch the latest # each time we inject credentials. cluster = dataproc.client.projects_regions_clusters.Get( get_request) cluster_key = clusters.ClusterKey(cluster) if not cluster_key: raise exceptions.PersonalAuthError( 'The cluster {} does not support personal auth.'. format(cluster_name)) failure_count = 0 while failure_count < 3: try: time.sleep(30) self.inject_credentials(dataproc, project, region, cluster_name, cluster_uuid, cluster_key, access_boundary_json, openssl_executable, operation_poller) failure_count = 0 except ValueError as err: log.error(err) failure_count += 1 raise exceptions.PersonalAuthError( 'Credential injection failed three times in a row, giving up...' ) except (console_io.OperationCancelledError, KeyboardInterrupt): return except exceptions.PersonalAuthError as err: log.error(err) return
def Run(self, args): api_client = appengine_api_client.AppengineApiClient.GetApiClient('v1beta') with progress_tracker.ProgressTracker( 'Repairing the app [{0}]'.format(api_client.project)): api_client.RepairApplication()
def RunDeploy(unused_self, args, enable_endpoints=False, app_create=False): """Perform a deployment based on the given args.""" version_id = args.version or util.GenerateVersionId() flags.ValidateVersion(version_id) project = properties.VALUES.core.project.Get(required=True) deploy_options = DeployOptions.FromProperties(enable_endpoints, app_create) # Parse existing app.yamls or try to generate a new one if the directory is # empty. if not args.deployables: yaml_path = deploy_command_util.DEFAULT_DEPLOYABLE if not os.path.exists(deploy_command_util.DEFAULT_DEPLOYABLE): log.warning('Automatic app detection is currently in Beta') yaml_path = deploy_command_util.CreateAppYamlForAppDirectory( os.getcwd()) app_config = yaml_parsing.AppConfigSet([yaml_path]) else: app_config = yaml_parsing.AppConfigSet(args.deployables) services = app_config.Services() if not args.skip_image_url_validation: flags.ValidateImageUrl(args.image_url, services) # The new API client. api_client = appengine_api_client.GetApiClient() # pylint: disable=protected-access log.debug('API endpoint: [{endpoint}], API version: [{version}]'.format( endpoint=api_client.client.url, version=api_client.client._VERSION)) # The legacy admin console API client. # The Admin Console API existed long before the App Engine Admin API, and # isn't being improved. We're in the process of migrating all of the calls # over to the Admin API, but a few things (notably config deployments) haven't # been ported over yet. ac_client = appengine_client.AppengineClient(args.server, args.ignore_bad_certs) app = _PossiblyCreateApp(api_client, project, deploy_options.app_create) # Tell the user what is going to happen, and ask them to confirm. deployed_urls = output_helpers.DisplayProposedDeployment( app, project, app_config, version_id, deploy_options.promote) console_io.PromptContinue(cancel_on_no=True) if services: # Do generic app setup if deploying any services. # All deployment paths for a service involve uploading source to GCS. code_bucket_ref = args.bucket or flags.GetCodeBucket(app, project) metrics.CustomTimedEvent(metric_names.GET_CODE_BUCKET) log.debug( 'Using bucket [{b}].'.format(b=code_bucket_ref.ToBucketUrl())) # Prepare Flex if any service is going to deploy an image. if any([m.RequiresImage() for m in services.values()]): deploy_command_util.DoPrepareManagedVms(ac_client) all_services = dict([(s.id, s) for s in api_client.ListServices()]) else: code_bucket_ref = None all_services = {} new_versions = [] stager = staging.GetNoopStager( ) if args.skip_staging else staging.GetStager() deployer = ServiceDeployer(api_client, stager, deploy_options) for (name, service) in services.iteritems(): new_version = version_util.Version(project, name, version_id) deployer.Deploy(service, new_version, code_bucket_ref, args.image_url, all_services) new_versions.append(new_version) log.status.Print('Deployed service [{0}] to [{1}]'.format( name, deployed_urls[name])) # Deploy config files. for (name, config) in app_config.Configs().iteritems(): message = 'Updating config [{config}]'.format(config=name) with progress_tracker.ProgressTracker(message): ac_client.UpdateConfig(name, config.parsed) updated_configs = app_config.Configs().keys() PrintPostDeployHints(new_versions, updated_configs) # Return all the things that were deployed. return {'versions': new_versions, 'configs': updated_configs}