def _start_upload(self): # This starts the uploader thread. We are forced to run the uploader in # another thread because the apitools uploader insists on taking a stream # as input. Happily, this also means we get asynchronous I/O to GCS. # # The uploader by default transfers data in chunks of 1024 * 1024 bytes at # a time, buffering writes until that size is reached. project_number = self._get_project_number(self._bucket) # Create a request count metric resource = resource_identifiers.GoogleCloudStorageBucket(self._bucket) labels = { monitoring_infos.SERVICE_LABEL: 'Storage', monitoring_infos.METHOD_LABEL: 'Objects.insert', monitoring_infos.RESOURCE_LABEL: resource, monitoring_infos.GCS_BUCKET_LABEL: self._bucket, monitoring_infos.GCS_PROJECT_ID_LABEL: project_number } service_call_metric = ServiceCallMetric( request_count_urn=monitoring_infos.API_REQUEST_COUNT_URN, base_labels=labels) try: self._client.objects.Insert(self._insert_request, upload=self._upload) service_call_metric.call('ok') except Exception as e: # pylint: disable=broad-except service_call_metric.call(e) _LOGGER.error( 'Error in _start_upload while inserting file %s: %s', self._path, traceback.format_exc()) self._upload_thread.last_error = e finally: self._child_conn.close()
def _query_metric(self, query_name, status): project_id = self._spanner_configuration.project resource = resource_identifiers.SpannerSqlQuery(project_id, query_name) labels = { **self.base_labels, monitoring_infos.RESOURCE_LABEL: resource, monitoring_infos.SPANNER_QUERY_NAME: query_name } service_call_metric = ServiceCallMetric( request_count_urn=monitoring_infos.API_REQUEST_COUNT_URN, base_labels=labels) service_call_metric.call(str(status))
def _table_metric(self, table_id, status): database_id = self._spanner_configuration.database project_id = self._spanner_configuration.project resource = resource_identifiers.SpannerTable( project_id, database_id, table_id) labels = { **self.base_labels, monitoring_infos.RESOURCE_LABEL: resource, monitoring_infos.SPANNER_TABLE_ID: table_id } service_call_metric = ServiceCallMetric( request_count_urn=monitoring_infos.API_REQUEST_COUNT_URN, base_labels=labels) service_call_metric.call(str(status))
def __init__(self, client, path, buffer_size, get_project_number): self._client = client self._path = path self._bucket, self._name = parse_gcs_path(path) self._buffer_size = buffer_size self._get_project_number = get_project_number project_number = self._get_project_number(self._bucket) # Create a request count metric resource = resource_identifiers.GoogleCloudStorageBucket(self._bucket) labels = { monitoring_infos.SERVICE_LABEL: 'Storage', monitoring_infos.METHOD_LABEL: 'Objects.get', monitoring_infos.RESOURCE_LABEL: resource, monitoring_infos.GCS_BUCKET_LABEL: self._bucket, monitoring_infos.GCS_PROJECT_ID_LABEL: project_number } service_call_metric = ServiceCallMetric( request_count_urn=monitoring_infos.API_REQUEST_COUNT_URN, base_labels=labels) # Get object state. self._get_request = ( storage.StorageObjectsGetRequest( bucket=self._bucket, object=self._name)) try: metadata = self._get_object_metadata(self._get_request) service_call_metric.call('ok') except HttpError as http_error: service_call_metric.call(http_error) if http_error.status_code == 404: raise IOError(errno.ENOENT, 'Not found: %s' % self._path) else: _LOGGER.error( 'HTTP error while requesting file %s: %s', self._path, http_error) raise self._size = metadata.size # Ensure read is from file of the correct generation. self._get_request.generation = metadata.generation # Initialize read buffer state. self._download_stream = io.BytesIO() self._downloader = transfer.Download( self._download_stream, auto_transfer=False, chunksize=self._buffer_size, num_retries=20) try: self._client.objects.Get(self._get_request, download=self._downloader) service_call_metric.call('ok') except HttpError as e: service_call_metric.call(e)
def process(self, query, *unused_args, **unused_kwargs): if query.namespace is None: query.namespace = '' _client = helper.get_client(query.project, query.namespace) client_query = query._to_client_query(_client) # Create request count metric resource = resource_identifiers.DatastoreNamespace( query.project, query.namespace) labels = { monitoring_infos.SERVICE_LABEL: 'Datastore', monitoring_infos.METHOD_LABEL: 'BatchDatastoreRead', monitoring_infos.RESOURCE_LABEL: resource, monitoring_infos.DATASTORE_NAMESPACE_LABEL: query.namespace, monitoring_infos.DATASTORE_PROJECT_ID_LABEL: query.project, monitoring_infos.STATUS_LABEL: 'ok' } service_call_metric = ServiceCallMetric( request_count_urn=monitoring_infos.API_REQUEST_COUNT_URN, base_labels=labels) try: for client_entity in client_query.fetch(query.limit): yield types.Entity.from_client_entity(client_entity) service_call_metric.call('ok') except (ClientError, GoogleAPICallError) as e: # e.code.value contains the numeric http status code. service_call_metric.call(e.code.value) except HttpError as e: service_call_metric.call(e)
def write_mutations(self, throttler, rpc_stats_callback, throttle_delay=1): """Writes a batch of mutations to Cloud Datastore. If a commit fails, it will be retried up to 5 times. All mutations in the batch will be committed again, even if the commit was partially successful. If the retry limit is exceeded, the last exception from Cloud Datastore will be raised. Assumes that the Datastore client library does not perform any retries on commits. It has not been determined how such retries would interact with the retries and throttler used here. See ``google.cloud.datastore_v1.gapic.datastore_client_config`` for retry config. Args: rpc_stats_callback: a function to call with arguments `successes` and `failures` and `throttled_secs`; this is called to record successful and failed RPCs to Datastore and time spent waiting for throttling. throttler: (``apache_beam.io.gcp.datastore.v1new.adaptive_throttler. AdaptiveThrottler``) Throttler instance used to select requests to be throttled. throttle_delay: (:class:`float`) time in seconds to sleep when throttled. Returns: (int) The latency of the successful RPC in milliseconds. """ # Client-side throttling. while throttler.throttle_request(time.time() * 1000): _LOGGER.info( "Delaying request for %ds due to previous failures", throttle_delay) time.sleep(throttle_delay) rpc_stats_callback(throttled_secs=throttle_delay) if self._batch is None: # this will only happen when we re-try previously failed batch self._batch = self._client.batch() self._batch.begin() for element in self._batch_elements: self.add_to_batch(element) # Create request count metric resource = resource_identifiers.DatastoreNamespace( self._project, "") labels = { monitoring_infos.SERVICE_LABEL: 'Datastore', monitoring_infos.METHOD_LABEL: 'BatchDatastoreWrite', monitoring_infos.RESOURCE_LABEL: resource, monitoring_infos.DATASTORE_NAMESPACE_LABEL: "", monitoring_infos.DATASTORE_PROJECT_ID_LABEL: self._project, monitoring_infos.STATUS_LABEL: 'ok' } service_call_metric = ServiceCallMetric( request_count_urn=monitoring_infos.API_REQUEST_COUNT_URN, base_labels=labels) try: start_time = time.time() self._batch.commit() end_time = time.time() service_call_metric.call('ok') rpc_stats_callback(successes=1) throttler.successful_request(start_time * 1000) commit_time_ms = int((end_time - start_time) * 1000) return commit_time_ms except (ClientError, GoogleAPICallError) as e: self._batch = None # e.code.value contains the numeric http status code. service_call_metric.call(e.code.value) rpc_stats_callback(errors=1) raise except HttpError as e: service_call_metric.call(e) rpc_stats_callback(errors=1) raise