def remove_key(self, key): if not isinstance(key, tuple): key = key.to_tuple() ge_cloud_id = key[1] data = { "data": { "type": self.ge_cloud_resource_type, "id": ge_cloud_id, "attributes": { "deleted": True, }, } } url = urljoin( self.ge_cloud_base_url, f"organizations/" f"{self.ge_cloud_credentials['organization_id']}/" f"{hyphen(self.ge_cloud_resource_name)}/" f"{ge_cloud_id}", ) try: response = requests.patch(url, json=data, headers=self.auth_headers) response_status_code = response.status_code if response_status_code < 300: return True return False except Exception as e: logger.debug(str(e)) raise StoreBackendError( f"Unable to delete object in GE Cloud Store Backend: {e}" )
def _set(self, key, value, content_encoding="utf-8", content_type="application/json"): import boto3 s3 = boto3.resource("s3") s3_object_key = self._build_s3_object_key(key) try: result_s3 = s3.Object(self.bucket, s3_object_key) if isinstance(value, str): result_s3.put( Body=value.encode(content_encoding), ContentEncoding=content_encoding, ContentType=content_type, ) else: result_s3.put(Body=value, ContentType=content_type) except s3.exceptions.ClientError as e: logger.debug(str(e)) raise StoreBackendError("Unable to set object in s3.") return s3_object_key
def _update(self, ge_cloud_id, value, **kwargs): resource_type = self.ge_cloud_resource_type organization_id = self.ge_cloud_credentials["organization_id"] attributes_key = self.PAYLOAD_ATTRIBUTES_KEYS[resource_type] data = { "data": { "type": resource_type, "id": ge_cloud_id, "attributes": { attributes_key: value, "organization_id": organization_id, }, } } url = urljoin( self.ge_cloud_base_url, f"organizations/" f"{organization_id}/" f"{hyphen(self.ge_cloud_resource_name)}/" f"{ge_cloud_id}", ) try: response = requests.patch(url, json=data, headers=self.auth_headers) response_status_code = response.status_code if response_status_code < 300: return True return False except Exception as e: logger.debug(str(e)) raise StoreBackendError( f"Unable to update object in GE Cloud Store Backend: {e}" )
def _set(self, key, value, **kwargs): data = { "data": { "type": self.ge_cloud_resource_type, "attributes": { "account_id": self.ge_cloud_credentials["account_id"], "checkpoint_config": value, }, } } url = urljoin( self.ge_cloud_base_url, f"accounts/" f"{self.ge_cloud_credentials['account_id']}/" f"{self.ge_cloud_resource_name}", ) try: response = requests.post(url, json=data, headers=self.auth_headers) response_json = response.json() object_id = response_json["data"]["id"] object_url = self.get_url_for_key((object_id, )) return GeCloudResourceRef( resource_type=self.ge_cloud_resource_type, ge_cloud_id=object_id, url=object_url, ) except Exception as e: logger.debug(str(e)) raise StoreBackendError( "Unable to set object in GE Cloud Store Backend.")
def __init__(self, filepath_template=None, filepath_prefix=None, filepath_suffix=None, forbidden_substrings=None, platform_specific_separator=True, fixed_length_key=False): super().__init__(fixed_length_key=fixed_length_key) if forbidden_substrings is None: forbidden_substrings = ["/", "\\"] self.forbidden_substrings = forbidden_substrings self.platform_specific_separator = platform_specific_separator if filepath_template is not None and filepath_suffix is not None: raise ValueError("filepath_suffix may only be used when filepath_template is None") self.filepath_template = filepath_template if filepath_prefix and len(filepath_prefix) > 0: # Validate that the filepath prefix does not end with a forbidden substring if filepath_prefix[-1] in self.forbidden_substrings: raise StoreBackendError("Unable to initialize TupleStoreBackend: filepath_prefix may not end with a " "forbidden substring. Current forbidden substrings are " + str(forbidden_substrings)) self.filepath_prefix = filepath_prefix self.filepath_suffix = filepath_suffix if filepath_template is not None: # key length is the number of unique values to be substituted in the filepath_template self.key_length = len( set( re.findall(r"{\d+}", filepath_template) ) ) self.verify_that_key_to_filepath_operation_is_reversible() self._fixed_length_key = True
def _set( self, key, value, content_encoding="utf-8", content_type="application/json", **kwargs, ): s3_object_key = self._build_s3_object_key(key) s3 = self._create_resource() try: result_s3 = s3.Object(self.bucket, s3_object_key) if isinstance(value, str): result_s3.put( Body=value.encode(content_encoding), ContentEncoding=content_encoding, ContentType=content_type, **self.s3_put_options, ) else: result_s3.put(Body=value, ContentType=content_type, **self.s3_put_options) except s3.meta.client.exceptions.ClientError as e: logger.debug(str(e)) raise StoreBackendError("Unable to set object in s3.") return s3_object_key
def _set(self, key, value, content_encoding="utf-8", content_type="application/json"): if self.platform_specific_separator: s3_object_key = os.path.join(self.prefix, self._convert_key_to_filepath(key)) else: s3_object_key = "/".join( (self.prefix, self._convert_key_to_filepath(key))) import boto3 s3 = boto3.resource("s3") try: result_s3 = s3.Object(self.bucket, s3_object_key) if isinstance(value, str): result_s3.put( Body=value.encode(content_encoding), ContentEncoding=content_encoding, ContentType=content_type, ) else: result_s3.put(Body=value, ContentType=content_type) except s3.exceptions.ClientError as e: logger.debug(str(e)) raise StoreBackendError("Unable to set object in s3.") return s3_object_key
def set(self, key, value, **kwargs): self._validate_key(key) self._validate_value(value) # Allow the implementing setter to return something (e.g. a path used for its key) try: return self._set(key, value, **kwargs) except ValueError as e: logger.debug(str(e)) raise StoreBackendError("ValueError while calling _set on store backend.")
def get_public_url_for_key(self, key, protocol=None): if not self.base_public_path: raise StoreBackendError( f"""Error: No base_public_path was configured! - A public URL was requested base_public_path was not configured for the TupleFilesystemStoreBackend """) path = self._convert_key_to_filepath(key) public_url = self.base_public_path + path return public_url
def _get_container_client(self): from azure.storage.blob import BlobServiceClient if self.connection_string: return BlobServiceClient.from_connection_string( self.connection_string).get_container_client(self.container) else: raise StoreBackendError( "Unable to initialize ServiceClient, AZURE_STORAGE_CONNECTION_STRING should be set" )
def _set(self, key: Tuple[str, ...], value: Any, **kwargs: dict) -> Union[bool, GeCloudResourceRef]: # Each resource type has corresponding attribute key to include in POST body ge_cloud_resource: GeCloudRESTResource = key[0] ge_cloud_id: str = key[1] # if key has ge_cloud_id, perform _update instead # Chetan - 20220713 - DataContextVariables are a special edge case for the Cloud product # and always necessitate a PUT. if (ge_cloud_id or ge_cloud_resource is GeCloudRESTResource.DATA_CONTEXT_VARIABLES): return self._update(ge_cloud_id=ge_cloud_id, value=value, **kwargs) resource_type = self.ge_cloud_resource_type resource_name = self.ge_cloud_resource_name organization_id = self.ge_cloud_credentials["organization_id"] attributes_key = self.PAYLOAD_ATTRIBUTES_KEYS[resource_type] data = { "data": { "type": resource_type, "attributes": { "organization_id": organization_id, attributes_key: value, **(kwargs if self.validate_set_kwargs(kwargs) else {}), }, } } url = urljoin( self.ge_cloud_base_url, f"organizations/" f"{organization_id}/" f"{hyphen(resource_name)}", ) try: response = requests.post(url, json=data, headers=self.auth_headers) response_json = response.json() object_id = response_json["data"]["id"] object_url = self.get_url_for_key( (self.ge_cloud_resource_type, object_id)) return GeCloudResourceRef( resource_type=resource_type, ge_cloud_id=object_id, url=object_url, ) # TODO Show more detailed error messages except Exception as e: logger.debug(str(e)) raise StoreBackendError( f"Unable to set object in GE Cloud Store Backend: {e}")
def get_public_url_for_key(self, key, protocol=None): if not self.base_public_path: raise StoreBackendError( f"""Error: No base_public_path was configured! - A public URL was requested base_public_path was not configured for the """) s3_key = self._convert_key_to_filepath(key) # <WILL> What happens if there is a prefix? if self.base_public_path[-1] != "/": public_url = self.base_public_path + "/" + s3_key else: public_url = self.base_public_path + s3_key return public_url
def _get(self, key: Tuple[str, ...]) -> dict: ge_cloud_url = self.get_url_for_key(key=key) try: response = requests.get(ge_cloud_url, headers=self.auth_headers) return response.json() except JSONDecodeError as jsonError: logger.debug( "Failed to parse GE Cloud Response into JSON", str(response.text), str(jsonError), ) raise StoreBackendError( f"Unable to get object in GE Cloud Store Backend: {jsonError}")
def _set(self, key, value, **kwargs): # Each resource type has corresponding attribute key to include in POST body ge_cloud_id = key[1] # if key has ge_cloud_id, perform _update instead if ge_cloud_id: return self._update(ge_cloud_id=ge_cloud_id, value=value, **kwargs) resource_type = self.ge_cloud_resource_type resource_name = self.ge_cloud_resource_name organization_id = self.ge_cloud_credentials["organization_id"] attributes_key = self.PAYLOAD_ATTRIBUTES_KEYS[resource_type] data = { "data": { "type": resource_type, "attributes": { "organization_id": organization_id, attributes_key: value, **(kwargs if self.validate_set_kwargs(kwargs) else {}), }, } } url = urljoin( self.ge_cloud_base_url, f"organizations/" f"{organization_id}/" f"{hyphen(resource_name)}", ) try: response = requests.post(url, json=data, headers=self.auth_headers) response_json = response.json() object_id = response_json["data"]["id"] object_url = self.get_url_for_key( (self.ge_cloud_resource_type, object_id)) return GeCloudResourceRef( resource_type=resource_type, ge_cloud_id=object_id, url=object_url, ) # TODO Show more detailed error messages except Exception as e: logger.debug(str(e)) raise StoreBackendError( f"Unable to set object in GE Cloud Store Backend: {e}")
def list_keys(self): url = urljoin( self.ge_cloud_base_url, f"accounts/" f"{self.ge_cloud_credentials['account_id']}/" f"{self.ge_cloud_resource_name}", ) try: response = requests.get(url, headers=self.auth_headers) response_json = response.json() keys = [(resource["id"], ) for resource in response_json.get("data")] return keys except Exception as e: logger.debug(str(e)) raise StoreBackendError( "Unable to list keys in GE Cloud Store Backend.")
def _container_client(self): from azure.identity import DefaultAzureCredential from azure.storage.blob import BlobServiceClient if self.connection_string: blob_service_client = BlobServiceClient.from_connection_string( self.connection_string) elif self.account_url: blob_service_client = BlobServiceClient( account_url=self.account_url, credential=DefaultAzureCredential()) else: raise StoreBackendError( "Unable to initialize ServiceClient, AZURE_STORAGE_CONNECTION_STRING should be set" ) return blob_service_client.get_container_client(self.container)
def list_keys(self, prefix: Tuple = ()) -> List[Tuple[str, Any]]: url = urljoin( self.ge_cloud_base_url, f"organizations/" f"{self.ge_cloud_credentials['organization_id']}/" f"{hyphen(self.ge_cloud_resource_name)}", ) try: response = requests.get(url, headers=self.auth_headers) response_json = response.json() keys = [( self.ge_cloud_resource_type, resource["id"], ) for resource in response_json.get("data")] return keys except Exception as e: logger.debug(str(e)) raise StoreBackendError( f"Unable to list keys in GE Cloud Store Backend: {e}")
def _move(self, source_key, dest_key, **kwargs) -> None: source_blob_path = self._convert_key_to_filepath(source_key) if not source_blob_path.startswith(self.prefix): source_blob_path = os.path.join(self.prefix, source_blob_path) dest_blob_path = self._convert_key_to_filepath(dest_key) if not dest_blob_path.startswith(self.prefix): dest_blob_path = os.path.join(self.prefix, dest_blob_path) # azure storage sdk does not have _move method source_blob = self._container_client.get_blob_client(source_blob_path) dest_blob = self._container_client.get_blob_client(dest_blob_path) dest_blob.start_copy_from_url(source_blob.url, requires_sync=True) copy_properties = dest_blob.get_blob_properties().copy if copy_properties.status != "success": dest_blob.abort_copy(copy_properties.id) raise StoreBackendError( f"Unable to copy blob {source_blob_path} with status {copy_properties.status}" ) source_blob.delete_blob()