def parse_gcs_url(gcs_url): """ Parses and validates a google cloud storage url. Returns: tuple(bucket_name, blob). """ parsed_url = urllib.parse.urlparse(gcs_url) if not parsed_url.netloc: raise PolyaxonStoresException('Received an invalid url `{}`'.format(gcs_url)) if parsed_url.scheme != 'gs': raise PolyaxonStoresException('Received an invalid url `{}`'.format(gcs_url)) blob = parsed_url.path.lstrip('/') return parsed_url.netloc, blob
def download_file(self, blob, local_path, container_name=None, use_basename=True): """ Downloads a file from Google Cloud Storage. Args: blob: `str`. blob to download. local_path: `str`. the path to download to. container_name: `str`. the name of the container. use_basename: `bool`. whether or not to use the basename of the blob. """ if not container_name: container_name, _, blob = self.parse_wasbs_url(blob) local_path = os.path.abspath(local_path) if use_basename: local_path = append_basename(local_path, blob) check_dirname_exists(local_path) try: self.connection.get_blob_to_path(container_name, blob, local_path) except AzureHttpError as e: raise PolyaxonStoresException(e)
def download_file(self, blob, local_path, bucket_name=None, use_basename=True): """ Downloads a file from Google Cloud Storage. Args: blob: `str`. blob to download. local_path: `str`. the path to download to. bucket_name: `str`. the name of the bucket. use_basename: `bool`. whether or not to use the basename of the blob. """ if not bucket_name: bucket_name, blob = self.parse_gcs_url(blob) local_path = os.path.abspath(local_path) if use_basename: local_path = append_basename(local_path, blob) check_dirname_exists(local_path) try: blob = self.get_blob(blob=blob, bucket_name=bucket_name) blob.download_to_filename(local_path) except (NotFound, GoogleAPIError) as e: raise PolyaxonStoresException(e)
def download_file(self, key, local_path, bucket_name=None, use_basename=True): """ Download a file from S3. Args: key: `str`. S3 key that will point to the file. local_path: `str`. the path to download to. bucket_name: `str`. Name of the bucket in which to store the file. use_basename: `bool`. whether or not to use the basename of the key. """ if not bucket_name: bucket_name, key = self.parse_s3_url(key) local_path = os.path.abspath(local_path) if use_basename: local_path = append_basename(local_path, key) check_dirname_exists(local_path) try: self.client.download_file(bucket_name, key, local_path) except ClientError as e: raise PolyaxonStoresException(e)
def delete_file(self, key, bucket_name=None): if not bucket_name: bucket_name, key = self.parse_gcs_url(key) bucket = self.get_bucket(bucket_name) try: return bucket.delete_blob(key) except (NotFound, GoogleAPIError) as e: raise PolyaxonStoresException(e)
def delete_file(self, key, bucket_name=None): if not bucket_name: (bucket_name, key) = self.parse_s3_url(key) try: obj = self.resource.Object(bucket_name, key) obj.delete() except ClientError as e: raise PolyaxonStoresException(e)
def __init__(self, store=None, path=None): self._path = path if not store and path: store = BaseStore.get_store_for_path(path=path) if not store: store = BaseStore.get_store() if isinstance(store, BaseStore): self._store = store else: raise PolyaxonStoresException('Received an unrecognised store `{}`.'.format(store))
def get_gc_credentials(key_path=None, keyfile_dict=None, scopes=None): """ Returns the Credentials object for Google API """ key_path = key_path or get_key_path() keyfile_dict = keyfile_dict or get_keyfile_dict() scopes = scopes or get_scopes() if scopes is not None: scopes = [s.strip() for s in scopes.split(',')] else: scopes = DEFAULT_SCOPES if not key_path and not keyfile_dict: logger.info('Getting connection using `google.auth.default()` ' 'since no key file is defined for hook.') credentials, _ = google.auth.default(scopes=scopes) elif key_path: # Get credentials from a JSON file. if key_path.endswith('.json'): logger.info('Getting connection using a JSON key file.') credentials = Credentials.from_service_account_file( os.path.abspath(key_path), scopes=scopes) else: raise PolyaxonStoresException( 'Unrecognised extension for key file.') else: # Get credentials from JSON data. try: if not isinstance(keyfile_dict, Mapping): keyfile_dict = json.loads(keyfile_dict) # Convert escaped newlines to actual newlines if any. keyfile_dict['private_key'] = keyfile_dict['private_key'].replace( '\\n', '\n') credentials = Credentials.from_service_account_info(keyfile_dict, scopes=scopes) except ValueError: # json.decoder.JSONDecodeError does not exist on py2 raise PolyaxonStoresException('Invalid key JSON.') return credentials
def parse_wasbs_url(wasbs_url): """ Parses and validates a wasbs url. Returns: tuple(container, storage_account, path). """ parsed_url = urllib.parse.urlparse(wasbs_url) if parsed_url.scheme != "wasbs": raise PolyaxonStoresException('Received an invalid url `{}`'.format(wasbs_url)) match = re.match("([^@]+)@([^.]+)\\.blob\\.core\\.windows\\.net", parsed_url.netloc) if match is None: raise PolyaxonStoresException( 'wasbs_url must be of the form <container>@<account>.blob.core.windows.net') container = match.group(1) storage_account = match.group(2) path = parsed_url.path if path.startswith('/'): path = path[1:] return container, storage_account, path
def get_store_for_path(cls, path): store_access = settings.RUN_STORES_ACCESS_KEYS.get(path) if not store_access: return cls.get_store() if 'store' not in store_access or 'secret_key' not in store_access: raise PolyaxonStoresException( 'Received an invalid store access definition.') store_type = store_access['store'] try: store_access = settings.config.get_dict(store_access['secret_key']) except RheaError: raise PolyaxonStoresException( 'Could not create store for path `{}`,' 'received a store type `{}` without valid access key.'.format( path, store_type)) return cls.get_store_for_type(store_type=store_type, store_access=store_access)
def parse_wasbs_url(wasbs_url): """ Parses and validates a wasbs url. Returns: tuple(container, storage_account, path). """ try: spec = rhea_parser.parse_wasbs_path(wasbs_url) return spec.container, spec.storage_account, spec.path except RheaError as e: raise PolyaxonStoresException(e)
def parse_gcs_url(gcs_url): """ Parses and validates a google cloud storage url. Returns: tuple(bucket_name, blob). """ try: spec = rhea_parser.parse_gcs_path(gcs_url) return spec.bucket, spec.blob except RheaError as e: raise PolyaxonStoresException(e)
def parse_s3_url(s3_url): """ Parses and validates an S3 url. Returns: tuple(bucket_name, key). """ try: spec = rhea_parser.parse_s3_path(s3_url) return spec.bucket, spec.key except RheaError as e: raise PolyaxonStoresException(e)
def get_store(cls, store_type=None, **kwargs): store_type = store_type or get_from_env(['POLYAXON_STORE']) # We assume that `None` refers to local store as well store_type = cls._LOCAL_STORE if store_type is None else store_type if store_type not in cls._STORE_TYPES: raise PolyaxonStoresException( 'Received an unrecognised store type `{}`.'.format(store_type)) if store_type == cls._LOCAL_STORE: from polystores.stores.local_store import LocalStore return LocalStore() if store_type == cls._AZURE_STORE: from polystores.stores.azure_store import AzureStore return AzureStore(**kwargs) if store_type == cls._S3_STORE: from polystores.stores.s3_store import S3Store return S3Store(**kwargs) if store_type == cls._GCS_STORE: from polystores.stores.gcs_store import GCSStore return GCSStore(**kwargs) raise PolyaxonStoresException( 'Received an unrecognised store type `{}`.'.format(store_type))
def parse_s3_url(s3_url): """ Parses and validates an S3 url. Returns: tuple(bucket_name, key). """ parsed_url = urllib.parse.urlparse(s3_url) if not parsed_url.netloc: raise PolyaxonStoresException('Received an invalid url `{}`'.format(s3_url)) else: bucket_name = parsed_url.netloc key = parsed_url.path.strip('/') return bucket_name, key
def get_key(self, key, bucket_name=None): """ Returns a boto3.s3.Object Args: key: `str`. the path to the key. bucket_name: `str`. the name of the bucket. """ if not bucket_name: (bucket_name, key) = self.parse_s3_url(key) try: obj = self.resource.Object(bucket_name, key) obj.load() return obj except Exception as e: raise PolyaxonStoresException(e)
def get_blob(self, blob, bucket_name=None): """ Get a file in Google Cloud Storage. Args: blob: `str`. the path to the object to check in the Google cloud storage bucket. bucket_name: `str`. Name of the bucket in which the file is stored """ if not bucket_name: bucket_name, blob = self.parse_gcs_url(blob) bucket = self.get_bucket(bucket_name) # Wrap google.cloud.storage's blob to raise if the file doesn't exist obj = bucket.get_blob(blob) if obj is None: raise PolyaxonStoresException('File does not exist: {}'.format(blob)) return obj
def upload_file(self, filename, key, bucket_name=None, overwrite=False, encrypt=False, acl=None, use_basename=True): """ Uploads a local file to S3. Args: filename: `str`. name of the file to upload. key: `str`. S3 key that will point to the file. bucket_name: `str`. Name of the bucket in which to store the file. overwrite: `bool`. A flag to decide whether or not to overwrite the key if it already exists. If replace is False and the key exists, an error will be raised. encrypt: `bool`. If True, the file will be encrypted on the server-side by S3 and will be stored in an encrypted form while at rest in S3. acl: `str`. ACL to use for uploading, e.g. "public-read". use_basename: `bool`. whether or not to use the basename of the filename. """ if not bucket_name: bucket_name, key = self.parse_s3_url(key) if use_basename: key = append_basename(key, filename) if not overwrite and self.check_key(key, bucket_name): raise PolyaxonStoresException( "The key {} already exists.".format(key)) extra_args = {} if encrypt: extra_args['ServerSideEncryption'] = self.ENCRYPTION if acl: extra_args['ACL'] = acl self.client.upload_file(filename, bucket_name, key, ExtraArgs=extra_args)
def check_dirname_exists(path, is_dir=False): if not is_dir: path = os.path.dirname(os.path.abspath(path)) if not os.path.isdir(path): raise PolyaxonStoresException( 'The parent path is not a directory {}'.format(path))