def get_registry(self): registry_proto = RegistryProto() if self._filepath.exists(): registry_proto.ParseFromString(self._filepath.read_bytes()) else: registry_proto.registry_schema_version = REGISTRY_SCHEMA_VERSION return registry_proto
def _write_registry(self, registry_proto: RegistryProto): registry_proto.version_id = str(uuid.uuid4()) registry_proto.last_updated.FromDatetime(datetime.utcnow()) file_dir = self._filepath.parent file_dir.mkdir(exist_ok=True) self._filepath.write_bytes(registry_proto.SerializeToString()) return
def _write_registry(self, registry_proto: RegistryProto): registry_proto.version_id = str(uuid.uuid4()) registry_proto.last_updated.FromDatetime(datetime.utcnow()) file_dir = self._filepath.parent file_dir.mkdir(exist_ok=True) with open(self._filepath, mode="wb", buffering=0) as f: f.write(registry_proto.SerializeToString())
def get_registry_proto(self): file_obj = TemporaryFile() registry_proto = RegistryProto() try: from botocore.exceptions import ClientError except ImportError as e: from feast.errors import FeastExtrasDependencyImportError raise FeastExtrasDependencyImportError("aws", str(e)) try: bucket = self.s3_client.Bucket(self._bucket) self.s3_client.meta.client.head_bucket(Bucket=bucket.name) except ClientError as e: # If a client error is thrown, then check that it was a 404 error. # If it was a 404 error, then the bucket does not exist. error_code = int(e.response["Error"]["Code"]) if error_code == 404: raise S3RegistryBucketNotExist(self._bucket) else: raise S3RegistryBucketForbiddenAccess(self._bucket) from e try: obj = bucket.Object(self._key) obj.download_fileobj(file_obj) file_obj.seek(0) registry_proto.ParseFromString(file_obj.read()) return registry_proto except ClientError as e: raise FileNotFoundError( f"Error while trying to locate Registry at path {self._uri.geturl()}" ) from e
def get_registry_proto(self): registry_proto = RegistryProto() if self._filepath.exists(): registry_proto.ParseFromString(self._filepath.read_bytes()) return registry_proto raise FileNotFoundError( f'Registry not found at path "{self._filepath}". Have you run "feast apply"?' )
def _initialize_registry(self): """Explicitly initializes the registry with an empty proto if it doesn't exist.""" try: self._get_registry_proto() except FileNotFoundError: registry_proto = RegistryProto() registry_proto.registry_schema_version = REGISTRY_SCHEMA_VERSION self._registry_store.update_registry_proto(registry_proto)
def _write_registry(self, registry_proto: RegistryProto): registry_proto.version_id = str(uuid.uuid4()) registry_proto.last_updated.FromDatetime(datetime.utcnow()) # we have already checked the bucket exists so no need to do it again file_obj = TemporaryFile() file_obj.write(registry_proto.SerializeToString()) file_obj.seek(0) self.s3_client.Bucket(self._bucket).put_object(Body=file_obj, Key=self._key)
def update_registry_proto(self, updater: Callable[[RegistryProto], RegistryProto]): try: registry_proto = self.get_registry_proto() except FileNotFoundError: registry_proto = RegistryProto() registry_proto.registry_schema_version = REGISTRY_SCHEMA_VERSION registry_proto = updater(registry_proto) self._write_registry(registry_proto) return
def _write_registry(self, registry_proto: RegistryProto): registry_proto.version_id = str(uuid.uuid4()) registry_proto.last_updated.FromDatetime(datetime.utcnow()) # we have already checked the bucket exists so no need to do it again gs_bucket = self.gcs_client.get_bucket(self._bucket) blob = gs_bucket.blob(self._blob) file_obj = TemporaryFile() file_obj.write(registry_proto.SerializeToString()) file_obj.seek(0) blob.upload_from_file(file_obj)
def _prepare_registry_for_changes(self): """Prepares the Registry for changes by refreshing the cache if necessary.""" try: self._get_registry_proto(allow_cache=True) except FileNotFoundError: registry_proto = RegistryProto() registry_proto.registry_schema_version = REGISTRY_SCHEMA_VERSION self.cached_registry_proto = registry_proto self.cached_registry_proto_created = datetime.now() return self.cached_registry_proto
def get_registry(self): from google.cloud import storage from google.cloud.exceptions import NotFound file_obj = TemporaryFile() registry_proto = RegistryProto() try: bucket = self.gcs_client.get_bucket(self._bucket) except NotFound: raise Exception( f"No bucket named {self._bucket} exists; please create it first." ) if storage.Blob(bucket=bucket, name=self._blob).exists(self.gcs_client): self.gcs_client.download_blob_to_file(self._uri.geturl(), file_obj) file_obj.seek(0) registry_proto.ParseFromString(file_obj.read()) else: registry_proto.registry_schema_version = REGISTRY_SCHEMA_VERSION return registry_proto
def get_registry_proto(self): from google.cloud import storage from google.cloud.exceptions import NotFound file_obj = TemporaryFile() registry_proto = RegistryProto() try: bucket = self.gcs_client.get_bucket(self._bucket) except NotFound: raise Exception( f"No bucket named {self._bucket} exists; please create it first." ) if storage.Blob(bucket=bucket, name=self._blob).exists(self.gcs_client): self.gcs_client.download_blob_to_file( self._uri.geturl(), file_obj, timeout=30 ) file_obj.seek(0) registry_proto.ParseFromString(file_obj.read()) return registry_proto raise FileNotFoundError( f'Registry not found at path "{self._uri.geturl()}". Have you run "feast apply"?' )
def get_registry_proto(self) -> RegistryProto: registry_proto = RegistryProto() try: with _get_conn(self.db_config) as conn, conn.cursor() as cur: cur.execute( sql.SQL( """ SELECT registry FROM {} WHERE version = (SELECT max(version) FROM {}) """ ).format( sql.Identifier(self.table_name), sql.Identifier(self.table_name), ) ) row = cur.fetchone() if row: registry_proto = registry_proto.FromString(row[0]) except psycopg2.errors.UndefinedTable: pass return registry_proto
def update_registry_proto(self, registry_proto: RegistryProto): """ Overwrites the current registry proto with the proto passed in. This method writes to the registry path. Args: registry_proto: the new RegistryProto """ schema_name = self.db_config.db_schema or self.db_config.user with _get_conn(self.db_config) as conn, conn.cursor() as cur: cur.execute( """ SELECT schema_name FROM information_schema.schemata WHERE schema_name = %s """, (schema_name,), ) schema_exists = cur.fetchone() if not schema_exists: cur.execute( sql.SQL("CREATE SCHEMA IF NOT EXISTS {} AUTHORIZATION {}").format( sql.Identifier(schema_name), sql.Identifier(self.db_config.user), ), ) cur.execute( sql.SQL( """ CREATE TABLE IF NOT EXISTS {} ( version BIGSERIAL PRIMARY KEY, registry BYTEA NOT NULL ); """ ).format(sql.Identifier(self.table_name)), ) # Do we want to keep track of the history or just keep the latest? cur.execute( sql.SQL( """ INSERT INTO {} (registry) VALUES (%s); """ ).format(sql.Identifier(self.table_name)), [registry_proto.SerializeToString()], )
def _initialize_registry(self): """Explicitly initializes the registry with an empty proto.""" registry_proto = RegistryProto() registry_proto.registry_schema_version = REGISTRY_SCHEMA_VERSION self._registry_store.update_registry_proto(registry_proto)