class MultiContents: """Contents container for commit.""" commit_meta: CommitMeta = desert.ib( fields.Nested(CommitMetaSchema, data_key="commitMeta")) operations: List[Operation] = desert.ib( fields.List(fields.Nested(OperationsSchema())))
class SqlView(Contents): """Dataclass for Nessie SQL View.""" dialect: str = desert.ib(fields.Str()) sql_test: str = desert.ib(fields.Str(data_key="sqlTest")) def pretty_print(self: "SqlView") -> str: """Print out for cli.""" return "Iceberg table:\n\tDialect: {}\n\tSql: {}".format( self.dialect, self.sql_test) # todo use a sql parser to pretty print this
class CommitMeta: """Dataclass for commit metadata.""" hash_: str = desert.ib(fields.Str(data_key="hash")) commitTime: int = desert.ib(fields.Int()) commiter: str = attr.ib(default=None, metadata=desert.metadata( fields.Str(allow_none=True))) email: str = attr.ib(default=None, metadata=desert.metadata(fields.Str(allow_none=True))) message: str = attr.ib(default=None, metadata=desert.metadata( fields.Str(allow_none=True)))
class DeltaLakeTable(Contents): """Dataclass for Nessie Contents.""" last_checkpoint: str = desert.ib(fields.Str(data_key="metadataLocation")) checkpoint_location_history: List[str] = desert.ib(fields.List(fields.Str)) metadata_location_history: List[str] = desert.ib(fields.List(fields.Str)) def pretty_print(self: "DeltaLakeTable") -> str: """Print out for cli.""" deltas = "\n\t\t".join(self.metadata_location_history) checkpoints = "\n\t\t".join(self.checkpoint_location_history) return "Iceberg table:\n\tLast Checkpoint: {}\n\tDelta History: {}\n\tCheckpoint History: {}".format( self.last_checkpoint, deltas, checkpoints)
class LogEntry: """Dataclass for commit log entries.""" commit_meta: CommitMeta = desert.ib( fields.Nested(CommitMetaSchema, data_key="commitMeta")) parent_commit_hash: str = attr.ib(default=None, metadata=desert.metadata( fields.Str( allow_none=True, data_key="parentCommitHash"))) operations: List[Operation] = desert.ib(marshmallow_field=fields.List( fields.Nested(OperationsSchema()), allow_none=True), default=None)
class IcebergView(Content): """Dataclass for Nessie Iceberg View.""" metadata_location: str = desert.ib(fields.Str(data_key="metadataLocation")) version_id: int = desert.ib(fields.Int(data_key="versionId")) schema_id: int = desert.ib(fields.Int(data_key="schemaId")) dialect: str = desert.ib(fields.Str()) sql_text: str = desert.ib(fields.Str(data_key="sqlText")) def pretty_print(self: "IcebergView") -> str: """Print out for cli.""" return "IcebergView:\n\tmetadata-location: {}\n\tversion-id: {}\n\tschema-id: {}\n\tDialect: {}\n\tSql: {}".format( self.metadata_location, self.version_id, self.schema_id, self.dialect, self.sql_text) # todo use a sql parser to pretty print this
class ReferenceMetadata: """Dataclass for Nessie ReferenceMetadata.""" commit_meta_of_head: CommitMeta = desert.ib( fields.Nested(CommitMetaSchema, data_key="commitMetaOfHEAD", allow_none=True)) num_commits_ahead: int = attr.ib(default=None, metadata=desert.metadata( fields.Int( allow_none=True, data_key="numCommitsAhead"))) num_commits_behind: int = attr.ib(default=None, metadata=desert.metadata( fields.Int( allow_none=True, data_key="numCommitsBehind"))) common_ancestor_hash: str = attr.ib( default=None, metadata=desert.metadata( fields.Str(allow_none=True, data_key="commonAncestorHash"))) num_total_commits: str = attr.ib(default=None, metadata=desert.metadata( fields.Int( allow_none=True, data_key="numTotalCommits")))
class Operation: """Single Commit Operation.""" key: ContentKey = desert.ib(fields.Nested(ContentKeySchema)) def pretty_print(self: "Operation") -> str: """Print out for cli.""" pass
class Contents: """Dataclass for Nessie Contents.""" id: str = desert.ib(fields.Str()) def pretty_print(self: "Contents") -> str: """Print out for cli.""" pass
class IcebergTable(Contents): """Dataclass for Nessie Contents.""" metadata_location: str = desert.ib(fields.Str(data_key="metadataLocation")) def pretty_print(self: "IcebergTable") -> str: """Print out for cli.""" return "Iceberg table:\n\t{}".format(self.metadata_location)
class Entries: """Dataclass for Content Entries.""" entries: List[Entry] = desert.ib(fields.List(fields.Nested(EntrySchema()))) has_more: bool = attr.ib(default=False, metadata=desert.metadata( fields.Bool(allow_none=True, data_key="hasMore"))) token: str = attr.ib(default=None, metadata=desert.metadata(fields.Str(allow_none=True)))
class CommitMeta: """Dataclass for commit metadata.""" hash_: str = desert.ib(fields.Str(data_key="hash"), default=None) commitTime: datetime = desert.ib(fields.DateTime(), default=None) authorTime: datetime = desert.ib(fields.DateTime(), default=None) committer: str = attr.ib(default=None, metadata=desert.metadata( fields.Str(allow_none=True))) author: str = attr.ib(default=None, metadata=desert.metadata( fields.Str(allow_none=True))) signedOffBy: str = attr.ib(default=None, metadata=desert.metadata( fields.Str(allow_none=True))) message: str = attr.ib(default=None, metadata=desert.metadata( fields.Str(allow_none=True))) properties: dict = desert.ib(fields.Dict(), default=None)
class ReflogResponse: """Dataclass for reflog Response.""" log_entries: List[ReflogEntry] = desert.ib( fields.List(fields.Nested(ReflogEntrySchema()), data_key="logEntries")) has_more: bool = attr.ib(default=False, metadata=desert.metadata( fields.Bool(allow_none=True, data_key="hasMore"))) token: str = attr.ib(default=None, metadata=desert.metadata(fields.Str(allow_none=True)))
class ReferencesResponse: """Dataclass for References.""" references: List[Reference] = desert.ib( fields.List(fields.Nested(ReferenceSchema()))) has_more: bool = attr.ib(default=False, metadata=desert.metadata( fields.Bool(allow_none=True, data_key="hasMore"))) token: str = attr.ib(default=None, metadata=desert.metadata(fields.Str(allow_none=True)))
class LogResponse: """Dataclass for Log Response.""" operations: List[CommitMeta] = desert.ib( fields.List(fields.Nested(CommitMetaSchema()))) has_more: bool = attr.ib(default=False, metadata=desert.metadata( fields.Bool(allow_none=True, data_key="hasMore"))) token: str = attr.ib(default=None, metadata=desert.metadata(fields.Str(allow_none=True)))
class IcebergTable(Content): """Dataclass for Nessie Content.""" metadata_location: str = desert.ib(fields.Str(data_key="metadataLocation")) snapshot_id: int = desert.ib(fields.Int(data_key="snapshotId")) schema_id: int = desert.ib(fields.Int(data_key="schemaId")) spec_id: int = desert.ib(fields.Int(data_key="specId")) sort_order_id: int = desert.ib(fields.Int(data_key="sortOrderId")) @staticmethod def requires_expected_state() -> bool: """Returns True - expected state should be provided for Put operations on Iceberg tables.""" return True def pretty_print(self: "IcebergTable") -> str: """Print out for cli.""" return ( f"Iceberg table:\n\tmetadata-location: {self.metadata_location}\n\tsnapshot-id: {self.snapshot_id}" f"\n\tschema-id: {self.schema_id}" f"\n\tpartition-spec-id: {self.spec_id}\n\tdefault-sort-order-id: {self.sort_order_id}" )
class Put(Operation): """Single Commit Operation.""" content: Content = desert.ib(fields.Nested(ContentSchema)) expectedContent: Optional[Content] = attr.ib( default=None, metadata=desert.metadata(fields.Nested(ContentSchema, allow_none=True))) def pretty_print(self: "Put") -> str: """Print out for cli.""" # pylint: disable=E1101 return f"Put of {self.key.to_string()} : {self.content.pretty_print()}"
class Content: """Dataclass for Nessie Content.""" id: str = desert.ib(fields.Str()) @staticmethod def requires_expected_state() -> bool: """Checks whether this Content object requires the "expected" state to be provided for Put operations.""" return False def pretty_print(self: "Content") -> str: """Print out for cli.""" pass
class Reference: """Dataclass for Nessie Reference.""" name: str = desert.ib(fields.Str()) hash_: Optional[str] = attr.ib(default=None, metadata=desert.metadata( fields.Str(data_key="hash", allow_none=True))) metadata: Optional[ReferenceMetadata] = attr.ib( default=None, metadata=desert.metadata( fields.Nested(ReferenceMetadataSchema, allow_none=True, data_key="metadata")))
class ArchivingTarget: transfer_process_name: str transfer_process_argument_prefix: str # TODO: mutable attribute... env: Dict[str, Optional[str]] = desert.ib( factory=dict, marshmallow_field=marshmallow.fields.Dict( keys=marshmallow.fields.String(), values=CustomStringField(allow_none=True), ), ) disk_space_path: Optional[str] = None disk_space_script: Optional[str] = None transfer_path: Optional[str] = None transfer_script: Optional[str] = None
class DiffEntry: """Dataclass for a Diff.""" content_key: ContentKey = desert.ib( fields.Nested(ContentKeySchema, data_key="key")) from_content: Content = desert.ib( fields.Nested(ContentSchema, default=None, data_key="from", allow_none=True)) to_content: Content = desert.ib( fields.Nested(ContentSchema, default=None, data_key="to", allow_none=True)) def pretty_print(self: "DiffEntry") -> str: """Print out for cli.""" # pylint: disable=E1101 from_output = f"{self.from_content.pretty_print()}" if self.from_content else "" to_output = f"{self.to_content.pretty_print()}" if self.to_content else "" return ( f"ContentKey: {self.content_key.to_path_string()}\nFROM:\n\t{from_output}\nTO:\n{to_output}" f"\n----------------------------------------")
class ContentKey: """ContentKey.""" elements: List[str] = desert.ib(fields.List(fields.Str)) def to_string(self: "ContentKey") -> str: """Convert this key to friendly CLI string.""" # false positives in pylint # pylint: disable=E1133 return ".".join(f'"{i}"' if "." in i else i for i in self.elements) def to_path_string(self: "ContentKey") -> str: """Convert this key to a url encoded path string.""" # false positives in pylint # pylint: disable=E1133 return ".".join( i.replace(".", "\00") if i else "" for i in self.elements) @staticmethod def from_path_string(key: str) -> "ContentKey": """Convert from path encoded string to normal string.""" return ContentKey( [i for i in ContentKey._split_key_based_on_regex(key) if i]) @staticmethod def _split_key_based_on_regex(raw_key: str) -> List[str]: # Find all occurrences of strings between double quotes # E.g: a.b."c.d" regex = re.compile('"[^"]*"') # Replace any dot that is inside double quotes with null char '\00' and remove the double quotes key_with_null = regex.sub( lambda x: x.group(0).replace(".", "\00").replace('"', ""), raw_key) # Split based on the dot splitted_key = key_with_null.split(".") # Return back the splitted elements and make sure to change back '/0' to '.' return [i.replace("\00", ".") for i in splitted_key]
class Put(Operation): """Single Commit Operation.""" contents: Contents = desert.ib(fields.Nested(ContentsSchema))
class A: x: str = desert.ib(marshmallow.fields.NaiveDateTime(), metadata={"foo": 1})
class Operation: """Single Commit Operation.""" key: ContentsKey = desert.ib(fields.Nested(ContentsKeySchema))
class Entry: """Dataclass for Nessie Entry.""" kind: str = desert.ib(fields.Str(data_key="type")) name: EntryName = desert.ib(fields.Nested(EntryNameSchema))
class EntryName: """Dataclass for Nessie Entry Name.""" elements: List[str] = desert.ib(fields.List(fields.Str()))
class Reference: """Dataclass for Nessie Reference.""" name: str = desert.ib(fields.Str()) hash_: Optional[str] = desert.ib(fields.Str(data_key="hash"))
class ContentsKey: """ContentsKey.""" elements: List[str] = desert.ib(fields.List(fields.Str))
class MultiContents: """Contents container for commit.""" operations: List[Operation] = desert.ib( fields.List(fields.Nested(OperationsSchema())))