def _validate_reference_list( reference_list: List[str] = None) -> List[str]: if not (reference_list and isinstance(reference_list, list)): raise ge_exceptions.SorterError( "CustomListSorter requires reference_list which was not provided." ) for item in reference_list: if not isinstance(item, str): raise ge_exceptions.SorterError( f"Items in reference list for CustomListSorter must have string type (actual type is `{str(type(item))}`)." ) return reference_list
def parse_string_to_datetime(datetime_string: str, datetime_format_string: str) -> datetime.date: if not isinstance(datetime_string, str): raise ge_exceptions.SorterError( f"""Source "datetime_string" must have string type (actual type is "{str(type(datetime_string))}"). """) if datetime_format_string and not isinstance(datetime_format_string, str): raise ge_exceptions.SorterError( f"""DateTime parsing formatter "datetime_format_string" must have string type (actual type is "{str(type(datetime_format_string))}"). """) return datetime.datetime.strptime(datetime_string, datetime_format_string).date()
def _verify_sorting_directives_and_get_partition_key( self, batch_definition: BatchDefinition) -> Any: partition_definition: dict = batch_definition.partition_definition if partition_definition.get(self.name) is None: raise ge_exceptions.SorterError( f'Unable to sort batch_definition "{batch_definition}" by attribute "{self.name}".' ) return self.get_partition_key(batch_definition=batch_definition)
def get_batch_key(self, batch_definition: BatchDefinition) -> Any: batch_identifiers: dict = batch_definition.batch_identifiers batch_value: Any = batch_identifiers[self.name] if batch_value in self._reference_list: return self._reference_list.index(batch_value) else: raise ge_exceptions.SorterError( f"Source {batch_value} was not found in Reference list. Try again..." )
def get_partition_key(self, batch_definition: BatchDefinition) -> Any: partition_definition: dict = batch_definition.partition_definition partition_value: Any = partition_definition[self.name] if partition_value in self._reference_list: return self._reference_list.index(partition_value) else: raise ge_exceptions.SorterError( f"Source {partition_value} was not found in Reference list. Try again..." )
def __init__(self, name: str, orderby: str = "asc"): self._name = name if orderby is None or orderby == "asc": reverse: bool = False elif orderby == "desc": reverse: bool = True else: raise ge_exceptions.SorterError( f'Illegal sort order "{orderby}" for attribute "{name}".') self._reverse = reverse
def __init__(self, name: str, orderby: str = "asc", datetime_format="%Y%m%d") -> None: super().__init__(name=name, orderby=orderby) if datetime_format and not isinstance(datetime_format, str): raise ge_exceptions.SorterError( f"""DateTime parsing formatter "datetime_format_string" must have string type (actual type is "{str(type(datetime_format))}"). """) self._datetime_format = datetime_format
def get_batch_key(self, batch_definition: BatchDefinition) -> Any: batch_identifiers: dict = batch_definition.batch_identifiers batch_value: Any = batch_identifiers[self.name] if not is_numeric(value=batch_value): raise ge_exceptions.SorterError( # what is the identifying characteristic of batch_definition? f"""BatchDefinition with IDDict "{self.name}" with value "{batch_value}" has value "{batch_value}" which cannot be part of numeric sort. """) if is_int(value=batch_value): return int(batch_value) # The case of strings having floating point number format used as references to partitions should be rare. return round(float(batch_value))