def remove_fields(self, *fields, multiple: bool = False, inplace: bool = True): removing_fields = arg.update(fields) removing_field_names = arg.get_names(removing_fields) existing_fields = self.get_fields() if inplace: for e in existing_fields: if arg.get_name(e) in removing_field_names: existing_fields.remove(e) if not multiple: break else: new_fields = [f for f in existing_fields if arg.get_name(f) not in removing_field_names] return self.make_new(new_fields)
def __init__(self, name: Name, value: Union[Value, Auto] = AUTO, update: bool = False): if update or not self._is_initialized(): name = get_name(name) if self._auto_value: value = Auto.acquire(value, name) self.name = name self.value = value
def get_parsed_line( self, line: str, item_type: Union[ItemType, Auto] = AUTO, struct: Union[Array, StructInterface, Auto] = AUTO, ) -> Item: item_type = arg.delayed_acquire(item_type, self.get_default_item_type) if item_type == ItemType.Line: return line line_parser = fs.csv_loads(delimiter=self.get_delimiter()) row = line_parser(line) if isinstance(struct, StructInterface): field_converters = struct.get_converters() row_converter = self._get_row_converter( converters=field_converters) row = row_converter(row) if item_type in (ItemType.Row, ItemType.Any, ItemType.Auto): return row if not arg.is_defined(struct, check_name=False): column_count = len(row) struct = list(range(column_count)) if item_type == ItemType.Record: return {arg.get_name(k): v for k, v in zip(struct, row)} elif item_type == ItemType.StructRow: return ItemType.StructRow.build(data=row, struct=struct) else: msg = 'item_type {} is not supported for {}.parse_lines()' raise ValueError(msg.format(item_type, self.__class__.__name__))
def get_one_column_values(self, column: Field, as_list: bool = False) -> Iterable: column = arg.get_name(column) if as_list: return list(self.get_one_column_values(column, as_list=False)) else: for r in self.get_records(): yield r.get(column)
def warning(self, msg: str, category: Optional[Type] = None, stacklevel: Optional[int] = None) -> None: if stacklevel: caller = getframeinfo(stack()[stacklevel][0]) category_name = arg.get_name(category) if category else '' msg = '{}:{}: {} {}'.format(caller.filename, caller.lineno, category_name, msg) self.log(msg=msg, level=LoggingLevel.Warning)
def get_str_fields_count(self, types: Array = (str, int, float, bool)) -> str: total_count = self.get_fields_count() type_names = list() types_count = list() for t in types: types_count.append(self.get_type_count(t)) type_names.append(arg.get_name(t, or_callable=False)) other_count = total_count - sum(types_count) str_fields_count = ' + '.join(['{} {}'.format(c, t) for c, t in zip(types_count, type_names)]) return '{} total = {} + {} other'.format(total_count, str_fields_count, other_count)
def get_type_count(self, field_type: Type = AUTO, by_prefix: bool = True) -> int: count = 0 field_type_name = arg.get_name(field_type, or_callable=False) for f in self.get_fields(): if by_prefix: is_selected_type = f.get_type_name().startswith(field_type_name) else: is_selected_type = f.get_type_name() == field_type_name if is_selected_type: count += 1 return count
def get_compatible_expression_tuples(expressions: dict) -> dict: prepared_expressions = dict() for k, v in expressions.items(): name = arg.get_name(k) if isinstance(v, (list, tuple)): value = get_selection_tuple(v) elif is_expression_description(v): value = v.get_selection_tuple() else: value = get_name_or_function(v) prepared_expressions[name] = value return prepared_expressions
def get_field_value_from_record( field: Union[FieldID, Callable], record: Record, default: Value = None, skip_missing: bool = True, ) -> Value: if isinstance(field, Callable): return field(record) else: field = arg.get_name(field) if skip_missing: return record.get(field, default) else: return record[field]
def validate_about(self, standard: StructInterface, ignore_moved: bool = False) -> Native: expected_struct = self.convert_to_native(standard) remaining_struct = expected_struct.copy() assert isinstance(expected_struct, FlatStruct) assert isinstance(remaining_struct, FlatStruct) updated_struct = FlatStruct([]) for pos_received, f_received in enumerate(self.get_fields()): assert isinstance(f_received, AdvancedField) f_name = f_received.get_name() if f_name in updated_struct.get_field_names(): is_valid = False warning = 'DUPLICATE_IN_DATA' if f_name in remaining_struct.get_field_names() else 'DUPLICATE' f_expected = updated_struct.get_field_description(f_name) f_updated = f_expected.set_valid(is_valid, inplace=False) elif f_name in expected_struct.get_field_names(): is_valid = True pos_expected = expected_struct.get_field_position(f_name) warning = None if pos_received == pos_expected or ignore_moved else 'MOVED' f_expected = expected_struct.get_field_description(f_name) f_updated = f_expected.set_valid(is_valid, inplace=False) else: is_valid = False warning = 'UNEXPECTED' message = 'field has been found in actual struct, but not in expected standard struct' caption = '{} ({})'.format(f_received.get_caption(), message) f_updated = f_received.set_valid(is_valid, inplace=False).set_caption(caption, inplace=False) if warning: caption = '[{}] {}'.format(warning, f_updated.get_caption() or '') f_updated = f_updated.set_caption(caption, inplace=False) updated_struct.append_field(f_updated) if f_name in remaining_struct.get_field_names(): remaining_struct.remove_fields(f_name, inplace=True) for f_remaining in remaining_struct.get_columns(): f_name = arg.get_name(f_remaining) is_valid = False f_expected = expected_struct.get_field_description(f_name) if f_name in updated_struct.get_field_names(): warning = 'DUPLICATE_IN_STRUCT' else: warning = 'MISSING_IN_FILE' caption = '[{}] {}'.format(warning, f_expected.get_caption() or '') f_updated = f_expected.set_valid(is_valid, inplace=False).set_caption(caption, inplace=False) updated_struct.append_field(f_updated) self.set_fields(updated_struct.get_fields(), inplace=True) return self
def get_dialect_type_from_conn_type_name( conn_type: Union[DynamicEnum, str], default: DialectType = DialectType.Python, other: DialectType = DialectType.String, ) -> DialectType: if conn_type is None: dialect_type = default else: conn_name = get_name(conn_type) if 'Postgres' in conn_name: dialect_type = DialectType.Postgres elif 'Click' in conn_name: dialect_type = DialectType.Clickhouse else: dialect_type = other if not isinstance(dialect_type, DialectType): dialect_type = DialectType.find_instance(dialect_type) return dialect_type
def add_column(self, name: Field, values: Iterable, ignore_errors: bool = False) -> Native: name = arg.get_name(name) items = map(lambda i, v: fs.merge_two_items()(i, { name: v }), self.get_items(), values) stream = self.stream(items) if self.is_in_memory(): if not ignore_errors: if not isinstance(values, ARRAY_TYPES): values = list(values) msg = 'for add_column() stream and values must have same items count, got {} != {}' assert self.get_count() == len(values), msg.format( self.get_count(), len(values)) stream = stream.to_memory() return stream
def log( self, msg: Union[str, list, tuple], level: Level = arg.AUTO, logger: Union[BaseLogger, arg.Auto] = arg.AUTO, end: Union[str, arg.Auto] = arg.AUTO, verbose: bool = True, truncate: bool = True, category: Optional[Type] = None, stacklevel: Optional[int] = None, ) -> None: level = arg.acquire( level, LoggingLevel.Info if verbose else LoggingLevel.Debug) logger = arg.delayed_acquire(logger, self.get_base_logger) if isinstance(msg, BaseException): msg = str(msg) if isinstance(msg, str): msg = [msg] elif isinstance(msg, Iterable): msg = list(msg) else: raise TypeError( 'Expected msg as str or list[str], got {}'.format(msg)) if category: category_name = arg.get_name(category) msg = [category_name] + msg if stacklevel: caller = getframeinfo(stack()[stacklevel + 1][0]) file_name_without_path = caller.filename.split('\\')[-1].split( '/')[-1] msg = ['{}:{}:'.format(file_name_without_path, caller.lineno) ] + msg if isinstance(msg, (list, tuple)): msg = self.format_message(*msg) if not isinstance(level, LoggingLevel): level = LoggingLevel(level) if logger: if self.is_suitable_level(level): logging_method = getattr(logger, level.get_method_name()) logging_method(msg) if verbose and not self.is_suitable_level(level): self.show(msg, end=end, truncate=truncate)
def detect(cls, obj, default: Union[Optional[DynamicEnum], str] = 'str') -> EnumItem: if isinstance(obj, DialectType): return obj if isinstance(obj, str): name = obj elif isclass(obj): name = obj.__name__ else: name = get_name(obj) dialect_type = DialectType.find_instance(name) if not dialect_type: if 'Postgres' in name: dialect_type = DialectType.Postgres elif 'Click' in name: dialect_type = DialectType.Clickhouse else: dialect_type = default if not isinstance(dialect_type, DialectType): dialect_type = DialectType.find_instance(dialect_type) return dialect_type
def set_file( self, file: Union[File, Name], encoding: str = DEFAULT_ENCODING, level: Level = DEFAULT_LOGGING_LEVEL, formatter: Formatter = DEFAULT_FORMATTER, if_not_added: bool = True, ) -> LoggerInterface: filename = arg.get_name(file) level = arg.get_value(level) is_added = False for h in self.get_handlers(): if hasattr(h, 'baseFilename'): if h.baseFilename.endswidth(filename): is_added = True break if not (is_added and if_not_added): file_handler = logging.FileHandler(filename, encoding=encoding) file_handler.setLevel(level) if isinstance(formatter, str): formatter = logging.Formatter(formatter) file_handler.setFormatter(formatter) self.add_handler(file_handler) return self
def get_field_position(self, field: Field) -> Optional[FieldNo]: if isinstance(field, FieldNo): return field else: field_name = arg.get_name(field) return self.get_struct().get_field_position(field_name)
def get_source_name(self) -> Name: return arg.get_name(self.get_source_field())
def get_one_column_values(self, column: Field) -> Iterable: column_name = arg.get_name(column) return self.get_dataframe()[column_name]
def get_target_field_name(self) -> Name: return arg.get_name(self._target)
def get_type_name(self) -> str: type_name = arg.get_value(self.get_type()) if not isinstance(type_name, str): type_name = arg.get_name(type_name) return str(type_name)
def get_name_or_function(field) -> Union[int, str, Callable]: if isinstance(field, Callable): return field else: return arg.get_name(field)