Пример #1
0
 def __init__(
         self,
         name: OptName = arg.AUTO,
         level: Level = arg.AUTO,
         formatter: Union[Formatter, arg.Auto] = arg.AUTO,
         loggers: SubLoggers = arg.AUTO,
         context: Context = None,
         file: Optional[FileOrName] = None,
 ):
     name = arg.acquire(name, DEFAULT_LOGGER_NAME)
     level = arg.acquire(level, DEFAULT_LOGGING_LEVEL)
     formatter = arg.acquire(formatter, DEFAULT_FORMATTER)
     if not isinstance(level, LoggingLevel):
         level = LoggingLevel(level)
     if isinstance(loggers, list):
         loggers = {i: i.get_name() for i in loggers}
     elif not arg.is_defined(loggers):
         loggers = dict()
     if name not in loggers:
         level_value = arg.get_value(level)
         base_logger = self.build_base_logger(name, level_value, formatter)
         loggers[name] = base_logger
     self._level = level
     super().__init__(name=name, children=loggers, context=context)
     if file:
         self.set_file(file)
Пример #2
0
 def get_lines(
     self,
     count: Optional[int] = None,
     skip_first: bool = False,
     allow_reopen: bool = True,
     check: bool = True,
     verbose: AutoBool = AUTO,
     message: Union[str, Auto] = AUTO,
     step: AutoCount = AUTO,
 ) -> Iterable:
     if check and not self.is_gzip():
         # assert self.get_count(allow_reopen=True) > 0
         assert not self.is_empty(
         ), 'for get_lines() file must be non-empty: {}'.format(self)
     self.open(allow_reopen=allow_reopen)
     lines = self.get_next_lines(count=count,
                                 skip_first=skip_first,
                                 close=True)
     verbose = arg.acquire(verbose, self.is_verbose())
     if verbose or arg.is_defined(message):
         message = arg.acquire(message, 'Reading {}')
         if '{}' in message:
             message = message.format(self.get_name())
         logger = self.get_logger()
         assert hasattr(logger,
                        'progress'), '{} has no progress in {}'.format(
                            self, logger)
         if not count:
             count = self.get_count(allow_slow_gzip=False)
         lines = self.get_logger().progress(lines,
                                            name=message,
                                            count=count,
                                            step=step)
     return lines
Пример #3
0
 def to_line_stream(
     self,
     delimiter: Union[str, Auto] = AUTO,
     columns: Columns = AUTO,
     add_title_row: Union[bool, Auto] = AUTO,
 ) -> LineStream:
     stream_type = self.get_stream_type()
     delimiter = arg.acquire(
         delimiter, '\t' if stream_type == StreamType.RowStream else None)
     stream = self
     if stream.get_stream_type() == StreamType.RecordStream:
         assert isinstance(stream, RegularStream) or hasattr(
             stream, 'get_columns'), 'got {}'.format(stream)
         columns = arg.acquire(columns, stream.get_columns, delayed=True)
         add_title_row = arg.acquire(add_title_row, True)
         stream = stream.to_row_stream(columns=columns,
                                       add_title_row=add_title_row)
     if delimiter:
         func = delimiter.join
     else:
         func = str
     stream = self.stream(
         stream._get_mapped_items(func),
         stream_type=StreamType.LineStream,
     )
     return self._assume_native(stream)
Пример #4
0
 def set_logger(
     self,
     logger: Union[Logger, Auto] = AUTO,
     selection_logger: Union[Logger, Auto] = AUTO,
 ) -> NoReturn:
     self._logger = arg.acquire(logger, getattr(logger, 'get_logger', None))
     self._selection_logger = arg.acquire(
         selection_logger, getattr(logger, 'get_selection_logger', None))
Пример #5
0
 def get_group_header(self, name: Comment = AUTO, caption: Comment = AUTO, comment: Comment = None) -> Iterable[str]:
     is_title_row = name == arg.AUTO
     name = arg.acquire(name, self.get_name())
     caption = arg.acquire(caption, self.get_caption())
     if arg.is_defined(name):
         yield name
     if arg.is_defined(caption):
         yield caption
     if is_title_row:
         yield self.get_str_fields_count()
     if arg.is_defined(comment):
         yield comment
Пример #6
0
 def group_by(
     self,
     *keys,
     values: Columns = None,
     step: AutoCount = AUTO,
     as_pairs: bool = False,
     take_hash: bool = True,
     verbose: bool = True,
 ) -> Stream:
     keys = arg.update(keys)
     keys = arg.get_names(keys)
     values = arg.get_names(values)
     if hasattr(keys[0],
                'get_field_names'):  # if isinstance(keys[0], FieldGroup)
         keys = keys[0].get_field_names()
     step = arg.acquire(step, self.max_items_in_memory)
     if as_pairs:
         key_for_sort = keys
     else:
         key_for_sort = get_key_function(keys, take_hash=take_hash)
     sorted_stream = self.sort(
         key_for_sort,
         step=step,
         verbose=verbose,
     )
     grouped_stream = sorted_stream.sorted_group_by(
         keys,
         values=values,
         as_pairs=as_pairs,
     )
     return grouped_stream
Пример #7
0
 def to_column_file(
     self,
     filename,
     delimiter='\t',
     encoding=AUTO,
     gzip=False,
     check=AUTO,
     verbose=True,
     return_stream=True,
 ):
     encoding = arg.acquire(encoding, self.get_encoding())
     meta = self.get_meta()
     if not gzip:
         meta.pop('count')
     stream_csv_file = self.to_line_stream(
         delimiter=delimiter, ).to_text_file(
             filename,
             encoding=encoding,
             gzip=gzip,
             check=check,
             verbose=verbose,
             return_stream=return_stream,
         )
     if return_stream:
         return stream_csv_file.to_row_stream(
             delimiter=delimiter, ).update_meta(**meta)
Пример #8
0
 def folder(self, name: str, folder_type: Union[FolderType, Auto] = AUTO, **kwargs) -> ConnectorInterface:
     supposed_type = FolderType.detect_by_name(name)
     folder_type = arg.acquire(folder_type, supposed_type)
     folder_class = FolderType(folder_type).get_class()
     folder_obj = folder_class(name, parent=self, **kwargs)
     self.add_folder(folder_obj)
     return folder_obj
Пример #9
0
 def execute_if_exists(
     self,
     query: str,
     table: Union[Table, Name],
     message_if_yes: Optional[str] = None,
     message_if_no: Optional[str] = None,
     stop_if_no: bool = False,
     verbose: AutoBool = AUTO,
 ) -> Optional[Iterable]:
     verbose = arg.acquire(verbose, message_if_yes or message_if_no)
     table_name = self._get_table_name(table)
     table_exists = self.exists_table(table_name, verbose=verbose)
     if table_exists:
         if '{}' in query:
             query = query.format(table_name)
         result = self.execute(query, verbose=verbose)
         if message_if_yes:
             if '{}' in message_if_yes:
                 message_if_yes = message_if_yes.format(table_name)
             self.log(message_if_yes, verbose=verbose)
         return result
     else:
         if message_if_no and '{}' in message_if_no:
             message_if_no = message_if_no.format(table_name)
         if stop_if_no:
             raise ValueError(message_if_no)
         else:
             if message_if_no:
                 self.log(message_if_no, verbose=verbose)
Пример #10
0
 def __init__(
     self,
     data: Iterable,
     name: AutoName = AUTO,
     check: bool = False,
     count: AutoCount = None,
     less_than: AutoCount = None,
     source: Connector = None,
     context: Context = None,
     max_items_in_memory: AutoCount = AUTO,
     tmp_files: TmpMask = AUTO,
 ):
     count = arg.get_optional_len(data, count)
     less_than = less_than or count
     self.max_items_in_memory = arg.acquire(max_items_in_memory,
                                            sm.MAX_ITEMS_IN_MEMORY)
     super().__init__(
         data=data,
         name=name,
         check=check,
         source=source,
         context=context,
         count=count,
         less_than=less_than,
     )
     self._tmp_files = arg.delayed_acquire(tmp_files, sm.get_tmp_mask,
                                           self.get_name())
Пример #11
0
 def create_table(
     self,
     table: Union[Table, Name],
     struct: Struct,
     drop_if_exists: bool = False,
     verbose: AutoBool = AUTO,
 ) -> Table:
     verbose = arg.acquire(verbose, self.verbose)
     table_name, struct_str = self._get_table_name_and_struct_str(
         table, struct, check_struct=True)
     if drop_if_exists:
         self.drop_table(table_name, verbose=verbose)
     message = 'Creating table:'
     query = 'CREATE TABLE {name} ({struct});'.format(name=table_name,
                                                      struct=struct_str)
     self.execute(
         query,
         get_data=False,
         commit=True,
         verbose=message if verbose is True else verbose,
     )
     self.post_create_action(table_name, verbose=verbose)
     self.log('Table {name} is created.'.format(name=table_name),
              verbose=verbose)
     if struct:
         return self.table(table, struct=struct)
     else:
         return self.table(table)
Пример #12
0
 def disk_sort(
     self,
     key: UniKey = fs.same(),
     reverse: bool = False,
     step: AutoCount = AUTO,
     verbose: AutoBool = False,
 ) -> Native:
     step = arg.acquire(step, self.max_items_in_memory)
     key_function = fs.composite_key(key)
     stream_parts = self.split_to_disk_by_step(
         step=step,
         sort_each_by=key_function,
         reverse=reverse,
         verbose=verbose,
     )
     assert stream_parts, 'streams must be non-empty'
     iterables = [f.get_iter() for f in stream_parts]
     counts = [f.get_count() or 0 for f in stream_parts]
     self.log('Merging {} parts... '.format(len(iterables)),
              verbose=verbose)
     return self.stream(
         algo.merge_iter(
             iterables,
             key_function=key_function,
             reverse=reverse,
             post_action=self.get_tmp_files().remove_all,
         ),
         count=sum(counts),
     )
Пример #13
0
 def reset_modification_timestamp(self,
                                  timestamp: Union[float, Auto,
                                                   None] = AUTO) -> Native:
     timestamp = arg.acquire(timestamp,
                             self.get_modification_timestamp(reset=False))
     self._modification_ts = timestamp
     return self
Пример #14
0
 def __init__(
     self,
     name: Union[str, arg.Auto] = arg.AUTO,
     ignore_warnings: bool = False,
 ):
     self._name = arg.acquire(name, DEFAULT_LOGGER_NAME)
     self._ignore_warnings = ignore_warnings
Пример #15
0
 def reset_selection_logger(self, name: OptName = arg.AUTO, **kwargs) -> Optional[SelectionLoggerInterface]:
     name = arg.acquire(name, SELECTION_LOGGER_NAME)
     context = self.get_context()
     if context:
         selection_logger = context.get_new_selection_logger(name, **kwargs)
         if selection_logger:
             self.set_selection_logger(selection_logger)
             return selection_logger
Пример #16
0
 def log(self, msg, level=arg.AUTO, end=arg.AUTO, verbose=arg.AUTO) -> None:
     logger = self.get_logger()
     if logger is not None:
         logger.log(
             logger=self.get_logger(),
             msg=msg, level=level, end=end,
             verbose=arg.acquire(verbose, self.verbose),
         )
Пример #17
0
def get_year_and_week_from_week_abs(week_abs: int,
                                    min_year: Union[int, arg.Auto] = arg.AUTO
                                    ) -> tuple:
    min_year = arg.acquire(min_year, _min_year)
    delta_year = int(week_abs / WEEKS_IN_YEAR)
    year = min_year + delta_year
    week = week_abs - delta_year * WEEKS_IN_YEAR
    return year, week
Пример #18
0
def get_week_abs_from_year_and_week(
    year: int,
    week: int,
    min_year: Union[int, arg.Auto] = arg.AUTO,
) -> int:
    min_year = arg.acquire(min_year, get_min_year())
    week_abs = (year - min_year) * WEEKS_IN_YEAR + week
    return week_abs
Пример #19
0
 def disk_sort_by_key(self, reverse=False, step=AUTO) -> Native:
     step = arg.acquire(step, self.max_items_in_memory)
     stream = self.disk_sort(
         key=self._get_key,
         reverse=reverse,
         step=step,
     )
     return self._assume_native(stream)
Пример #20
0
 def is_inside_folder(self,
                      folder: Union[str, Connector, Auto] = AUTO) -> bool:
     folder_obj = arg.acquire(folder, self.get_folder())
     if isinstance(folder_obj, str):
         folder_path = folder_obj
     else:  # elif isinstance(folder_obj, LocalFolder)
         folder_path = folder_obj.get_path()
     return self.get_folder_path() in folder_path
Пример #21
0
 def get_selection_logger(self, name: OptName = arg.AUTO, **kwargs) -> Optional[SelectionLoggerInterface]:
     name = arg.acquire(name, SELECTION_LOGGER_NAME)
     selection_logger = self.get_child(name)
     if selection_logger:
         if kwargs:
             selection_logger.set_meta(name, **kwargs)
     else:
         selection_logger = self.reset_selection_logger(name, **kwargs)
     return selection_logger
Пример #22
0
 def get_new_progress(self, name: Name, count: Count = None, context: OptContext = arg.AUTO) -> ProgressInterface:
     progress = Progress(
         name=name,
         count=count,
         logger=self,
         context=arg.acquire(context, self.get_context, delayed=True),
     )
     self.add_child(progress, check=False)
     return progress
Пример #23
0
 def __init__(self,
              name: Union[Name, Auto] = AUTO,
              logger: Union[LoggerInterface, Auto] = AUTO,
              skip_not_implemented: bool = True):
     self._logger = logger
     self._local_storage = None
     self._skip_not_implemented = skip_not_implemented
     self._tmp_folder = None
     super().__init__(name=arg.acquire(name, NAME))
Пример #24
0
 def to_stream(self,
               stream_type: AutoStreamType = AUTO,
               *args,
               **kwargs) -> Stream:
     stream_type = arg.acquire(stream_type, self.get_stream_type())
     method_suffix = StreamType.of(stream_type).get_method_suffix()
     method_name = 'to_{}'.format(method_suffix)
     stream_method = self.__getattribute__(method_name)
     return stream_method(stream_type, *args, **kwargs)
Пример #25
0
 def can_be_in_memory(self, step: AutoCount = AUTO) -> bool:
     step = arg.acquire(step, self.max_items_in_memory)
     if self.is_in_memory() or step is None:
         return True
     else:
         count = self.get_estimated_count()
         if count is None:
             return False
         else:
             return count <= step
Пример #26
0
 def __init__(self,
              name: Name,
              value: Union[Value, arg.Auto] = arg.AUTO,
              update: bool = False):
     if update or not self._is_initialized():
         name = arg.get_name(name)
         if self._auto_value:
             value = arg.acquire(value, name)
         self.name = name
         self.value = value
Пример #27
0
 def format_message(
         self, *messages,
         max_len: Union[int, arg.Auto] = arg.AUTO,
         truncate: bool = True,
 ) -> str:
     messages = arg.update(messages)
     max_len = arg.acquire(max_len, self.max_line_len)
     message = SPACE.join([str(m) for m in messages])
     if truncate and len(message) > max_len:
         message = message[:max_len - 2] + TRUNCATED_SUFFIX
     return message
Пример #28
0
 def update_with_step(self, position, step=arg.AUTO):
     step = arg.acquire(step, DEFAULT_STEP)
     cur_increment = position - (self.position or 0)
     self.position = position
     step_passed = (self.position + 1) % step == 0
     step_passed = step_passed or (cur_increment >= step)
     expected_count = self.expected_count
     if not arg.is_defined(expected_count):
         expected_count = 0
     pool_finished = 0 < expected_count < (self.position + 1)
     if step_passed or pool_finished:
         self.update_now(position)
Пример #29
0
 def map_to_type(self,
                 function: Callable,
                 stream_type: AutoStreamType = AUTO) -> Stream:
     stream_type = arg.acquire(stream_type, self.get_stream_type())
     result = self.stream(
         map(function, self.get_items()),
         stream_type=stream_type,
     )
     if hasattr(self, 'is_in_memory'):
         if self.is_in_memory():
             return result.to_memory()
     return result
Пример #30
0
 def map_to(self,
            function: Callable,
            stream_type: OptStreamType = AUTO) -> Native:
     stream_type = arg.acquire(stream_type,
                               self.get_stream_type,
                               delayed=True)
     stream = self.stream(
         map(function, self.get_iter()),
         stream_type=stream_type,
     )
     if self.is_in_memory():
         stream = stream.to_memory()
     return self._assume_native(stream)