Python FileDispatcher примеры использования

Язык программирования: Python

Пространство имен/Пакет: modin.engines.base.io

Класс/Тип: FileDispatcher

Примеров на hotexamples.com: 5

Python FileDispatcher - 5 примеров найдено. Это лучшие примеры Python кода для modin.engines.base.io.FileDispatcher, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

file_open(5)

Основные методы

file_open (5)

Пример #1

Показать файл

Файл: parsers.py Проект: wroldwiedbwe/modin

 def parse(fname, **kwargs):
     num_splits = kwargs.pop("num_splits", None)
     start = kwargs.pop("start", None)
     end = kwargs.pop("end", None)
     if start is not None and end is not None:
         # pop "compression" from kwargs because bio is uncompressed
         bio = FileDispatcher.file_open(
             fname, "rb", kwargs.pop("compression", "infer")
         )
         bio.seek(start)
         to_read = b"" + bio.read(end - start)
         bio.close()
         columns = kwargs.pop("columns")
         pandas_df = pandas.read_json(BytesIO(to_read), **kwargs)
     else:
         # This only happens when we are reading with only one worker (Default)
         return pandas.read_json(fname, **kwargs)
     if not pandas_df.columns.equals(columns):
         raise NotImplementedError("Columns must be the same across all rows.")
     partition_columns = pandas_df.columns
     return _split_result_for_readers(1, num_splits, pandas_df) + [
         len(pandas_df),
         pandas_df.dtypes,
         partition_columns,
     ]

Пример #2

Показать файл

Файл: parsers.py Проект: wroldwiedbwe/modin

 def parse(fname, **kwargs):
     num_splits = kwargs.pop("num_splits", None)
     start = kwargs.pop("start", None)
     end = kwargs.pop("end", None)
     index_col = kwargs.get("index_col", None)
     if start is not None and end is not None:
         # pop "compression" from kwargs because bio is uncompressed
         bio = FileDispatcher.file_open(
             fname, "rb", kwargs.pop("compression", "infer")
         )
         if kwargs.get("encoding", None) is not None:
             header = b"" + bio.readline()
         else:
             header = b""
         bio.seek(start)
         to_read = header + bio.read(end - start)
         bio.close()
         pandas_df = pandas.read_fwf(BytesIO(to_read), **kwargs)
     else:
         # This only happens when we are reading with only one worker (Default)
         return pandas.read_fwf(fname, **kwargs)
     if index_col is not None:
         index = pandas_df.index
     else:
         # The lengths will become the RangeIndex
         index = len(pandas_df)
     return _split_result_for_readers(1, num_splits, pandas_df) + [
         index,
         pandas_df.dtypes,
     ]

Пример #3

Показать файл

Файл: parsers.py Проект: wroldwiedbwe/modin

    def parse(chunks, **kwargs):
        warnings.filterwarnings("ignore")
        num_splits = kwargs.pop("num_splits", None)
        index_col = kwargs.get("index_col", None)

        pandas_dfs = []
        for fname, start, end in chunks:
            if start is not None and end is not None:
                # pop "compression" from kwargs because bio is uncompressed
                bio = FileDispatcher.file_open(
                    fname, "rb", kwargs.pop("compression", "infer")
                )
                if kwargs.get("encoding", None) is not None:
                    header = b"" + bio.readline()
                else:
                    header = b""
                bio.seek(start)
                to_read = header + bio.read(end - start)
                bio.close()
                pandas_dfs.append(pandas.read_csv(BytesIO(to_read), **kwargs))
            else:
                # This only happens when we are reading with only one worker (Default)
                return pandas.read_csv(fname, **kwargs)

        # Combine read in data.
        if len(pandas_dfs) > 1:
            pandas_df = pandas.concat(pandas_dfs)
        elif len(pandas_dfs) > 0:
            pandas_df = pandas_dfs[0]
        else:
            pandas_df = pandas.DataFrame()

        # Set internal index.
        if index_col is not None:
            index = pandas_df.index
        else:
            # The lengths will become the RangeIndex
            index = len(pandas_df)
        return _split_result_for_readers(1, num_splits, pandas_df) + [
            index,
            pandas_df.dtypes,
        ]

Пример #4

Показать файл

    def parse(cls, fname, **kwargs):
        warnings.filterwarnings("ignore")
        num_splits = kwargs.pop("num_splits", None)
        start = kwargs.pop("start", None)
        end = kwargs.pop("end", None)
        index_col = kwargs.get("index_col", None)
        gpu_selected = kwargs.pop("gpu", 0)

        if start is not None and end is not None:
            put_func = cls.frame_partition_cls.put

            # pop "compression" from kwargs because bio is uncompressed
            bio = FileDispatcher.file_open(
                fname, "rb", kwargs.pop("compression", "infer")
            )
            if kwargs.get("encoding", None) is not None:
                header = b"" + bio.readline()
            else:
                header = b""
            bio.seek(start)
            to_read = header + bio.read(end - start)
            bio.close()
            pandas_df = pandas.read_csv(BytesIO(to_read), **kwargs)
        else:
            # This only happens when we are reading with only one worker (Default)
            pandas_df = pandas.read_csv(fname, **kwargs)
            num_splits = (
                1  # force num_splits to be 1 here because we don't want it partitioning
            )
        if index_col is not None:
            index = pandas_df.index
        else:
            index = len(pandas_df)
        partition_dfs = _split_result_for_readers(1, num_splits, pandas_df)
        key = [
            put_func(GPU_MANAGERS[gpu_selected], partition_df)
            for partition_df in partition_dfs
        ]
        return key + [index, pandas_df.dtypes]

Пример #5

Показать файл

 def parse(fname, **kwargs):
     warnings.filterwarnings("ignore")
     num_splits = kwargs.pop("num_splits", None)
     start = kwargs.pop("start", None)
     end = kwargs.pop("end", None)
     header_size = kwargs.pop("header_size", None)
     if start is not None and end is not None:
         # pop "compression" from kwargs because bio is uncompressed
         bio = FileDispatcher.file_open(
             fname, "rb", kwargs.pop("compression", "infer")
         )
         header = b""
         # In this case we beware that fisrt line can contain BOM, so
         # adding this line to the `header` for reading and then skip it
         if kwargs.get("encoding", None) is not None and header_size == 0:
             header += bio.readline()
             # `skiprows` can be only None here, so don't check it's type
             # and just set to 1
             kwargs["skiprows"] = 1
         for _ in range(header_size):
             header += bio.readline()
         bio.seek(start)
         to_read = header + bio.read(end - start)
         bio.close()
         pandas_df = pandas.read_csv(BytesIO(to_read), **kwargs)
     else:
         # This only happens when we are reading with only one worker (Default)
         return pandas.read_csv(fname, **kwargs)
     index = (
         pandas_df.index
         if not isinstance(pandas_df.index, pandas.RangeIndex)
         else len(pandas_df)
     )
     return _split_result_for_readers(1, num_splits, pandas_df) + [
         index,
         pandas_df.dtypes,
     ]