Python file_has_parquet_magic_number примеры использования

Язык программирования: Python

Пространство имен/Пакет: cjwkernel.parquet

Метод/Функция: file_has_parquet_magic_number

Примеров на hotexamples.com: 7

Python file_has_parquet_magic_number - 7 примеров найдено. Это лучшие примеры Python кода для cjwkernel.parquet.file_has_parquet_magic_number, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

def render_arrow(
    table, params, tab_name, fetch_result: Optional[FetchResult], output_path: Path
) -> RenderResult:
    # Must perform header operation here in the event the header checkbox
    # state changes
    if fetch_result is None:
        # empty table
        return RenderResult(ArrowTable())
    elif fetch_result.path is not None and parquet.file_has_parquet_magic_number(
        fetch_result.path
    ):
        # Deprecated files: we used to parse in fetch() and store the result
        # as Parquet. Now we've lost the original file data, and we need to
        # support our oldest users.
        #
        # In this deprecated format, parse errors were written as
        # fetch_result.errors.
        return _render_deprecated_parquet(
            fetch_result.path, fetch_result.errors, output_path, params
        )
    elif fetch_result.errors:
        # We've never stored errors+data. If there are errors, assume
        # there's no data.
        return RenderResult(ArrowTable(), fetch_result.errors)
    else:
        assert not fetch_result.errors  # we've never stored errors+data.
        return _render_file(fetch_result.path, params, output_path)

Пример #2

Показать файл

def render_arrow(
    table: types.ArrowTable,
    params: Dict[str, Any],
    tab_name: str,
    fetch_result: Optional[types.FetchResult],
    output_path: Path,
) -> types.RenderResult:
    """
    Render using `cjwkernel.types` data types.

    If outputting Arrow data, write to `output_path`.

    Module authors are encouraged to replace this function, because Arrow
    tables are simpler and more memory-efficient than Pandas tables. This is
    the ideal signature for a "rename columns" module, for instance: Arrow
    can pass data through without consuming excessive RAM.

    This does not validate the render_pandas() return value.
    """
    pandas_table = __arrow_to_pandas(table)
    pandas_input_tabs = {
        to.tab.slug: __arrow_tab_output_to_pandas(to)
        for to in _find_tab_outputs(params)
    }
    if fetch_result is not None:
        if fetch_result.path.stat(
        ).st_size == 0 or parquet.file_has_parquet_magic_number(
                fetch_result.path):
            fetched_table = __parquet_to_pandas(fetch_result.path)
            pandas_fetch_result = ptypes.ProcessResult(
                fetched_table,
                [
                    ptypes.ProcessResultError.from_arrow(error)
                    for error in fetch_result.errors
                ],
            )
        else:
            pandas_fetch_result = fetch_result
    else:
        pandas_fetch_result = None

    pandas_result: ptypes.ProcessResult = render_pandas(
        input_table=pandas_table,
        input_table_shape=ptypes.TableShape.from_arrow(table.metadata),
        params=_arrow_param_to_pandas_param(params),
        tab_name=tab_name,
        input_tabs=pandas_input_tabs,
        fetch_result=pandas_fetch_result,
    )

    return pandas_result.to_arrow(output_path)

Пример #3

Показать файл