Exemplo n.º 1
0
def write_table(table,
                sink,
                chunk_size=None,
                version='1.0',
                use_dictionary=True,
                compression='snappy'):
    """
    Write a Table to Parquet format

    Parameters
    ----------
    table : pyarrow.Table
    sink: string or pyarrow.io.NativeFile
    chunk_size : int
        The maximum number of rows in each Parquet RowGroup. As a default,
        we will write a single RowGroup per file.
    version : {"1.0", "2.0"}, default "1.0"
        The Parquet format version, defaults to 1.0
    use_dictionary : bool or list
        Specify if we should use dictionary encoding in general or only for
        some columns.
    compression : str or dict
        Specify the compression codec, either on a general basis or per-column.
    """
    writer = ParquetWriter(sink,
                           use_dictionary=use_dictionary,
                           compression=compression,
                           version=version)
    writer.write_table(table, row_group_size=chunk_size)
Exemplo n.º 2
0
def write_table(table, where, row_group_size=None, version='1.0',
                use_dictionary=True, compression='snappy', **kwargs):
    """
    Write a Table to Parquet format

    Parameters
    ----------
    table : pyarrow.Table
    where: string or pyarrow.io.NativeFile
    row_group_size : int, default None
        The maximum number of rows in each Parquet RowGroup. As a default,
        we will write a single RowGroup per file.
    version : {"1.0", "2.0"}, default "1.0"
        The Parquet format version, defaults to 1.0
    use_dictionary : bool or list
        Specify if we should use dictionary encoding in general or only for
        some columns.
    compression : str or dict
        Specify the compression codec, either on a general basis or per-column.
    """
    row_group_size = kwargs.get('chunk_size', row_group_size)
    writer = ParquetWriter(where, table.schema,
                           use_dictionary=use_dictionary,
                           compression=compression,
                           version=version)
    writer.write_table(table, row_group_size=row_group_size)
    writer.close()
Exemplo n.º 3
0
def write_metadata(schema,
                   where,
                   version='1.0',
                   use_deprecated_int96_timestamps=False,
                   coerce_timestamps=None):
    """
    Write metadata-only Parquet file from schema

    Parameters
    ----------
    schema : pyarrow.Schema
    where: string or pyarrow.io.NativeFile
    version : {"1.0", "2.0"}, default "1.0"
        The Parquet format version, defaults to 1.0
    use_deprecated_int96_timestamps : boolean, default False
        Write nanosecond resolution timestamps to INT96 Parquet format
    coerce_timestamps : string, default None
        Cast timestamps a particular resolution.
        Valid values: {None, 'ms', 'us'}
    """
    options = dict(
        version=version,
        use_deprecated_int96_timestamps=use_deprecated_int96_timestamps,
        coerce_timestamps=coerce_timestamps)
    writer = ParquetWriter(where, schema, **options)
    writer.close()
Exemplo n.º 4
0
def write_metadata(schema, where, version='1.0'):
    """
    Write metadata-only Parquet file from schema

    Parameters
    ----------
    schema : pyarrow.Schema
    where: string or pyarrow.io.NativeFile
    version : {"1.0", "2.0"}, default "1.0"
        The Parquet format version, defaults to 1.0
    """
    writer = ParquetWriter(where, schema, version=version)
    writer.close()
Exemplo n.º 5
0
def write_metadata(schema, where, version='1.0'):
    """
    Write metadata-only Parquet file from schema

    Parameters
    ----------
    schema : pyarrow.Schema
    where: string or pyarrow.io.NativeFile
    version : {"1.0", "2.0"}, default "1.0"
        The Parquet format version, defaults to 1.0
    """
    writer = ParquetWriter(where, schema, version=version)
    writer.close()
Exemplo n.º 6
0
def write_table(table,
                where,
                row_group_size=None,
                version='1.0',
                use_dictionary=True,
                compression='snappy',
                use_deprecated_int96_timestamps=False,
                coerce_timestamps=None,
                **kwargs):
    """
    Write a Table to Parquet format

    Parameters
    ----------
    table : pyarrow.Table
    where: string or pyarrow.io.NativeFile
    row_group_size : int, default None
        The maximum number of rows in each Parquet RowGroup. As a default,
        we will write a single RowGroup per file.
    version : {"1.0", "2.0"}, default "1.0"
        The Parquet format version, defaults to 1.0
    use_dictionary : bool or list
        Specify if we should use dictionary encoding in general or only for
        some columns.
    use_deprecated_int96_timestamps : boolean, default False
        Write nanosecond resolution timestamps to INT96 Parquet format
    coerce_timestamps : string, default None
        Cast timestamps a particular resolution.
        Valid values: {None, 'ms', 'us'}
    compression : str or dict
        Specify the compression codec, either on a general basis or per-column.
    """
    row_group_size = kwargs.get('chunk_size', row_group_size)
    options = dict(
        use_dictionary=use_dictionary,
        compression=compression,
        version=version,
        use_deprecated_int96_timestamps=use_deprecated_int96_timestamps,
        coerce_timestamps=coerce_timestamps)

    writer = None
    try:
        writer = ParquetWriter(where, table.schema, **options)
        writer.write_table(table, row_group_size=row_group_size)
    except:
        if writer is not None:
            writer.close()
        if isinstance(where, six.string_types):
            try:
                os.remove(where)
            except os.error:
                pass
        raise
    else:
        writer.close()
Exemplo n.º 7
0
def write_metadata(schema, where, version='1.0',
                   use_deprecated_int96_timestamps=False):
    """
    Write metadata-only Parquet file from schema

    Parameters
    ----------
    schema : pyarrow.Schema
    where: string or pyarrow.io.NativeFile
    version : {"1.0", "2.0"}, default "1.0"
        The Parquet format version, defaults to 1.0
    """
    options = dict(
        version=version,
        use_deprecated_int96_timestamps=use_deprecated_int96_timestamps
    )
    writer = ParquetWriter(where, schema, **options)
    writer.close()
Exemplo n.º 8
0
def write_table(table, where, row_group_size=None, version='1.0',
                use_dictionary=True, compression='snappy',
                use_deprecated_int96_timestamps=False, **kwargs):
    """
    Write a Table to Parquet format

    Parameters
    ----------
    table : pyarrow.Table
    where: string or pyarrow.io.NativeFile
    row_group_size : int, default None
        The maximum number of rows in each Parquet RowGroup. As a default,
        we will write a single RowGroup per file.
    version : {"1.0", "2.0"}, default "1.0"
        The Parquet format version, defaults to 1.0
    use_dictionary : bool or list
        Specify if we should use dictionary encoding in general or only for
        some columns.
    compression : str or dict
        Specify the compression codec, either on a general basis or per-column.
    """
    row_group_size = kwargs.get('chunk_size', row_group_size)
    options = dict(
        use_dictionary=use_dictionary,
        compression=compression,
        version=version,
        use_deprecated_int96_timestamps=use_deprecated_int96_timestamps)
    writer = ParquetWriter(where, table.schema, **options)
    writer.write_table(table, row_group_size=row_group_size)
    writer.close()