Пример #1
0
def write_legacy_pickles():

    # force our cwd to be the first searched
    import sys
    sys.path.insert(0,'.')

    import os
    import numpy as np
    import pandas
    import pandas.util.testing as tm
    import platform as pl

    print("This script generates a pickle file for the current arch, system, and python version")

    version = pandas.__version__

    # construct a reasonable platform name
    f = '_'.join([ str(version), str(pl.machine()), str(pl.system().lower()), str(pl.python_version()) ])
    pth = '{0}.pickle'.format(f)

    fh = open(pth,'wb')
    pickle.dump(create_data(),fh,pickle.HIGHEST_PROTOCOL)
    fh.close()

    print("created pickle file: %s" % pth)
def to_pickle(obj, path):
    """
    Pickle (serialize) object to input file path

    Parameters
    ----------
    obj : any object
    path : string
        File path
    """
    with open(path, 'wb') as f:
        pkl.dump(obj, f, protocol=pkl.HIGHEST_PROTOCOL)
Пример #3
0
def write_dataframe_to_s3( data_frame, bucket, key ):
    """
    Writes the data_frame (as a pickle) to the given bucket and key
    """
    conn = boto.connect_s3()
    with tempfile.SpooledTemporaryFile() as t:
        try:
            pkl.dump(data_frame, t)#from actual to_pickle code
        except:
            logger = logging.getLogger('write2df')
            logger.exception("DFsize : %s" % sizeof_df( data_frame ))
        t.seek(0)
        k = Key(conn.create_bucket(bucket))
        k.key = key 
        k.set_contents_from_file( t )
Пример #4
0
def write_legacy_pickles():

    # force our cwd to be the first searched
    import sys
    sys.path.insert(0,'.')

    import os, os.path
    import numpy as np
    import pandas
    import pandas.util.testing as tm
    import platform as pl

    # make sure we are < 0.13 compat (in py3)
    try:
        from pandas.compat import zip, cPickle as pickle
    except:
        import pickle

    sys_version = version = pandas.__version__
    if len(sys.argv) < 2:
        exit("{0} <version> <output_dir>".format(sys.argv[0]))

    version = str(sys.argv[1])
    output_dir = str(sys.argv[2])

    print("This script generates a pickle file for the current arch, system, and python version")
    print("  system version: {0}".format(sys_version))
    print("  output version: {0}".format(version))
    print("  output dir    : {0}".format(output_dir))

    # construct a reasonable platform name
    f = '_'.join([ str(version), str(pl.machine()), str(pl.system().lower()), str(pl.python_version()) ])
    pth = '{0}.pickle'.format(f)

    fh = open(os.path.join(output_dir,pth),'wb')
    pickle.dump(create_data(),fh,pickle.HIGHEST_PROTOCOL)
    fh.close()

    print("created pickle file: %s" % pth)
def write_legacy_pickles(output_dir):

    # make sure we are < 0.13 compat (in py3)
    try:
        from pandas.compat import zip, cPickle as pickle
    except:
        import pickle

    version = pandas.__version__

    print("This script generates a storage file for the current arch, system, and python version")
    print("  pandas version: {0}".format(version))
    print("  output dir    : {0}".format(output_dir))
    print("  storage format: pickle")

    pth = '{0}.pickle'.format(platform_name())

    fh = open(os.path.join(output_dir, pth), 'wb')
    pickle.dump(create_pickle_data(), fh, pickle.HIGHEST_PROTOCOL)
    fh.close()

    print("created pickle file: %s" % pth)
Пример #6
0
def to_pickle(obj, path, compression='infer', protocol=pkl.HIGHEST_PROTOCOL):
    """
    Pickle (serialize) object to input file path

    Parameters
    ----------
    obj : any object
    path : string
        File path
    compression : {'infer', 'gzip', 'bz2', 'xz', None}, default 'infer'
        a string representing the compression to use in the output file

        .. versionadded:: 0.20.0
    protocol : int
        Int which indicates which protocol should be used by the pickler,
        default HIGHEST_PROTOCOL (see [1], paragraph 12.1.2). The possible
        values for this parameter depend on the version of Python. For Python
        2.x, possible values are 0, 1, 2. For Python>=3.0, 3 is a valid value.
        For Python >= 3.4, 4 is a valid value. A negative value for the
        protocol parameter is equivalent to setting its value to
        HIGHEST_PROTOCOL.

        .. [1] https://docs.python.org/3/library/pickle.html
        .. versionadded:: 0.21.0


    """
    path = _stringify_path(path)
    inferred_compression = _infer_compression(path, compression)
    f, fh = _get_handle(path, 'wb',
                        compression=inferred_compression,
                        is_text=False)
    if protocol < 0:
        protocol = pkl.HIGHEST_PROTOCOL
    try:
        pkl.dump(obj, f, protocol=protocol)
    finally:
        for _f in fh:
            _f.close()
Пример #7
0
def write_legacy_pickles():

    # force our cwd to be the first searched
    import sys
    sys.path.insert(0,'.')

    import os
    import numpy as np
    import pandas
    import pandas.util.testing as tm
    import platform as pl

    print("This script generates a pickle file for the current arch, system, and python version")

    base_dir, _ = os.path.split(os.path.abspath(__file__))
    base_dir = os.path.join(base_dir,'data/legacy_pickle')

    # could make this a parameter?
    version = None


    if version is None:
        version = pandas.__version__
    pth = os.path.join(base_dir, str(version))
    try:
        os.mkdir(pth)
    except:
        pass

    # construct a reasonable platform name
    f = '_'.join([ str(pl.machine()), str(pl.system().lower()), str(pl.python_version()) ])
    pth = os.path.abspath(os.path.join(pth,'%s.pickle' % f))

    fh = open(pth,'wb')
    pickle.dump(create_data(),fh,pickle.HIGHEST_PROTOCOL)
    fh.close()

    print("created pickle file: %s" % pth)
Пример #8
0
def to_pickle(obj, path, compression='infer'):
    """
    Pickle (serialize) object to input file path

    Parameters
    ----------
    obj : any object
    path : string
        File path
    compression : {'infer', 'gzip', 'bz2', 'xz', None}, default 'infer'
        a string representing the compression to use in the output file

        .. versionadded:: 0.20.0
    """
    inferred_compression = _infer_compression(path, compression)
    f, fh = _get_handle(path, 'wb',
                        compression=inferred_compression,
                        is_text=False)
    try:
        pkl.dump(obj, f, protocol=pkl.HIGHEST_PROTOCOL)
    finally:
        for _f in fh:
            _f.close()
Пример #9
0
def write_legacy_pickles(output_dir):

    # make sure we are < 0.13 compat (in py3)
    try:
        from pandas.compat import zip, cPickle as pickle  # noqa
    except:
        import pickle

    version = pandas.__version__

    print("This script generates a storage file for the current arch, system, "
          "and python version")
    print("  pandas version: {0}".format(version))
    print("  output dir    : {0}".format(output_dir))
    print("  storage format: pickle")

    pth = '{0}.pickle'.format(platform_name())

    fh = open(os.path.join(output_dir, pth), 'wb')
    pickle.dump(create_pickle_data(), fh, pickle.HIGHEST_PROTOCOL)
    fh.close()

    print("created pickle file: %s" % pth)
Пример #10
0
def to_pickle(obj, path, compression='infer'):
    """
    Pickle (serialize) object to input file path

    Parameters
    ----------
    obj : any object
    path : string
        File path
    compression : {'infer', 'gzip', 'bz2', 'xz', None}, default 'infer'
        a string representing the compression to use in the output file

        .. versionadded:: 0.20.0
    """
    inferred_compression = _infer_compression(path, compression)
    f, fh = _get_handle(path,
                        'wb',
                        compression=inferred_compression,
                        is_text=False)
    try:
        pkl.dump(obj, f, protocol=pkl.HIGHEST_PROTOCOL)
    finally:
        for _f in fh:
            _f.close()
Пример #11
0
def read_dataframe_from_s3( bucket, key, encoding=None ):
    """
    Returns a dataframe from the given bucket and key (where df on s3 is a pickle)
    """
    conn = boto.connect_s3()
    with tempfile.SpooledTemporaryFile() as t:
        k = Key(conn.get_bucket(bucket))
        k.key = key 
        k.get_contents_to_file( t )
        t.seek(0)
        try:
            try:
                pkl.dump(data_frame, t)#from actual to_pickle code
            except:
                logger = logging.getLogger('write2df')
                logger.exception("DFsize : %s" % sizeof_df( data_frame ))
                raise
        except:
            try:
                t.seek(0)
                return pc.load(t, encoding=encoding, compat=False)
            except:
                t.seek(0)
                return pc.load(t, encoding=encoding, compat=True)
Пример #12
0
def to_pickle(obj, path, compression='infer', protocol=pkl.HIGHEST_PROTOCOL):
    """
    Pickle (serialize) object to file.

    Parameters
    ----------
    obj : any object
        Any python object.
    path : str
        File path where the pickled object will be stored.
    compression : {'infer', 'gzip', 'bz2', 'xz', None}, default 'infer'
        A string representing the compression to use in the output file. By
        default, infers from the file extension in specified path.

        .. versionadded:: 0.20.0
    protocol : int
        Int which indicates which protocol should be used by the pickler,
        default HIGHEST_PROTOCOL (see [1], paragraph 12.1.2). The possible
        values for this parameter depend on the version of Python. For Python
        2.x, possible values are 0, 1, 2. For Python>=3.0, 3 is a valid value.
        For Python >= 3.4, 4 is a valid value. A negative value for the
        protocol parameter is equivalent to setting its value to
        HIGHEST_PROTOCOL.

        .. [1] https://docs.python.org/3/library/pickle.html
        .. versionadded:: 0.21.0

    See Also
    --------
    read_pickle : Load pickled pandas object (or any object) from file.
    DataFrame.to_hdf : Write DataFrame to an HDF5 file.
    DataFrame.to_sql : Write DataFrame to a SQL database.
    DataFrame.to_parquet : Write a DataFrame to the binary parquet format.

    Examples
    --------
    >>> original_df = pd.DataFrame({"foo": range(5), "bar": range(5, 10)})
    >>> original_df
       foo  bar
    0    0    5
    1    1    6
    2    2    7
    3    3    8
    4    4    9
    >>> pd.to_pickle(original_df, "./dummy.pkl")

    >>> unpickled_df = pd.read_pickle("./dummy.pkl")
    >>> unpickled_df
       foo  bar
    0    0    5
    1    1    6
    2    2    7
    3    3    8
    4    4    9

    >>> import os
    >>> os.remove("./dummy.pkl")
    """
    path = _stringify_path(path)
    inferred_compression = _infer_compression(path, compression)
    f, fh = _get_handle(path,
                        'wb',
                        compression=inferred_compression,
                        is_text=False)
    if protocol < 0:
        protocol = pkl.HIGHEST_PROTOCOL
    try:
        pkl.dump(obj, f, protocol=protocol)
    finally:
        for _f in fh:
            _f.close()
Пример #13
0
def to_pickle(obj, path, compression='infer', protocol=pkl.HIGHEST_PROTOCOL):
    """
    Pickle (serialize) object to file.

    Parameters
    ----------
    obj : any object
        Any python object.
    path : str
        File path where the pickled object will be stored.
    compression : {'infer', 'gzip', 'bz2', 'xz', None}, default 'infer'
        A string representing the compression to use in the output file. By
        default, infers from the file extension in specified path.

        .. versionadded:: 0.20.0
    protocol : int
        Int which indicates which protocol should be used by the pickler,
        default HIGHEST_PROTOCOL (see [1], paragraph 12.1.2). The possible
        values for this parameter depend on the version of Python. For Python
        2.x, possible values are 0, 1, 2. For Python>=3.0, 3 is a valid value.
        For Python >= 3.4, 4 is a valid value. A negative value for the
        protocol parameter is equivalent to setting its value to
        HIGHEST_PROTOCOL.

        .. [1] https://docs.python.org/3/library/pickle.html
        .. versionadded:: 0.21.0

    See Also
    --------
    read_pickle : Load pickled pandas object (or any object) from file.
    DataFrame.to_hdf : Write DataFrame to an HDF5 file.
    DataFrame.to_sql : Write DataFrame to a SQL database.
    DataFrame.to_parquet : Write a DataFrame to the binary parquet format.

    Examples
    --------
    >>> original_df = pd.DataFrame({"foo": range(5), "bar": range(5, 10)})
    >>> original_df
       foo  bar
    0    0    5
    1    1    6
    2    2    7
    3    3    8
    4    4    9
    >>> pd.to_pickle(original_df, "./dummy.pkl")

    >>> unpickled_df = pd.read_pickle("./dummy.pkl")
    >>> unpickled_df
       foo  bar
    0    0    5
    1    1    6
    2    2    7
    3    3    8
    4    4    9

    >>> import os
    >>> os.remove("./dummy.pkl")
    """
    path = _stringify_path(path)
    inferred_compression = _infer_compression(path, compression)
    f, fh = _get_handle(path, 'wb',
                        compression=inferred_compression,
                        is_text=False)
    if protocol < 0:
        protocol = pkl.HIGHEST_PROTOCOL
    try:
        pkl.dump(obj, f, protocol=protocol)
    finally:
        for _f in fh:
            _f.close()