def write_legacy_pickles(): # force our cwd to be the first searched import sys sys.path.insert(0,'.') import os import numpy as np import pandas import pandas.util.testing as tm import platform as pl print("This script generates a pickle file for the current arch, system, and python version") version = pandas.__version__ # construct a reasonable platform name f = '_'.join([ str(version), str(pl.machine()), str(pl.system().lower()), str(pl.python_version()) ]) pth = '{0}.pickle'.format(f) fh = open(pth,'wb') pickle.dump(create_data(),fh,pickle.HIGHEST_PROTOCOL) fh.close() print("created pickle file: %s" % pth)
def to_pickle(obj, path): """ Pickle (serialize) object to input file path Parameters ---------- obj : any object path : string File path """ with open(path, 'wb') as f: pkl.dump(obj, f, protocol=pkl.HIGHEST_PROTOCOL)
def write_dataframe_to_s3( data_frame, bucket, key ): """ Writes the data_frame (as a pickle) to the given bucket and key """ conn = boto.connect_s3() with tempfile.SpooledTemporaryFile() as t: try: pkl.dump(data_frame, t)#from actual to_pickle code except: logger = logging.getLogger('write2df') logger.exception("DFsize : %s" % sizeof_df( data_frame )) t.seek(0) k = Key(conn.create_bucket(bucket)) k.key = key k.set_contents_from_file( t )
def write_legacy_pickles(): # force our cwd to be the first searched import sys sys.path.insert(0,'.') import os, os.path import numpy as np import pandas import pandas.util.testing as tm import platform as pl # make sure we are < 0.13 compat (in py3) try: from pandas.compat import zip, cPickle as pickle except: import pickle sys_version = version = pandas.__version__ if len(sys.argv) < 2: exit("{0} <version> <output_dir>".format(sys.argv[0])) version = str(sys.argv[1]) output_dir = str(sys.argv[2]) print("This script generates a pickle file for the current arch, system, and python version") print(" system version: {0}".format(sys_version)) print(" output version: {0}".format(version)) print(" output dir : {0}".format(output_dir)) # construct a reasonable platform name f = '_'.join([ str(version), str(pl.machine()), str(pl.system().lower()), str(pl.python_version()) ]) pth = '{0}.pickle'.format(f) fh = open(os.path.join(output_dir,pth),'wb') pickle.dump(create_data(),fh,pickle.HIGHEST_PROTOCOL) fh.close() print("created pickle file: %s" % pth)
def write_legacy_pickles(output_dir): # make sure we are < 0.13 compat (in py3) try: from pandas.compat import zip, cPickle as pickle except: import pickle version = pandas.__version__ print("This script generates a storage file for the current arch, system, and python version") print(" pandas version: {0}".format(version)) print(" output dir : {0}".format(output_dir)) print(" storage format: pickle") pth = '{0}.pickle'.format(platform_name()) fh = open(os.path.join(output_dir, pth), 'wb') pickle.dump(create_pickle_data(), fh, pickle.HIGHEST_PROTOCOL) fh.close() print("created pickle file: %s" % pth)
def to_pickle(obj, path, compression='infer', protocol=pkl.HIGHEST_PROTOCOL): """ Pickle (serialize) object to input file path Parameters ---------- obj : any object path : string File path compression : {'infer', 'gzip', 'bz2', 'xz', None}, default 'infer' a string representing the compression to use in the output file .. versionadded:: 0.20.0 protocol : int Int which indicates which protocol should be used by the pickler, default HIGHEST_PROTOCOL (see [1], paragraph 12.1.2). The possible values for this parameter depend on the version of Python. For Python 2.x, possible values are 0, 1, 2. For Python>=3.0, 3 is a valid value. For Python >= 3.4, 4 is a valid value. A negative value for the protocol parameter is equivalent to setting its value to HIGHEST_PROTOCOL. .. [1] https://docs.python.org/3/library/pickle.html .. versionadded:: 0.21.0 """ path = _stringify_path(path) inferred_compression = _infer_compression(path, compression) f, fh = _get_handle(path, 'wb', compression=inferred_compression, is_text=False) if protocol < 0: protocol = pkl.HIGHEST_PROTOCOL try: pkl.dump(obj, f, protocol=protocol) finally: for _f in fh: _f.close()
def write_legacy_pickles(): # force our cwd to be the first searched import sys sys.path.insert(0,'.') import os import numpy as np import pandas import pandas.util.testing as tm import platform as pl print("This script generates a pickle file for the current arch, system, and python version") base_dir, _ = os.path.split(os.path.abspath(__file__)) base_dir = os.path.join(base_dir,'data/legacy_pickle') # could make this a parameter? version = None if version is None: version = pandas.__version__ pth = os.path.join(base_dir, str(version)) try: os.mkdir(pth) except: pass # construct a reasonable platform name f = '_'.join([ str(pl.machine()), str(pl.system().lower()), str(pl.python_version()) ]) pth = os.path.abspath(os.path.join(pth,'%s.pickle' % f)) fh = open(pth,'wb') pickle.dump(create_data(),fh,pickle.HIGHEST_PROTOCOL) fh.close() print("created pickle file: %s" % pth)
def to_pickle(obj, path, compression='infer'): """ Pickle (serialize) object to input file path Parameters ---------- obj : any object path : string File path compression : {'infer', 'gzip', 'bz2', 'xz', None}, default 'infer' a string representing the compression to use in the output file .. versionadded:: 0.20.0 """ inferred_compression = _infer_compression(path, compression) f, fh = _get_handle(path, 'wb', compression=inferred_compression, is_text=False) try: pkl.dump(obj, f, protocol=pkl.HIGHEST_PROTOCOL) finally: for _f in fh: _f.close()
def write_legacy_pickles(output_dir): # make sure we are < 0.13 compat (in py3) try: from pandas.compat import zip, cPickle as pickle # noqa except: import pickle version = pandas.__version__ print("This script generates a storage file for the current arch, system, " "and python version") print(" pandas version: {0}".format(version)) print(" output dir : {0}".format(output_dir)) print(" storage format: pickle") pth = '{0}.pickle'.format(platform_name()) fh = open(os.path.join(output_dir, pth), 'wb') pickle.dump(create_pickle_data(), fh, pickle.HIGHEST_PROTOCOL) fh.close() print("created pickle file: %s" % pth)
def read_dataframe_from_s3( bucket, key, encoding=None ): """ Returns a dataframe from the given bucket and key (where df on s3 is a pickle) """ conn = boto.connect_s3() with tempfile.SpooledTemporaryFile() as t: k = Key(conn.get_bucket(bucket)) k.key = key k.get_contents_to_file( t ) t.seek(0) try: try: pkl.dump(data_frame, t)#from actual to_pickle code except: logger = logging.getLogger('write2df') logger.exception("DFsize : %s" % sizeof_df( data_frame )) raise except: try: t.seek(0) return pc.load(t, encoding=encoding, compat=False) except: t.seek(0) return pc.load(t, encoding=encoding, compat=True)
def to_pickle(obj, path, compression='infer', protocol=pkl.HIGHEST_PROTOCOL): """ Pickle (serialize) object to file. Parameters ---------- obj : any object Any python object. path : str File path where the pickled object will be stored. compression : {'infer', 'gzip', 'bz2', 'xz', None}, default 'infer' A string representing the compression to use in the output file. By default, infers from the file extension in specified path. .. versionadded:: 0.20.0 protocol : int Int which indicates which protocol should be used by the pickler, default HIGHEST_PROTOCOL (see [1], paragraph 12.1.2). The possible values for this parameter depend on the version of Python. For Python 2.x, possible values are 0, 1, 2. For Python>=3.0, 3 is a valid value. For Python >= 3.4, 4 is a valid value. A negative value for the protocol parameter is equivalent to setting its value to HIGHEST_PROTOCOL. .. [1] https://docs.python.org/3/library/pickle.html .. versionadded:: 0.21.0 See Also -------- read_pickle : Load pickled pandas object (or any object) from file. DataFrame.to_hdf : Write DataFrame to an HDF5 file. DataFrame.to_sql : Write DataFrame to a SQL database. DataFrame.to_parquet : Write a DataFrame to the binary parquet format. Examples -------- >>> original_df = pd.DataFrame({"foo": range(5), "bar": range(5, 10)}) >>> original_df foo bar 0 0 5 1 1 6 2 2 7 3 3 8 4 4 9 >>> pd.to_pickle(original_df, "./dummy.pkl") >>> unpickled_df = pd.read_pickle("./dummy.pkl") >>> unpickled_df foo bar 0 0 5 1 1 6 2 2 7 3 3 8 4 4 9 >>> import os >>> os.remove("./dummy.pkl") """ path = _stringify_path(path) inferred_compression = _infer_compression(path, compression) f, fh = _get_handle(path, 'wb', compression=inferred_compression, is_text=False) if protocol < 0: protocol = pkl.HIGHEST_PROTOCOL try: pkl.dump(obj, f, protocol=protocol) finally: for _f in fh: _f.close()