def test_pickle(self): from statsmodels.compatnp.py3k import BytesIO fh = BytesIO() #test wrapped results load save pickle self.res.save(fh) fh.seek(0, 0) res_unpickled = self.res.__class__.load(fh) assert_(type(res_unpickled) is type(self.res))
def test_pickle(self): from statsmodels.compatnp.py3k import BytesIO fh = BytesIO() #test wrapped results load save pickle self.res1.save(fh) fh.seek(0,0) res_unpickled = self.res1.__class__.load(fh) assert_(type(res_unpickled) is type(self.res1))
def test_stata_writer_array(): buf = BytesIO() dta = macrodata.load().data dta = DataFrame.from_records(dta) dta.columns = ["v%d" % i for i in range(1,15)] writer = StataWriter(buf, dta.values) writer.write_file() buf.seek(0) dta2 = genfromdta(buf) dta = dta.to_records(index=False) assert_array_equal(dta, dta2)
def test_stata_writer_structured(): buf = BytesIO() dta = macrodata.load().data dtype = dta.dtype dta = dta.astype(np.dtype([('year', int), ('quarter', int)] + dtype.descr[2:])) writer = StataWriter(buf, dta) writer.write_file() buf.seek(0) dta2 = genfromdta(buf) assert_array_equal(dta, dta2)
def test_stata_writer_pandas(): buf = BytesIO() dta = macrodata.load().data dtype = dta.dtype #as of 0.9.0 pandas only supports i8 and f8 dta = dta.astype(np.dtype([('year', 'i8'), ('quarter', 'i8')] + dtype.descr[2:])) dta = DataFrame.from_records(dta) writer = StataWriter(buf, dta) writer.write_file() buf.seek(0) dta2 = genfromdta(buf) ptesting.assert_frame_equal(dta.reset_index(), DataFrame.from_records(dta2))
def test_missing_roundtrip(): buf = BytesIO() dta = np.array([(np.nan, np.inf, "")], dtype=[("double_miss", float), ("float_miss", np.float32), ("string_miss", "a1")]) writer = StataWriter(buf, dta) writer.write_file() buf.seek(0) dta = genfromdta(buf, missing_flt=np.nan) assert_(isnull(dta[0][0])) assert_(isnull(dta[0][1])) assert_(dta[0][2] == asbytes("")) dta = genfromdta(os.path.join(curdir, "results/data_missing.dta"), missing_flt=-999) assert_(np.all([dta[0][i] == -999 for i in range(5)]))
def check_pickle(obj): from statsmodels.compatnp.py3k import BytesIO fh = BytesIO() pickle.dump(obj, fh) plen = fh.tell() fh.seek(0, 0) res = pickle.load(fh) fh.close() return res, plen
def test_pickle_wrapper(self): from statsmodels.iolib.smpickle import save_pickle, load_pickle from statsmodels.compatnp.py3k import BytesIO fh = BytesIO() #use cPickle with binary content #test unwrapped results load save pickle self.results._results.save(fh) fh.seek(0, 0) res_unpickled = self.results._results.__class__.load(fh) assert_(type(res_unpickled) is type(self.results._results)) #test wrapped results load save fh.seek(0, 0) #save_pickle(self.results, fh) self.results.save(fh) fh.seek(0, 0) #res_unpickled = load_pickle(fh) res_unpickled = self.results.__class__.load(fh) fh.close() #print type(res_unpickled) assert_(type(res_unpickled) is type(self.results)) before = sorted(self.results.__dict__.keys()) after = sorted(res_unpickled.__dict__.keys()) assert_(before == after, msg='not equal %r and %r' % (before, after)) before = sorted(self.results._results.__dict__.keys()) after = sorted(res_unpickled._results.__dict__.keys()) assert_(before == after, msg='not equal %r and %r' % (before, after)) before = sorted(self.results.model.__dict__.keys()) after = sorted(res_unpickled.model.__dict__.keys()) assert_(before == after, msg='not equal %r and %r' % (before, after)) before = sorted(self.results._cache.keys()) after = sorted(res_unpickled._cache.keys()) assert_(before == after, msg='not equal %r and %r' % (before, after))
def test_pickle(): import tempfile from numpy.testing import assert_equal tmpdir = tempfile.mkdtemp(prefix='pickle') a = range(10) save_pickle(a, tmpdir+'/res.pkl') b = load_pickle(tmpdir+'/res.pkl') assert_equal(a, b) #cleanup, tested on Windows try: import os os.remove(tmpdir+'/res.pkl') os.rmdir(tmpdir) except (OSError, IOError): pass assert not os.path.exists(tmpdir) #test with file handle from statsmodels.compatnp.py3k import BytesIO fh = BytesIO() save_pickle(a, fh) fh.seek(0,0) c = load_pickle(fh) fh.close() assert_equal(a,b)
def test_stata_writer_pandas(): buf = BytesIO() dta = macrodata.load().data dtype = dta.dtype #as of 0.9.0 pandas only supports i8 and f8 dta = dta.astype( np.dtype([('year', 'i8'), ('quarter', 'i8')] + dtype.descr[2:])) dta4 = dta.astype( np.dtype([('year', 'i4'), ('quarter', 'i4')] + dtype.descr[2:])) dta = DataFrame.from_records(dta) dta4 = DataFrame.from_records(dta4) # dta is int64 'i8' given to Stata writer writer = StataWriter(buf, dta) writer.write_file() buf.seek(0) dta2 = genfromdta(buf) dta5 = DataFrame.from_records(dta2) # dta2 is int32 'i4' returned from Stata reader if dta5.dtypes[1] is np.dtype('int64'): ptesting.assert_frame_equal(dta.reset_index(), dta5) else: # don't check index because it has different size, int32 versus int64 ptesting.assert_frame_equal(dta4, dta5[dta5.columns[1:]])
def test_stata_writer_pandas(): buf = BytesIO() dta = macrodata.load().data dtype = dta.dtype #as of 0.9.0 pandas only supports i8 and f8 dta = dta.astype(np.dtype([('year', 'i8'), ('quarter', 'i8')] + dtype.descr[2:])) dta4 = dta.astype(np.dtype([('year', 'i4'), ('quarter', 'i4')] + dtype.descr[2:])) dta = DataFrame.from_records(dta) dta4 = DataFrame.from_records(dta4) # dta is int64 'i8' given to Stata writer writer = StataWriter(buf, dta) writer.write_file() buf.seek(0) dta2 = genfromdta(buf) dta5 = DataFrame.from_records(dta2) # dta2 is int32 'i4' returned from Stata reader if dta5.dtypes[1] is np.dtype('int64'): ptesting.assert_frame_equal(dta.reset_index(), dta5) else: # don't check index because it has different size, int32 versus int64 ptesting.assert_frame_equal(dta4, dta5[dta5.columns[1:]])
def test_pickle_wrapper(self): from statsmodels.iolib.smpickle import save_pickle, load_pickle from statsmodels.compatnp.py3k import BytesIO fh = BytesIO() #use cPickle with binary content #test unwrapped results load save pickle self.results._results.save(fh) fh.seek(0,0) res_unpickled = self.results._results.__class__.load(fh) assert_(type(res_unpickled) is type(self.results._results)) #test wrapped results load save fh.seek(0,0) #save_pickle(self.results, fh) self.results.save(fh) fh.seek(0,0) #res_unpickled = load_pickle(fh) res_unpickled = self.results.__class__.load(fh) fh.close() #print type(res_unpickled) assert_(type(res_unpickled) is type(self.results)) before = sorted(self.results.__dict__.keys()) after = sorted(res_unpickled.__dict__.keys()) assert_(before == after, msg='not equal %r and %r' % (before, after)) before = sorted(self.results._results.__dict__.keys()) after = sorted(res_unpickled._results.__dict__.keys()) assert_(before == after, msg='not equal %r and %r' % (before, after)) before = sorted(self.results.model.__dict__.keys()) after = sorted(res_unpickled.model.__dict__.keys()) assert_(before == after, msg='not equal %r and %r' % (before, after)) before = sorted(self.results._cache.keys()) after = sorted(res_unpickled._cache.keys()) assert_(before == after, msg='not equal %r and %r' % (before, after))
def test_datetime_roundtrip(): dta = np.array([(1, datetime(2010, 1, 1), 2), (2, datetime(2010, 2, 1), 3), (4, datetime(2010, 3, 1), 5)], dtype=[('var1', float), ('var2', object), ('var3', float)]) buf = BytesIO() writer = StataWriter(buf, dta, {"var2": "tm"}) writer.write_file() buf.seek(0) dta2 = genfromdta(buf) assert_equal(dta, dta2) dta = DataFrame.from_records(dta) buf = BytesIO() writer = StataWriter(buf, dta, {"var2": "tm"}) writer.write_file() buf.seek(0) dta2 = genfromdta(buf, pandas=True) ptesting.assert_frame_equal(dta, dta2.drop('index', axis=1))
def test_datetime_roundtrip(): dta = np.array([(1, datetime(2010, 1, 1), 2), (2, datetime(2010, 2, 1), 3), (4, datetime(2010, 3, 1), 5)], dtype=[('var1', float), ('var2', object), ('var3', float)]) buf = BytesIO() writer = StataWriter(buf, dta, {"var2" : "tm"}) writer.write_file() buf.seek(0) dta2 = genfromdta(buf) assert_equal(dta, dta2) dta = DataFrame.from_records(dta) buf = BytesIO() writer = StataWriter(buf, dta, {"var2" : "tm"}) writer.write_file() buf.seek(0) dta2 = genfromdta(buf, pandas=True) ptesting.assert_frame_equal(dta, dta2.drop('index', axis=1))
ss5 = '''\ 2 - 3\t4.340\t0.691\t7.989\t*** 2 - 1\t4.600\t0.951\t8.249\t*** 3 - 2\t-4.340\t-7.989\t-0.691\t*** 3 - 1\t0.260\t-3.389\t3.909\t- 1 - 2\t-4.600\t-8.249\t-0.951\t*** 1 - 3\t-0.260\t-3.909\t3.389\t''' #accommodate recfromtxt for python 3.2, requires bytes ss = asbytes(ss) ss2 = asbytes(ss2) ss3 = asbytes(ss3) ss5 = asbytes(ss5) dta = np.recfromtxt(BytesIO(ss), names=("Rust", "Brand", "Replication")) dta2 = np.recfromtxt(BytesIO(ss2), names=("idx", "Treatment", "StressReduction")) dta3 = np.recfromtxt(BytesIO(ss3), names=("Brand", "Relief")) dta5 = np.recfromtxt(BytesIO(ss5), names=('pair', 'mean', 'lower', 'upper', 'sig'), delimiter='\t') sas_ = dta5[[1, 3, 2]] from statsmodels.stats.multicomp import (tukeyhsd, pairwise_tukeyhsd, MultiComparison) #import statsmodels.sandbox.stats.multicomp as multi #print tukeyhsd(dta['Brand'], dta['Rust']) def get_thsd(mci, alpha=0.05):
ss5 = '''\ 2 - 3 4.340 0.691 7.989 *** 2 - 1 4.600 0.951 8.249 *** 3 - 2 -4.340 -7.989 -0.691 *** 3 - 1 0.260 -3.389 3.909 - 1 - 2 -4.600 -8.249 -0.951 *** 1 - 3 -0.260 -3.909 3.389 ''' #accommodate recfromtxt for python 3.2, requires bytes ss = asbytes(ss) ss2 = asbytes(ss2) ss3 = asbytes(ss3) ss5 = asbytes(ss5) dta = np.recfromtxt(BytesIO(ss), names=("Rust","Brand","Replication")) dta2 = np.recfromtxt(BytesIO(ss2), names = ("idx", "Treatment", "StressReduction")) dta3 = np.recfromtxt(BytesIO(ss3), names = ("Brand", "Relief")) dta5 = np.recfromtxt(BytesIO(ss5), names = ('pair', 'mean', 'lower', 'upper', 'sig'), delimiter='\t') sas_ = dta5[[1,3,2]] from statsmodels.stats.multicomp import (tukeyhsd, pairwise_tukeyhsd, MultiComparison) #import statsmodels.sandbox.stats.multicomp as multi #print tukeyhsd(dta['Brand'], dta['Rust']) def get_thsd(mci, alpha=0.05): var_ = np.var(mci.groupstats.groupdemean(), ddof=len(mci.groupsunique)) means = mci.groupstats.groupmean nobs = mci.groupstats.groupnobs resi = tukeyhsd(means, nobs, var_, df=None, alpha=alpha,