def webuse(data, baseurl='http://www.stata-press.com/data/r11/', as_df=True): """ Parameters ---------- data : str Name of dataset to fetch. baseurl : str The base URL to the stata datasets. as_df : bool If True, returns a `pandas.DataFrame` Returns ------- dta : Record Array A record array containing the Stata dataset. Examples -------- >>> dta = webuse('auto') Notes ----- Make sure baseurl has trailing forward slash. Doesn't do any error checking in response URLs. """ # lazy imports from statsmodels.iolib import genfromdta url = urljoin(baseurl, data+'.dta') dta = urlopen(url) dta = StringIO(dta.read()) # make it truly file-like if as_df: # could make this faster if we don't process dta twice? return DataFrame.from_records(genfromdta(dta)) else: return genfromdta(dta)
def get_griliches76_data(): import os curdir = os.path.split(__file__)[0] path = os.path.join(curdir, 'griliches76.dta') griliches76_data = iolib.genfromdta(path, missing_flt=np.NaN, pandas=True) # create year dummies years = griliches76_data['year'].unique() N = griliches76_data.shape[0] for yr in years: griliches76_data['D_%i' % yr] = np.zeros(N) for i in range(N): if griliches76_data.loc[griliches76_data.index[i], 'year'] == yr: griliches76_data.loc[griliches76_data.index[i], 'D_%i' % yr] = 1 else: pass griliches76_data['const'] = 1 X = add_constant(griliches76_data[[ 's', 'iq', 'expr', 'tenure', 'rns', 'smsa', 'D_67', 'D_68', 'D_69', 'D_70', 'D_71', 'D_73' ]], prepend=True) # for R comparison #prepend=False) # for Stata comparison Z = add_constant(griliches76_data[['expr', 'tenure', 'rns', 'smsa', \ 'D_67', 'D_68', 'D_69', 'D_70', 'D_71', 'D_73', 'med', 'kww', 'age', 'mrt']]) Y = griliches76_data['lw'] return Y, X, Z
def webuse(data, baseurl='http://www.stata-press.com/data/r11/'): """ Parameters ---------- data : str Name of dataset to fetch. Returns ------- dta : Record Array A record array containing the Stata dataset. Examples -------- >>> dta = webuse('auto') Notes ----- Make sure baseurl has trailing forward slash. Doesn't do any error checking in response URLs. """ # lazy imports import pandas from statsmodels.iolib import genfromdta from urllib2 import urlopen from urlparse import urljoin from StringIO import StringIO url = urljoin(baseurl, data+'.dta') dta = urlopen(url) dta = StringIO(dta.read()) # make it truly file-like return genfromdta(dta)
def get_griliches76_data(): import os curdir = os.path.split(__file__)[0] path = os.path.join(curdir, 'griliches76.dta') griliches76_data = iolib.genfromdta(path, missing_flt=np.NaN, pandas=True) # create year dummies years = griliches76_data['year'].unique() N = griliches76_data.shape[0] for yr in years: griliches76_data['D_%i' %yr] = np.zeros(N) for i in range(N): if griliches76_data.ix[i, 'year'] == yr: griliches76_data.ix[i, 'D_%i' %yr] = 1 else: pass griliches76_data['const'] = 1 X = add_constant(griliches76_data[['s', 'iq', 'expr', 'tenure', 'rns', 'smsa', 'D_67', 'D_68', 'D_69', 'D_70', 'D_71', 'D_73']], prepend=True) # for R comparison #prepend=False) # for Stata comparison Z = add_constant(griliches76_data[['expr', 'tenure', 'rns', 'smsa', \ 'D_67', 'D_68', 'D_69', 'D_70', 'D_71', 'D_73', 'med', 'kww', 'age', 'mrt']]) Y = griliches76_data['lw'] return Y, X, Z