Пример #1
0
def webuse(data, baseurl='http://www.stata-press.com/data/r11/', as_df=True):
    """
    Parameters
    ----------
    data : str
        Name of dataset to fetch.
    baseurl : str
        The base URL to the stata datasets.
    as_df : bool
        If True, returns a `pandas.DataFrame`

    Returns
    -------
    dta : Record Array
        A record array containing the Stata dataset.

    Examples
    --------
    >>> dta = webuse('auto')

    Notes
    -----
    Make sure baseurl has trailing forward slash. Doesn't do any
    error checking in response URLs.
    """
    # lazy imports
    from statsmodels.iolib import genfromdta

    url = urljoin(baseurl, data+'.dta')
    dta = urlopen(url)
    dta = StringIO(dta.read())  # make it truly file-like
    if as_df:  # could make this faster if we don't process dta twice?
        return DataFrame.from_records(genfromdta(dta))
    else:
        return genfromdta(dta)
Пример #2
0
def get_griliches76_data():
    import os
    curdir = os.path.split(__file__)[0]
    path = os.path.join(curdir, 'griliches76.dta')
    griliches76_data = iolib.genfromdta(path, missing_flt=np.NaN, pandas=True)

    # create year dummies
    years = griliches76_data['year'].unique()
    N = griliches76_data.shape[0]

    for yr in years:
        griliches76_data['D_%i' % yr] = np.zeros(N)
        for i in range(N):
            if griliches76_data.loc[griliches76_data.index[i], 'year'] == yr:
                griliches76_data.loc[griliches76_data.index[i],
                                     'D_%i' % yr] = 1
            else:
                pass

    griliches76_data['const'] = 1

    X = add_constant(griliches76_data[[
        's', 'iq', 'expr', 'tenure', 'rns', 'smsa', 'D_67', 'D_68', 'D_69',
        'D_70', 'D_71', 'D_73'
    ]],
                     prepend=True)  # for R comparison
    #prepend=False)  # for Stata comparison

    Z = add_constant(griliches76_data[['expr', 'tenure', 'rns', 'smsa', \
                                       'D_67', 'D_68', 'D_69', 'D_70', 'D_71',
                                       'D_73', 'med', 'kww', 'age', 'mrt']])
    Y = griliches76_data['lw']

    return Y, X, Z
Пример #3
0
def webuse(data, baseurl='http://www.stata-press.com/data/r11/'):
    """
    Parameters
    ----------
    data : str
        Name of dataset to fetch.

    Returns
    -------
    dta : Record Array
        A record array containing the Stata dataset.

    Examples
    --------
    >>> dta = webuse('auto')

    Notes
    -----
    Make sure baseurl has trailing forward slash. Doesn't do any
    error checking in response URLs.
    """
    # lazy imports
    import pandas
    from statsmodels.iolib import genfromdta
    from urllib2 import urlopen
    from urlparse import urljoin
    from StringIO import StringIO

    url = urljoin(baseurl, data+'.dta')
    dta = urlopen(url)
    dta = StringIO(dta.read()) # make it truly file-like
    return genfromdta(dta)
Пример #4
0
def get_griliches76_data():
    import os
    curdir = os.path.split(__file__)[0]
    path = os.path.join(curdir, 'griliches76.dta')
    griliches76_data = iolib.genfromdta(path, missing_flt=np.NaN, pandas=True)

    # create year dummies
    years = griliches76_data['year'].unique()
    N = griliches76_data.shape[0]

    for yr in years:
        griliches76_data['D_%i' %yr] = np.zeros(N)
        for i in range(N):
            if griliches76_data.ix[i, 'year'] == yr:
                griliches76_data.ix[i, 'D_%i' %yr] = 1
            else:
                pass

    griliches76_data['const'] = 1

    X = add_constant(griliches76_data[['s', 'iq', 'expr', 'tenure', 'rns',
                                       'smsa', 'D_67', 'D_68', 'D_69', 'D_70',
                                       'D_71', 'D_73']],
                                       prepend=True)  # for R comparison
                                       #prepend=False)  # for Stata comparison

    Z = add_constant(griliches76_data[['expr', 'tenure', 'rns', 'smsa', \
                                       'D_67', 'D_68', 'D_69', 'D_70', 'D_71',
                                       'D_73', 'med', 'kww', 'age', 'mrt']])
    Y = griliches76_data['lw']

    return Y, X, Z
Пример #5
0
def webuse(data, baseurl='http://www.stata-press.com/data/r11/'):
    """
    Parameters
    ----------
    data : str
        Name of dataset to fetch.

    Returns
    -------
    dta : Record Array
        A record array containing the Stata dataset.

    Examples
    --------
    >>> dta = webuse('auto')

    Notes
    -----
    Make sure baseurl has trailing forward slash. Doesn't do any
    error checking in response URLs.
    """
    # lazy imports
    import pandas
    from statsmodels.iolib import genfromdta
    from urllib2 import urlopen
    from urlparse import urljoin
    from StringIO import StringIO

    url = urljoin(baseurl, data+'.dta')
    dta = urlopen(url)
    dta = StringIO(dta.read()) # make it truly file-like
    return genfromdta(dta)