Пример #1
0
def webuse(data, baseurl='http://www.stata-press.com/data/r11/'):
    """
    Parameters
    ----------
    data : str
        Name of dataset to fetch.

    Returns
    -------
    dta : Record Array
        A record array containing the Stata dataset.

    Examples
    --------
    >>> dta = webuse('auto')

    Notes
    -----
    Make sure baseurl has trailing forward slash. Doesn't do any
    error checking in response URLs.
    """
    # lazy imports
    import pandas
    from scikits.statsmodels.iolib import genfromdta
    from urllib2 import urlopen
    from urlparse import urljoin
    from StringIO import StringIO

    url = urljoin(baseurl, data+'.dta')
    dta = urlopen(url)
    dta = StringIO(dta.read()) # make it truly file-like
    return genfromdta(dta)
Пример #2
0
def open_mortality():
    # load file
    try:
        import scikits.statsmodels.iolib as pd        
    except:
        import scikits.statsmodels.lib.io as pd
    mortality = pandas.DataFrame(pd.genfromdta('/home/j/Project/Mortality/GBD Envelopes/04. Lifetables/02. MORTMatch/cluster/results/compiled/iso3_lt_mean_uncertainty.dta'))
    # keep desired variables
    mortality = mortality.ix[:,0:7]
    mortality.columns = ['area', 'sex', 'year_start', 'age_start', 'lower_ci', 'upper_ci', 'value']
    # add input data
    mortality = mortality[((mortality['year_start']==1990)|(mortality['year_start']==2005)|(mortality['year_start']==2010))]
    return mortality
Пример #3
0
def open_mortality():
    # load file
    try:
        import scikits.statsmodels.iolib as pd
    except:
        import scikits.statsmodels.lib.io as pd
    mortality = pandas.DataFrame(
        pd.genfromdta(
            '/home/j/Project/Mortality/GBD Envelopes/04. Lifetables/02. MORTMatch/cluster/results/compiled/iso3_lt_mean_uncertainty.dta'
        ))
    # keep desired variables
    mortality = mortality.ix[:, 0:7]
    mortality.columns = [
        'area', 'sex', 'year_start', 'age_start', 'lower_ci', 'upper_ci',
        'value'
    ]
    # add input data
    mortality = mortality[((mortality['year_start'] == 1990) |
                           (mortality['year_start'] == 2005) |
                           (mortality['year_start'] == 2010))]
    return mortality