Exemplo n.º 1
0
def webuse(data, baseurl='http://www.stata-press.com/data/r11/', as_df=True):
    """
    Parameters
    ----------
    data : str
        Name of dataset to fetch.
    baseurl : str
        The base URL to the stata datasets.
    as_df : bool
        If True, returns a `pandas.DataFrame`

    Returns
    -------
    dta : Record Array
        A record array containing the Stata dataset.

    Examples
    --------
    >>> dta = webuse('auto')

    Notes
    -----
    Make sure baseurl has trailing forward slash. Doesn't do any
    error checking in response URLs.
    """
    # lazy imports
    from statsmodels.iolib import genfromdta

    url = urljoin(baseurl, data+'.dta')
    dta = urlopen(url)
    dta = StringIO(dta.read())  # make it truly file-like
    if as_df:  # could make this faster if we don't process dta twice?
        return DataFrame.from_records(genfromdta(dta))
    else:
        return genfromdta(dta)
Exemplo n.º 2
0
    def _coef_table(self):
        model = self.model
        k = model.neqs

        Xnames = self.model.exog_names

        data = lzip(model.params.T.ravel(), model.stderr.T.ravel(),
                    model.tvalues.T.ravel(), model.pvalues.T.ravel())

        header = ('coefficient', 'std. error', 't-stat', 'prob')

        buf = StringIO()
        dim = k * model.k_ar + model.k_trend
        for i in range(k):
            section = "Results for equation %s" % model.names[i]
            buf.write(section + '\n')
            #print >> buf, section

            table = SimpleTable(data[dim * i:dim * (i + 1)],
                                header,
                                Xnames,
                                title=None,
                                txt_fmt=self.default_fmt)
            buf.write(str(table) + '\n')

            if i < k - 1:
                buf.write('\n')

        return buf.getvalue()
Exemplo n.º 3
0
    def _coef_table(self):
        model = self.model
        k = model.neqs

        Xnames = self.model.exog_names

        data = lzip(model.params.T.ravel(),
                   model.stderr.T.ravel(),
                   model.tvalues.T.ravel(),
                   model.pvalues.T.ravel())

        header = ('coefficient','std. error','t-stat','prob')

        buf = StringIO()
        dim = k * model.k_ar + model.k_trend
        for i in range(k):
            section = "Results for equation %s" % model.names[i]
            buf.write(section + '\n')
            #print >> buf, section

            table = SimpleTable(data[dim * i : dim * (i + 1)], header,
                                Xnames, title=None, txt_fmt = self.default_fmt)
            buf.write(str(table) + '\n')

            if i < k - 1:
                buf.write('\n')

        return buf.getvalue()
Exemplo n.º 4
0
    def make(self, endog_names=None, exog_names=None):
        """
        Summary of VAR model
        """
        buf = StringIO()

        buf.write(self._header_table() + '\n')
        buf.write(self._stats_table() + '\n')
        buf.write(self._coef_table() + '\n')
        buf.write(self._resid_info() + '\n')

        return buf.getvalue()
Exemplo n.º 5
0
def test_formula_labels():
    # make sure labels pass through patsy as expected
    # data(Duncan) from car in R
    dta = StringIO(""""type","income","education","prestige"\n"accountant","prof",62,86,82\n"pilot","prof",72,76,83\n"architect","prof",75,92,90\n"author","prof",55,90,76\n"chemist","prof",64,86,90\n"minister","prof",21,84,87\n"professor","prof",64,93,93\n"dentist","prof",80,100,90\n"reporter","wc",67,87,52\n"engineer","prof",72,86,88\n"undertaker","prof",42,74,57\n"lawyer","prof",76,98,89\n"physician","prof",76,97,97\n"welfare.worker","prof",41,84,59\n"teacher","prof",48,91,73\n"conductor","wc",76,34,38\n"contractor","prof",53,45,76\n"factory.owner","prof",60,56,81\n"store.manager","prof",42,44,45\n"banker","prof",78,82,92\n"bookkeeper","wc",29,72,39\n"mail.carrier","wc",48,55,34\n"insurance.agent","wc",55,71,41\n"store.clerk","wc",29,50,16\n"carpenter","bc",21,23,33\n"electrician","bc",47,39,53\n"RR.engineer","bc",81,28,67\n"machinist","bc",36,32,57\n"auto.repairman","bc",22,22,26\n"plumber","bc",44,25,29\n"gas.stn.attendant","bc",15,29,10\n"coal.miner","bc",7,7,15\n"streetcar.motorman","bc",42,26,19\n"taxi.driver","bc",9,19,10\n"truck.driver","bc",21,15,13\n"machine.operator","bc",21,20,24\n"barber","bc",16,26,20\n"bartender","bc",16,28,7\n"shoe.shiner","bc",9,17,3\n"cook","bc",14,22,16\n"soda.clerk","bc",12,30,6\n"watchman","bc",17,25,11\n"janitor","bc",7,20,8\n"policeman","bc",34,47,41\n"waiter","bc",8,32,10""")
    from pandas import read_csv
    dta = read_csv(dta)
    model = ols("prestige ~ income + education", dta).fit()
    assert_equal(model.fittedvalues.index, dta.index)
Exemplo n.º 6
0
def test_formula_labels():
    # make sure labels pass through patsy as expected
    # data(Duncan) from car in R
    dta = StringIO(""""type" "income" "education" "prestige"\n"accountant" "prof" 62 86 82\n"pilot" "prof" 72 76 83\n"architect" "prof" 75 92 90\n"author" "prof" 55 90 76\n"chemist" "prof" 64 86 90\n"minister" "prof" 21 84 87\n"professor" "prof" 64 93 93\n"dentist" "prof" 80 100 90\n"reporter" "wc" 67 87 52\n"engineer" "prof" 72 86 88\n"undertaker" "prof" 42 74 57\n"lawyer" "prof" 76 98 89\n"physician" "prof" 76 97 97\n"welfare.worker" "prof" 41 84 59\n"teacher" "prof" 48 91 73\n"conductor" "wc" 76 34 38\n"contractor" "prof" 53 45 76\n"factory.owner" "prof" 60 56 81\n"store.manager" "prof" 42 44 45\n"banker" "prof" 78 82 92\n"bookkeeper" "wc" 29 72 39\n"mail.carrier" "wc" 48 55 34\n"insurance.agent" "wc" 55 71 41\n"store.clerk" "wc" 29 50 16\n"carpenter" "bc" 21 23 33\n"electrician" "bc" 47 39 53\n"RR.engineer" "bc" 81 28 67\n"machinist" "bc" 36 32 57\n"auto.repairman" "bc" 22 22 26\n"plumber" "bc" 44 25 29\n"gas.stn.attendant" "bc" 15 29 10\n"coal.miner" "bc" 7 7 15\n"streetcar.motorman" "bc" 42 26 19\n"taxi.driver" "bc" 9 19 10\n"truck.driver" "bc" 21 15 13\n"machine.operator" "bc" 21 20 24\n"barber" "bc" 16 26 20\n"bartender" "bc" 16 28 7\n"shoe.shiner" "bc" 9 17 3\n"cook" "bc" 14 22 16\n"soda.clerk" "bc" 12 30 6\n"watchman" "bc" 17 25 11\n"janitor" "bc" 7 20 8\n"policeman" "bc" 34 47 41\n"waiter" "bc" 8 32 10""")
    from pandas import read_table
    dta = read_table(dta, sep=" ")
    model = ols("prestige ~ income + education", dta).fit()
    assert_equal(model.fittedvalues.index, dta.index)
Exemplo n.º 7
0
def print_ic_table(ics, selected_orders):
    """
    For VAR order selection

    """
    # Can factor this out into a utility method if so desired

    cols = sorted(ics)

    data = mat([["%#10.4g" % v for v in ics[c]] for c in cols], dtype=object).T

    # start minimums
    for i, col in enumerate(cols):
        idx = int(selected_orders[col]), i
        data[idx] = data[idx] + '*'
        # data[idx] = data[idx][:-1] + '*' # super hack, ugh

    fmt = dict(_default_table_fmt, data_fmts=("%s", ) * len(cols))

    buf = StringIO()
    table = SimpleTable(data,
                        cols,
                        lrange(len(data)),
                        title='VAR Order Selection',
                        txt_fmt=fmt)
    buf.write(str(table) + '\n')
    buf.write('* Minimum' + '\n')

    print(buf.getvalue())
Exemplo n.º 8
0
def pprint_matrix(values, rlabels, clabels, col_space=None):
    buf = StringIO()

    T, K = len(rlabels), len(clabels)

    if col_space is None:
        min_space = 10
        col_space = [max(len(str(c)) + 2, min_space) for c in clabels]
    else:
        col_space = (col_space, ) * K

    row_space = max([len(str(x)) for x in rlabels]) + 2

    head = _pfixed('', row_space)

    for j, h in enumerate(clabels):
        head += _pfixed(h, col_space[j])

    buf.write(head + '\n')

    for i, rlab in enumerate(rlabels):
        line = ('%s' % rlab).ljust(row_space)

        for j in range(K):
            line += _pfixed(values[i, j], col_space[j])

        buf.write(line + '\n')

    return buf.getvalue()
Exemplo n.º 9
0
    def _resid_info(self):
        buf = StringIO()
        names = self.model.names

        buf.write("Correlation matrix of residuals" + '\n')
        buf.write(pprint_matrix(self.model.resid_corr, names, names) + '\n')

        return buf.getvalue()
Exemplo n.º 10
0
    def make(self, endog_names=None, exog_names=None):
        """
        Summary of VAR model
        """
        buf = StringIO()

        buf.write(self._header_table() + '\n')
        buf.write(self._stats_table() + '\n')
        buf.write(self._coef_table() + '\n')
        buf.write(self._resid_info() + '\n')

        return buf.getvalue()
Exemplo n.º 11
0
def load_basic_data():
    raw_csv = StringIO(
        "res,qual\n2.00,=\n4.20,=\n4.62,=\n5.00,ND\n5.00,ND\n5.50,ND\n"
        "5.57,=\n5.66,=\n5.75,ND\n5.86,=\n6.65,=\n6.78,=\n6.79,=\n7.50,=\n"
        "7.50,=\n7.50,=\n8.63,=\n8.71,=\n8.99,=\n9.50,ND\n9.50,ND\n9.85,=\n"
        "10.82,=\n11.00,ND\n11.25,=\n11.25,=\n12.20,=\n14.92,=\n16.77,=\n"
        "17.81,=\n19.16,=\n19.19,=\n19.64,=\n20.18,=\n22.97,=\n")
    df = (pandas.read_csv(raw_csv).assign(conc=lambda df: df['res']).assign(
        censored=lambda df: df['qual'] == 'ND'))
    return df
Exemplo n.º 12
0
def _get_dataset_meta(dataname, package, cache):
    # get the index, you'll probably want this cached because you have
    # to download info about all the data to get info about any of the data...
    index_url = ("https://raw.githubusercontent.com/vincentarelbundock/"
                 "Rdatasets/master/datasets.csv")
    data, _ = _urlopen_cached(index_url, cache)
    data = data.decode('utf-8', 'strict')
    index = read_csv(StringIO(data))
    idx = np.logical_and(index.Item == dataname, index.Package == package)
    dataset_meta = index.loc[idx]
    return dataset_meta["Title"].item()
Exemplo n.º 13
0
def _get_data(base_url, dataname, cache, extension="csv"):
    url = base_url + (dataname + ".%s") % extension
    try:
        data, from_cache = _urlopen_cached(url, cache)
    except HTTPError as err:
        if '404' in str(err):
            raise ValueError("Dataset %s was not found." % dataname)
        else:
            raise err

    data = data.decode('utf-8', 'strict')
    return StringIO(data), from_cache
Exemplo n.º 14
0
def pprint_matrix(values, rlabels, clabels, col_space=None):
    buf = StringIO()

    T, K = len(rlabels), len(clabels)

    if col_space is None:
        min_space = 10
        col_space = [max(len(str(c)) + 2, min_space) for c in clabels]
    else:
        col_space = (col_space,) * K

    row_space = max([len(str(x)) for x in rlabels]) + 2

    head = _pfixed('', row_space)

    for j, h in enumerate(clabels):
        head += _pfixed(h, col_space[j])

    buf.write(head + '\n')

    for i, rlab in enumerate(rlabels):
        line = ('%s' % rlab).ljust(row_space)

        for j in range(K):
            line += _pfixed(values[i,j], col_space[j])

        buf.write(line + '\n')

    return buf.getvalue()
Exemplo n.º 15
0
def print_ic_table(ics, selected_orders):
    """
    For VAR order selection

    """
    # Can factor this out into a utility method if so desired

    cols = sorted(ics)

    data = mat([["%#10.4g" % v for v in ics[c]] for c in cols],
               dtype=object).T

    # start minimums
    for i, col in enumerate(cols):
        idx = int(selected_orders[col]), i
        data[idx] = data[idx] + '*'
        # data[idx] = data[idx][:-1] + '*' # super hack, ugh

    fmt = dict(_default_table_fmt,
               data_fmts=("%s",) * len(cols))

    buf = StringIO()
    table = SimpleTable(data, cols, lrange(len(data)),
                        title='VAR Order Selection', txt_fmt=fmt)
    buf.write(str(table) + '\n')
    buf.write('* Minimum' + '\n')

    print(buf.getvalue())
Exemplo n.º 16
0
    def summary(self):
        buf = StringIO()

        rng = lrange(self.periods)
        for i in range(self.neqs):
            ppm = output.pprint_matrix(self.decomp[i], rng, self.names)

            buf.write('FEVD for %s\n' % self.names[i])
            buf.write(ppm + '\n')

        print(buf.getvalue())
Exemplo n.º 17
0
def check_pickle(obj):
    fh =StringIO()
    cPickle.dump(obj, fh)
    plen = fh.pos
    fh.seek(0,0)
    res = cPickle.load(fh)
    fh.close()
    return res, plen
Exemplo n.º 18
0
    def _resid_info(self):
        buf = StringIO()
        names = self.model.names

        buf.write("Correlation matrix of residuals" + '\n')
        buf.write(pprint_matrix(self.model.resid_corr, names, names) + '\n')

        return buf.getvalue()
Exemplo n.º 19
0
    def summary(self):
        buf = StringIO()

        rng = lrange(self.periods)
        for i in range(self.neqs):
            ppm = output.pprint_matrix(self.decomp[i], rng, self.names)

            buf.write('FEVD for %s\n' % self.names[i])
            buf.write(ppm + '\n')

        print(buf.getvalue())
Exemplo n.º 20
0
def get_ic_table(ics, selected_orders):
    '''
    该方法将滞后阶数结果转换为表格化的分析结果
    :param ics: 滞后阶数结果
    :param selected_orders: 最大滞后阶数
    :return: 返回表格化的滞后阶数分析结果
    '''
    _default_table_fmt = dict(empty_cell='',
                              colsep='  ',
                              row_pre='',
                              row_post='',
                              table_dec_above='=',
                              table_dec_below='=',
                              header_dec_below='-',
                              header_fmt='%s',
                              stub_fmt='%s',
                              title_align='c',
                              header_align='r',
                              data_aligns='r',
                              stubs_align='l',
                              fmt='txt')
    cols = sorted(ics)
    data = np.array([["%#10.4g" % v for v in ics[c]] for c in cols],
                    dtype=object).T
    for i, col in enumerate(cols):
        idx = int(selected_orders[col]), i
        data[idx] = data[idx] + '*'
    fmt = dict(_default_table_fmt, data_fmts=("%s", ) * len(cols))
    buf = StringIO()
    table = SimpleTable(data,
                        cols,
                        lrange(len(data)),
                        title='VAR Order Selection',
                        txt_fmt=fmt)
    buf.write(str(table) + '\n')
    buf.write('* Minimum' + '\n')
    return buf.getvalue()
Exemplo n.º 21
0
kidney_table = StringIO("""Days      Duration Weight ID
    0.0      1      1      1
    2.0      1      1      2
    1.0      1      1      3
    3.0      1      1      4
    0.0      1      1      5
    2.0      1      1      6
    0.0      1      1      7
    5.0      1      1      8
    6.0      1      1      9
    8.0      1      1     10
    2.0      1      2      1
    4.0      1      2      2
    7.0      1      2      3
   12.0      1      2      4
   15.0      1      2      5
    4.0      1      2      6
    3.0      1      2      7
    1.0      1      2      8
    5.0      1      2      9
   20.0      1      2     10
   15.0      1      3      1
   10.0      1      3      2
    8.0      1      3      3
    5.0      1      3      4
   25.0      1      3      5
   16.0      1      3      6
    7.0      1      3      7
   30.0      1      3      8
    3.0      1      3      9
   27.0      1      3     10
    0.0      2      1      1
    1.0      2      1      2
    1.0      2      1      3
    0.0      2      1      4
    4.0      2      1      5
    2.0      2      1      6
    7.0      2      1      7
    4.0      2      1      8
    0.0      2      1      9
    3.0      2      1     10
    5.0      2      2      1
    3.0      2      2      2
    2.0      2      2      3
    0.0      2      2      4
    1.0      2      2      5
    1.0      2      2      6
    3.0      2      2      7
    6.0      2      2      8
    7.0      2      2      9
    9.0      2      2     10
   10.0      2      3      1
    8.0      2      3      2
   12.0      2      3      3
    3.0      2      3      4
    7.0      2      3      5
   15.0      2      3      6
    4.0      2      3      7
    9.0      2      3      8
    6.0      2      3      9
    1.0      2      3     10
""")
Exemplo n.º 22
0
class Test_ROS_RNADAdata(CheckROSMixin):
    decimal = 3
    datastring = StringIO(dedent("""\
        res cen
        0.090  True
        0.090  True
        0.090  True
        0.101 False
        0.136 False
        0.340 False
        0.457 False
        0.514 False
        0.629 False
        0.638 False
        0.774 False
        0.788 False
        0.900  True
        0.900  True
        0.900  True
        1.000  True
        1.000  True
        1.000  True
        1.000  True
        1.000  True
        1.000 False
        1.000  True
        1.000  True
        1.000  True
        1.000  True
        1.000  True
        1.000  True
        1.000  True
        1.000  True
        1.000  True
        1.000  True
        1.000  True
        1.000  True
        1.100 False
        2.000 False
        2.000 False
        2.404 False
        2.860 False
        3.000 False
        3.000 False
        3.705 False
        4.000 False
        5.000 False
        5.960 False
        6.000 False
        7.214 False
       16.000 False
       17.716 False
       25.000 False
       51.000 False"""
    ))
    rescol = 'res'
    cencol = 'cen'
    df = pandas.read_csv(datastring, sep='\s+')
    expected_final = numpy.array([
        0.01907990,  0.03826254,  0.06080717,  0.10100000,  0.13600000,
        0.34000000,  0.45700000,  0.51400000,  0.62900000,  0.63800000,
        0.77400000,  0.78800000,  0.08745914,  0.25257575,  0.58544205,
        0.01711153,  0.03373885,  0.05287083,  0.07506079,  0.10081573,
        1.00000000,  0.13070334,  0.16539309,  0.20569039,  0.25257575,
        0.30725491,  0.37122555,  0.44636843,  0.53507405,  0.64042242,
        0.76644378,  0.91850581,  1.10390531,  1.10000000,  2.00000000,
        2.00000000,  2.40400000,  2.86000000,  3.00000000,  3.00000000,
        3.70500000,  4.00000000,  5.00000000,  5.96000000,  6.00000000,
        7.21400000, 16.00000000, 17.71600000, 25.00000000, 51.00000000
    ])

    expected_cohn = pandas.DataFrame({
        'nuncen_above': numpy.array([9., 0.0, 18., numpy.nan]),
        'nobs_below': numpy.array([3., 15., 32., numpy.nan]),
        'ncen_equal': numpy.array([3., 3., 17., numpy.nan]),
        'prob_exceedance': numpy.array([0.84, 0.36, 0.36, 0]),
    })
Exemplo n.º 23
0
def hypothesis_test_table(results, title, null_hyp):
    fmt = dict(_default_table_fmt,
               data_fmts=["%#15.6F","%#15.6F","%#15.3F", "%s"])

    buf = StringIO()
    table = SimpleTable([[results['statistic'],
                          results['crit_value'],
                          results['pvalue'],
                          str(results['df'])]],
                        ['Test statistic', 'Critical Value', 'p-value',
                         'df'], [''], title=None, txt_fmt=fmt)

    buf.write(title + '\n')
    buf.write(str(table) + '\n')

    buf.write(null_hyp + '\n')

    buf.write("Conclusion: %s H_0" % results['conclusion'])
    buf.write(" at %.2f%% significance level" % (results['signif'] * 100))

    return buf.getvalue()
Exemplo n.º 24
0
 def open(self, filename):
     fullfilename = [f for f in self.namelist() if filename in f][0]
     return StringIO(self.read(fullfilename))
Exemplo n.º 25
0
kidney_table = StringIO("""Days      Duration Weight ID
    0.0      1      1      1
    2.0      1      1      2
    1.0      1      1      3
    3.0      1      1      4
    0.0      1      1      5
    2.0      1      1      6
    0.0      1      1      7
    5.0      1      1      8
    6.0      1      1      9
    8.0      1      1     10
    2.0      1      2      1
    4.0      1      2      2
    7.0      1      2      3
   12.0      1      2      4
   15.0      1      2      5
    4.0      1      2      6
    3.0      1      2      7
    1.0      1      2      8
    5.0      1      2      9
   20.0      1      2     10
   15.0      1      3      1
   10.0      1      3      2
    8.0      1      3      3
    5.0      1      3      4
   25.0      1      3      5
   16.0      1      3      6
    7.0      1      3      7
   30.0      1      3      8
    3.0      1      3      9
   27.0      1      3     10
    0.0      2      1      1
    1.0      2      1      2
    1.0      2      1      3
    0.0      2      1      4
    4.0      2      1      5
    2.0      2      1      6
    7.0      2      1      7
    4.0      2      1      8
    0.0      2      1      9
    3.0      2      1     10
    5.0      2      2      1
    3.0      2      2      2
    2.0      2      2      3
    0.0      2      2      4
    1.0      2      2      5
    1.0      2      2      6
    3.0      2      2      7
    6.0      2      2      8
    7.0      2      2      9
    9.0      2      2     10
   10.0      2      3      1
    8.0      2      3      2
   12.0      2      3      3
    3.0      2      3      4
    7.0      2      3      5
   15.0      2      3      6
    4.0      2      3      7
    9.0      2      3      8
    6.0      2      3      9
    1.0      2      3     10
""")
Exemplo n.º 26
0
def hypothesis_test_table(results, title, null_hyp):
    fmt = dict(_default_table_fmt,
               data_fmts=["%#15.6F", "%#15.6F", "%#15.3F", "%s"])

    buf = StringIO()
    table = SimpleTable([[
        results['statistic'], results['crit_value'], results['pvalue'],
        str(results['df'])
    ]], ['Test statistic', 'Critical Value', 'p-value', 'df'], [''],
                        title=None,
                        txt_fmt=fmt)

    buf.write(title + '\n')
    buf.write(str(table) + '\n')

    buf.write(null_hyp + '\n')

    buf.write("Conclusion: %s H_0" % results['conclusion'])
    buf.write(" at %.2f%% significance level" % (results['signif'] * 100))

    return buf.getvalue()
Exemplo n.º 27
0
])
cyl_labels = np.array([
    'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA',
    'France', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA',
    'Japan', 'USA', 'USA', 'USA', 'Japan', 'Germany', 'France', 'Germany',
    'Sweden', 'Germany', 'USA', 'USA', 'USA', 'USA', 'USA', 'Germany', 'USA',
    'USA', 'France', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA',
    'USA', 'USA', 'Germany', 'Japan', 'USA', 'USA', 'USA', 'USA', 'Germany',
    'Japan', 'Japan', 'USA', 'Sweden', 'USA', 'France', 'Japan', 'Germany',
    'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA',
    'USA', 'USA', 'Germany', 'Japan', 'Japan', 'USA', 'USA', 'Japan', 'Japan',
    'Japan', 'Japan', 'Japan', 'Japan', 'USA', 'USA', 'USA', 'USA', 'Japan',
    'USA', 'USA', 'USA', 'Germany', 'USA', 'USA', 'USA'
])

dta = np.recfromtxt(StringIO(ss), names=("Rust", "Brand", "Replication"))
dta2 = np.recfromtxt(StringIO(ss2),
                     names=("idx", "Treatment", "StressReduction"))
dta3 = np.recfromtxt(StringIO(ss3), names=("Brand", "Relief"))

from statsmodels.sandbox.stats.multicomp import tukeyhsd
import statsmodels.sandbox.stats.multicomp as multi
#print tukeyhsd(dta['Brand'], dta['Rust'])


def get_thsd(mci):
    var_ = np.var(mci.groupstats.groupdemean(), ddof=len(mci.groupsunique))
    means = mci.groupstats.groupmean
    nobs = mci.groupstats.groupnobs
    resi = tukeyhsd(means,
                    nobs,
Exemplo n.º 28
0
DEBUG = False

ss = '''\
agecat	smokes	deaths	pyears
1	1	32	52407
2	1	104	43248
3	1	206	28612
4	1	186	12663
5	1	102	5317
1	0	2	18790
2	0	12	10673
3	0	28	5710
4	0	28	2585
5	0	31	1462'''

data = pd.read_csv(StringIO(ss), delimiter='\t')
data = data.astype(int)
data['logpyears'] = np.log(data['pyears'])


class CheckPoissonConstrainedMixin(object):
    def test_basic(self):
        res1 = self.res1
        res2 = self.res2
        assert_allclose(res1[0], res2.params[self.idx], rtol=1e-6)
        # see below Stata has nan, we have zero
        bse1 = np.sqrt(np.diag(res1[1]))
        mask = (bse1 == 0) & np.isnan(res2.bse[self.idx])
        assert_allclose(bse1[~mask], res2.bse[self.idx][~mask], rtol=1e-6)

    def test_basic_method(self):
Exemplo n.º 29
0
    Means	Simultaneous 95% Confidence Limits	 Sign.
    2 - 3	4.340	0.691	7.989	***
    2 - 1	4.600	0.951	8.249	***
    3 - 2	-4.340	-7.989	-0.691	***
    3 - 1	0.260	-3.389	3.909	 -
    1 - 2	-4.600	-8.249	-0.951	***
    1 - 3	-0.260	-3.909	3.389	'''

    ss5 = '''\
    2 - 3	 4.340	 0.691 	 7.989	***
    2 - 1	 4.600	 0.951	 8.249	***
    3 - 2	-4.340	-7.989	-0.691	***
    3 - 1	 0.260	-3.389	 3.909	 -
    1 - 2	-4.600	-8.249	-0.951	***
    1 - 3	-0.260	-3.909	 3.389	'''

    dta5 = np.recfromtxt(StringIO(ss5),
                         names=('pair', 'mean', 'lower', 'upper', 'sig'),
                         delimiter='\t')

    sas_ = dta5[[1, 3, 2]]
    confint1 = res3[1][4]
    confint2 = sas_[['lower', 'upper']].view(float).reshape((3, 2))
    assert_almost_equal(confint1, confint2, decimal=2)
    reject1 = res3[1][1]
    reject2 = sas_['sig'] == '***'
    assert_equal(reject1, reject2)
    meandiff1 = res3[1][2]
    meandiff2 = sas_['mean']
    assert_almost_equal(meandiff1, meandiff2, decimal=14)
Exemplo n.º 30
0
def setup_module():
    global _orig_stdout
    _orig_stdout = sys.stdout
    sys.stdout = StringIO()