Пример #1
0
    def test_groupby_with_group_named_isnull_pn(self):
        """
        Test case with a group having the same name as a column in TableOne
        """
        df = self.data_pn.copy()

        columns = ['Age', 'SysABP', 'Height', 'Weight', 'ICU']
        groupby = 'ICU'
        group_levels = df[groupby].unique()

        # collect the possible column names
        table = TableOne(df, columns=columns, groupby=groupby, pval=True)
        tableone_columns = list(table.tableone.columns.levels[1])

        table = TableOne(df, columns=columns, groupby=groupby, pval=True, pval_adjust='b')
        tableone_columns = tableone_columns + list(table.tableone.columns.levels[1])
        tableone_columns = np.unique(tableone_columns)
        tableone_columns = [c for c in tableone_columns if c not in group_levels]

        for c in tableone_columns:
            # for each output column name in tableone, try them as a group
            df.loc[0:20,'ICU'] = c
            if 'adjust' in c:
                pval_adjust='b'
            else:
                pval_adjust=None

            with assert_raises(InputError):
                table = TableOne(df, columns=columns, groupby=groupby, pval=True, 
                    pval_adjust=pval_adjust)
Пример #2
0
    def test_check_null_counts_are_correct_pn(self):
        """
        Test that the isnull column is correctly reporting number of nulls
        """
        columns = ['Age', 'SysABP', 'Height', 'Weight', 'ICU', 'death']
        categorical = ['ICU', 'death']
        groupby = ['death']

        # test when not grouping
        table = TableOne(self.data_pn, columns=columns, categorical=categorical)

        # get isnull column only
        isnull = table.tableone.iloc[:,0]
        for i, v in enumerate(isnull):
            # skip empty rows by checking value is not a string
            if 'float' in str(type(v)):
                # check each null count is correct
                col = isnull.index[i][0]
                assert self.data_pn[col].isnull().sum() == v

        # test when grouping by a variable
        grouped_table = TableOne(self.data_pn, columns=columns, 
            categorical=categorical, groupby=groupby)

        # get isnull column only
        isnull = grouped_table.tableone.iloc[:,0]
        for i, v in enumerate(isnull):
            # skip empty rows by checking value is not a string
            if 'float' in str(type(v)):
                # check each null count is correct
                col = isnull.index[i][0]
                assert self.data_pn[col].isnull().sum() == v
Пример #3
0
    def test_compute_standardized_mean_difference_categorical(self):
        """
        Test that pairwise standardized mean difference is computer correctly
        for categorical variables.

        # Ref: Introduction to Meta-Analysis. Michael Borenstein,
        # L. V. Hedges, J. P. T. Higgins and H. R. Rothstein
        # Wiley (2011). Chapter 4. Effect Sizes Based on Means.
        """

        t = TableOne(pd.DataFrame([1, 2, 3]))

        # test with the physionet data
        cols = [
            'Age', 'SysABP', 'Height', 'Weight', 'ICU', 'MechVent', 'LOS',
            'death'
        ]
        categorical = ['ICU', 'MechVent', 'death']
        strata = "MechVent"

        t = TableOne(self.data_pn,
                     categorical=categorical,
                     label_suffix=False,
                     groupby=strata,
                     pval=True,
                     htest_name=False,
                     smd=True)

        # consistent with R StdDiff() and R tableone
        exp_smd = {'ICU': '0.747', 'MechVent': 'nan', 'death': '0.017'}

        for k in exp_smd:
            smd = t.tableone.loc[k, 'Grouped by MechVent']['SMD (0,1)'][0]
            assert_equal(smd, exp_smd[k])
Пример #4
0
    def test_pval_correction(self):
        """
        Test the pval_adjust argument
        """
        df = pd.DataFrame({
            'numbers': [1, 2, 6, 1, 1, 1],
            'other': [1, 2, 3, 3, 3, 4],
            'colors': ['red', 'white', 'blue', 'red', 'blue', 'blue'],
            'even': ['yes', 'no', 'yes', 'yes', 'no', 'yes']
        })

        t1 = TableOne(df, groupby="even", pval=True, pval_adjust="bonferroni")

        # check the multiplier is correct (3 = no. of reported values)
        pvals_expected = {
            'numbers, mean (SD)': '1.000',
            'other, mean (SD)': '1.000',
            'colors, n (%)': '0.669'
        }

        group = 'Grouped by even'
        col = 'P-Value (adjusted)'
        for k in pvals_expected:
            assert_equal(t1.tableone.loc[k][group][col].values[0],
                         pvals_expected[k])

        # catch the pval_adjust=True
        with warnings.catch_warnings(record=False) as w:
            warnings.simplefilter('ignore', category=UserWarning)
            t2 = TableOne(df, groupby="even", pval=True, pval_adjust=True)

        for k in pvals_expected:
            assert_equal(t1.tableone.loc[k][group][col].values[0],
                         pvals_expected[k])
Пример #5
0
    def test_tableone_columns_in_consistent_order_pn(self):
        """
        Test output columns in TableOne are always in the same order
        """
        df = self.data_pn.copy()
        columns = ['Age', 'SysABP', 'Height', 'Weight', 'ICU', 'death']
        categorical = ['ICU', 'death']
        groupby = ['death']

        table = TableOne(df, columns=columns, groupby=groupby, pval=True)

        assert table.tableone.columns.levels[1][0] == 'isnull'
        assert table.tableone.columns.levels[1][-1] == 'ptest'
        assert table.tableone.columns.levels[1][-2] == 'pval'

        df.loc[df['death'] == 0, 'death'] = 2
        table = TableOne(df,
                         columns=columns,
                         groupby=groupby,
                         pval=True,
                         pval_adjust='bonferroni')

        assert table.tableone.columns.levels[1][0] == 'isnull'
        assert table.tableone.columns.levels[1][-1] == 'ptest'
        assert table.tableone.columns.levels[1][-2] == 'pval (adjusted)'
Пример #6
0
    def test_custom_statistical_tests(self):
        """
        Test that the user can specify custom statistical functions.
        """
        # from the example provided at:
        # https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ks_2samp.html

        # define custom test
        func = mytest

        np.random.seed(12345678)
        n1 = 200
        n2 = 300

        # Baseline distribution
        rvs1 = stats.norm.rvs(size=n1, loc=0., scale=1)
        df1 = pd.DataFrame({'rvs': 'rvs1', 'val': rvs1})

        # Different to rvs1
        # stats.ks_2samp(rvs1, rvs2)
        # (0.20833333333333334, 5.129279597781977e-05)
        rvs2 = stats.norm.rvs(size=n2, loc=0.5, scale=1.5)
        df2 = pd.DataFrame({'rvs': 'rvs2', 'val': rvs2})

        # Similar to rvs1
        # stats.ks_2samp(rvs1, rvs3)
        # (0.10333333333333333, 0.14691437867433876)
        rvs3 = stats.norm.rvs(size=n2, loc=0.01, scale=1.0)
        df3 = pd.DataFrame({'rvs': 'rvs3', 'val': rvs3})

        # Identical to rvs1
        # stats.ks_2samp(rvs1, rvs4)
        # (0.07999999999999996, 0.41126949729859719)
        rvs4 = stats.norm.rvs(size=n2, loc=0.0, scale=1.0)
        df4 = pd.DataFrame({'rvs': 'rvs4', 'val': rvs4})

        # Table 1 for different distributions
        different = df1.append(df2, ignore_index=True)
        t1_diff = TableOne(data=different, columns=["val"], pval=True,
                           groupby="rvs", htest={"val": func})

        assert_almost_equal(t1_diff._htest_table['P-Value'].val,
                            stats.ks_2samp(rvs1, rvs2)[1])

        # Table 1 for similar distributions
        similar = df1.append(df3, ignore_index=True)
        t1_similar = TableOne(data=similar, columns=["val"], pval=True,
                              groupby="rvs", htest={"val": func})

        assert_almost_equal(t1_similar._htest_table['P-Value'].val,
                            stats.ks_2samp(rvs1, rvs3)[1])

        # Table 1 for identical distributions
        identical = df1.append(df4, ignore_index=True)
        t1_identical = TableOne(data=identical, columns=["val"], pval=True,
                                groupby="rvs", htest={"val": func})

        assert_almost_equal(t1_identical._htest_table['P-Value'].val,
                            stats.ks_2samp(rvs1, rvs4)[1])
Пример #7
0
    def test_examples_used_in_the_readme_run_without_raising_error(self):

        columns = ['time','age','bili','chol','albumin','copper',
            'alk.phos','ast','trig','platelet','protime',
            'status', 'ascites', 'hepato', 'spiders', 'edema',
            'stage', 'sex']
        catvars = ['status', 'ascites', 'hepato', 'spiders', 'edema','stage', 'sex']
        groupby = 'trt'
        nonnormal = ['bili']
        mytable = TableOne(self.data_pbc, columns, catvars, groupby, nonnormal, pval=False)
        mytable = TableOne(self.data_pbc, columns, catvars, groupby, nonnormal, pval=True)
Пример #8
0
    def test_order_of_order_categorical_columns(self):
        """
        Test that the order of ordered categorical columns is retained.
        """
        day_cat = pd.Categorical(["mon", "wed", "tue", "thu"],
                                 categories=["wed", "thu", "mon", "tue"],
                                 ordered=True)

        alph_cat = pd.Categorical(["a", "b", "c", "a"],
                                  categories=["b", "c", "d", "a"],
                                  ordered=False)

        mon_cat = pd.Categorical(["jan", "feb", "mar", "apr"],
                                 categories=["feb", "jan", "mar", "apr"],
                                 ordered=True)

        data = pd.DataFrame({"A": ["a", "b", "c", "a"]})
        data["day"] = day_cat
        data["alph"] = alph_cat
        data["month"] = mon_cat

        order = {"month": ["jan"], "day": ["mon", "tue", "wed"]}

        # if a custom order is not specified, the categorical order
        # specified above should apply
        t1 = TableOne(data, label_suffix=False)

        t1_expected_order = {
            'month': ["feb", "jan", "mar", "apr"],
            'day': ["wed", "thu", "mon", "tue"]
        }

        for k in order:
            assert_list_equal(t1._order[k], t1_expected_order[k])
            assert_list_equal(t1.tableone.loc[k].index.to_list(),
                              t1_expected_order[k])

        # if a desired order is set, it should override the order
        t2 = TableOne(data, order=order, label_suffix=False)

        t2_expected_order = {
            'month': ["jan", "feb", "mar", "apr"],
            'day': ["mon", "tue", "wed", "thu"]
        }

        for k in order:
            assert_list_equal(t2._order[k], t2_expected_order[k])
            assert_list_equal(t2.tableone.loc[k].index.to_list(),
                              t2_expected_order[k])
Пример #9
0
    def test_tableone_columns_in_consistent_order_pn(self):
        """
        Test output columns in TableOne are always in the same order
        """
        df = self.data_pn.copy()
        columns = ['Age', 'SysABP', 'Height', 'Weight', 'ICU', 'death']
        categorical = ['ICU', 'death']
        groupby = ['death']

        table = TableOne(df,
                         columns=columns,
                         groupby=groupby,
                         pval=True,
                         htest_name=True,
                         overall=False)

        assert table.tableone.columns.levels[1][0] == 'Missing'
        assert table.tableone.columns.levels[1][-1] == 'Test'
        assert table.tableone.columns.levels[1][-2] == 'P-Value'

        df.loc[df['death'] == 0, 'death'] = 2

        # without overall column
        table = TableOne(df,
                         columns=columns,
                         groupby=groupby,
                         pval=True,
                         pval_adjust='bonferroni',
                         htest_name=True,
                         overall=False)

        assert table.tableone.columns.levels[1][0] == 'Missing'
        assert table.tableone.columns.levels[1][-1] == 'Test'
        assert table.tableone.columns.levels[1][-2] == 'P-Value (adjusted)'

        # with overall column
        table = TableOne(df,
                         columns=columns,
                         groupby=groupby,
                         pval=True,
                         pval_adjust='bonferroni',
                         htest_name=True,
                         overall=True)

        assert table.tableone.columns.levels[1][0] == 'Missing'
        assert table.tableone.columns.levels[1][1] == 'Overall'
        assert table.tableone.columns.levels[1][-1] == 'Test'
        assert table.tableone.columns.levels[1][-2] == 'P-Value (adjusted)'
Пример #10
0
    def test_limit_of_categorical_data_pn(self):
        """
        Tests the `limit` keyword arg, which limits the number of categories
        presented
        """
        data_pn = self.data_pn.copy()
        # 6 categories of age based on decade
        data_pn['age_group'] = data_pn['Age'].map(lambda x: int(x / 10))

        # limit
        columns = [
            'age_group', 'Age', 'SysABP', 'Height', 'Weight', 'ICU', 'death'
        ]
        categorical = ['age_group', 'ICU', 'death']

        # test it limits to 3
        table = TableOne(data_pn,
                         columns=columns,
                         categorical=categorical,
                         limit=3,
                         label_suffix=False)
        assert table.tableone.loc['age_group', :].shape[0] == 3

        # test other categories are not affected if limit > num categories
        assert table.tableone.loc['death', :].shape[0] == 2
Пример #11
0
    def test_label_dictionary_input_pn(self):
        """
        Test columns and rows are relabelled with the label argument
        """
        df = self.data_pn.copy()
        columns = ['Age', 'ICU', 'death']
        categorical = ['death', 'ICU']
        groupby = 'death'

        labels = {
            'death': 'mortality',
            'Age': 'Age, years',
            'ICU': 'Intensive Care Unit'
        }

        table = TableOne(df,
                         columns=columns,
                         categorical=categorical,
                         groupby=groupby,
                         labels=labels)

        # check the header column is updated (groupby variable)
        assert table.tableone.columns.levels[0][0] == 'Grouped by mortality'

        # check the categorical rows are updated
        assert 'Intensive Care Unit' in table.tableone.index.levels[0]

        # check the continuous rows are updated
        assert 'Age, years' in table.tableone.index.levels[0]
Пример #12
0
    def test_min_max_for_nonnormal_variables(self):
        """
        Test the min_max argument returns expected results.
        """
        # columns to summarize
        columns = ['Age', 'SysABP', 'Height', 'Weight', 'ICU', 'death']

        # columns containing categorical variables
        categorical = ['ICU']

        # set decimal places for age to 0
        decimals = {"Age": 0}

        # non-normal variables
        nonnormal = ['Age']

        # optionally, a categorical variable for stratification
        groupby = ['death']

        t1 = TableOne(self.data_pn, columns=columns, categorical=categorical,
                      groupby=groupby, nonnormal=nonnormal, decimals=decimals,
                      min_max=['Age'])

        k = "Age, median [min,max]"
        group = "Grouped by death"
        t1_columns = ["Overall", "0", "1"]
        expected = ["68 [16,90]", "66 [16,90]", "75 [26,90]"]
        for c, e in zip(t1_columns, expected):
            cell = t1.tableone.loc[k][group][c].values[0]
            assert_equal(cell, e)
Пример #13
0
    def test_statistical_tests_skipped_if_subgroups_have_zero_observations(self):
        """
        Ensure that the package skips running statistical tests if the subgroups have zero observations
        """
        categorical=['likesmarmalade']
        table = TableOne(self.data_sample, categorical=categorical, groupby='bear', pval=True)

        assert table._significance_table.loc['likesmarmalade','testname'] == 'Not tested'
Пример #14
0
    def test_examples_used_in_the_readme_run_without_raising_error_pn(self):

        columns = ['Age', 'SysABP', 'Height', 'Weight', 'ICU', 'death']
        categorical = ['ICU', 'death']
        groupby = ['death']
        nonnormal = ['Age']
        mytable = TableOne(self.data_pn, columns=columns, categorical=categorical,
            groupby=groupby, nonnormal=nonnormal, pval=False)
Пример #15
0
    def test_with_data_as_only_input_argument(self):
        """
        Test with a simple dataset that a table generated with no pre-specified columns
        returns the same results as a table generated with specified columns
        """
        table_no_args = TableOne(self.data_groups)

        columns = ['group','age','weight']
        categorical=['group']
        table_with_args = TableOne(self.data_groups, columns=columns, categorical=categorical)

        assert table_no_args._columns == table_with_args._columns
        assert table_no_args._categorical == table_with_args._categorical
        assert table_no_args._remarks == table_with_args._remarks
        assert (table_no_args.tableone.columns == table_with_args.tableone.columns).all()
        assert (table_no_args.tableone['overall'].values == \
            table_with_args.tableone['overall'].values).all()
        assert (table_no_args.tableone == table_with_args.tableone).all().all()
Пример #16
0
    def test_overall_mean_and_std_as_expected_for_cont_variable(self):

        columns=['normal','nonnormal','height']
        table = TableOne(self.data_sample, columns=columns)

        mean =  table._cont_describe.loc['normal']['mean']['overall']
        std = table._cont_describe.loc['normal']['std']['overall']

        assert abs(mean-self.mu) <= 0.02
        assert abs(std-self.sigma) <= 0.02
Пример #17
0
    def test_fisher_exact_for_small_cell_count(self):
        """
        Ensure that the package runs Fisher exact if cell counts are <=5 and it's a 2x2
        """
        categorical=['group1','group3']
        table = TableOne(self.data_small, categorical=categorical, groupby='group2', pval=True)

        # group2 should be tested because it's a 2x2
        # group3 is a 2x3 so should not be tested
        assert table._significance_table.loc['group1','ptest'] == 'Fisher''s exact'
        assert table._significance_table.loc['group3','ptest'] == 'Chi-squared (warning: expected count < 5)'
Пример #18
0
    def test_robust_to_duplicates_in_input_df_index(self):

        d_control = pd.DataFrame(data={'group': [0, 0, 0, 0, 0, 0, 0],
                                 'value': [3, 4, 4, 4, 4, 4, 5]})

        d_case = pd.DataFrame(data={'group': [1, 1, 1], 'value': [1, 2, 3]})
        d = pd.concat([d_case, d_control])

        with assert_raises(InputError):
            t = TableOne(d, ['value'], groupby='group', pval=True)

        d_idx_reset = pd.concat([d_case, d_control], ignore_index=True)
        t2 = TableOne(d_idx_reset, ['value'], groupby='group', pval=True)

        header = "Grouped by group"
        mean_std_0 = t2.tableone[header].at[("value, mean (SD)", ""), "0"]
        mean_std_1 = t2.tableone[header].at[("value, mean (SD)", ""), "1"]

        assert mean_std_0 == '4.0 (0.6)'
        assert mean_std_1 == '2.0 (1.0)'
Пример #19
0
    def test_categorical_cell_count(self):
        """
        Ensure that the package runs Fisher exact if cell counts are <=5 and it's a 2x2
        """
        categorical=list(np.arange(10))
        table = TableOne(self.data_categorical, columns=categorical,categorical=categorical)

        # each column
        for i in np.arange(10):
            # each category should have 100 levels
            assert table._cat_describe['overall'].loc[i].shape[0] == 100
Пример #20
0
 def test_string_data_as_continuous_error(self):
     """
     Test raising an error when continuous columns contain non-numeric data
     """
     try:
         # Trigger the categorical warning
         table = TableOne(self.data_mixed, categorical=[])
     except InputError as e:
         starts_str = "The following continuous column(s) have non-numeric values"
         assert e.args[0].startswith(starts_str)
     except:
         # unexpected error - raise it
         raise
Пример #21
0
    def test_overall_n_and_percent_as_expected_for_binary_cat_variable_with_nan(self):
        """
        Ignore NaNs when counting the number of values and the overall percentage
        """
        categorical=['likeshoney']
        table = TableOne(self.data_sample, columns=categorical, categorical=categorical)

        lh = table._cat_describe['overall'].loc['likeshoney']
        likefreq = lh.loc[1.0,'freq']
        likepercent = lh.loc[1.0,'percent']

        assert likefreq == 5993
        assert abs(100-likepercent) <= 0.01
Пример #22
0
 def test_categorical_cell_count(self):
     """
     Check the categorical cell counts are correct
     """
     categorical=list(np.arange(10))
     table = TableOne(self.data_categorical, columns=categorical,categorical=categorical)
     df = table.cat_describe
     # drop 'overall' level of column index
     df.columns = df.columns.droplevel(level=1)
     # each column
     for i in np.arange(10):
         # each category should have 100 levels
         assert df.loc[i].shape[0] == 100
Пример #23
0
    def test_tableone_row_sort_pn(self):
        """
        Test sort functionality of TableOne
        """
        df = self.data_pn.copy()
        columns = ['Age', 'SysABP', 'Height', 'Weight', 'ICU', 'death']
        table = TableOne(df, columns=columns)

        # a call to .index.levels[0] automatically sorts the levels
        # instead, call values and use pd.unique as it preserves order
        tableone_rows = pd.unique([x[0] for x in table.tableone.index.values])

        # default should not sort
        for i, c in enumerate(columns):
            # i+1 because we skip the first row, 'n'
            assert tableone_rows[i + 1] == c

        table = TableOne(df, columns=columns, sort=True)
        tableone_rows = pd.unique([x[0] for x in table.tableone.index.values])
        for i, c in enumerate(np.sort(columns)):
            # i+1 because we skip the first row, 'n'
            assert tableone_rows[i + 1] == c
Пример #24
0
    def test_overall_mean_and_std_as_expected_for_cont_variable(self):

        columns = ['normal', 'nonnormal', 'height']
        table = TableOne(self.data_sample, columns=columns)

        mean = table.cont_describe.loc['normal']['mean']['Overall']
        std = table.cont_describe.loc['normal']['std']['Overall']

        print(self.data_sample.mean())
        print(self.data_sample.std())

        assert abs(mean - self.data_sample.normal.mean()) <= 0.02
        assert abs(std - self.data_sample.normal.std()) <= 0.02
Пример #25
0
    def test_sequence_of_cont_table(self):
        """
        Ensure that the columns align with the values
        """
        columns = ['age','weight']
        categorical = []
        groupby = 'group'
        t = TableOne(self.data_groups, columns = columns,
            categorical = categorical, groupby = groupby, isnull = False)

        # n and weight rows are already ordered, so sorting should not alter the order
        assert t.tableone[0][1:] == sorted(t.tableone[0][1:])
        assert t.tableone[1][1:] == ['0.50 (0.71)', '3.50 (1.29)', '8.50 (1.87)', '15.50 (2.45)']
        assert t.tableone[2][1:] == sorted(t.tableone[2][1:])
Пример #26
0
def df_to_table(df, incl_vars, categorical, nonnormal, groupvar, pval, labels,
                order, missing):

    df_cols = list(df)

    # this is the list of columns for which Table 1 is generated
    col_list = [df_col for df_col in incl_vars]

    my_table = TableOne(df,
                        columns=col_list,
                        groupby=groupvar,
                        nonnormal=nonnormal,
                        categorical=categorical,
                        pval=pval,
                        missing=missing,
                        label_suffix=True,
                        rename=labels,
                        order=order)

    my_table_html = my_table.to_html(
        classes=["table", "table-dark", 'table-sm'])

    return my_table_html
Пример #27
0
    def test_row_percent_false(self):
        """
        Test row_percent=False displays n(%) for the column.
        """
        # columns to summarize
        columns = ['Age', 'SysABP', 'Height', 'MechVent', 'ICU', 'death']

        # columns containing categorical variables
        categorical = ['ICU', 'MechVent']

        # set decimal places for age to 0
        decimals = {"Age": 0}

        # non-normal variables
        nonnormal = ['Age']

        # optionally, a categorical variable for stratification
        groupby = ['death']
        group = "Grouped by death"

        # row_percent = False
        t1 = TableOne(self.data_pn, columns=columns,
                      categorical=categorical, groupby=groupby,
                      nonnormal=nonnormal, decimals=decimals,
                      row_percent=False)

        row1 = list(t1.tableone.loc["MechVent, n (%)"][group].values[0])
        row1_expect = [0, '540 (54.0)', '468 (54.2)', '72 (52.9)']
        assert_list_equal(row1, row1_expect)

        row2 = list(t1.tableone.loc["MechVent, n (%)"][group].values[1])
        row2_expect = ['', '460 (46.0)', '396 (45.8)', '64 (47.1)']
        assert_list_equal(row2, row2_expect)

        row3 = list(t1.tableone.loc["ICU, n (%)"][group].values[0])
        row3_expect = [0, '162 (16.2)', '137 (15.9)', '25 (18.4)']
        assert_list_equal(row3, row3_expect)

        row4 = list(t1.tableone.loc["ICU, n (%)"][group].values[1])
        row4_expect = ['', '202 (20.2)', '194 (22.5)', '8 (5.9)']
        assert_list_equal(row4, row4_expect)

        row5 = list(t1.tableone.loc["ICU, n (%)"][group].values[2])
        row5_expect = ['', '380 (38.0)', '318 (36.8)', '62 (45.6)']
        assert_list_equal(row5, row5_expect)

        row6 = list(t1.tableone.loc["ICU, n (%)"][group].values[3])
        row6_expect = ['', '256 (25.6)', '215 (24.9)', '41 (30.1)']
        assert_list_equal(row6, row6_expect)
Пример #28
0
    def test_row_percent_true_and_overall_false(self):
        """
        Test row_percent=True displays n(%) for the row rather than the column.
        """
        # columns to summarize
        columns = ['Age', 'SysABP', 'Height', 'MechVent', 'ICU', 'death']

        # columns containing categorical variables
        categorical = ['ICU', 'MechVent']

        # set decimal places for age to 0
        decimals = {"Age": 0}

        # non-normal variables
        nonnormal = ['Age']

        # optionally, a categorical variable for stratification
        groupby = ['death']
        group = "Grouped by death"

        # row_percent = True
        t1 = TableOne(self.data_pn, columns=columns, overall=False,
                      categorical=categorical, groupby=groupby,
                      nonnormal=nonnormal, decimals=decimals,
                      row_percent=True)

        row1 = list(t1.tableone.loc["MechVent, n (%)"][group].values[0])
        row1_expect = [0, '468 (86.7)', '72 (13.3)']
        assert_list_equal(row1, row1_expect)

        row2 = list(t1.tableone.loc["MechVent, n (%)"][group].values[1])
        row2_expect = ['', '396 (86.1)', '64 (13.9)']
        assert_list_equal(row2, row2_expect)

        row3 = list(t1.tableone.loc["ICU, n (%)"][group].values[0])
        row3_expect = [0, '137 (84.6)', '25 (15.4)']
        assert_list_equal(row3, row3_expect)

        row4 = list(t1.tableone.loc["ICU, n (%)"][group].values[1])
        row4_expect = ['', '194 (96.0)', '8 (4.0)']
        assert_list_equal(row4, row4_expect)

        row5 = list(t1.tableone.loc["ICU, n (%)"][group].values[2])
        row5_expect = ['', '318 (83.7)', '62 (16.3)']
        assert_list_equal(row5, row5_expect)

        row6 = list(t1.tableone.loc["ICU, n (%)"][group].values[3])
        row6_expect = ['', '215 (84.0)', '41 (16.0)']
        assert_list_equal(row6, row6_expect)
Пример #29
0
    def test_sequence_of_cont_table(self):
        """
        Ensure that the columns align with the values
        """
        columns = ['age','weight']
        categorical = []
        groupby = 'group'
        t = TableOne(self.data_groups, columns = columns,
            categorical = categorical, groupby = groupby, isnull = False)

        # n and weight rows are already ordered, so sorting should not alter the order
        assert (t.tableone.loc['n'].values[0].astype(float) == \
            sorted(t.tableone.loc['n'].values[0].astype(float))).any()
        assert (t.tableone.loc['age'].values[0] == \
            ['0.50 (0.71)', '3.50 (1.29)', '8.50 (1.87)', '15.50 (2.45)']).any()
Пример #30
0
    def test_overall_n_and_percent_as_expected_for_binary_cat_variable(self):

        categorical=['likesmarmalade']
        table = TableOne(self.data_sample, columns=categorical, categorical=categorical)

        lm = table._cat_describe['overall'].loc['likesmarmalade']
        notlikefreq = lm.loc[0,'freq']
        notlikepercent = lm.loc[0,'percent']
        likefreq = lm.loc[1,'freq']
        likepercent = lm.loc[1,'percent']

        assert notlikefreq + likefreq == 10000
        assert abs(100 - notlikepercent - likepercent) <= 0.02
        assert notlikefreq == 8977
        assert likefreq == 1023