def winsorize(df: pd.DataFrame,
              by: Union[str, Iterable[str]],
              p: Iterable[Union[tuple, float]] = (.01, .99)):
    """Drop variables in `by' outside quantiles `p`."""
    # TODO: Some kind of warning/error if too fine of quantiles are
    #       requested for the number of rows, e.g. .99 with 5 rows.
    df = df.copy()

    by = cast(Iterable[str], force_iterable(by))

    # Allow different cutoffs for different variables
    if hasattr(p[0], '__iter__'):
        assert len(p) == len(by)
    else:
        p = [p] * len(by)

    survive_winsor = np.array([True] * df.shape[0])

    for idx, col in enumerate(by):
        cuts = df[col].quantile(p[idx]).values
        survive_this = np.logical_and(df[col] >= cuts[0], df[col] <= cuts[1])
        survive_winsor = np.minimum(survive_winsor, survive_this)

    df = df[survive_winsor]

    return df
示例#2
0
def outreg(regs,
           var_names=None,
           var_labels=None,
           digits=4,
           stars=True,
           se="(",
           options=False):
    """Create the guts of a Latex tabular enviornment from regression results.

    Args:
        regs (Results or iterable of Results): Regressions to output to table.
        var_names (str or iterable of str): Variable names to pull from regs.
            If none specified, by default uses the pandas dataframe colum
            names.
        var_labels (str or iterable of str): Pretty names for variables in
            table. If none specified, will use var_names.

    Keyword Args:
        digits (int): Defaults to 4. How many digits to include past decimal.
        stars (bool): Defaults to True. If True, adds stars to mark statistical
            significance.
        se (str): Defaults to "(". Marker for standard errors. May
            also choose brackets with ``se="["``.
        options (bool): Default to False: If True, return a ``dict`` with
            formatting options that were generated by ``outreg``:
            ``name_just``, ``stat_just``, etc., for additional calls to
            ``table_mainrow`` and ``table_statrow``.
    Return:
        str: LaTeX fragment meant to be wrapped in a tabular environment.
    """

    regs = force_iterable(regs)

    if var_names is None:
        var_names = regs[0].beta.index.tolist()
        if len(regs) > 1:
            for reg in regs[1:]:
                var_names += [
                    x for x in reg.beta.index.tolist() if x not in var_names
                ]

    if var_labels is None:
        var_labels = [x if type(x) is str else str(x) for x in var_names]

    opt_dict = _set_options(var_labels, digits, stars)
    table_str = ''
    for var_idx, varname in enumerate(var_names):
        table_str += table_mainrow(var_labels[var_idx], varname, regs,
                                   **opt_dict)

    if options:
        return table_str, opt_dict
    else:
        return table_str
示例#3
0
def table_mainrow(rowname, varname, regs,
                  name_just=24, stat_just=12, digits=3, se="(",
                  stars=True):

    """Add a table row of regression coefficients with standard errors.

    Args:
        rowname (str): First cell of table row, i.e., the row's name.
        varname (str): Name of variable to pull from ``Results`` object.
        regs (Results or iterable of Results): Regressions from which
          to pull coefficients named ``varname``.

    Keyword Args:
        name_just (int):
        stat_just (int):
        digits (int):
        se (str):
        stars (bool):

    Returns:
        str: String of table row.
    """

    # Start beta and SE rows
    beta_vals = []
    se_vals = []
    # Extract beta/sig and se values to pass to `table_statrow`
    for reg in force_iterable(regs):
        if type(reg) is not Results or varname not in reg.beta:
            beta_vals.append('')
            se_vals.append('')
        else:
            # Beta and stars
            this_beta = _format_nums(reg.beta[varname], digits=digits)
            if stars:
                this_sig = _sig_level(reg.pt[varname])
            else:
                this_sig = ''
            beta_vals.append(this_beta + this_sig)
            # Standard Error
            this_se = reg.se[varname]
            se_vals.append(this_se)

    beta_row = table_statrow(rowname, beta_vals, name_just=name_just,
                             stat_just=stat_just)
    se_row = table_statrow('', se_vals, name_just=name_just,
                           stat_just=stat_just, sd=se, digits=digits)

    full_row = beta_row + se_row

    return full_row
示例#4
0
def table_mainrow(rowname, varname, regs,
                  name_just=24, stat_just=12, digits=3, se="(",
                  stars=True):

    """Add a table row of regression coefficients with standard errors.

    Args:
        rowname (str): First cell of table row, i.e., the row's name.
        varname (str): Name of variable to pull from ``Results`` object.
        regs (Results or iterable of Results): Regressions from which
          to pull coefficients named ``varname``.

    Keyword Args:
        name_just (int):
        stat_just (int):
        digits (int):
        se (str):
        stars (bool):

    Returns:
        str: String of table row.
    """

    # Start beta and SE rows
    beta_vals = []
    se_vals = []
    # Extract beta/sig and se values to pass to `table_statrow`
    for reg in force_iterable(regs):
        if type(reg) is not Results or varname not in reg.beta:
            beta_vals.append('')
            se_vals.append('')
        else:
            # Beta and stars
            this_beta = _format_nums(reg.beta[varname], digits=digits)
            if stars:
                this_sig = _sig_level(reg.pt[varname])
            else:
                this_sig = ''
            beta_vals.append(this_beta + this_sig)
            # Standard Error
            this_se = reg.se[varname]
            se_vals.append(this_se)

    beta_row = table_statrow(rowname, beta_vals, name_just=name_just,
                             stat_just=stat_just)
    se_row = table_statrow('', se_vals, name_just=name_just,
                           stat_just=stat_just, sd=se, digits=digits)

    full_row = beta_row + se_row

    return full_row
示例#5
0
def outreg(regs, var_names=None, var_labels=None, digits=4, stars=True, se="(",
           options=False):
    """Create the guts of a Latex tabular enviornment from regression results.

    Args:
        regs (Results or iterable of Results): Regressions to output to table.
        var_names (str or iterable of str): Variable names to pull from regs.
            If none specified, by default uses the pandas dataframe colum
            names.
        var_labels (str or iterable of str): Pretty names for variables in
            table. If none specified, will use var_names.

    Keyword Args:
        digits (int): Defaults to 4. How many digits to include past decimal.
        stars (bool): Defaults to True. If True, adds stars to mark statistical
            significance.
        se (str): Defaults to "(". Marker for standard errors. May
            also choose brackets with ``se="["``.
        options (bool): Default to False: If True, return a ``dict`` with
            formatting options that were generated by ``outreg``:
            ``name_just``, ``stat_just``, etc., for additional calls to
            ``table_mainrow`` and ``table_statrow``.
    Return:
        str: LaTeX fragment meant to be wrapped in a tabular environment.
    """

    regs = force_iterable(regs)

    if var_names is None:
        var_names = regs[0].beta.index.tolist()
        if len(regs) > 1:
            for reg in regs[1:]:
                var_names += [x for x in reg.beta.index.tolist()
                              if x not in var_names]

    if var_labels is None:
        var_labels = [x if type(x) is str else str(x) for x in var_names]

    opt_dict = _set_options(var_labels, digits, stars)
    table_str = ''
    for var_idx, varname in enumerate(var_names):
        table_str += table_mainrow(var_labels[var_idx], varname, regs,
                                   **opt_dict)

    if options:
        return table_str, opt_dict
    else:
        return table_str
 def test_string(self):
     a_string = 'abcd'
     expected = (a_string,)
     result = force_iterable(a_string)
     assert expected == result
 def test_array(self):
     expected = np.arange(3)
     result = force_iterable(expected)
     assert_array_equal(expected, result)
 def test_tup(self):
     expected = (1, 2, 3)
     result = force_iterable(expected)
     assert expected == result
 def test_int(self):
     an_int = 10
     expected = (an_int,)
     result = force_iterable(an_int)
     assert expected == result
 def test_list(self):
     expected = [1, 2, 3]
     result = force_iterable(expected)
     assert expected == result