示例#1
0
def winsorize(df, by, p=(.01, .99)):
    """Drop variables in `by' outside quantiles `p`."""
    # TODO: Some kind of warning/error if too fine of quantiles are
    #       requested for the number of rows, e.g. .99 with 5 rows.
    df = df.copy()

    by = force_iterable(by)

    # Allow different cutoffs for different variables
    if hasattr(p[0], '__iter__'):
        assert len(p) == len(by)
    else:
        p = [p] * len(by)

    survive_winsor = np.array([True] * df.shape[0])

    for idx, col in enumerate(by):
        cuts = df[col].quantile(p[idx]).values
        survive_this = np.logical_and(df[col] >= cuts[0], df[col] <= cuts[1])
        survive_winsor = np.minimum(survive_winsor, survive_this)

    df = df[survive_winsor]

    return df
示例#2
0
 def test_string(self):
     a_string = 'abcd'
     expected = (a_string, )
     result = force_iterable(a_string)
     assert_equal(expected, result)
示例#3
0
 def test_tup(self):
     expected = (1, 2, 3)
     result = force_iterable(expected)
     assert_equal(expected, result)
示例#4
0
 def test_array(self):
     expected = np.arange(3)
     result = force_iterable(expected)
     assert_array_equal(expected, result)
示例#5
0
 def test_int(self):
     an_int = 10
     expected = (an_int, )
     result = force_iterable(an_int)
     assert_equal(expected, result)
示例#6
0
 def test_list(self):
     expected = [1, 2, 3]
     result = force_iterable(expected)
     assert_equal(expected, result)
示例#7
0
 def test_tup(self):
     expected = (1, 2, 3)
     result = force_iterable(expected)
     assert expected == result
示例#8
0
 def test_int(self):
     an_int = 10
     expected = (an_int, )
     result = force_iterable(an_int)
     assert expected == result
示例#9
0
 def test_list(self):
     expected = [1, 2, 3]
     result = force_iterable(expected)
     assert expected == result