def test_winsorize(): outlier_test = pd.DataFrame([ 92, 19, 101, 58, 1053, 91, 26, 78, 10, 13, -40, 101, 86, 85, 15, 89, 89, 28, -5, 41 ]) out = winsorize(outlier_test, cutoff={ 'quantile': [0.05, .95] }, replace_with_cutoff=False).values.squeeze() correct_result = np.array([ 92, 19, 101, 58, 101, 91, 26, 78, 10, 13, -5, 101, 86, 85, 15, 89, 89, 28, -5, 41 ]) assert (np.sum(out == correct_result) == 20) out = winsorize(outlier_test, cutoff={ 'std': [2, 2] }, replace_with_cutoff=False).values.squeeze() correct_result = np.array([ 92, 19, 101, 58, 101, 91, 26, 78, 10, 13, -40, 101, 86, 85, 15, 89, 89, 28, -5, 41 ]) assert (np.sum(out == correct_result) == 20) out = winsorize(outlier_test, cutoff={ 'std': [2, 2] }, replace_with_cutoff=True).values.squeeze() correct_result = np.array([ 92., 19., 101., 58., 556.97961997, 91., 26., 78., 10., 13., -40., 101., 86., 85., 15., 89., 89., 28., -5., 41. ]) assert (np.round(np.mean(out)) == np.round(np.mean(correct_result)))
def test_winsorize(): outlier_test = pd.DataFrame([ 92, 19, 101, 58, 1053, 91, 26, 78, 10, 13, -40, 101, 86, 85, 15, 89, 89, 28, -5, 41, ]) out = winsorize(outlier_test, cutoff={ "quantile": [0.05, 0.95] }, replace_with_cutoff=False).values.squeeze() correct_result = np.array([ 92, 19, 101, 58, 101, 91, 26, 78, 10, 13, -5, 101, 86, 85, 15, 89, 89, 28, -5, 41, ]) assert np.sum(out == correct_result) == 20 out = winsorize(outlier_test, cutoff={ "std": [2, 2] }, replace_with_cutoff=False).values.squeeze() correct_result = np.array([ 92, 19, 101, 58, 101, 91, 26, 78, 10, 13, -40, 101, 86, 85, 15, 89, 89, 28, -5, 41, ]) assert np.sum(out == correct_result) == 20 out = winsorize(outlier_test, cutoff={ "std": [2, 2] }, replace_with_cutoff=True).values.squeeze() correct_result = np.array([ 92.0, 19.0, 101.0, 58.0, 556.97961997, 91.0, 26.0, 78.0, 10.0, 13.0, -40.0, 101.0, 86.0, 85.0, 15.0, 89.0, 89.0, 28.0, -5.0, 41.0, ]) assert np.round(np.mean(out)) == np.round(np.mean(correct_result))