def test_three_pass_with_ref(self): sigma_thresh = 2 ser = pd.Series([-4, -4] + [0, 1, 2] * 4 + [5]) ref = pd.Series(range(len(ser))) ser = ser - ser.mean() + ref df = pd.DataFrame({'ser': ser, 'ref': ref}) df = sigma_edit_dataframe(sigma_thresh, ['ser'], df) self.assertTrue(all_in_bounds(sigma_thresh, df['ser']))
def main(): msg = textwrap.dedent(""" Remove outliers from DataFrame columns using a recursive sigma-edit algorithm. The algorithm will recursively NaN out values greater than sigma_thresh standard deviations away from sample mean. ----------------------------------------------------------------------- Examples: * Do a 2.5-sigma edit on a gamma distribution and show histogram p.rand -n 1000 -t gamma --alpha=3 --beta=.01\\ | p.df 'df["c1"] = df.c0'\\ | p.sig_edit -c c1 -t 2.5\\ | p.df 'pd.melt(df)' --names raw edited\\ | p.facet_grid --hue variable --map pl.hist\\ --args value --kwargs 'alpha=.2' 'range=[0, 1000]' 'bins=50' ----------------------------------------------------------------------- """) # read command line arguments parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description=msg) arg_lib.add_args(parser, 'io_in', 'io_out', 'example') parser.add_argument("-t", "--sigma_thresh", help="Sigma threshold", nargs=1, required=True, type=float) parser.add_argument("-c", "--cols", required=True, help="Column(s) to sigma-edit", nargs="+") parser.add_argument("--max_iter", help="Max number of recursions", nargs=1, type=int, default=[20]) # parse arguments args = parser.parse_args() # get the input dataframe df = io_lib.df_from_input(args) df = outlier_lib.sigma_edit_dataframe(args.sigma_thresh[0], args.cols, df, max_iter=args.max_iter[0]) # write dataframe to output io_lib.df_to_output(args, df)
def main(): msg = textwrap.dedent( """ Remove outliers from DataFrame columns using a recursive sigma-edit algorithm. The algorithm will recursively NaN out values greater than sigma_thresh standard deviations away from sample mean. ----------------------------------------------------------------------- Examples: * Do a 2.5-sigma edit on a gamma distribution and show histogram p.rand -n 1000 -t gamma --alpha=3 --beta=.01\\ | p.df 'df["c1"] = df.c0'\\ | p.sig_edit -c c1 -t 2.5\\ | p.df 'pd.melt(df)' --names raw edited\\ | p.facet_grid --hue variable --map pl.hist\\ --args value --kwargs 'alpha=.2' 'range=[0, 1000]' 'bins=50' ----------------------------------------------------------------------- """ ) # read command line arguments parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description=msg) arg_lib.add_args(parser, 'io_in', 'io_out', 'example') parser.add_argument("-t", "--sigma_thresh", help="Sigma threshold", nargs=1, required=True, type=float) parser.add_argument("-c", "--cols", required=True, help="Column(s) to sigma-edit", nargs="+") parser.add_argument("--max_iter", help="Max number of recursions", nargs=1, type=int, default=[20]) # parse arguments args = parser.parse_args() # get the input dataframe df = io_lib.df_from_input(args) df = outlier_lib.sigma_edit_dataframe( args.sigma_thresh[0], args.cols, df, max_iter=args.max_iter[0]) # write dataframe to output io_lib.df_to_output(args, df)