Exemplo n.º 1
0
def test_fboxplot_rainbowplot(close_figures):
    # Test fboxplot and rainbowplot together, is much faster.
    def harmfunc(t):
        """Test function, combination of a few harmonic terms."""
        # Constant, 0 with p=0.9, 1 with p=1 - for creating outliers
        ci = int(np.random.random() > 0.9)
        a1i = np.random.random() * 0.05
        a2i = np.random.random() * 0.05
        b1i = (0.15 - 0.1) * np.random.random() + 0.1
        b2i = (0.15 - 0.1) * np.random.random() + 0.1

        func = (1 - ci) * (a1i * np.sin(t) + a2i * np.cos(t)) + \
            ci * (b1i * np.sin(t) + b2i * np.cos(t))

        return func

    np.random.seed(1234567)
    # Some basic test data, Model 6 from Sun and Genton.
    t = np.linspace(0, 2 * np.pi, 250)
    data = [harmfunc(t) for _ in range(20)]

    # fboxplot test
    fig = plt.figure()
    ax = fig.add_subplot(111)
    _, depth, ix_depth, ix_outliers = fboxplot(data, wfactor=2, ax=ax)

    ix_expected = np.array(
        [13, 4, 15, 19, 8, 6, 3, 16, 9, 7, 1, 5, 2, 12, 17, 11, 14, 10, 0, 18])
    assert_equal(ix_depth, ix_expected)
    ix_expected2 = np.array([2, 11, 17, 18])
    assert_equal(ix_outliers, ix_expected2)

    # rainbowplot test (re-uses depth variable)
    xdata = np.arange(data[0].size)
    fig = rainbowplot(data, xdata=xdata, depth=depth, cmap=plt.cm.rainbow)
Exemplo n.º 2
0
def test_fboxplot_rainbowplot(close_figures):
    # Test fboxplot and rainbowplot together, is much faster.
    def harmfunc(t):
        """Test function, combination of a few harmonic terms."""
        # Constant, 0 with p=0.9, 1 with p=1 - for creating outliers
        ci = int(np.random.random() > 0.9)
        a1i = np.random.random() * 0.05
        a2i = np.random.random() * 0.05
        b1i = (0.15 - 0.1) * np.random.random() + 0.1
        b2i = (0.15 - 0.1) * np.random.random() + 0.1

        func = (1 - ci) * (a1i * np.sin(t) + a2i * np.cos(t)) + \
            ci * (b1i * np.sin(t) + b2i * np.cos(t))

        return func

    np.random.seed(1234567)
    # Some basic test data, Model 6 from Sun and Genton.
    t = np.linspace(0, 2 * np.pi, 250)
    data = [harmfunc(t) for _ in range(20)]

    # fboxplot test
    fig = plt.figure()
    ax = fig.add_subplot(111)
    _, depth, ix_depth, ix_outliers = fboxplot(data, wfactor=2, ax=ax)

    ix_expected = np.array([13, 4, 15, 19, 8, 6, 3, 16, 9, 7, 1, 5, 2,
                            12, 17, 11, 14, 10, 0, 18])
    assert_equal(ix_depth, ix_expected)
    ix_expected2 = np.array([2, 11, 17, 18])
    assert_equal(ix_outliers, ix_expected2)

    # rainbowplot test (re-uses depth variable)
    xdata = np.arange(data[0].size)
    fig = rainbowplot(data, xdata=xdata, depth=depth, cmap=plt.cm.rainbow)
Exemplo n.º 3
0
def main(argstr=None):
    import argparse

    # helper for boolean flags
    # based on http://stackoverflow.com/a/9236426/344821
    class ActionNoYes(argparse.Action):
        def __init__(self,
                     opt_name,
                     off_name=None,
                     dest=None,
                     default=True,
                     required=False,
                     help=None):

            if off_name is None:
                off_name = 'no-' + opt_name
            self.off_name = '--' + off_name

            if dest is None:
                dest = opt_name.replace('-', '_')

            super(ActionNoYes,
                  self).__init__(['--' + opt_name, '--' + off_name],
                                 dest,
                                 nargs=0,
                                 const=None,
                                 default=default,
                                 required=required,
                                 help=help)

        def __call__(self, parser, namespace, values, option_string=None):
            setattr(namespace, self.dest, option_string != self.off_name)

    parser = argparse.ArgumentParser()
    parser.add_argument('files', nargs='+')

    g = parser.add_mutually_exclusive_group()
    g.add_argument('--over-random', action='store_true', default=False)
    g.add_argument('--absolute', action='store_false', dest='over_random')

    parser.add_argument('--key-regexes',
                        '--keys',
                        nargs='*',
                        metavar='RE',
                        default=[re.compile('.*')],
                        type=re.compile)
    parser.add_argument('--key-exclude-regexes',
                        '--skip-keys',
                        nargs='*',
                        default=[],
                        type=re.compile,
                        metavar='RE')

    parser.add_argument('--legend',
                        default='outside',
                        choices={'outside', 'inside'})

    g = parser.add_argument_group('Plot Types')
    g._add_action(ActionNoYes('rmses', default=False))
    g._add_action(ActionNoYes('rmse-fboxplots', default=False))
    g._add_action(ActionNoYes('auc', default=True))
    g._add_action(ActionNoYes('predaucs', default=False))
    g._add_action(ActionNoYes('predauc-auc', default=False))
    g.add_argument('--ge-cutoff', nargs='+', type=float)
    g.add_argument('--ge-cutoff-auc', nargs='+', type=float)

    #parser.add_argument('--save')
    if argstr is not None:
        import shlex
        args = parser.parse_args(shlex.split(argstr))
    else:
        args = parser.parse_args()

    #if args.save:
    #    import matplotlib
    #    matplotlib.use('Agg')

    import matplotlib.pyplot as plt

    res = load_data(args.files,
                    do_rmse=args.rmses,
                    do_rmse_auc=args.auc,
                    do_predauc=args.predaucs,
                    do_predauc_auc=args.predauc_auc,
                    do_cutoffs=args.ge_cutoff,
                    do_cutoff_aucs=args.ge_cutoff_auc,
                    rmse_over_random=args.over_random,
                    ret_rmse_traces=args.rmse_fboxplots)
    if args.rmse_fboxplots:
        data, rmse_traces = res
    else:
        data = res
    ns = data['ns']

    #key_res = [re.compile(r) for r in args.key_regexes]
    #key_bads = [re.compile(r) for r in args.key_exclude_regexes]
    def filter_keys(d):
        return {
            k: v
            for k, v in d.items()
            if any(r.search(k) for r in args.key_regexes) and not any(
                r.search(k) for r in args.key_exclude_regexes)
        }

    over_random = ' over_random' if args.over_random else ''

    # rmse stuff
    if args.rmses:
        plt.figure()
        plot_lines(ns, filter_keys(data['rmse']), 'RMSE' + over_random)
        show_legend(args.legend)

    if args.rmse_fboxplots:
        from statsmodels.graphics.functional import fboxplot
        for name, trace in filter_keys(rmse_traces).items():
            fboxplot(trace, xdata=ns)
            plt.hlines(0, *plt.xlim(), color='k')
            plt.title(KEY_NAMES.get(name, name))
            plt.xlabel("# of rated elements")
            plt.ylabel('RMSE' + over_random)

    if args.auc:
        plt.figure()
        plot_aucs(filter_keys(data['rmse_auc']),
                  'AUC ({})'.format('RMSE' + over_random))

    # prediction auc stuff
    if args.predaucs:
        plt.figure()
        plot_lines(ns, filter_keys(data['predauc']),
                   'Prediction AUC' + over_random)
        show_legend(args.legend)

    if args.auc:
        plt.figure()
        plot_aucs(filter_keys(data['predauc_auc']),
                  'AUC ({})'.format('Prediction AUC' + over_random))

    # # >= cutoff stuff
    if args.ge_cutoff:
        for cutoff in args.ge_cutoff:
            plt.figure()
            plot_lines(ns, filter_keys(data['cutoffs'][cutoff]),
                       '# >= {}'.format(cutoff))
            show_legend(args.legend)

    if args.ge_cutoff_auc:
        for cutoff in args.ge_cutoff_auc:
            plt.figure()
            plot_aucs(filter_keys(data['cutoff_aucs'][cutoff]),
                      'AUC (# >= {})'.format(cutoff))

    plt.show()