Пример #1
0
def plot_prediction_interpolated(lons, lats, ghfs):
    """ Plots predicted GHF values as an interpolated pseudocolor plot for
        Greenland.

        Args:
            lons: one-dimensional list (native, numpy, or pandas) of longitudes.
            lons: similar, latitudes.
            ghfs: similar, GHF values.
    """
    m = Basemap(**GREENLAND_BASEMAP_ARGS)
    _mark_ice_cores(m, GREENLAND.lon.as_matrix(), GREENLAND.lat.as_matrix(),
                    GREENLAND.ghf.as_matrix())
    pcolor_args = {'cmap': SPECTRAL_CMAP}
    colorbar_args = {'location': 'right', 'pad': '5%'}
    plot_values_on_map_pcolormesh_interp(m,
                                         lons,
                                         lats,
                                         ghfs,
                                         parallel_step=5.,
                                         meridian_step=10.,
                                         colorbar_args=colorbar_args,
                                         pcolor_args=pcolor_args)

    m.drawparallels(np.arange(-80., 81., 5.),
                    labels=[1, 0, 0, 0],
                    fontsize=10,
                    color='#c6c6c6')
    m.drawmeridians(np.arange(-180., 181., 10.),
                    labels=[0, 0, 0, 1],
                    fontsize=10,
                    color='#c6c6c6')

    m.drawcoastlines(color='grey', linewidth=0.5)
    m.drawmapboundary(color='grey')
Пример #2
0
def exp_sensitivity(data):
    """ Evaluates sensitivity of GBRT and linear regression to perturbations in
        training GHF. Plot is saved to <OUT_DIR>/sensitivity.png.
    """
    radius = GREENLAND_RADIUS
    roi_density = 11.3 # Greenland
    noise_amps = np.arange(0.025, .31, .025)
    ncenters = 50
    dumpfile = 'sensitivity.txt'
    plotfile = 'sensitivity.png'
    plot_sensitivity_analysis(data, roi_density, radius, noise_amps, ncenters, dumpfile=dumpfile, replot=False)
    save_cur_fig(plotfile, title='GBRT prediction sensitivity to noise in training GHF', set_title_for=None)
Пример #3
0
def exp_error_by_density(data):
    """ Evaluates prediction error (normalized rmse and r2) for GBRT, linear
        regression and constant predictor by using increasingly large sample
        densities in ROIs, constrained to the specified region, with radius
        equal to that of Greenland. Plot is saved to <OUT_DIR>/error_by_density[<region>].png.
    """
    densities = np.append(np.array([1]), np.arange(5, 51, 5))
    radius = GREENLAND_RADIUS
    ncenters = 50
    # region constraints: 'NA-WE', 'NA', 'WE', or None (i.e all)
    region = 'NA-WE'
    dumpfile = 'error_by_density[%s].txt' % region
    plotfile = 'error_by_density[%s].png' % region
    plot_error_by_density(data, densities, radius, ncenters, region=region, dumpfile=dumpfile, replot=False)
    save_cur_fig(plotfile)
Пример #4
0
def exp_error_by_radius(data):
    """ Evaluates prediction error (normalized rmse and r2) for GBRT, linear
        regression and constant predictor by using increasingly large radii for
        ROIs, constrained to the specified region, with sample density equal to
        that of Greenland. Plot is saved to <OUT_DIR>/error_by_radius[<region>].png.
    """
    radius = GREENLAND_RADIUS
    roi_density = 11.3 # Greenland
    ncenters = 50
    radii = np.arange(500, 4001, 500)
    region = 'NA-WE'
    dumpfile = 'error_by_radius[%s].txt' % region
    plotfile = 'error_by_radius[%s].png' % region

    sys.stderr.write('=> Experiment: Error by Radius (region: %s, no. centers: %d, no. radii: %d)\n' % (region, ncenters, len(radii)))
    plot_error_by_radius(data, roi_density, radii, ncenters, region=region, dumpfile=dumpfile, replot=False)
    save_cur_fig(plotfile)
Пример #5
0
def plot_error_by_density(data, roi_densities, radius, ncenters, region='NA-WE',
                          replot=False, dumpfile=None, **gbrt_params):
    """ ncenters random centers are picked and over all given ROI densities.
        Cross-validation errors (normalized RMSE and r2) are averaged over
        ncenters. One standard deviation mark is shown by a shaded region.
    """
    sys.stderr.write('=> Experiment: Error by Density (region: %s, no. centers: %d, no. densities: %d)\n' %
                     (region, ncenters, len(roi_densities)))
    fig = plt.figure(figsize=(11,5))
    ax_rmse, ax_r2 = fig.add_subplot(1, 2, 1), fig.add_subplot(1, 2, 2)

    if replot:
        results = pickle_load(dumpfile)
    else:
        centers = [
            random_prediction_ctr(data, radius, region=region, min_density=max(roi_densities))
            for _ in range(ncenters)
        ]
        shape = (ncenters, len(roi_densities))
        # blank error matrix (keyed by center number and roi density index),
        # used to initialize multiple components of the results dictionary.
        blank = np.zeros(shape)

        results = {
            'ncenters': ncenters,
            'roi_densities': roi_densities,
            'errors': {
                'gbrt': {'rmse': blank.copy(), 'r2': blank.copy()},
                'linear': {'rmse': blank.copy(), 'r2': blank.copy()},
                'constant': {'rmse': blank.copy(), 'r2': blank.copy()},
            },
        }
        for idx_density, roi_density in enumerate(roi_densities):
            for idx_ctr, center in enumerate(centers):
                sys.stderr.write('# density = %.2f, center %d/%d ' % (roi_density, idx_ctr + 1, ncenters))
                comp = compare_models(data, roi_density, radius, center, **gbrt_params)
                for k in results['errors'].keys():
                    # k is one of gbrt, linear, or constant
                    results['errors'][k]['r2'][idx_ctr][idx_density] = comp[k][0]
                    results['errors'][k]['rmse'][idx_ctr][idx_density] = comp[k][1]
        if dumpfile:
            pickle_dump(dumpfile, results, comment='GBRT performance results')

    errors = results['errors']
    roi_densities = results['roi_densities']
    ncenters = results['ncenters']
    num_sigma = 1

    # Plot GBRT results
    kw = {'alpha': .9, 'lw': 1, 'marker': 'o', 'markersize': 4, 'color': 'b'}
    mean_rmse = errors['gbrt']['rmse'].mean(axis=0)
    sd_rmse = np.sqrt(errors['gbrt']['rmse'].var(axis=0))
    lower_rmse = mean_rmse - num_sigma * sd_rmse
    higher_rmse = mean_rmse + num_sigma * sd_rmse
    ax_rmse.plot(roi_densities, mean_rmse, label='GBRT', **kw)
    ax_rmse.fill_between(roi_densities, lower_rmse, higher_rmse, facecolor='b', edgecolor='b', alpha=.3)

    mean_r2 = errors['gbrt']['r2'].mean(axis=0)
    sd_r2 = np.sqrt(errors['gbrt']['r2'].var(axis=0))
    lower_r2 = mean_r2 - num_sigma * sd_r2
    higher_r2 = mean_r2 + num_sigma * sd_r2
    ax_r2.plot(roi_densities, errors['gbrt']['r2'].mean(axis=0), **kw)
    ax_r2.fill_between(roi_densities, lower_r2, higher_r2, facecolor='b', edgecolor='b', alpha=.2)

    # Plot Linear Regression results
    kw = {'alpha': .7, 'lw': 1, 'marker': 'o', 'markersize': 4, 'markeredgecolor': 'r', 'color': 'r'}
    mean_rmse = errors['linear']['rmse'].mean(axis=0)
    sd_rmse = np.sqrt(errors['linear']['rmse'].var(axis=0))
    lower_rmse = mean_rmse - num_sigma * sd_rmse
    higher_rmse = mean_rmse + num_sigma * sd_rmse
    ax_rmse.plot(roi_densities, mean_rmse, label='linear regression', **kw)
    ax_rmse.fill_between(roi_densities, lower_rmse, higher_rmse, facecolor='r', edgecolor='r', alpha=.3)

    mean_r2 = errors['linear']['r2'].mean(axis=0)
    sd_r2 = np.sqrt(errors['linear']['r2'].var(axis=0))
    lower_r2 = mean_r2 - num_sigma * sd_r2
    higher_r2 = mean_r2 + num_sigma * sd_r2
    ax_r2.plot(roi_densities, errors['linear']['r2'].mean(axis=0), **kw)
    ax_r2.fill_between(roi_densities, lower_r2, higher_r2, facecolor='r', edgecolor='r', alpha=.2)

    # Plot constant predictor results
    kw = {'alpha': .7, 'lw': 1, 'ls': '--', 'marker': 'o', 'markersize': 4, 'color': 'k', 'markeredgecolor': 'k'}
    ax_rmse.plot(roi_densities, errors['constant']['rmse'].mean(axis=0), label='constant predictor', **kw)
    ax_r2.plot(roi_densities, errors['constant']['r2'].mean(axis=0), **kw)

    # Style plot
    ax_rmse.set_ylabel('Normalized RMSE', fontsize=14)
    ax_r2.set_ylabel('$r^2$', fontsize=16)
    ax_r2.set_ylim(-.05, 1)
    ax_r2.set_xlim(min(roi_densities) - 5, max(roi_densities) + 5)
    ax_r2.set_yticks(np.arange(0, 1.01, .1))
    ax_rmse.set_ylim(0, .5)
    ax_rmse.set_yticks(np.arange(0, .51, .05))
    ax_rmse.set_xlim(*ax_r2.get_xlim())
    for ax in [ax_rmse, ax_r2]:
        # FIXME force xlims to be the same
        ax.set_xlabel('density of training points in ROI ($10^{-6}$ km $^{-2}$)',
                      fontsize=14)
        ax.grid(True)
    ax_rmse.legend(prop={'size':15}, numpoints=1)
    fig.tight_layout()
Пример #6
0
def plot_generalization_analysis(data, roi_density, radius, ncenters,
                                 ns_estimators, replot=False, dumpfile=None):
    """ For all given values for n_estimators (number of trees) for GBRT,
        perform cross-validation over ncenters ROIs with given radius and
        sample density. The average training and validation error for each
        number of trees is plotted. This is the standard plot to detect
        overfitting defined as the turning point beyond which validation error
        starts increasing while training error is driven down to zero. As
        expected, GBRT does not overfit (validation error plateaus).

        One standard deviation is indicated by a shaded region.
    """
    fig, ax = plt.subplots()

    if replot:
        res = pickle_load(dumpfile)
        for v in ['roi_density', 'radius', 'ns_estimators', 'train_rmses', 'test_rmses']:
            exec('%s = res["%s"]' % (v, v))
        assert len(train_rmses) == len(test_rmses), \
               'array length (# of centers) should be the same for training and test'
    else:
        sys.stderr.write('=> Experiment: Generalization ' + \
                         '(roi_density: %.2f, radius: %.2f,' % (roi_density, radius) +
                         ' no. centers: %d, no. of n_estimators: %d)\n' % (ncenters, len(ns_estimators)))
        centers = [random_prediction_ctr(data, radius, min_density=roi_density)
                   for _ in range(ncenters)]

        train_rmses = np.zeros([ncenters, len(ns_estimators)])
        test_rmses = np.zeros([ncenters, len(ns_estimators)])
        for center_idx, center in enumerate(centers):
            sys.stderr.write('# center %d/%d\n' % (center_idx + 1, ncenters))
            X_train, y_train, X_test, y_test = \
                split_with_circle(data, center, roi_density=roi_density, radius=radius)
            X_train = X_train.drop(['lat', 'lon'], axis=1)
            X_test = X_test.drop(['lat', 'lon'], axis=1)
            assert not X_test.empty

            for n_idx, n in enumerate(ns_estimators):
                sys.stderr.write('  # n_estimators: %d ' % n)
                gbrt = train_gbrt(X_train, y_train, n_estimators=n)
                _, train_rmse = error_summary(y_train, gbrt.predict(X_train))
                _, test_rmse  = error_summary(y_test, gbrt.predict(X_test))
                train_rmses[center_idx][n_idx] = train_rmse
                test_rmses[center_idx][n_idx] = test_rmse

        if dumpfile:
            res = {'roi_density': roi_density,
                   'radius': radius,
                   'ns_estimators': ns_estimators,
                   'train_rmses': train_rmses,
                   'test_rmses': test_rmses}
            pickle_dump(dumpfile, res, comment='generalization errors')

    num_sigma = 1

    mean_rmse = test_rmses.mean(axis=0)
    sd_rmse = np.sqrt(test_rmses.var(axis=0))
    lower_rmse = mean_rmse - num_sigma * sd_rmse
    higher_rmse = mean_rmse + num_sigma * sd_rmse
    ax.plot(ns_estimators, mean_rmse, 'r', marker='o', markersize=3, alpha=.9, label='validation')
    ax.fill_between(ns_estimators, lower_rmse, higher_rmse, facecolor='r', edgecolor='r', alpha=.3)

    mean_rmse = train_rmses.mean(axis=0)
    sd_rmse = np.sqrt(train_rmses.var(axis=0))
    lower_rmse = mean_rmse - num_sigma * sd_rmse
    higher_rmse = mean_rmse + num_sigma * sd_rmse
    ax.plot(ns_estimators, mean_rmse, 'g', marker='o', markersize=3, alpha=.9, label='training')
    ax.fill_between(ns_estimators, lower_rmse, higher_rmse, facecolor='g', edgecolor='g', alpha=.3)

    ax.grid(True)
    ax.set_xlim(ns_estimators[0] - 100, ns_estimators[-1] + 100)
    ax.set_ylim(0, .3)
    ax.set_yticks(np.arange(0, .31, .05))
    ax.set_xlabel('Number of trees')
    ax.set_ylabel('Normalized RMSE')
    ax.legend(prop={'size':12.5})
    fig.tight_layout()
Пример #7
0
def plot_sensitivity_analysis(data, roi_density, radius, noise_amps, ncenters,
                              replot=False, dumpfile=None):
    """ For each given noise amplitude, performs cross-validation on ncenters
        with given radius and density, the average over ncenters of
        normalized rmse between noise-free predictions and predictions based on
        noisy GHF is calculated. This perturbation in predictions is plotted
        against the expected absolute value of applied noise (amplitude).

        Both GBRT and linear regression are considered.
        One standard deviation is indicated by a shaded region.
        The case of Greenland is considered separately and overlayed.
    """
    fig = plt.figure(figsize=(10, 5))
    ax_gbrt = fig.add_subplot(1, 2, 1)
    ax_lin = fig.add_subplot(1, 2, 2)

    def _predict(X_train, y_train, X_test, noise_amp):
        # If noise ~ N(0, s^2), then mean(|noise|) = s * sqrt(2/pi),
        # cf. https://en.wikipedia.org/wiki/Half-normal_distribution
        # To get noise with mean(|noise|) / mean(y) = noise_ampl, we need to
        # have noise ~ N(0, s*^2) with s* = mean(y) * noise_ampl * sqrt(pi/2).
        noise = np.mean(y_train) * noise_amp * np.sqrt(np.pi/ 2) * np.random.randn(len(y_train))
        gbrt = train_gbrt(X_train.drop(['lat', 'lon'], axis=1),
                          y_train + noise)
        lin_reg = train_linear(X_train.drop(['lat', 'lon'], axis=1),
                               y_train + noise)
        gbrt_pred = gbrt.predict(X_test.drop(['lat', 'lon'], axis=1))
        lin_pred = lin_reg.predict(X_test.drop(['lat', 'lon'], axis=1))
        return gbrt_pred, lin_pred

    if replot:
        res = pickle_load(dumpfile)
        rmses_gbrt, rmses_lin = res['rmses_gbrt'], res['rmses_lin']
        noise_amps = res['noise_amps']
    else:
        centers = [random_prediction_ctr(data, radius, min_density=roi_density)
                   for _ in range(ncenters)]
        y0 = []
        centers = [None] + centers # one extra "center" (Greenland)
        rmses_gbrt = np.zeros((len(centers), len(noise_amps)))
        rmses_lin = np.zeros((len(centers), len(noise_amps)))
        for idx_ctr, center in enumerate(centers):
            if center is None:
                # Greenland case
                X_train, y_train, X_test = greenland_train_test_sets()
            else:
                X_train, y_train, X_test, _ = \
                    split_with_circle(data, center, roi_density=roi_density, radius=radius)
            sys.stderr.write('(ctr %d) noise_amp = 0.00 ' % (idx_ctr + 1))
            y0_gbrt, y0_lin = _predict(X_train, y_train, X_test, 0)
            for idx_noise, noise_amp in enumerate(noise_amps):
                sys.stderr.write('(ctr %d) noise_amp = %.2f ' % (idx_ctr + 1, noise_amp))
                y_gbrt, y_lin = _predict(X_train, y_train, X_test, noise_amp)
                rmse_gbrt = sqrt(mean_squared_error(y0_gbrt, y_gbrt)) / np.mean(y0_gbrt)
                rmse_lin = sqrt(mean_squared_error(y0_lin, y_lin)) / np.mean(y0_lin)
                rmses_gbrt[idx_ctr][idx_noise] = rmse_gbrt
                rmses_lin[idx_ctr][idx_noise] = rmse_lin

        if dumpfile:
            res = {'rmses_lin': rmses_lin, 'rmses_gbrt': rmses_gbrt, 'noise_amps': noise_amps}
            pickle_dump(dumpfile, res, 'sensitivity analysis')

    kw = dict(alpha=.6, lw=2, marker='o', color='k', label='global average')
    noise_amps = np.append([0], noise_amps)

    num_sigma = 1
    mean_rmse = rmses_lin[1:].mean(axis=0)
    sd_rmse = np.sqrt(rmses_lin[1:].var(axis=0))
    lower_rmse = np.append([0], mean_rmse - num_sigma * sd_rmse)
    higher_rmse = np.append([0], mean_rmse + num_sigma * sd_rmse)
    mean_rmse = np.append([0], mean_rmse)
    ax_lin.plot(noise_amps, mean_rmse, **kw)
    ax_lin.fill_between(noise_amps, lower_rmse, higher_rmse, facecolor='k', edgecolor='k', alpha=.2)

    mean_rmse = rmses_gbrt[1:].mean(axis=0)
    sd_rmse = np.sqrt(rmses_gbrt[1:].var(axis=0))
    lower_rmse = np.append([0], mean_rmse - num_sigma * sd_rmse)
    higher_rmse = np.append([0], mean_rmse + num_sigma * sd_rmse)
    mean_rmse = np.append([0], mean_rmse)
    ax_gbrt.plot(noise_amps, mean_rmse, **kw)
    ax_gbrt.fill_between(noise_amps, lower_rmse, higher_rmse, facecolor='k', edgecolor='k', alpha=.2)

    # Greenland case
    kw = dict(color='g', alpha=.5, lw=2.5, marker='o',
              markeredgewidth=0.0, label='Greenland')
    ax_lin.plot(noise_amps, np.append([0], rmses_lin[0]), **kw)
    ax_gbrt.plot(noise_amps, np.append([0], rmses_gbrt[0]), **kw)

    for ax in [ax_gbrt, ax_lin]:
        ax.set_xlabel('Relative magnitude of noise in training GHF', fontsize=12)
        ax.set_xlim(0, max(noise_amps) * 1.1)
        ax.set_aspect('equal')
        ax.grid(True)
        ax.set_xticks(np.arange(0, .35, .05))
        ax.set_yticks(np.arange(0, .35, .05))
        ax.set_xlim(-.025, .325)
        ax.set_ylim(-.025, .325)
        ax.legend(loc=1, fontsize=12)
    ax_gbrt.set_ylabel(r'Normalized RMSE difference in $\widehat{GHF}_{\mathrm{GBRT}}$', fontsize=12)
    ax_lin.set_ylabel(r'Normalized RMSE difference in $\widehat{GHF}_{\mathrm{lin}}$', fontsize=12)

    fig.tight_layout()
Пример #8
0
def plot_error_by_radius(data, roi_density, radii, ncenters, region='NA-WE',
                         replot=False, dumpfile=None, **gbrt_params):
    """ ncenters random centers are picked and over all given radii.
        Cross-validation errors (normalized RMSE and r2) are averaged over
        ncenters. One standard deviation mark is shown by a shaded region.
    """
    fig = plt.figure(figsize=(11,5))
    ax_rmse, ax_r2 = fig.add_subplot(1, 2, 1), fig.add_subplot(1, 2, 2)

    if replot:
        results = pickle_load(dumpfile)
    else:
        centers = [
            # HACK there's no easy way to check if for a given center the
            # demanded density is attainable for circles of all desired radii.
            # Ask for twice the density we need on the largest radius and hope
            # for the best!
            random_prediction_ctr(data, max(radii), region=region, min_density=2*roi_density)
            for _ in range(ncenters)
        ]
        shape = (ncenters, len(radii))
        # blank error matrix (keyed by center number and roi density index),
        # used to initialize multiple components of the results dictionary.
        blank = np.zeros(shape)

        results = {
            'ncenters': ncenters,
            'radii': radii,
            'errors': {
                'gbrt': {'rmse': blank.copy(), 'r2': blank.copy()},
                'linear': {'rmse': blank.copy(), 'r2': blank.copy()},
                'constant': {'rmse': blank.copy(), 'r2': blank.copy()},
            },
        }
        for idx_radius, radius in enumerate(radii):
            for idx_ctr, center in enumerate(centers):
                sys.stderr.write('# radius = %.0f, center %d/%d ' % (radius, idx_ctr + 1, ncenters))
                comp = compare_models(data, roi_density, radius, center, **gbrt_params)
                for k in results['errors'].keys():
                    # k is one of gbrt, linear, or constant
                    results['errors'][k]['r2'][idx_ctr][idx_radius] = comp[k][0]
                    results['errors'][k]['rmse'][idx_ctr][idx_radius] = comp[k][1]
        if dumpfile:
            pickle_dump(dumpfile, results, comment='GBRT performance results')

    errors = results['errors']
    radii = results['radii']
    ncenters = results['ncenters']

    num_sigma = 1

    # Plot GBRT results
    kw = {'alpha': .9, 'lw': 1, 'marker': 'o', 'markersize': 4, 'color': 'b'}
    mean_rmse = errors['gbrt']['rmse'].mean(axis=0)
    sd_rmse = np.sqrt(errors['gbrt']['rmse'].var(axis=0))
    lower_rmse = mean_rmse - num_sigma * sd_rmse
    higher_rmse = mean_rmse + num_sigma * sd_rmse
    ax_rmse.plot(radii, mean_rmse, label='GBRT', **kw)
    ax_rmse.fill_between(radii, lower_rmse, higher_rmse, facecolor='b', edgecolor='b', alpha=.3)

    mean_r2 = errors['gbrt']['r2'].mean(axis=0)
    sd_r2 = np.sqrt(errors['gbrt']['r2'].var(axis=0))
    lower_r2 = mean_r2 - num_sigma * sd_r2
    higher_r2 = mean_r2 + num_sigma * sd_r2
    ax_r2.plot(radii, errors['gbrt']['r2'].mean(axis=0), **kw)
    ax_r2.fill_between(radii, lower_r2, higher_r2, facecolor='b', edgecolor='b', alpha=.2)

    # Plot Linear Regression results
    kw = {'alpha': .7, 'lw': 1, 'marker': 'o', 'markersize': 4, 'markeredgecolor': 'r', 'color': 'r'}
    mean_rmse = errors['linear']['rmse'].mean(axis=0)
    sd_rmse = np.sqrt(errors['linear']['rmse'].var(axis=0))
    lower_rmse = mean_rmse - num_sigma * sd_rmse
    higher_rmse = mean_rmse + num_sigma * sd_rmse
    ax_rmse.plot(radii, mean_rmse, label='linear regression', **kw)
    ax_rmse.fill_between(radii, lower_rmse, higher_rmse, facecolor='r', edgecolor='r', alpha=.3)

    mean_r2 = errors['linear']['r2'].mean(axis=0)
    sd_r2 = np.sqrt(errors['linear']['r2'].var(axis=0))
    lower_r2 = mean_r2 - num_sigma * sd_r2
    higher_r2 = mean_r2 + num_sigma * sd_r2
    ax_r2.plot(radii, errors['linear']['r2'].mean(axis=0), **kw)
    ax_r2.fill_between(radii, lower_r2, higher_r2, facecolor='r', edgecolor='r', alpha=.2)

    # Plot constant predictor results
    kw = {'alpha': .7, 'lw': 1, 'ls': '--', 'marker': 'o', 'markersize': 4, 'color': 'k', 'markeredgecolor': 'k'}
    ax_rmse.plot(radii, errors['constant']['rmse'].mean(axis=0), label='constant predictor', **kw)
    ax_r2.plot(radii, errors['constant']['r2'].mean(axis=0), **kw)

    # Style plot
    ax_rmse.set_ylabel('Normalized RMSE', fontsize=14)
    ax_r2.set_ylabel('$r^2$', fontsize=16)
    ax_r2.set_ylim(-.05, 1)
    ax_r2.set_xlim(min(radii) - 100, max(radii) + 100)
    ax_r2.set_yticks(np.arange(0, 1.01, .1))
    ax_rmse.set_ylim(0, .5)
    ax_rmse.set_yticks(np.arange(0, .51, .05))
    ax_rmse.set_xlim(*ax_r2.get_xlim())
    for ax in [ax_rmse, ax_r2]:
        # FIXME force xlims to be the same
        ax.set_xlabel('radius of ROI (km)', fontsize=14)
        ax.grid(True)
    ax_rmse.legend(prop={'size':15}, numpoints=1)
    fig.tight_layout()