def check_fit_bivariate_normal(sigma1, sigma2, mu, alpha, N=1000): # poisson stats rtol = 2 * np.sqrt(N) / N x, y = bivariate_normal(mu, sigma1, sigma2, alpha, N).T mu_fit, sigma1_fit, sigma2_fit, alpha_fit = fit_bivariate_normal(x, y) if alpha_fit > np.pi / 2: alpha_fit -= np.pi elif alpha_fit < -np.pi / 2: alpha_fit += np.pi # Circular degeneracy in alpha: test sin(2*alpha) instead assert_allclose(np.sin(2 * alpha_fit), np.sin(2 * alpha), atol=2 * rtol) assert_allclose(mu, mu_fit, rtol=rtol) assert_allclose(sigma1_fit, sigma1, rtol=rtol) assert_allclose(sigma2_fit, sigma2, rtol=rtol)
# it's easier to visualize the x and y sigmas with alpha=0.0 alpha_deg = 0.0 #used to be 45 deg... alpha = alpha_deg * np.pi / 180 #------------------------------------------------------------ # Draw N points from a multivariate normal distribution # # we use the bivariate_normal function from astroML. A more # general function for this is numpy.random.multivariate_normal(), # which requires the user to specify the full covariance matrix. # bivariate_normal() generates this covariance matrix for the # given inputs np.random.seed(0) X = bivariate_normal(mu, sigma1, sigma2, alpha, N) #------------------------------------------------------------ # Create the figure showing the fits fig = plt.figure(figsize=(8, 8)) fig.subplots_adjust(left=0.1, right=0.90, wspace=0.25, bottom=0.1, top=0.9, hspace=0.3) ax = fig.add_subplot(2, 2, 0) # add outliers distributed using a bivariate normal. x, y = X.T # scatter the points
sigma2 = 1.0 mu = [10, 10] alpha_deg = 45.0 alpha = alpha_deg * np.pi / 180 #------------------------------------------------------------ # Draw N points from a multivariate normal distribution # # we use the bivariate_normal function from astroML. A more # general function for this is numpy.random.multivariate_normal(), # which requires the user to specify the full covariance matrix. # bivariate_normal() generates this covariance matrix for the # given inputs np.random.seed(0) X = bivariate_normal(mu, sigma1, sigma2, alpha, N) #------------------------------------------------------------ # Create the figure showing the fits fig = plt.figure(figsize=(10, 5)) fig.subplots_adjust(left=0.07, right=0.95, wspace=0.05, bottom=0.1, top=0.95) # We'll create two figures, with two levels of contamination for i, f in enumerate([0.05, 0.15]): ax = fig.add_subplot(1, 2, i + 1) # add outliers distributed using a bivariate normal. X[:int(f * N)] = bivariate_normal((10, 10), 2, 5, 45 * np.pi / 180., int(f * N)) x, y = X.T
# Define the mean, principal axes, and rotation of the ellipse mean = np.array([0, 0]) sigma_1 = 2 sigma_2 = 1 alpha = np.pi / 4 #------------------------------------------------------------ # Draw 10^5 points from a multivariate normal distribution # # we use the bivariate_normal function from astroML. A more # general function for this is numpy.random.multivariate_normal(), # which requires the user to specify the full covariance matrix. # bivariate_normal() generates this covariance matrix for the # given inputs. np.random.seed(0) x, cov = bivariate_normal(mean, sigma_1, sigma_2, alpha, size=100000, return_cov=True) sigma_x = np.sqrt(cov[0, 0]) sigma_y = np.sqrt(cov[1, 1]) sigma_xy = cov[0, 1] #------------------------------------------------------------ # Plot the results fig = plt.figure(figsize=(5, 5)) ax = fig.add_subplot(111) # plot a 2D histogram/hess diagram of the points H, bins = np.histogramdd(x, bins=2 * [np.linspace(-4.5, 4.5, 51)]) ax.imshow(H, origin='lower', cmap=plt.cm.binary, interpolation='nearest', extent=[bins[0][0], bins[0][-1], bins[1][0], bins[1][-1]])
# Set parameters for the distributions Nbootstraps = 5000 N = 1000 sigma1 = 2.0 sigma2 = 1.0 mu = (10.0, 10.0) alpha_deg = 45.0 alpha = alpha_deg * np.pi / 180 f = 0.01 #------------------------------------------------------------ # sample the distribution # without outliers and with outliers np.random.seed(0) X = bivariate_normal(mu, sigma1, sigma2, alpha, N) X_out = X.copy() X_out[:int(f * N)] = bivariate_normal(mu, 2, 5, 45 * np.pi / 180., int(f * N)) # true values of rho (pearson/spearman r) and tau # tau value comes from Eq. 41 of arXiv:1011.2009 rho_true = 0.6 tau_true = 2 / np.pi * np.arcsin(rho_true) #------------------------------------------------------------ # Create a function to compute the statistics. Since this # takes a while, we'll use the "pickle_results" decorator # to save the results of the computation to disk
sigma1 = 2.0 sigma2 = 1.0 mu = (10.0, 10.0) alpha_deg = 45.0 alpha = alpha_deg * np.pi / 180 fracout = 0.01 # fraction of points to be made into outliers sigmaout1 = 2. sigmaout2 = 5. alphaout = alpha #------------------------------------------------------------ # sample the distribution # without outliers and with outliers np.random.seed(0) dataXY = bivariate_normal(mu, sigma1, sigma2, alpha, Npts) dataXY_out = dataXY.copy() dataXY_out[:int(fracout * Npts)] = bivariate_normal(mu, sigmaout1, sigmaout2, alphaout, int(fracout * Npts)) # true values of rho (pearson/spearman r) and tau # tau value comes from Eq. 41 of arXiv:1011.2009 rho_true = 0.6 tau_true = 2 / np.pi * np.arcsin(rho_true) #------------------------------------------------------------ # Create a function to compute the statistics. Since this # takes a while, we'll use the "pickle_results" decorator # to save the results of the computation to disk