# Set bounding box used by DEFT bbox = [-5.0, 10.0] # Perform density estimation using DEFT start_time = time.clock() # # DO THIS If all you want is the distribution function # #Q_star = deft_1d(xs, G=G, alpha=alpha, bbox=bbox) # # DO THIS If all you want details of the computation # Q_star, results = deft_1d(xs, G=G, alpha=alpha, bbox=bbox, details=True) s = 'deft_1d with G=%d and alpha=%d took %.2f sec' % (G, alpha, time.clock() - start_time) ### ### Plot results ### # Close existing figure and create new figure plt.close('all') plt.figure() # Plot histogram plt.hist(xs, G,
G = 100 # Specify N and alphas N = 100 alpha = 2 num_samples = 20 # Draw data, rescaled to give an xint of length L = 10, centered on 0 xint = sp.array([-5.0, 5.0]) [xis, xint, Q_true, Q_true_details] = draw_from_gaussian_mix(N=N, Nx=G, gaussians=gaussians, xint=xint) xmid = sp.mean(xint) xspan = xint - xmid gaussians = Q_true_details.gaussians # Perform DEFT density estimation Q_star, Q_star_details = deft_1d(xis, xmid+xspan, alpha=alpha, G=G, details=True, num_samples=20, tf_shift=-3, verbose=True) Q_star_wide3, Q_star_wide3_details = deft_1d(xis, xmid+3*xspan, alpha=alpha, G=3*G, details=True, verbose=True) Q_star_wide10, Q_star_wide10_details = deft_1d(xis, xmid+10*xint, alpha=alpha, G=10*G, details=True, verbose=True) Q_star_fine, Q_star_fine_details = deft_1d(xis, xmid+xspan, alpha=alpha, G=3*G, details=True, verbose=True) Q_star_coarse, Q_star_coarse_details = deft_1d(xis, xmid+xspan, alpha=alpha, G=int(G/3), details=True, verbose=True) Q_star_alpha1, Q_star_alpha1_details = deft_1d(xis, xmid+xspan, alpha=1, G=G, details=True, verbose=True) Q_star_alpha3, Q_star_alpha3_details = deft_1d(xis, xmid+xspan, alpha=3, G=G, details=True, verbose=True) # Design plotting grid xs = sp.linspace(xint[0], xint[1], 10000) # Save everything things = {} things['Q_star_details'] = Q_star_details things['Q_true_details'] = Q_true_details things['Q_star_wide3_details'] = Q_star_wide3_details
# Set bounding box used by DEFT bbox = [-5.0, 10.0] # Perform density estimation using DEFT start_time = time.clock() # # DO THIS If all you want is the distribution function # # Q_star = deft_1d(xs, G=G, alpha=alpha, bbox=bbox) # # DO THIS If all you want details of the computation # Q_star, results = deft_1d(xs, G=G, alpha=alpha, bbox=bbox, details=True) s = "deft_1d with G=%d and alpha=%d took %.2f sec" % (G, alpha, time.clock() - start_time) ### ### Plot results ### # Close existing figure and create new figure plt.close("all") plt.figure() # Plot histogram plt.hist(xs, G, normed=1, histtype="stepfilled", edgecolor="none", facecolor="gray") # Plot estimated density and true density
def get_pdfs_from_data(data, method="deft", G=200, alpha=3, bbox="adjust", factor=0.5, verbose=False): """ Performs a non-parametric estimation of the densities in data and returns a list compatible with the npfi function defined above. If DEFT is used for the estimates, it uses the same bounding box for all PDFs. This bounding box should be used when calling npfi. Args: data: A list of arrays containing the sample data. method: Either "deft" or "gaussian_kde" for the non-parametric estimation method. G: parameter to be passed to DEFT if used [2]. alpha: parameter to be passed to DEFT if used [2]. bbox: Either "adjust" or a bounding box (tuple with two values). Used for DEFT only. factor: If bbox is "adjust", by which factor to adjust. verbose: If set to true, print out debug info such as run times. Returns: pdfs: a list of the estimated pdfs bbox: the appropriate bounding box """ assert hasattr(data, '__iter__') assert isinstance(method, str) and method in ["deft", "gaussian_kde"] if method == "deft" and not HAS_DEFT: raise Exception("DEFT has been disabled.") assert isinstance(G, (int, long)) assert isinstance(alpha, (int, long)) and alpha > 0 assert isinstance(factor, (int, long, float)) and factor > 0 assert (isinstance(bbox, str) and bbox == "adjust") or \ (len(bbox) == 2 and bbox[0] < bbox[1]) # Track time if verbose: start = timeit.default_timer() # Get the bounding box if necessary if method is "deft" and bbox is "adjust": bbox = get_bbox(data, multi_dim=True, factor=factor) if method is "gaussian_kde": bbox = (-np.inf, np.inf) # Estimate the PDFs pdfs = [] for d in data: if method is "gaussian_kde": pdfs.append(gaussian_kde(d)) else: pdfs.append(deft_1d(d, G=G, alpha=alpha, bbox=bbox)) if verbose: print("PDF estimation took %.2f with %s" % (timeit.default_timer()-start, method)) return pdfs, bbox
# Draw data from mixture [xis, xgrid, Q_true, other] = draw_from_gaussian_mix(N=N, Nx=G, gaussians=gaussians) # Compute data range and grid for fine-graned analysis xmin = min(xgrid) xmax = max(xgrid) xint = [xmin, xmax] xs = sp.linspace(xmin, xmax, plot_grid_size) dx = xs[1]-xs[0] # Interpolate Q_true for plotting Q_true_vals = Q_true(xs) # Perform DEFT density estimation Q_star1_vals = deft_1d(xis, xint, alpha=1, G=G, verbose=False)(xs) Q_star2_vals = deft_1d(xis, xint, alpha=2, G=G, verbose=False)(xs) Q_star3_vals = deft_1d(xis, xint, alpha=3, G=G, verbose=False)(xs) # Perform GKDE denstiy estimation gkde = gaussian_kde(xis) Q_gkde_vals = gkde(xs)/sum(gkde(xs)*dx) # Perform GMM denstiy estimation using BIC max_K = 10 bic_values = sp.zeros([max_K]); Qs_gmm = sp.zeros([max_K,plot_grid_size]) for k in sp.arange(1,max_K+1): gmm = mixture.GMM(int(k)) gmm.fit(xis) Qgmm = lambda(x): sp.exp(gmm.score(x))/sum(sp.exp(gmm.score(xs))*dx)
# Draw data from mixture [xis, xgrid, Q_true, other] = draw_from_gaussian_mix(N=N, Nx=G, gaussians=gaussians) # Compute data range and grid for fine-graned analysis xmin = min(xgrid) xmax = max(xgrid) xint = [xmin, xmax] xs = sp.linspace(xmin, xmax, plot_grid_size) dx = xs[1] - xs[0] # Interpolate Q_true for plotting Q_true_vals = Q_true(xs) # Perform DEFT density estimation Q_star1_vals = deft_1d(xis, xint, alpha=1, G=G, verbose=False)(xs) Q_star2_vals = deft_1d(xis, xint, alpha=2, G=G, verbose=False)(xs) Q_star3_vals = deft_1d(xis, xint, alpha=3, G=G, verbose=False)(xs) # Perform GKDE denstiy estimation gkde = gaussian_kde(xis) Q_gkde_vals = gkde(xs) / sum(gkde(xs) * dx) # Perform GMM denstiy estimation using BIC max_K = 10 bic_values = sp.zeros([max_K]) Qs_gmm = sp.zeros([max_K, plot_grid_size]) for k in sp.arange(1, max_K + 1): gmm = mixture.GMM(int(k)) gmm.fit(xis) Qgmm = lambda (x): sp.exp(gmm.score(x)) / sum(
### ### Perform density estimation using deft_1d ### # Set number of grid points used by DEFT G = 100 # Set power of derivative to constrain alpha = 3 # Set bounding box used by DEFT bbox = [-5.0, 10.0] # Perform density estimation using DEFT start_time = time.clock() Q_star = deft_1d(xs, G=G, alpha=alpha, bbox=bbox) s = 'deft_1d with G=%d and alpha=%d took %.2f sec'%(G,alpha,time.clock()-start_time) ### ### Plot results ### # Close existing figure and create new figure plt.close('all') plt.figure() # Plot histogram plt.hist(xs, G, normed=1, histtype='stepfilled', edgecolor='none', facecolor='gray') # Plot estimated density and true density xgrid = sp.linspace(bbox[0], bbox[1], 1000)