def test_biweight_location_constant_axis_3d(): shape = (10, 5, 2) data = np.ones(shape) cbl = biweight_location(data, axis=0) assert_allclose(cbl, np.ones((shape[1], shape[2]))) cbl = biweight_location(data, axis=1) assert_allclose(cbl, np.ones((shape[0], shape[2]))) cbl = biweight_location(data, axis=2) assert_allclose(cbl, np.ones((shape[0], shape[1])))
def test_biweight_location_ignore_nan(): data1d = np.array([1, 3, 5, 500, 2, np.nan]) data2d = np.array([data1d, data1d]) assert np.isnan(biweight_location(data1d, ignore_nan=False)) biw_expected = biweight_location(data1d[:-1], ignore_nan=False) assert_equal(biweight_location(data1d, ignore_nan=True), biw_expected) assert_equal(biweight_location(data2d, axis=0, ignore_nan=True), data1d) assert_equal(biweight_location(data2d, axis=1, ignore_nan=True), [biw_expected, biw_expected])
def test_biweight_location_constant_axis_2d(): shape = (10, 5) data = np.ones(shape) cbl = biweight_location(data, axis=0) assert_allclose(cbl, np.ones(shape[1])) cbl = biweight_location(data, axis=1) assert_allclose(cbl, np.ones(shape[0])) val1 = 100. val2 = 2. data = np.arange(50).reshape(10, 5) data[2] = val1 data[7] = val2 cbl = biweight_location(data, axis=1) assert_allclose(cbl[2], val1) assert_allclose(cbl[7], val2)
def test_biweight_location_axis_3d(): """Test a 3D array with the axis keyword.""" with NumpyRNGContext(12345): nz = 3 ny = 4 nx = 5 data = np.random.normal(5, 2, (nz, ny, nx)) bw = biweight_location(data, axis=0) assert bw.shape == (ny, nx) y = 0 bwi = [] for i in range(nx): bwi.append(biweight_location(data[:, y, i])) bwi = np.array(bwi) assert_allclose(bw[y], bwi)
def get_mean_cov(X, robust=True): """Get Mean Covariance. Parameters ---------- X : array-like robust : bool Returns ------- mean_tot : array-like cov_tot : array-like """ from astropy.stats.biweight import ( biweight_midcovariance, biweight_location, ) if robust: # Robust mean_tot = biweight_location(X, axis=0) cov_tot = biweight_midcovariance(X.T) else: # Classical mean_tot = np.mean(X, axis=0) cov_tot = np.cov(X) return mean_tot, cov_tot
def xbin_ybwt(x, y, xbins, Nmin=1): """ Take x,y pairs. Bin in x, find biweight location and scale Input: x and y, xbins Nmin : default 1 minimum number of points per bin to be used (otherwise nan) Return: xbins centers, yloc, yscale """ assert len(x) == len(y) xp = (xbins[1:]+xbins[:-1])/2 Nbins = len(xp) yloc = np.zeros(Nbins) + np.nan yscale = np.zeros(Nbins) + np.nan bin_nums = np.digitize(x, xbins) for ibin in range(Nbins): bin_num = ibin + 1 vals = y[bin_nums == bin_num] if len(vals) < Nmin: continue yloc[ibin] = biweight_location(vals) yscale[ibin] = biweight_scale(vals) return xp, yloc, yscale
def test_biweight_location_nan(): data1d = np.array([1, 3, 5, 500, 2, np.nan]) all_nan = data1d.copy() all_nan[:] = np.nan data2d = np.array([data1d, data1d, all_nan]) data1d_masked = np.ma.masked_invalid(data1d) data1d_masked.data[0] = np.nan data2d_masked = np.ma.masked_invalid(data2d) assert np.isnan(biweight_location(data1d)) bw_loc = biweight_location(data1d_masked) assert not isinstance(bw_loc, np.ma.MaskedArray) assert np.isnan(biweight_location(data2d)) for axis in (0, 1): assert np.all(np.isnan(biweight_location(data2d, axis=axis))) assert isinstance(biweight_location(data2d_masked, axis=axis), np.ma.MaskedArray)
def test_biweight_location_axis(): """Test a 2D array with the axis keyword.""" with NumpyRNGContext(12345): ny = 100 nx = 200 data = np.random.normal(5, 2, (ny, nx)) bw = biweight_location(data, axis=0) bwi = [] for i in range(nx): bwi.append(biweight_location(data[:, i])) bwi = np.array(bwi) assert_allclose(bw, bwi) bw = biweight_location(data, axis=1) bwi = [] for i in range(ny): bwi.append(biweight_location(data[i, :])) bwi = np.array(bwi) assert_allclose(bw, bwi)
def filter_columns(data_2d): # for each column, compute some robust average like the biweight # print("data 2d shape ", data_2d.shape) ny, nx = data_2d.shape smdata = np.zeros(nx) for i in range(nx): coldata = data_2d[:, i].astype(float) bwt = biweight.biweight_location(coldata) smdata[i] = bwt # print("data 1d shape ", smdata.shape) return smdata
def find_resolution(multispec_fname, initial_fwhm=.05, usepercentile=True, percentiles=[60, 80, 95], Rguess=None, full_output=False, useclip=True, findpeak=False, makeplot=True): """ """ from .spectrum import Spectrum1D from astropy.stats import sigma_clip, biweight from ..robust_polyfit import gaussfit from ..utils import find_distribution_peak import time arcs = Spectrum1D.read(multispec_fname, flux_ext=4) line_centers = np.loadtxt(os.path.dirname(__file__) + "/../data/linelists/thar_list", usecols=0) start = time.time() alllinefits = [] allRmed = [] allRerr = [] allwmid = [] allR = [] for i, arc in enumerate(arcs): linefits = [] wave = arc.dispersion flux = arc.flux wmin, wmax = wave[0], wave[-1] wmid = (wmin + wmax) / 2. lcs = line_centers[(line_centers > wmin) & (line_centers < wmax)] for lc in lcs: fwhm = initial_fwhm # get subpiece of arc ii = (wave > lc - 5 * fwhm) & (wave < lc + 5 * fwhm) _x, _y = wave[ii], flux[ii] # guess amplitude, center, sigma p0 = [np.max(_y), lc, fwhm / 2.355] try: popt = gaussfit(_x, _y, p0) except: pass else: if popt[0] > 0 and abs(popt[1] - lc) < .05: linefits.append(popt) try: A, w, s = np.array(linefits).T except ValueError: print("This order did not have any good lines I guess") #allR.append(np.nan); allRmed.append(np.nan); allRerr.append(np.nan); allwmid.append(wmid) continue alllinefits.append(linefits) R = w / (s * 2.355) if useclip: R = sigma_clip(R) if findpeak: if Rguess is None: Rguess = np.nanmedian(R) try: Rmed = find_distribution_peak(R, Rguess) except (ValueError, RuntimeError): print("--Could not find peak for arc {:02}".format(i)) print("--{}".format(sys.exc_info())) Rmed = np.median(R) elif usepercentile: assert len(percentiles) == 3 Rlo, Rmed, Rhi = np.percentile(R, percentiles) #Rerr = max(Rhi-Rmed, Rmed-Rlo) Rerr = (Rmed - Rlo, Rhi - Rmed) else: Rmed = biweight.biweight_location(R) Rerr = biweight.biweight_scale(R) allR.append(R) allRmed.append(Rmed) allRerr.append(Rerr) allwmid.append(wmid) if usepercentile: allRerr = np.array(allRerr).T if makeplot: import matplotlib.pyplot as plt fig, ax = plt.subplots() ax.errorbar(allwmid, allRmed, yerr=allRerr, fmt='o') plt.show() if full_output: return allRmed, allRerr, allwmid, allR, arcs, alllinefits return allRmed, allRerr, allwmid
def test_biweight_location_small(): cbl = biweight_location([1, 3, 5, 500, 2]) assert abs(cbl - 2.745) < 1e-3
def test_biweight_location_constant(): cbl = biweight_location(np.ones((10, 5))) assert cbl == 1.
def test_biweight_location_small(): bw_loc = biweight_location([1, 3, 5, 500, 2]) assert_allclose(bw_loc, 2.7456117)
def test_biweight_location_masked(): data1d = np.array([1, 3, 5, 500, 2, np.nan]) data2d = np.array([data1d, data1d]) data1d_masked = np.ma.masked_invalid(data1d) data2d_masked = np.ma.masked_invalid(data2d) assert_equal(biweight_location(data1d, ignore_nan=True), biweight_location(data1d_masked)) assert_equal(biweight_location(data2d, ignore_nan=True), biweight_location(data2d_masked)) bw_loc = biweight_location(data2d, ignore_nan=True, axis=1) bw_loc_masked = biweight_location(data2d_masked, axis=1) assert isinstance(bw_loc_masked, np.ma.MaskedArray) assert ~np.any(bw_loc_masked.mask) # mask is all False assert_equal(bw_loc, bw_loc_masked.data) bw_loc = biweight_location(data2d, ignore_nan=True, axis=0) bw_loc_masked = biweight_location(data2d_masked, axis=0) assert_equal(bw_loc_masked.data[:-1], bw_loc[:-1]) assert bw_loc_masked.mask[-1] # last mask element is True data1d_masked.data[0] = np.nan # unmasked NaN assert biweight_location(data1d_masked) is np.ma.masked assert_equal(biweight_location(data1d_masked, ignore_nan=True), biweight_location(data1d[1:], ignore_nan=True)) # ensure that input masked array is not modified assert np.isnan(data1d_masked[0])
def test_biweight_location(): with NumpyRNGContext(12345): # test that it runs randvar = np.random.randn(10000) cbl = biweight_location(randvar) assert abs(cbl - 0) < 1e-2
def test_biweight_location_axis_tuple(): """Test a 3D array with a tuple axis keyword.""" data = np.arange(24).reshape(2, 3, 4) data[0, 0] = 100. assert_equal(biweight_location(data, axis=0), biweight_location(data, axis=(0,))) assert_equal(biweight_location(data, axis=-1), biweight_location(data, axis=(2,))) assert_equal(biweight_location(data, axis=(0, 1)), biweight_location(data, axis=(1, 0))) assert_equal(biweight_location(data, axis=(0, 2)), biweight_location(data, axis=(0, -1))) assert_equal(biweight_location(data, axis=(0, 1, 2)), biweight_location(data, axis=(2, 0, 1))) assert_equal(biweight_location(data, axis=(0, 1, 2)), biweight_location(data, axis=None))
def histogauss(SAMPLE,domain=[0,0]): #+ #NAME: # HISTOGAUSS # # PURPOSE: # Histograms data and overlays it with a fitted Gaussian. # Uses "MPFIT" to perform the fitting, which can be done on a restricted domain. # # CALLING SEQUENCE: # model_params = histogauss.histogauss(mydata) # model_params = histogauss.histogauss(mydata,[lower,upper]) # # INPUT: # mydata = nparray of data values (near-normally distributed) to be histogrammed # domain = optional 2-valued list, specifying domain endpoints for Gaussian fit; # default is to fit Gaussian to entire domain of data values # # OUTPUT ARGUMENTS: # model_param = list of coefficients of the Gaussian fit: # # model_param[0]= the normalization ("height") of the fitted Gaussian # model_param[1]= the mean of the fitted Gaussian # model_param[2]= the standard deviation of the fitted Gaussian # model_param[3]= the half-width of the 95% confidence interval # of the biweight mean (not fitted) # # REVISION HISTORY: # Written, H. Freudenreich, STX, 12/89 # More quantities returned in A, 2/94, HF # Added NOPLOT keyword and print if Gaussian, 3/94 # Stopped printing confidence limits on normality 3/31/94 HF # Added CHARSIZE keyword, changed annotation format, 8/94 HF # Simplified calculation of Gaussian height, 5/95 HF # Convert to V5.0, use T_CVF instead of STUDENT_T, GAUSSFIT instead of # FITAGAUSS W. Landsman April 2002 # Correct call to T_CVF for calculation of A[3], 95% confidence interval # P. Broos/W. Landsman July 2003 # Allow FONT keyword to be passed. T. Robishaw Apr. 2006 # Use Coyote Graphics for plotting W.L. Mar 2011 # Better formatting of text output W.L. May 2012 #- # (Crudely) converted to Python3 by N. Grogin July 2020 # DATA = SAMPLE.copy() N = len(DATA) # Make sure that not everything is in the same bin. If most # data = 0, reject zeroes. If they are all some other value, # complain and give up. DATA.sort() ### boundaries of first and third quartiles N3 = int(0.75*N)-1 N1 = int(0.25*N)-1 if (DATA[N3] == DATA[N1]): if (DATA[int(N/2)-1] == 0): if (sum(DATA!=0) > 15): print('Suppressing Zeroes') DATA = DATA[np.where(DATA!=0)] N = len(DATA) else: print('Too Few Non-0 Values!') return 0 else: print('Too Many Identical Values: '+str(DATA[int(N/2)])) return 0 # legacy structure from the IDL version; A[0] was an effective height A = np.zeros(4) NTOT = len(DATA) # Outlier-resistant estimator of sample "mean": A[1] = biweight_location(DATA) # Outlier-resistant estimator of sample "standard deviation": A[2] = biweight_scale(DATA) # Compute he 95% confidence half-interval on the above mean: M=0.7*(NTOT-1) #appropriate for a biweighted mean CL = 0.95 two_tail_area = 1 - CL A[3]=abs( sp_stats.t.ppf(1 - (two_tail_area)/2.0,M) ) * A[2] / np.sqrt(NTOT) ### clear the figure plt.clf() # Plot the histogram [important to have density=True for uniform normalization # Also determines 'optimal' bin sizing, based on Freedman algorithm histy,histx,ignored=hist(DATA, bins=200, histtype='stepfilled', density=True) # Compute the midpoints of the histogram bins xmid = np.zeros(len(histy)) for i in range(len(histy)): xmid[i] = (histx[i]+histx[i+1])/2 # trim the histogram-fitting region, if a domain is specified if (domain==[0,0]): fitx = xmid fity = histy else: fitx = xmid[(xmid>=domain[0]) & (xmid<=domain[1])] fity = histy[(xmid>=domain[0]) & (xmid<=domain[1])] # Array containing the initial guess of Gaussian params: normalization; mean; sigma # !!! POOR RESULTS IF INPUT DATA ARE HIGHLY NON-GAUSSIAN !!! p0 = [max(histy)*(A[2] * np.sqrt(2 * np.pi)),A[1],A[2]] # Uniform weighting of each histogram bin value, in Gaussian fit err = np.ones(len(fitx)) # prepare the dictionary of histogram information, for the fitting fa = {'x':fitx, 'y':fity, 'err':err} # perform the Gaussian fit to the histogram m = mpfit.mpfit(gaussian_fitter, p0, functkw=fa) # post-fitting diagnostics print('mpfit status = ', m.status) if (m.status <= 0): print('mpfit error message = ', m.errmsg) return 0 print('mpfit parameters = ', m.params) [norm, mu, sigma] = m.params ### plot the model-fit Gaussian at finely-spaced locations spanning the data bins ### color the model-fitted region green, and the ignored region(s) red finex = histx[0] + np.arange(1000) * (histx[-1]-histx[0])/1000 if (domain==[0,0]): plt.plot(finex, norm/(sigma * np.sqrt(2 * np.pi)) * np.exp( - (finex - mu)**2 / (2 * sigma**2) ), linewidth=2, color='g') else: xsec = finex[(finex<domain[0])] if len(xsec) > 0: plt.plot(xsec, norm/(sigma * np.sqrt(2 * np.pi)) * np.exp( - (xsec - mu)**2 / (2 * sigma**2) ), linewidth=2, color='r') xsec = finex[(finex>=domain[0]) & (finex<=domain[1])] plt.plot(xsec, norm/(sigma * np.sqrt(2 * np.pi)) * np.exp( - (xsec - mu)**2 / (2 * sigma**2) ), linewidth=2, color='g') xsec = finex[(finex>domain[1])] if len(xsec) > 0: plt.plot(xsec, norm/(sigma * np.sqrt(2 * np.pi)) * np.exp( - (xsec - mu)**2 / (2 * sigma**2) ), linewidth=2, color='r') # NAG's devel environment does not easily allow plot-windowing, # so the 'plt.show' is commented out, in favor of file-dumping #plt.show() ### !!! BEWARE HARDCODED OUTPUT-FILENAME, BELOW !!! ### SUGGEST ADDING OUTPUT-FILENAME AS ADDITIONAL PARAMETER PASSED TO HISTOGAUSS plt.savefig('temp.png') return [norm, mu, sigma, A[3]]