def test_biweight_32bit_runtime_warnings(): """Regression test for #6905.""" with NumpyRNGContext(12345): data = np.random.random(100).astype(np.float32) data[50] = 30000. biweight_scale(data) biweight_midvariance(data)
def test_biweight_scale(): # NOTE: biweight_scale is covered by biweight_midvariance tests data = [1, 3, 5, 500, 2] scl = biweight_scale(data) var = biweight_midvariance(data) assert_allclose(scl, np.sqrt(var)) data = np.ma.masked_invalid([1, 3, 5, 500, 2, np.nan]) data[0] = np.nan scl = biweight_scale(data, ignore_nan=True) var = biweight_midvariance(data, ignore_nan=True) assert_allclose(scl, np.sqrt(var))
def test_biweight_32bit_runtime_warnings(): """Regression test for #6905.""" with NumpyRNGContext(12345): data = np.random.random(100).astype(np.float32) data[50] = 30000. with catch_warnings(RuntimeWarning) as warning_lines: biweight_scale(data) assert len(warning_lines) == 0 with catch_warnings(RuntimeWarning) as warning_lines: biweight_midvariance(data) assert len(warning_lines) == 0
def test_biweight_32bit_runtime_warnings(): """Regression test for #6905.""" with NumpyRNGContext(12345): data = np.random.random(100).astype(np.float32) data[50] = 30000. with catch_warnings(RuntimeWarning) as warning_lines: biweight_scale(data) assert len(warning_lines) == 0 with catch_warnings(RuntimeWarning) as warning_lines: biweight_midvariance(data) assert len(warning_lines) == 0
def xbin_ybwt(x, y, xbins, Nmin=1): """ Take x,y pairs. Bin in x, find biweight location and scale Input: x and y, xbins Nmin : default 1 minimum number of points per bin to be used (otherwise nan) Return: xbins centers, yloc, yscale """ assert len(x) == len(y) xp = (xbins[1:]+xbins[:-1])/2 Nbins = len(xp) yloc = np.zeros(Nbins) + np.nan yscale = np.zeros(Nbins) + np.nan bin_nums = np.digitize(x, xbins) for ibin in range(Nbins): bin_num = ibin + 1 vals = y[bin_nums == bin_num] if len(vals) < Nmin: continue yloc[ibin] = biweight_location(vals) yscale[ibin] = biweight_scale(vals) return xp, yloc, yscale
def velocity_disp_proj(self, pc, radius): v = pc[2][pc[0] <= radius] self.sigma_s = np.std(v, ddof=1) self.sigma_g = gapper(v) self.sigma_b = biweight.biweight_scale( v) #, c=9.0, M=biweight.biweight_location(v, M=np.mean(v))) #if len(v) > 1: # self.sigma_b = aux_bwt*np.sqrt(len(v)/(len(v)-1)) #else: self.sigma_b = aux_bwt return self.sigma_s, self.sigma_g, self.sigma_b
def los_sigmas_sph(self, proj_info, r): v = proj_info[:, 4][proj_info[:, 1] <= r] self.sigma_los_sph_s = np.std(v, ddof=1) self.sigma_los_sph_g = gapper(v) self.sigma_los_sph_b = biweight.biweight_scale( v) #, c=9.0, M=biweight.biweight_location(v, M=np.mean(v))) #if len(v) > 1: # self.sigma_los_sph_b = aux_bwt*np.sqrt(len(v)/(len(v)-1)) #else: self.sigma_los_sph_b = aux_bwt #self.sigma_los_sph_b = biweight.biweight_scale(v)#, c=9.0, M=biweight.biweight_location(v, M=np.mean(v)))*np.sqrt(len(v)/(len(v)-1)) #ERRORS self.e_sigma_los_sph_s, self.e_sigma_los_sph_g, self.e_sigma_los_sph_b = errors_estim( v) return (self.sigma_los_sph_s, self.e_sigma_los_sph_s), ( self.sigma_los_sph_g, self.sigma_los_sph_g), (self.sigma_los_sph_b, self.sigma_los_sph_b)
def errors_estim(v): ss = [] sg = [] sb = [] for ib in range(100): np.random.seed(1986 * ib) vv = np.random.choice(v, len(v), replace=True) ss = np.append(ss, np.std(vv, ddof=1)) sg = np.append(sg, gapper(vv)) sb = np.append(sb, biweight.biweight_scale( vv)) #, c=9.0, M=biweight.biweight_location(vv, M=np.mean(vv)))) #if len(v) > 1: # sb = np.append(sb, aux_sb*np.sqrt(len(vv)/(len(vv)-1))) #else: sb = np.append(sb, aux_sb) #sb = np.append(sb, biweight.biweight_scale(vv, c=9.0, M=biweight.biweight_location(vv, M=np.mean(vv))))#*np.sqrt(len(vv)/(len(vv)-1))) e_ss = np.std(ss) e_sg = np.std(sg) e_sb = np.std(sb) return e_ss, e_sg, e_sb
def test_biweight_scale_nan(): data1d = np.array([1, 3, 5, 500, 2, np.nan]) all_nan = data1d.copy() all_nan[:] = np.nan data2d = np.array([data1d, data1d, all_nan]) data1d_masked = np.ma.masked_invalid(data1d) data1d_masked.data[0] = np.nan data2d_masked = np.ma.masked_invalid(data2d) assert np.isnan(biweight_scale(data1d)) bw_scl = biweight_scale(data1d_masked) assert not isinstance(bw_scl, np.ma.MaskedArray) assert np.isnan(bw_scl) assert np.isnan(biweight_scale(data2d)) assert_allclose(biweight_scale(data2d_masked), 1.709926, atol=1e-5) for axis in (0, 1): assert np.all(np.isnan(biweight_scale(data2d, axis=axis))) assert isinstance(biweight_scale(data2d_masked, axis=axis), np.ma.MaskedArray)
def test_biweight_scale_axis_tuple(): """Test a 3D array with a tuple axis keyword.""" data = np.arange(24).reshape(2, 3, 4) data[0, 0] = 100. assert_equal(biweight_scale(data, axis=0), biweight_scale(data, axis=(0,))) assert_equal(biweight_scale(data, axis=-1), biweight_scale(data, axis=(2,))) assert_equal(biweight_scale(data, axis=(0, 1)), biweight_scale(data, axis=(1, 0))) assert_equal(biweight_scale(data, axis=(0, 2)), biweight_scale(data, axis=(0, -1))) assert_equal(biweight_scale(data, axis=(0, 1, 2)), biweight_scale(data, axis=(2, 0, 1))) assert_equal(biweight_scale(data, axis=(0, 1, 2)), biweight_scale(data, axis=None)) assert_equal(biweight_scale(data, axis=(0, 2), modify_sample_size=True), biweight_scale(data, axis=(0, -1), modify_sample_size=True))
def measure_mike_velocities( template, blue_fname, red_fname, outfname_fig=None, outfname_data=None, vmin=-500, vmax=500, vspanmin=10, dvlist=[10, 1, .1], norm_kwargs={ "exclude": None, "function": "spline", "high_sigma_clip": 1.0, "include": None, "knot_spacing": 20, "low_sigma_clip": 2.0, "max_iterations": 5, "order": 2, "scale": 1.0 }, telluric_regions=[]): name = blue_fname.split("/")[-1].split("blue")[0] name2 = red_fname.split("/")[-1].split("red")[0] if not isinstance(template, spectrum.Spectrum1D): # Assume it's a filename to load template = spectrum.Spectrum1D.read(template) ## Blue orders = spectrum.Spectrum1D.read(blue_fname) orders = mask_tellurics(orders, telluric_regions) try: v_helio, v_bary = motions.corrections_from_headers(orders[0].metadata) vhelcorr = v_helio.to("km/s").value except Exception as e: print("vhel failed:") print(e) vhelcorr = np.nan rv_output1 = measure_order_velocities_2(orders, template, norm_kwargs, vmin=vmin, vmax=vmax, vspanmin=vspanmin, dvlist=dvlist) ## Red orders = spectrum.Spectrum1D.read(red_fname) orders = mask_tellurics(orders, telluric_regions) rv_output2 = measure_order_velocities_2(orders, template, norm_kwargs, vmin=vmin, vmax=vmax, vspanmin=vspanmin, dvlist=dvlist) o_all = np.array(list(rv_output1[:, 0]) + list(rv_output2[:, 0])) v_all = np.array(list(rv_output1[:, 1]) + list(rv_output2[:, 1])) e_all = np.array(list(rv_output1[:, 2]) + list(rv_output2[:, 2])) w_all = np.array( list((rv_output1[:, 3] + rv_output1[:, 4]) / 2.) + list((rv_output2[:, 3] + rv_output2[:, 4]) / 2.)) keep = np.isfinite(e_all) & np.isfinite(v_all) & (o_all <= 95) & (o_all >= 50) for iter_clip in range(5): w = e_all[keep]**-2 v_avg = np.sum(w * v_all[keep]) / np.sum(w) v_err = (np.sum(w))**-0.5 v_std = biweight_scale(v_all[keep]) v_med = np.median(v_all[keep]) new_keep = keep & (np.abs(v_all - v_med) < 5 * v_std) print("===============iter_clip={}, {}->{}".format( iter_clip + 1, keep.sum(), new_keep.sum())) if keep.sum() == new_keep.sum(): break keep = new_keep v_blue = np.median(v_all[keep & (o_all > 70)]) v_red = np.median(v_all[keep & (o_all <= 70)]) if outfname_fig is not None: import matplotlib.pyplot as plt fig, ax = plt.subplots(figsize=(8, 4)) yrange = max(5, 5 * v_std) wave1, wave2 = (rv_output1[:, 3] + rv_output1[:, 4]) / 2, ( rv_output2[:, 3] + rv_output2[:, 4]) / 2 ax.errorbar(wave1, rv_output1[:, 1], yerr=rv_output1[:, 2], fmt='o', color='b', ecolor='b') ax.errorbar(wave2, rv_output2[:, 1], yerr=rv_output2[:, 2], fmt='o', color='r', ecolor='r') ax.plot(w_all[keep], v_all[keep], 'ko', mfc='none', mec='k', mew=2, ms=10) ax.set_ylim(v_avg - yrange, v_avg + yrange) ax.yaxis.set_minor_locator(plt.MultipleLocator(0.5)) ax.yaxis.set_major_locator(plt.MultipleLocator(2)) ax.axhline(v_avg, color='k', zorder=-9) fig.tight_layout() fig.savefig(outfname_fig) plt.close(fig) if outfname_data is not None: np.save(outfname_data, [(v_avg, v_med, v_err, v_std, v_blue, v_red, vhelcorr), (o_all, v_all, e_all, w_all, keep), rv_output1, rv_output2]) return v_avg, v_med, v_err, v_std, v_blue, v_red, vhelcorr
def test_biweight_scale(): # NOTE: biweight_scale is covered by biweight_midvariance tests data = [1, 3, 5, 500, 2] scl = biweight_scale(data) var = biweight_midvariance(data) assert_allclose(scl, np.sqrt(var))
def test_biweight_scale(): # NOTE: biweight_scale is covered by biweight_midvariance tests data = [1, 3, 5, 500, 2] scl = biweight_scale(data) var = biweight_midvariance(data) assert_allclose(scl, np.sqrt(var))
def sigmas200(self, vv): #v = vv[0] v = vv[:, 0] self.sigma_x_s = np.std(v, ddof=1) self.sigma_x_g = gapper(v) self.sigma_x_b = biweight.biweight_scale( v) #, c=9.0, M=biweight.biweight_location(v, M=np.mean(v))) #if len(v) > 1: # self.sigma_x_b = aux_bwt_x*np.sqrt(len(v)/(len(v)-1)) #else: self.sigma_x_b = aux_bwt_x #ERRORS e_ssx, e_sgx, e_sbx = errors_estim(v) #v = vv[1] v = vv[:, 1] self.sigma_y_s = np.std(v, ddof=1) self.sigma_y_g = gapper(v) self.sigma_y_b = biweight.biweight_scale( v ) #, c=9.0, M=biweight.biweight_location(v, M=np.mean(v)))*np.sqrt(len(v)/(len(v)-1)) #aux_bwt_y = biweight.biweight_scale(v)#, c=9.0, M=biweight.biweight_location(v, M=np.mean(v))) #if len(v) > 1: # self.sigma_y_b = aux_bwt_y*np.sqrt(len(v)/(len(v)-1)) #else: self.sigma_y_b = aux_bwt_y #ERRORS e_ssy, e_sgy, e_sby = errors_estim(v) #v = vv[2] v = vv[:, 2] self.sigma_z_s = np.std(v, ddof=1) self.sigma_z_g = gapper(v) self.sigma_z_b = biweight.biweight_scale( v) #, c=9.0, M=biweight.biweight_location(v, M=np.mean(v))) #if len(v) > 1: # self.sigma_z_b = aux_bwt_z*np.sqrt(len(v)/(len(v)-1)) #else: self.sigma_z_b = aux_bwt_z #ERRORS e_ssz, e_sgz, e_sbz = errors_estim(v) self.sigma3d_s = np.sqrt(self.sigma_x_s**2 + self.sigma_y_s**2 + self.sigma_z_s**2) self.e_sigma3d_s = np.sqrt( (self.sigma_x_s * e_ssx)**2 + (self.sigma_y_s * e_ssy)**2 + (self.sigma_z_s * e_ssz)**2) / self.sigma3d_s self.sigma1d_s = self.sigma3d_s / np.sqrt(3.) self.e_sigma1d_s = self.e_sigma3d_s / np.sqrt(3.) self.sigma3d_g = np.sqrt(self.sigma_x_g**2 + self.sigma_y_g**2 + self.sigma_z_g**2) self.e_sigma3d_g = np.sqrt( (self.sigma_x_g * e_sgx)**2 + (self.sigma_y_g * e_sgy)**2 + (self.sigma_z_g * e_sgz)**2) / self.sigma3d_g self.sigma1d_g = self.sigma3d_g / np.sqrt(3.) self.e_sigma1d_g = self.e_sigma3d_g / np.sqrt(3.) self.sigma3d_b = np.sqrt(self.sigma_x_b**2 + self.sigma_y_b**2 + self.sigma_z_b**2) self.e_sigma3d_b = np.sqrt( (self.sigma_x_b * e_sbx)**2 + (self.sigma_y_b * e_sby)**2 + (self.sigma_z_b * e_sbz)**2) / self.sigma3d_b self.sigma1d_b = self.sigma3d_b / np.sqrt(3.) self.e_sigma1d_b = self.e_sigma3d_b / np.sqrt(3.) return (self.sigma3d_s, self.e_sigma3d_s), (self.sigma3d_g, self.e_sigma3d_g), ( self.sigma3d_b, self.e_sigma3d_b), (self.sigma1d_s, self.e_sigma1d_s), ( self.sigma1d_g, self.e_sigma1d_g), (self.sigma1d_b, self.e_sigma1d_b)
def find_resolution(multispec_fname, initial_fwhm=.05, usepercentile=True, percentiles=[60, 80, 95], Rguess=None, full_output=False, useclip=True, findpeak=False, makeplot=True): """ """ from .spectrum import Spectrum1D from astropy.stats import sigma_clip, biweight from ..robust_polyfit import gaussfit from ..utils import find_distribution_peak import time arcs = Spectrum1D.read(multispec_fname, flux_ext=4) line_centers = np.loadtxt(os.path.dirname(__file__) + "/../data/linelists/thar_list", usecols=0) start = time.time() alllinefits = [] allRmed = [] allRerr = [] allwmid = [] allR = [] for i, arc in enumerate(arcs): linefits = [] wave = arc.dispersion flux = arc.flux wmin, wmax = wave[0], wave[-1] wmid = (wmin + wmax) / 2. lcs = line_centers[(line_centers > wmin) & (line_centers < wmax)] for lc in lcs: fwhm = initial_fwhm # get subpiece of arc ii = (wave > lc - 5 * fwhm) & (wave < lc + 5 * fwhm) _x, _y = wave[ii], flux[ii] # guess amplitude, center, sigma p0 = [np.max(_y), lc, fwhm / 2.355] try: popt = gaussfit(_x, _y, p0) except: pass else: if popt[0] > 0 and abs(popt[1] - lc) < .05: linefits.append(popt) try: A, w, s = np.array(linefits).T except ValueError: print("This order did not have any good lines I guess") #allR.append(np.nan); allRmed.append(np.nan); allRerr.append(np.nan); allwmid.append(wmid) continue alllinefits.append(linefits) R = w / (s * 2.355) if useclip: R = sigma_clip(R) if findpeak: if Rguess is None: Rguess = np.nanmedian(R) try: Rmed = find_distribution_peak(R, Rguess) except (ValueError, RuntimeError): print("--Could not find peak for arc {:02}".format(i)) print("--{}".format(sys.exc_info())) Rmed = np.median(R) elif usepercentile: assert len(percentiles) == 3 Rlo, Rmed, Rhi = np.percentile(R, percentiles) #Rerr = max(Rhi-Rmed, Rmed-Rlo) Rerr = (Rmed - Rlo, Rhi - Rmed) else: Rmed = biweight.biweight_location(R) Rerr = biweight.biweight_scale(R) allR.append(R) allRmed.append(Rmed) allRerr.append(Rerr) allwmid.append(wmid) if usepercentile: allRerr = np.array(allRerr).T if makeplot: import matplotlib.pyplot as plt fig, ax = plt.subplots() ax.errorbar(allwmid, allRmed, yerr=allRerr, fmt='o') plt.show() if full_output: return allRmed, allRerr, allwmid, allR, arcs, alllinefits return allRmed, allRerr, allwmid
def histogauss(SAMPLE,domain=[0,0]): #+ #NAME: # HISTOGAUSS # # PURPOSE: # Histograms data and overlays it with a fitted Gaussian. # Uses "MPFIT" to perform the fitting, which can be done on a restricted domain. # # CALLING SEQUENCE: # model_params = histogauss.histogauss(mydata) # model_params = histogauss.histogauss(mydata,[lower,upper]) # # INPUT: # mydata = nparray of data values (near-normally distributed) to be histogrammed # domain = optional 2-valued list, specifying domain endpoints for Gaussian fit; # default is to fit Gaussian to entire domain of data values # # OUTPUT ARGUMENTS: # model_param = list of coefficients of the Gaussian fit: # # model_param[0]= the normalization ("height") of the fitted Gaussian # model_param[1]= the mean of the fitted Gaussian # model_param[2]= the standard deviation of the fitted Gaussian # model_param[3]= the half-width of the 95% confidence interval # of the biweight mean (not fitted) # # REVISION HISTORY: # Written, H. Freudenreich, STX, 12/89 # More quantities returned in A, 2/94, HF # Added NOPLOT keyword and print if Gaussian, 3/94 # Stopped printing confidence limits on normality 3/31/94 HF # Added CHARSIZE keyword, changed annotation format, 8/94 HF # Simplified calculation of Gaussian height, 5/95 HF # Convert to V5.0, use T_CVF instead of STUDENT_T, GAUSSFIT instead of # FITAGAUSS W. Landsman April 2002 # Correct call to T_CVF for calculation of A[3], 95% confidence interval # P. Broos/W. Landsman July 2003 # Allow FONT keyword to be passed. T. Robishaw Apr. 2006 # Use Coyote Graphics for plotting W.L. Mar 2011 # Better formatting of text output W.L. May 2012 #- # (Crudely) converted to Python3 by N. Grogin July 2020 # DATA = SAMPLE.copy() N = len(DATA) # Make sure that not everything is in the same bin. If most # data = 0, reject zeroes. If they are all some other value, # complain and give up. DATA.sort() ### boundaries of first and third quartiles N3 = int(0.75*N)-1 N1 = int(0.25*N)-1 if (DATA[N3] == DATA[N1]): if (DATA[int(N/2)-1] == 0): if (sum(DATA!=0) > 15): print('Suppressing Zeroes') DATA = DATA[np.where(DATA!=0)] N = len(DATA) else: print('Too Few Non-0 Values!') return 0 else: print('Too Many Identical Values: '+str(DATA[int(N/2)])) return 0 # legacy structure from the IDL version; A[0] was an effective height A = np.zeros(4) NTOT = len(DATA) # Outlier-resistant estimator of sample "mean": A[1] = biweight_location(DATA) # Outlier-resistant estimator of sample "standard deviation": A[2] = biweight_scale(DATA) # Compute he 95% confidence half-interval on the above mean: M=0.7*(NTOT-1) #appropriate for a biweighted mean CL = 0.95 two_tail_area = 1 - CL A[3]=abs( sp_stats.t.ppf(1 - (two_tail_area)/2.0,M) ) * A[2] / np.sqrt(NTOT) ### clear the figure plt.clf() # Plot the histogram [important to have density=True for uniform normalization # Also determines 'optimal' bin sizing, based on Freedman algorithm histy,histx,ignored=hist(DATA, bins=200, histtype='stepfilled', density=True) # Compute the midpoints of the histogram bins xmid = np.zeros(len(histy)) for i in range(len(histy)): xmid[i] = (histx[i]+histx[i+1])/2 # trim the histogram-fitting region, if a domain is specified if (domain==[0,0]): fitx = xmid fity = histy else: fitx = xmid[(xmid>=domain[0]) & (xmid<=domain[1])] fity = histy[(xmid>=domain[0]) & (xmid<=domain[1])] # Array containing the initial guess of Gaussian params: normalization; mean; sigma # !!! POOR RESULTS IF INPUT DATA ARE HIGHLY NON-GAUSSIAN !!! p0 = [max(histy)*(A[2] * np.sqrt(2 * np.pi)),A[1],A[2]] # Uniform weighting of each histogram bin value, in Gaussian fit err = np.ones(len(fitx)) # prepare the dictionary of histogram information, for the fitting fa = {'x':fitx, 'y':fity, 'err':err} # perform the Gaussian fit to the histogram m = mpfit.mpfit(gaussian_fitter, p0, functkw=fa) # post-fitting diagnostics print('mpfit status = ', m.status) if (m.status <= 0): print('mpfit error message = ', m.errmsg) return 0 print('mpfit parameters = ', m.params) [norm, mu, sigma] = m.params ### plot the model-fit Gaussian at finely-spaced locations spanning the data bins ### color the model-fitted region green, and the ignored region(s) red finex = histx[0] + np.arange(1000) * (histx[-1]-histx[0])/1000 if (domain==[0,0]): plt.plot(finex, norm/(sigma * np.sqrt(2 * np.pi)) * np.exp( - (finex - mu)**2 / (2 * sigma**2) ), linewidth=2, color='g') else: xsec = finex[(finex<domain[0])] if len(xsec) > 0: plt.plot(xsec, norm/(sigma * np.sqrt(2 * np.pi)) * np.exp( - (xsec - mu)**2 / (2 * sigma**2) ), linewidth=2, color='r') xsec = finex[(finex>=domain[0]) & (finex<=domain[1])] plt.plot(xsec, norm/(sigma * np.sqrt(2 * np.pi)) * np.exp( - (xsec - mu)**2 / (2 * sigma**2) ), linewidth=2, color='g') xsec = finex[(finex>domain[1])] if len(xsec) > 0: plt.plot(xsec, norm/(sigma * np.sqrt(2 * np.pi)) * np.exp( - (xsec - mu)**2 / (2 * sigma**2) ), linewidth=2, color='r') # NAG's devel environment does not easily allow plot-windowing, # so the 'plt.show' is commented out, in favor of file-dumping #plt.show() ### !!! BEWARE HARDCODED OUTPUT-FILENAME, BELOW !!! ### SUGGEST ADDING OUTPUT-FILENAME AS ADDITIONAL PARAMETER PASSED TO HISTOGAUSS plt.savefig('temp.png') return [norm, mu, sigma, A[3]]