def ls_period(time, magnitude, error, prange=(0.2, 1.4), **kwargs): with ctx.silence_stdout(SILENCE): model = LombScargleFast().fit(time, magnitude, error) periods, power = model.periodogram_auto(nyquist_factor=100) model.optimizer.period_range = prange return model.best_period, (periods, power)
def getPeriod(HJD, TMag, TdMag, p_range=None, makeGraph=None, giveP=None): if p_range is None: p_min = .01 p_max = 2 else: p_min, p_max = p_range if makeGraph is None: makeGraph = False if giveP is None: giveP = False lsf = LombScargleFast() lsf.optimizer.period_range = (p_min, p_max) lsf.fit(HJD, TMag, TdMag) period = lsf.best_period if giveP: print("\n") print("Period: " + str(period)) print("\n") if makeGraph: periods = np.linspace(p_min, p_max, 1000) scores = lsf.score(periods) plt.plot(periods, scores) plt.xlabel("Period") plt.ylabel("Probability") plt.show() return period
def Periodogram(self, madVar=True): """ This function computes the power spectrum from the timeseries ONLY. """ dtav = np.mean(np.diff(self.time_fix)) # mean value of time differences (s) dtmed = np.median(np.diff(self.time_fix)) # median value of time differences (s) if dtmed == 0: dtmed = dtav # compute periodogram from regular frequency values fmin = 0 # minimum frequency N = len(self.time_fix) # n-points df = 1./(dtmed*N) # bin width (1/Tobs) (in Hz) model = LombScargleFast().fit(self.time_fix, self.flux_fix, np.ones(N)) power = model.score_frequency_grid(fmin, df, N/2) # signal-to-noise ratio, (1) eqn 9 freqs = fmin + df * np.arange(N/2) # the periodogram was computed over these freqs (Hz) # the variance of the flux if madVar: var = mad_std(self.flux_fix)**2 else: var = np.std(self.flux_fix)**2 # convert to PSD, see (1) eqn 1, 8, 9 & (2) power /= np.sum(power) # make the power sum to unity (dimensionless) power *= var # Parseval's theorem. time-series units: ppm. variance units: ppm^2 power /= df * 1e6 # convert from ppm^2 to ppm^2 muHz^-1 if len(freqs) < len(power): power = power[0:len(freqs)] if len(freqs) > len(power): freqs = freqs[0:len(power)] self.freq = freqs * 1e6 # muHz self.power = power # ppm^2 muHz^-1 self.bin_width = df * 1e6 # muHz
def psearch(self): """ Convole the two periodograms and find the Max """ model = LombScargleFast().fit(self.t, self.m, self.merr) self.periods, self.power = model.periodogram_auto(nyquist_factor=200) self.aov, self.fr, _ = pyaov.aovw(self.t, self.m, self.merr, fstop=max(1 / self.periods), fstep=1 / self.periods[0]) self.aov = self.aov[1:] self.fr = self.fr[1:] print len(self.aov), len(self.power) self.pgram = self.power * self.aov self.pgram_max = max(self.pgram) self.fbest = self.fr[np.argmax(self.pgram)] self.pbest = 1 / self.fbest self.pbest_signif = (self.pgram_max - np.median(self.pgram)) / np.std( self.pgram) print( "best period at {:.3f} days, {:.2f} sigma from the median".format( self.pbest, self.pbest_signif))
def FlarePer(time, minper=0.1, maxper=30.0, nper=20000): ''' Look for periodicity in the flare occurrence times. Could be due to: a) mis-identified periodic things (e.g. heartbeat stars) b) crazy binary star flaring things c) flares on a rotating star d) bugs in code e) aliens ''' # use energy = 1 for flare times. # This will create something like the window function energy = np.ones_like(time) # Use Jake Vanderplas faster version! pgram = LombScargleFast(fit_offset=False) pgram.optimizer.set(period_range=(minper, maxper)) pgram = pgram.fit(time, energy - np.nanmedian(energy)) df = (1. / minper - 1. / maxper) / nper f0 = 1. / maxper pwr = pgram.score_frequency_grid(f0, df, nper) freq = f0 + df * np.arange(nper) per = 1. / freq pk = per[np.argmax(pwr)] # peak period pp = np.max(pwr) # peak period power return pk, pp
def lombscargle(self): print(self.length) model = LombScargleFast(silence_warnings=True).fit(self.d, self.m, 0.1) model.optimizer.period_range = (0.02, min(10, self.length)) period = model.best_period print "period is ", model.best_period sco = model.score(period, ) print "the score is ", sco periods, power = model.periodogram_auto( nyquist_factor=2000) ##Depend the highest fequency of the model. #f=open("tet.txt",'wb') #f.write('period is:'+str(period)) #f.close x = periods y = power fig, axes = plt.subplots(1, 2, figsize=(8, 8)) ax1, ax2 = axes.flatten() ax1.plot(x, y, color="red", linewidth=2) ax1.set_xlim(0, 10) ax1.set_xlabel("period (days)") ax1.set_ylabel("Lomb-Scargle Power") ax1.legend() phase = self.d / (1 * period) phase = phase % 1 * 1 ax2.plot(phase, self.m, 'ro') ax2.set_xlabel("phase") ax2.set_ylabel("Mag") #ax2.set_ylim(0,1) ax2.legend() # plt.savefig('/home/yf/ptftry/pic/Lomb-Scargle.png') plt.show() return period
def get_period(t, mag, dmag, n_periods=1): model = LombScargleFast().fit(t, mag, dmag) periods, power = model.periodogram_auto(nyquist_factor=200) model.optimizer.period_range = (periods.min(), numpy.min([periods.max(), 2])) model.optimizer.quiet = True best_period, best_period_scores = model.optimizer.find_best_periods( model, n_periods=n_periods, return_scores=True) return best_period, best_period_scores
def ls_period(self, time, magnitude, error, prange=(0.2, 1.4), **kwargs): if len(time) > 50: model = LombScargleFast().fit(time, magnitude, error) periods, power = model.periodogram_auto(nyquist_factor=100) model.optimizer.period_range = prange else: model = LombScargle().fit(time, magnitude, error) periods, power = model.periodogram_auto(nyquist_factor=100) model.optimizer.period_range = prange return type(model).__name__, model.best_period, periods, power
def FindPeriod(time,flux, minper=0.1,maxper=30,debug=False): ''' Finds period in time series data using Lomb-Scargle method ''' pgram = LombScargleFast(fit_period=True,\ optimizer_kwds={"quiet": not debug}) pgram.optimizer.period_range = (minper, maxper) pgram.fit(time,flux) if debug: print ("Best period:", pgram.best_period) return pgram.best_period
def period_search_ls(t, mag, magerr, data_out, remove_harmonics = True): ls = LombScargleFast(silence_warnings=True) T = np.max(t) - np.min(t) ls.optimizer.period_range = (0.1, T) ls.fit(t, mag, magerr) # period = ls.best_period # power = ls.periodogram(period) # https://github.com/astroML/gatspy/blob/master/examples/FastLombScargle.ipynb oversampling = 2 N = len(t) df = 1. / (oversampling * T) # frequency grid spacing fmin = 1 / T fmax = 10 # minimum period is 0.05 d Nf = (fmax - fmin) // df freqs = fmin + df * np.arange(Nf) periods = 1 / freqs powers = ls._score_frequency_grid(fmin, df, Nf) ind_best = np.argsort(powers)[-1] period = periods[ind_best] power = powers[ind_best] # calcualte false alarm probability (FAP) Z = power normalization='standard' # fap_Neff is underestimate fap_Neff = FAP_estimated(Z, N, fmax, t, normalization=normalization) """ # fap_Baluev is overestimate fap_Baluev = FAP_aliasfree(Z, N, fmax, t, mag, magerr, normalization=normalization) """ psigma = (np.percentile(powers, 84) - np.percentile(powers, 16))/2 significance = power / psigma if remove_harmonics == True: # In some cases, the period search is not successful: harmonics = np.array([1/5, 1/4, 1/3, 1/2, 1., 2.]) if abs(period - T)<0.005: period = -99 else: for harmonic in harmonics: if abs(period - harmonic)<0.005: if fap_Neff > 0.001: period = -99 data_out["period"] = period data_out["significance"] = significance data_out["freqs"] = freqs data_out["powers"] = powers data_out["power"] = power data_out["Nztfobs"] = N data_out["fap_Neff"] = fap_Neff # data_out["fap_Baluev"] = fap_Baluev return data_out
def normalised_lombscargle(ts, ys, dys, oversampling=5, nyquist_factor=3): model = LombScargleFast().fit(ts, ys, dys) pers, pows = model.periodogram_auto(oversampling=oversampling, nyquist_factor=nyquist_factor) fs = 1.0 / pers T = np.max(ts) - np.min(ts) mu = 1 / T * np.trapz(ys, ts) s2 = 1 / T * np.trapz(np.square(ys - mu), ts) return fs, s2 * pows / np.trapz(pows, fs)
def fit_lomb_scargle(self): from gatspy.periodic import LombScargleFast period_range = (0.005 * (max(self.times) - min(self.times)), 0.95 * (max(self.times) - min(self.times))) model_gat = LombScargleFast(fit_period=True, silence_warnings=True, optimizer_kwds={ 'period_range': period_range, 'quiet': True }) model_gat.fit(self.times, self.measurements, self.errors) self.best_period = model_gat.best_period self.best_score = model_gat.score(model_gat.best_period).item()
def fast_find_spectrum(self, f_samps, mag_low, mag_high, raw=False): """ Finds the Lomb-Scargle periodogram of the concentration time series, frequency units are in hz. This method uses the Gatspy package, which has a fast fft based periodogram calculator. f_samps -- int; number of frequency samples to be used in the fitting, they are spaced logarithmically mag_low -- float; mag_high -- float; the sampled frequency range is [10^mag_low, 10^mag_high] raw -- Boolean; if true, self.c_raw is used, otherwise self.c is used NB the frequency grid is linear (logarithmic in self.find_spectrum()) in this method, so many more points should be used. """ assert mag_low < mag_high if raw: abs_time = self.t_raw gas_conc = self.c_raw else: abs_time = self.t gas_conc = self.c rel_time = np.array([]) #the date/datetime objects should be in increasing order, but it should #work all the same t0 = EPOCH for t in abs_time: delta_t = t - t0 rel_time = np.append(rel_time, delta_t.total_seconds()) fmin = 10**mag_low fmax = 10**mag_high df = (fmax - fmin) / f_samps #NB uses same x and t in the above section model = LombScargleFast().fit(rel_time, gas_conc) rel_power = model.score_frequency_grid(fmin, df, f_samps) freqs = fmin + df * np.arange(f_samps) abs_amplitude = gas_conc.std() * np.sqrt(2 * rel_power) self.rel_spectrum = np.array([freqs, rel_power]) self.spectrum = np.array([freqs, abs_amplitude]) return None
def run_periodogram(self, oversampling=5): """ Compute a periodogram using gatspy.LombScargleFast. Parameters ---------- oversampling : int, optional The oversampling factor for the periodogram. """ model = LombScargleFast().fit(self.l_curve.times, self.l_curve.fluxes, self.l_curve.flux_errs) self.periods, self.powers = model.periodogram_auto( oversampling=oversampling)
def psearch(self): """ Convole the two periodograms and find the Max """ model = LombScargleFast().fit(self.t, self.m, self.merr) self.periods, self.power = model.periodogram_auto(nyquist_factor=200) self.aov, self.fr, _ = pyaov.aovw(self.t, self.m, self.merr, fstop=max(1/self.periods), fstep=1/self.periods[0]) self.aov = self.aov[1:] self.fr = self.fr[1:] print len(self.aov), len(self.power) self.pgram = self.power*self.aov self.pgram_max = max(self.pgram) self.fbest = self.fr[np.argmax(self.pgram)] self.pbest = 1/self.fbest self.pbest_signif = (self.pgram_max - np.median(self.pgram))/np.std(self.pgram) print("best period at {:.3f} days, {:.2f} sigma from the median".format(self.pbest, self.pbest_signif))
def periodogram(datax, datay, min_per, max_per, nyquist): #finding periodogram model = LombScargleFast().fit(datax, datay) period, power = model.periodogram_auto(nyquist_factor=nyquist) #plotting x_label = 'Period' y_label = 'Power' title = 'Lomb-Scargle Periodogram' plt.xlabel(x_label) plt.ylabel(y_label) plt.semilogx(period, power) # set range and find period model.optimizer.period_range = (min_per, max_per) period = model.best_period print("period = {0}".format(period)) return period
def fAnalysis(self, rr_samples): """ Frequency analysis to calc self.lf, self.hf, returns the LF/HF-ratio and also calculates the spectrum as pairs of (self.f_hr_axis,self.f_hr). The input arrary is in sample points where R peaks have been detected. """ # discrete timestamps self.hr_discrete = self._intervals(rr_samples) / 1000 # hr positions in time self.t_hr_discrete = [i / self.fs for i in rr_samples[1:]] # now let's create function which approximates the hr(t) relationship self.hr_func = interp1d(self.t_hr_discrete, self.hr_discrete) # we take 1024 samples for a linear time array for hr(t) nsamp = 1000 # linear time array for the heartrate self.t_hr_linear = np.linspace( self.t_hr_discrete[1], self.t_hr_discrete[len(self.t_hr_discrete) - 2], num=nsamp) # duration in secs of the heartrate array minus the ends b/c of the approx duration = self.t_hr_discrete[len(self.t_hr_discrete) - 2] - self.t_hr_discrete[1] # heartrate linearly approximated between discrete samples self.hr_linear = self.hr_func(self.t_hr_linear) model = LombScargleFast().fit(self.t_hr_discrete, self.hr_discrete, 1E-2) fmax = 1 fmin = 0.01 df = (fmax - fmin) / nsamp self.f_hr = model.score_frequency_grid(fmin, df, nsamp) self.f_hr_axis = fmin + df * np.arange(nsamp) # lf self.lf = 0 # hf self.hf = 0 for i in range(0, int(nsamp / 2)): if (self.f_hr_axis[i] >= 0.04) and (self.f_hr_axis[i] <= 0.15): self.lf = self.lf + self.f_hr[i] if (self.f_hr_axis[i] >= 0.15) and (self.f_hr_axis[i] <= 0.4): self.hf = self.hf + self.f_hr[i] # hf return self.lf / self.hf
def calculate_ls_period(t_np, y_np, dy_np) -> Period: period_max = np.max(t_np) - np.min(t_np) if period_max <= 0.01: return ls = LombScargleFast( optimizer_kwds={"quiet": True, "period_range": (0.01, period_max)}, silence_warnings=True, fit_period=True, ).fit(t_np, y_np, dy_np) period = ls.best_period return Period(period, "LS")
def plot_phase_diagram(tuple, comparison_stars, suffix='', period=None): star_description = tuple[0] coords = star_description.coords curve = tuple[1] star = star_description.local_id star_match, separation = star_description.get_match_string("VSX") match_string = f" ({star_match})" if not star_match == None else '' upsilon_match = star_description.get_catalog('Upsilon') upsilon_text = upsilon_match.get_upsilon_string( ) if upsilon_match is not None else '' #print("Calculating phase diagram for", star) if curve is None: logging.info("Curve of star {} is None".format(star)) return t_np = curve['JD'].to_numpy() y_np = curve['V-C'].to_numpy() dy_np = curve['s1'].to_numpy() if period is None: period_max = np.max(t_np) - np.min(t_np) if period_max <= 0.01: return ls = LombScargleFast(optimizer_kwds={'quiet': True, 'period_range': (0.01,period_max)}, silence_warnings=True)\ .fit(t_np,y_np) period = ls.best_period #print("Best period: " + str(period) + " days") fig = plt.figure(figsize=(18, 16), dpi=80, facecolor='w', edgecolor='k') plt.xlabel("Phase", labelpad=TITLE_PAD) plt.ylabel("Magnitude", labelpad=TITLE_PAD) plt.title( f"Star {star}{match_string}, p: {period:.5f} d{upsilon_text}\n{get_hms_dms(coords)}", pad=TITLE_PAD) plt.tight_layout() # plotting + calculation of 'double' phase diagram from -1 to 1 phased_t = np.fmod(t_np / period, 1) minus_one = lambda t: t - 1 minus_oner = np.vectorize(minus_one) phased_t2 = minus_oner(phased_t) phased_lc = y_np[:] phased_t_final = np.append(phased_t2, phased_t) phased_lc_final = np.append(phased_lc, phased_lc) #phased_lc_final = phased_lc_final + comparison_stars[0].vmag phased_err = np.clip(np.append(dy_np, dy_np), -0.5, 0.5) # error values are clipped to +0.5 and -0.5 plt.gca().invert_yaxis() plt.errorbar(phased_t_final, phased_lc_final, yerr=phased_err, linestyle='none', marker='o', ecolor='gray', elinewidth=1) fig.savefig(settings.phasedir + str(star).zfill(5) + '_phase' + suffix) plt.close(fig)
def l_gatspy(): from gatspy.periodic import LombScargleFast from gatspy.periodic.lomb_scargle_fast import lomb_scargle_fast t = [ 2455263.5230960627, 2455263.5481076366, 2455263.5490798587, 2455263.583848377, 2455263.584832174, 2455263.625989581, 2455263.6269849516, 2455265.520839118, 2455265.5218113405, 2455265.5837789327, 2455265.5847627292, 2455265.6254340257, 2455265.6264293958, 2455266.5401562476, 2455266.541140044, 2455266.54212384, 2455266.548802081, 2455266.5840335623, 2455266.585028933, 2455267.524554396, 2455267.525538192, 2455267.5493344883, 2455267.584103007, 2455267.5850868034, 2455267.6254340257, 2455267.6264178217, 2455268.517725692, 2455268.518697914, 2455268.542575229, 2455268.5435474515, 2455268.5839062477, 2455268.584890044, 2455268.6254455997, 2455268.6264293958, 2455269.5231192107, 2455269.524103007, 2455269.542528933, 2455269.5435243035, 2455269.584126155, 2455269.5850983774, 2455269.6254918957, 2455269.6264756923, 2455269.751776618, 2455269.7527604145, 2455269.7921701367, 2455269.793153933, 2455269.834126155, 2455269.8351099514, 2455269.8758738404, 2455269.8768576365 ] y = [ 13.52, 13.62, 13.67, 13.84, 13.81, 13.96, 13.98, 14.45, 14.43, 14.42, 14.4, 14.53, 14.5, 14.299999999999999, 14.33, 14.33, 14.36, 14.389999999999999, 14.36, 13.85, 13.86, 13.93, 14.07, 14.09, 14.2, 14.229999999999999, 13.37, 13.35, 13.319999999999999, 13.3, 13.54, 13.55, 13.76, 13.77, 14.42, 14.42, 14.5, 14.51, 14.52, 14.53, 13.870000000000001, 13.870000000000001, 13.79, 13.77, 13.92, 13.92, 14.08, 14.11, 14.26, 14.26 ] model = LombScargleFast().fit(t, y) periods, power = model.periodogram_auto() # periods, power = lomb_scargle_fast(t, y, dy=1, f0=0.03, df=0.03, Nf=375) for i in range(0, len(periods)): if periods[i] > 0.55 and periods[i] < 0.57: print(i, periods[i], power[i])
def Calc_Period(time, flux, error): # fig, ax = plt.subplots() # ax.errorbar(time, flux, error, fmt='.k', ecolor='gray') # ax.set(xlabel='Time (days)', ylabel='magitude', # title='LINEAR object {0}') # ax.invert_yaxis(); model = LombScargleFast().fit(time, flux, error) periods, power = model.periodogram_auto(nyquist_factor=100) #fig, ax = plt.subplots() # ax.plot(periods, power) # ax.set(xlim=(2, 400), ylim=(0, 0.8), # xlabel='period (days)', # ylabel='Lomb-Scargle Power'); # plt.show() # set range and find period model.optimizer.period_range = (10, 500) period = model.best_period # print("period = {0}".format(period)) #print("error = {0}".format(error)) return period
def get_timing_lombscargle(n, ofac, hfac): x, y, dy = generate_random_signal(n) Nf = int(floor(0.5 * len(x) * ofac * hfac)) df = 1. / (ofac * (max(x) - min(x))) f0 = df model = LombScargleFast(silence_warnings=True) model.fit(x, y, dy) t0 = time() model.score_frequency_grid(f0, df, Nf) dt = time() - t0 return dt
def normalised_lombscargle(ts, ys, dys, fmin=None, fmax=None, oversampling=1): """Returns ``(fs, psd)``, an array of frequencies and a Lomb-Scargle estimate of the one-sided power spectral density. """ if fmin is None: fmin = 1.0 / (np.max(ts) - np.min(ts)) if fmax is None: fmax = 1.0 / (2.0 * np.min(np.diff(np.sort(ts)))) df = fmin / oversampling N = int(round((fmax - fmin) / df)) pows = LombScargleFast().fit(ts, ys, dys).score_frequency_grid(fmin, df, N) fs = fmin + df * np.arange(N) T = np.max(ts) - np.min(ts) mu = np.trapz(ys, ts) / T var = np.trapz(np.square(ys - mu), ts) / T return fs, var * pows / np.trapz(pows, fs)
action="store_true") args = parser.parse_args() #print("Input file:") #print(args.inputfile) datain=pd.read_csv(args.inputfile,sep=r'\s*',header=0) from gatspy.periodic import LombScargleFast dmag=0.000005 nyquist_factor=40 model = LombScargleFast().fit(datain['BJD'], datain['mag'], dmag) periods, power = model.periodogram_auto(nyquist_factor) model.optimizer.period_range=(0.2, 10) period = model.best_period enablePrint() print(period) if args.plot: import matplotlib.pyplot as plt plt.plot(datain['BJD'],datain['mag']) plt.show() if args.foldedplot: import matplotlib.pyplot as plt
def FitSin(time, flux, error, maxnum=5, nper=20000, minper=0.1, maxper=30.0, plim=0.25, returnmodel=True, debug=False, per2=False): ''' Use Lomb Scargle to find periods, fit sins, remove, repeat. Parameters ---------- time: flux: error: maxnum: nper: int, optional number of periods to search over with Lomb Scargle minper: maxper: plim: debug: Returns ------- ''' # periods = np.linspace(minper, maxper, nper) flux_out = np.array(flux, copy=True) sin_out = np.zeros_like(flux) # return the sin function! # total baseline of time window dt = np.nanmax(time) - np.nanmin(time) medflux = np.nanmedian(flux) # ti = time[dl[i]:dr[i]] for k in range(0, maxnum): # Use Jake Vanderplas faster version! pgram = LombScargleFast(fit_offset=False) pgram.optimizer.set(period_range=(minper, maxper)) pgram = pgram.fit(time, flux_out - medflux, error) df = (1. / minper - 1. / maxper) / nper f0 = 1. / maxper pwr = pgram.score_frequency_grid(f0, df, nper) freq = f0 + df * np.arange(nper) per = 1. / freq pok = np.where((per < dt) & (per > minper)) pk = per[pok][np.argmax(pwr[pok])] pp = np.max(pwr) if debug is True: print('trial (k): ' + str(k) + '. peak period (pk):' + str(pk) + '. peak power (pp):' + str(pp)) # if a period w/ enough power is detected if (pp > plim): # fit sin curve to window and subtract if per2 is True: p0 = [ pk, 3.0 * np.nanstd(flux_out - medflux), 0.0, pk / 2., 1.5 * np.nanstd(flux_out - medflux), 0.1, 0.0 ] try: pfit, pcov = curve_fit(_sinfunc2, time, flux_out - medflux, p0=p0) if debug is True: print('>>', pfit) except RuntimeError: pfit = [pk, 0., 0., 0., 0., 0., 0.] if debug is True: print('Curve_Fit2 no good') flux_out = flux_out - _sinfunc2(time, *pfit) sin_out = sin_out + _sinfunc2(time, *pfit) else: p0 = [pk, 3.0 * np.nanstd(flux_out - medflux), 0.0, 0.0] try: pfit, pcov = curve_fit(_sinfunc, time, flux_out - medflux, p0=p0) except RuntimeError: pfit = [pk, 0., 0., 0.] if debug is True: print('Curve_Fit no good') flux_out = flux_out - _sinfunc(time, *pfit) sin_out = sin_out + _sinfunc(time, *pfit) # add the median flux for this window BACK in sin_out = sin_out + medflux # if debug is True: # plt.figure() # plt.plot(time, flux) # plt.plot(time, flux_out, c='red') # plt.show() if returnmodel is True: return sin_out else: return flux_out
def k2_test(style=None): """ Demonstrate the technique for a K2 EB. Parameters ---------- style : str, optional The name of a matplotlib style sheet. """ nova = pd.read_csv("villanova-db.csv") # Load Aigrain et al. (2015) pipeline light curve. hdu = fits.open("hlsp_k2sc_k2_llc_212012387-c05_kepler_v1_lc.fits") time = hdu[1].data["time"] cadence = hdu[1].data["cadence"] # Add back stellar variability correction. flux = hdu[1].data["flux"] + hdu[1].data["trend_t"] - \ np.nanmedian(hdu[1].data["trend_t"]) flux /= np.nanmedian(flux) # Mask out bad fluxes finite = np.isfinite(flux) time = time[finite] flux = flux[finite] cadence = cadence[finite] # Load parameters from Villanova catalog epic = 212012387 eb = nova[nova["KIC"] == epic] p_orb = eb.period.values[0] t_0 = eb.bjd0.values[0] p_width = eb.pwidth.values[0] s_width = eb.swidth.values[0] sep = eb.sep.values[0] phase = ((time - t_0) % p_orb) / p_orb window = 1.0 mask = ((phase > p_width * window) & (phase < 1 - p_width * window)) & \ ((phase > sep + s_width * window) | (phase < sep - s_width * window)) timecut = time[mask] fluxcut = flux[mask] cadencecut = cadence[mask] model = LombScargleFast().fit(time, flux) period, power = model.periodogram_auto(oversampling=5) model = LombScargleFast().fit(timecut, fluxcut) period_c, power_c = model.periodogram_auto(oversampling=5) lag, acf = interpacf.interpolated_acf(time, flux - np.median(flux), cadence) lag_c, acf_c = interpacf.interpolated_acf(timecut, fluxcut - np.median(fluxcut), cadencecut) if style is not None: plt.style.use(style) plt.rcParams["font.size"] = 22 fig = plt.figure(figsize=(15, 12)) ax1 = plt.subplot2grid((2, 2), (0, 0), colspan=2) ax2 = plt.subplot2grid((2, 2), (1, 0)) ax3 = plt.subplot2grid((2, 2), (1, 1)) # Plot lightcurve offset = 2304. ax1.plot(time - offset, flux, lw=1, color="k", alpha=0.4) ax1.plot(timecut - offset, fluxcut, lw=1, color="k") ax1.set_ylim(0.95, 1.01) ax1.set_xlabel("BJD - {0:.0f}".format(2454833 + offset)) ax1.set_ylabel("Normalized Flux") ax1.minorticks_on() # Plot periodograms ax2.plot(period, 2 * power, color="r", lw=1.25, alpha=1.0) ax2.plot(period_c, power_c, color="k", lw=1.25) ax2.axvline(p_orb, color="k", lw=1.5, linestyle=":") ax2.axvline(p_orb / 2., color="k", lw=1.5, linestyle=":") ax2.text(p_orb, ax2.get_ylim()[1] + 0.01, "$P_{orb}$", ha="center") ax2.text(p_orb / 2., ax2.get_ylim()[1] + 0.01, "$P_{orb}/2$", ha="center") ax2.set_xlabel("Period (days)") ax2.set_ylabel("Normalized Power") ax2.minorticks_on() ax2.set_xlim(0.01, 10) ax2.set_ylim(0, 0.6) ax3.plot(lag, acf / acf.max(), color="r", lw=1.5, alpha=1.0) ax3.plot(lag_c, acf_c / acf_c.max(), color="k", lw=1.5) ax3.axvline(p_orb, color="k", lw=1.5, linestyle=":") ax3.axvline(p_orb / 2., color="k", lw=1.5, linestyle=":") ax3.text(p_orb, 1.05, "$P_{orb}$", ha="center") ax3.text(p_orb / 2., 1.05, "$P_{orb}/2$", ha="center") ax3.minorticks_on() ax3.set_xlim(0.01, 10) ax3.set_ylim(-0.55, 1) ax3.set_xlabel("Lag (days)") ax3.set_ylabel("ACF") fig.suptitle("EPIC {0}".format(epic), fontsize=26, y=0.94) fig.subplots_adjust(wspace=0.3, hspace=0.35) plt.savefig("k2_removal_demo.pdf")
def multi_night(sources, unique_nights, night, brightest_mag, mags, mag_err, uniform_ylim=True): """ Plot magnitude vs time data for several sources over several nights """ number_of_nights = len(unique_nights) for source in sources: f = plt.figure(figsize=(5 * number_of_nights, 5)) night_means = [] night_stds = [] night_bins = [] source_mags = mags[source.id - 1] if uniform_ylim: # Use median to handle outliers. source_median = np.median(source_mags[np.isfinite(source_mags)]) # Use median absolute deviation to get measure of scatter. # Helps avoid extremely points. source_variation = 3 * mad_std(source_mags[np.isfinite(source_mags)]) # Ensure y range will be at least 0.2 magnitudes if source_variation < 0.1: half_range = 0.1 else: half_range = source_variation y_range = (source_median - half_range, source_median + half_range) else: # Empty if this option wasn't chosen so that automatic limits will be used. y_range = [] last_axis = None for i, this_night in enumerate(unique_nights): last_axis = plt.subplot(1, number_of_nights + 1, i + 1, sharey=last_axis) night_mask = (night == this_night) night_mean, night_std = plot_magnitudes(mags=mags[source.id - 1][night_mask], errors=mag_err[source.id - 1][night_mask], times=source.bjd_tdb[night_mask], source=source.id, night=this_night, ref_mag=brightest_mag, y_range=y_range) night_means.append(night_mean) night_stds.append(night_std) night_bins.append(this_night) plt.subplot(1, number_of_nights + 1, number_of_nights + 1) if uniform_ylim: f.subplots_adjust(wspace=0) plt.setp([a.get_yticklabels() for a in f.axes[1:]], visible=False) # Plot indicators of variation, and information about this source. # For simplicity, make the x and y range of this plot be 0 to 1. x = np.array([0., 1]) y = x # Add invisible line to make plot. plt.plot(x, y, alpha=0, label='source {}'.format(source.id)) night_means = np.array(night_means) # Plot bar proportional to Lomb-Scargle power. bad_mags = (np.isnan(mags[source.id - 1]) | np.isinf(mags[source.id - 1])) bad_errs = (np.isnan(mag_err[source.id - 1]) | np.isinf(mag_err[source.id - 1])) bads = bad_mags | bad_errs good_mags = ~bads model = LombScargleFast().fit(source.bjd_tdb[good_mags], mags[source.id - 1][good_mags], mag_err[source.id - 1][good_mags]) periods, power = model.periodogram_auto(nyquist_factor=100, oversampling=3) max_pow = power.max() # print(source, max_pow) if max_pow > 0.5: color = 'green' elif max_pow > 0.4: color = 'cyan' else: color = 'gray' bar_x = 0.25 plt.plot([bar_x, bar_x], [0, max_pow], color=color, linewidth=10, label='LS power') plt.legend() # Add dot for magnitude of star. size = 10000./np.abs(10**((source_median - brightest_mag)/2.5)) plt.scatter([0.8], [0.2], c='red', marker='o', s=size) plt.ylim(0, 1)
periods <= maximum] period = periods[np.argmax(periodogram)] extra = '' ################################################################################ # gastpy/LombScargleFast ################################################################################ elif options.algorithm == 'gatspy-fast': from gatspy.periodic import LombScargleFast oversampling = 4.0 hifac = 200 starttime = datetime.now() model = LombScargleFast().fit(t, m, dm) periods, periodogram = model.periodogram_auto( oversampling=oversampling, nyquist_factor=hifac) endtime = datetime.now() # Restrict range periods, periodogram = periods[periods <= maximum], periodogram[ periods <= maximum] period = periods[np.argmax(periodogram)] extra = '' ################################################################################ ################################################################################ # Conditional entropy (adaptive grid) ################################################################################
periods, periodogram = periods[periods <= maximum], periodogram[periods <= maximum] period = periods[np.argmax(periodogram)] extra = '' ################################################################################ # gastpy/LombScargleFast ################################################################################ elif options.algorithm == 'gatspy-fast': from gatspy.periodic import LombScargleFast oversampling = 4.0 hifac = 200 starttime = datetime.now() model = LombScargleFast().fit(t, m, dm) periods, periodogram = model.periodogram_auto(oversampling = oversampling, nyquist_factor = hifac) endtime = datetime.now() # Restrict range periods, periodogram = periods[periods <= maximum], periodogram[periods <= maximum] period = periods[np.argmax(periodogram)] extra = '' ################################################################################ ################################################################################ # Conditional entropy (adaptive grid) ################################################################################ elif options.algorithm == 'ce-adaptive':
def periodogram(self, time, flux, err, p_fold=None, plt_color='k', max_days=100.0, oversampling=5): """ Plot a the light curve, periodogram, and phase-folded light curve. The orbital period and possible aliases are indicated in red on the periodogram. Parameters ---------- time : array_like Observation times in days. flux : array_like Fluxes. err : array_like Flux errors. p_fold : float, optional Plot the light curve folded at this period (in days), and indicate the period and likely aliases on the periodogram plot. plt_color : str, optional The line and scatter plot color. max_days : float, optional The maximum number of days to plot in the light curve and periodogram. oversampling: int, optional The oversampling factor for the periodogram. """ model = LombScargleFast().fit(time, flux, err) period, power = model.periodogram_auto(oversampling=oversampling) fig1, (ax1, ax2) = plt.subplots(nrows=2, figsize=(7, 14)) ax1.plot(time, flux, color=plt_color) ax1.set_xlim(time.min(), time.min() + max_days) ax1.set_xlabel('Time (days)') ax1.set_ylabel('Relative Flux') ax2.plot(period, power, color=plt_color) ax2.set_xlim(0.1, max_days) ax2.set_xlabel('Period (days)') ax2.set_ylabel('Power') # Plot some the most likely aliases of eclipse period. factors = [0.5, 1, 2, 3, 4] for factor in factors: ax2.axvline(self.p_orb / factor, color='r') if p_fold is not None: for factor in factors: ax2.axvline(p_fold / factor, color='b') fig1.suptitle('KIC {0:d} --- p_orb = {1:3.5f} days'. format(self.kic, self.p_orb)) if p_fold is not None: fig2, ax3 = plt.subplots() phase = (time % p_fold) / p_fold ax3.scatter(phase, flux, color=plt_color, s=0.1) ax3.set_xlim(0, 1) ax3.set_xlabel('Phase') ax3.set_ylabel('Relative Flux') plt.show()
def power_spectrum(self, verbose=False, noise=0.0, \ length=-1): ''' This function computes the power spectrum from the timeseries. The function checks to see if the timeseries has been read in, and if not it calls the read_timeseries function. The porperties of the power spectrum can be altered for a given timeseries via the noise, and length parameters. The frequency and power are stored in the object atributes self.freq and self,power. Parameters ---------------- verbose: Bool(False) Provide verbose output if set to True. noise: Float If noise is not zero then additional noise is added to the timeseries where the value of noise is the standard deviation of the additional noise. length: Int If length is not -1 then a subset of the timeseries is selected when n points will equal length. The subset of data is taken from the start of the time series. TODO this can be updated if neccessary. Returns ---------------- NA Examples ---------------- To read in a data set and create the power spectrum one need only run: >>> import K2data >>> star = K2data.Dataset(2001122017, '/home/davies/Data/ktwo_2001122017_llc.pow') >>> star.power_spectrum() ''' if len(self.time) < 1: self.read_timeseries(verbose=True) if noise > 0.0: self.flux_fix[:length] += np.random.randn(len(self.time_fix[:length])) * noise dtav = np.mean(np.diff(self.time_fix[:length])) dtmed = np.median(np.diff(self.time_fix[:length])) if dtmed == 0: dtmed = dtav fmin = 0 N = len(self.time_fix[:length]) #n-points df = 1.0 / dtmed / N #bw model = LombScargleFast().fit(self.time_fix[:length], \ self.flux_fix[:length], \ np.ones(N)) power = model.score_frequency_grid(fmin, df, N/2) freqs = fmin + df * np.arange(N/2) var = np.std(self.flux_fix[:length])**2 power /= np.sum(power) power *= var power /= df * 1e6 if len(freqs) < len(power): power = power[0:len(freqs)] if len(freqs) > len(power): freqs = freqs[0:len(power)] self.freq = freqs * 1e6 self.power = power if verbose: print("Frequency resolution : {}".format(self.freq[1])) print("Nyquist : ~".format(self.freq.max()))
if Method == 0: #If method is 0, we don't phase fold if hole_index == 1: t=np.ma.array(t,mask=to_mask) signal=np.ma.array(signal,mask=to_mask) #We choose the frequency range we want to check. An angular frequency is required #for the L-S diagram. fmin=0.01*f_real fmax=10.*f_real Nf= N df = (fmax - fmin) / Nf f = 2.0*np.pi*np.linspace(fmin, fmax, Nf) #We take the L-S of the signal. if hole_index == 0: pgram = LombScargleFast().fit(t, signal,sdev) elif hole_index == 1: pgram = LombScargleFast().fit(t[~t.mask], signal[~signal.mask],sdev) power = pgram.score_frequency_grid(fmin,df,Nf) #We plot the signal and the L-S fig, ax = plt.subplots(2, 1) ax[0].plot(t,signal, 'o', ms = 1.5) ax[0].set_xlim([0.,T_tot]) ax[0].set_xlabel('Time') ax[0].set_ylabel('Flux') ax[0].grid() #We want the frequency in Hz. ax[1].plot(f/2.0/np.pi/f_real, power, 'o') ax[1].set_xlim([fmin/2.0/np.pi/f_real,6])
def periodogram(self, time, flux, err, p_fold=None, plt_color='k', max_days=100.0, oversampling=5, plot=False, cut_eclipses=True, best_period = True, period_range = (.05,45)): """ Plot a the light curve, periodogram, and phase-folded light curve. The orbital period and possible aliases are indicated in red on the periodogram. Parameters ---------- time : array_like Observation times in days. flux : array_like Fluxes. err : array_like Flux errors. p_fold : float, optional Plot the light curve folded at this period (in days), and indicate the period and likely aliases on the periodogram plot. plt_color : str, optional The line and scatter plot color. max_days : float, optional The maximum number of days to plot in the light curve and periodogram. oversampling: int, optional The oversampling factor for the periodogram. """ if cut_eclipses: time, flux, err = self.curve_cut() else: time=self.time flux=self.flux error=self.error model = LombScargleFast().fit(time, flux, err) period, power = model.periodogram_auto(oversampling=oversampling) #new stuff to output the best period if best_period: model.optimizer.period_range = period_range Best_period = model.best_period if plot: fig1, (ax1, ax2) = plt.subplots(nrows=2, figsize=(7, 14)) ax1.plot(time, flux, color=plt_color) ax1.set_xlim(time.min(), time.min() + max_days) ax1.set_xlabel('Time (days)') ax1.set_ylabel('Relative Flux') ax2.plot(period, power, color=plt_color) ax2.set_xlim(0.1, max_days) ax2.set_xlabel('Period (days)') ax2.set_ylabel('Power') # Plot some the most likely aliases of eclipse period. factors = [0.5, 1, 2, 3, 4] for factor in factors: ax2.axvline(self.p_orb / factor, color='r') if p_fold is not None: for factor in factors: ax2.axvline(p_fold / factor, color='b') fig1.suptitle('KIC {0:d} --- p_orb = {1:3.5f} days'. format(self.kic, self.p_orb)) if p_fold is not None: fig2, ax3 = plt.subplots() phase = (time % p_fold) / p_fold ax3.scatter(phase, flux, color=plt_color, s=0.1) ax3.set_xlim(0, 1) ax3.set_xlabel('Phase') ax3.set_ylabel('Relative Flux') plt.show() #I don't know how it would react to returning nonexistent stuff if best_period: return period, power, Best_period else: return period, power
def estimate(self, observedLC): """! Estimate intrinsicFlux, period, eccentricity, omega, tau, & a2sini """ ## intrinsicFluxEst maxPeriodFactor = 10.0 model = LombScargleFast().fit(observedLC.t, observedLC.y, observedLC.yerr) periods, power = model.periodogram_auto(nyquist_factor = observedLC.numCadences) model.optimizer.period_range = (2.0*np.mean(observedLC.t[1:] - observedLC.t[:-1]), maxPeriodFactor*observedLC.T) periodEst = model.best_period numIntrinsicFlux = 100 lowestFlux = np.min(observedLC.y[np.where(observedLC.mask == 1.0)]) highestFlux = np.max(observedLC.y[np.where(observedLC.mask == 1.0)]) intrinsicFlux = np.linspace(np.min(observedLC.y[np.where(observedLC.mask == 1.0)]), np.max(observedLC.y[np.where(observedLC.mask == 1.0)]), num = numIntrinsicFlux) intrinsicFluxList = list() totalIntegralList = list() for f in xrange(1, numIntrinsicFlux - 1): beamedLC = observedLC.copy() beamedLC.x = np.require(np.zeros(beamedLC.numCadences), requirements=['F', 'A', 'W', 'O', 'E']) for i in xrange(beamedLC.numCadences): beamedLC.y[i] = observedLC.y[i]/intrinsicFlux[f] beamedLC.yerr[i] = observedLC.yerr[i]/intrinsicFlux[f] dopplerLC = beamedLC.copy() dopplerLC.x = np.require(np.zeros(dopplerLC.numCadences), requirements=['F', 'A', 'W', 'O', 'E']) for i in xrange(observedLC.numCadences): dopplerLC.y[i] = math.pow(beamedLC.y[i], 1.0/3.44) dopplerLC.yerr[i] = (1.0/3.44)*math.fabs(dopplerLC.y[i]*(beamedLC.yerr[i]/beamedLC.y[i])) dzdtLC = dopplerLC.copy() dzdtLC.x = np.require(np.zeros(dopplerLC.numCadences), requirements=['F', 'A', 'W', 'O', 'E']) for i in xrange(observedLC.numCadences): dzdtLC.y[i] = 1.0 - (1.0/dopplerLC.y[i]) dzdtLC.yerr[i] = math.fabs((-1.0*dopplerLC.yerr[i])/math.pow(dopplerLC.y[i], 2.0)) foldedLC = dzdtLC.fold(periodEst) foldedLC.x = np.require(np.zeros(foldedLC.numCadences), requirements=['F', 'A', 'W', 'O', 'E']) integralSpline = UnivariateSpline(foldedLC.t[np.where(foldedLC.mask == 1.0)], foldedLC.y[np.where(foldedLC.mask == 1.0)], 1.0/foldedLC.yerr[np.where(foldedLC.mask == 1.0)], k = 3, s = None, check_finite = True) totalIntegral = math.fabs(integralSpline.integral(foldedLC.t[0], foldedLC.t[-1])) intrinsicFluxList.append(intrinsicFlux[f]) totalIntegralList.append(totalIntegral) intrinsicFluxEst = intrinsicFluxList[np.where(np.array(totalIntegralList) == np.min(np.array(totalIntegralList)))[0][0]] ## periodEst for i in xrange(beamedLC.numCadences): beamedLC.y[i] = observedLC.y[i]/intrinsicFluxEst beamedLC.yerr[i] = observedLC.yerr[i]/intrinsicFluxEst dopplerLC.y[i] = math.pow(beamedLC.y[i], 1.0/3.44) dopplerLC.yerr[i] = (1.0/3.44)*math.fabs(dopplerLC.y[i]*(beamedLC.yerr[i]/beamedLC.y[i])) dzdtLC.y[i] = 1.0 - (1.0/dopplerLC.y[i]) dzdtLC.yerr[i] = math.fabs((-1.0*dopplerLC.yerr[i])/math.pow(dopplerLC.y[i], 2.0)) model = LombScargleFast().fit(dzdtLC.t, dzdtLC.y, dzdtLC.yerr) periods, power = model.periodogram_auto(nyquist_factor = dzdtLC.numCadences) model.optimizer.period_range = (2.0*np.mean(dzdtLC.t[1:] - dzdtLC.t[:-1]), maxPeriodFactor*dzdtLC.T) periodEst = model.best_period ## eccentricityEst & omega2Est # First find a full period going from rising to falling. risingSpline = UnivariateSpline(dzdtLC.t[np.where(dzdtLC.mask == 1.0)], dzdtLC.y[np.where(dzdtLC.mask == 1.0)], 1.0/dzdtLC.yerr[np.where(dzdtLC.mask == 1.0)], k = 3, s = None, check_finite = True) risingSplineRoots = risingSpline.roots() firstRoot = risingSplineRoots[0] if risingSpline.derivatives(risingSplineRoots[0])[1] > 0.0: tRising = risingSplineRoots[0] else: tRising = risingSplineRoots[1] # Now fold the LC starting at tRising and going for a full period. foldedLC = dzdtLC.fold(periodEst, tStart = tRising) foldedLC.x = np.require(np.zeros(foldedLC.numCadences), requirements=['F', 'A', 'W', 'O', 'E']) # Fit the folded LC with a spline to figure out alpha and beta fitLC = foldedLC.copy() foldedSpline = UnivariateSpline(foldedLC.t[np.where(foldedLC.mask == 1.0)], foldedLC.y[np.where(foldedLC.mask == 1.0)], 1.0/foldedLC.yerr[np.where(foldedLC.mask == 1.0)], k = 3, s = 2*foldedLC.numCadences, check_finite = True) for i in xrange(fitLC.numCadences): fitLC.x[i] = foldedSpline(fitLC.t[i]) # Now get the roots and find the falling root tZeros = foldedSpline.roots() if tZeros.shape[0] == 1: # We have found just tFalling tFalling = tZeros[0] tRising = fitLC.t[0] startIndex = 0 tFull = fitLC.t[-1] stopIndex = fitLC.numCadences elif tZeros.shape[0] == 2: # We have found tFalling and one of tRising or tFull if foldedSpline.derivatives(tZeros[0])[1] < 0.0: tFalling = tZeros[0] tFull = tZeros[1] stopIndex = np.where(fitLC.t < tFull)[0][-1] tRising = fitLC.t[0] startIndex = 0 elif foldedSpline.derivatives(tZeros[0])[1] > 0.0: if foldedSpline.derivatives(tZeros[1])[1] < 0.0: tRising = tZeros[0] startIndex = np.where(fitLC.t > tRising)[0][0] tFalling = tZeros[1] tFull = fitLC.t[-1] stopIndex = fitLC.numCadences else: raise RuntimeError('Could not determine alpha & omega correctly because the first root is rising but the second root is not falling!') elif tZeros.shape[0] == 3: tRising = tZeros[0] startIndex = np.where(fitLC.t > tRising)[0][0] tFalling = tZeros[1] tFull = tZeros[2] stopIndex = np.where(fitLC.t < tFull)[0][-1] else: raise RuntimeError('Could not determine alpha & omega correctly because tZeros has %d roots!'%(tZeros.shape[0])) # One full period now goes from tRising to periodEst. The maxima occurs between tRising and tFalling while the minima occurs between tFalling and tRising + periodEst # Find the minima and maxima alpha = math.fabs(fitLC.x[np.where(np.max(fitLC.x[startIndex:stopIndex]) == fitLC.x)[0][0]]) beta = math.fabs(fitLC.x[np.where(np.min(fitLC.x[startIndex:stopIndex]) == fitLC.x)[0][0]]) peakLoc = fitLC.t[np.where(np.max(fitLC.x[startIndex:stopIndex]) == fitLC.x)[0][0]] troughLoc = fitLC.t[np.where(np.min(fitLC.x[startIndex:stopIndex]) == fitLC.x)[0][0]] KEst = 0.5*(alpha + beta) delta2 = (math.fabs(foldedSpline.integral(tRising, peakLoc)) + math.fabs(foldedSpline.integral(troughLoc, tFull)))/2.0 delta1 = (math.fabs(foldedSpline.integral(peakLoc, tFalling)) + math.fabs(foldedSpline.integral(tFalling, troughLoc)))/2.0 eCosOmega2 = (alpha - beta)/(alpha + beta) eSinOmega2 = ((2.0*math.sqrt(alpha*beta))/(alpha + beta))*((delta2 - delta1)/(delta2 + delta1)) eccentricityEst = math.sqrt(math.pow(eCosOmega2, 2.0) + math.pow(eSinOmega2, 2.0)) tanOmega2 = math.fabs(eSinOmega2/eCosOmega2) if (eCosOmega2/math.fabs(eCosOmega2) == 1.0) and (eSinOmega2/math.fabs(eSinOmega2) == 1.0): omega2Est = math.atan(tanOmega2)*(180.0/math.pi) if (eCosOmega2/math.fabs(eCosOmega2) == -1.0) and (eSinOmega2/math.fabs(eSinOmega2) == 1.0): omega2Est = 180.0 - math.atan(tanOmega2)*(180.0/math.pi) if (eCosOmega2/math.fabs(eCosOmega2) == -1.0) and (eSinOmega2/math.fabs(eSinOmega2) == -1.0): omega2Est = 180.0 + math.atan(tanOmega2)*(180.0/math.pi) if (eCosOmega2/math.fabs(eCosOmega2) == 1.0) and (eSinOmega2/math.fabs(eSinOmega2) == -1.0): omega2Est = 360.0 - math.atan(tanOmega2)*(180.0/math.pi) omega1Est = omega2Est - 180.0 ## tauEst zDot = KEst*(1.0 + eccentricityEst)*(eCosOmega2/eccentricityEst) zDotLC = dzdtLC.copy() for i in xrange(zDotLC.numCadences): zDotLC.y[i] = zDotLC.y[i] - zDot zDotSpline = UnivariateSpline(zDotLC.t[np.where(zDotLC.mask == 1.0)], zDotLC.y[np.where(zDotLC.mask == 1.0)], 1.0/zDotLC.yerr[np.where(zDotLC.mask == 1.0)], k = 3, s = 2*zDotLC.numCadences, check_finite = True) for i in xrange(zDotLC.numCadences): zDotLC.x[i] = zDotSpline(zDotLC.t[i]) zDotZeros = zDotSpline.roots() zDotFoldedLC = dzdtLC.fold(periodEst) zDotFoldedSpline = UnivariateSpline(zDotFoldedLC.t[np.where(zDotFoldedLC.mask == 1.0)], zDotFoldedLC.y[np.where(zDotFoldedLC.mask == 1.0)], 1.0/zDotFoldedLC.yerr[np.where(zDotFoldedLC.mask == 1.0)], k = 3, s = 2*zDotFoldedLC.numCadences, check_finite = True) for i in xrange(zDotFoldedLC.numCadences): zDotFoldedLC.x[i] = zDotFoldedSpline(zDotFoldedLC.t[i]) tC = zDotFoldedLC.t[np.where(np.max(zDotFoldedLC.x) == zDotFoldedLC.x)[0][0]] nuC = (360.0 - omega2Est)%360.0 tE = zDotFoldedLC.t[np.where(np.min(zDotFoldedLC.x) == zDotFoldedLC.x)[0][0]] nuE = (180.0 - omega2Est)%360.0 if math.fabs(360.0 - nuC) < math.fabs(360 - nuE): tauEst = zDotZeros[np.where(zDotZeros > tC)[0][0]] else: tauEst = zDotZeros[np.where(zDotZeros > tE)[0][0]] ## a2sinInclinationEst a2sinInclinationEst = ((KEst*periodEst*self.Day*self.c*math.sqrt(1.0 - math.pow(eccentricityEst, 2.0)))/self.twoPi)/self.Parsec return intrinsicFluxEst, periodEst, eccentricityEst, omega1Est, tauEst, a2sinInclinationEst
def find_cycle(feature, strain, mouse=None, bin_width=15, methods='LombScargleFast', disturb_t=False, gen_doc=False, plot=True, search_range_fit=None, nyquist_factor=3, n_cycle=10, search_range_find=(2, 26), sig=np.array([0.05])): """ Use Lomb-Scargel method on different strain and mouse's data to find the best possible periods with highest p-values. The function can be used on specific strains and specific mouses, as well as just specific strains without specifying mouse number. We use the O(NlogN) fast implementation of Lomb-Scargle from the gatspy package, and also provide a way to visualize the result. Note that either plotting or calculating L-S power doesn't use the same method in finding best cycle. The former can use user-specified search_range, while the latter uses default two grid search_range. Parameters ---------- feature: string in {"AS", "F", "M_AS", "M_IS", "W", "Distance"} "AS": Active state probalibity "F": Food consumed (g) "M_AS": Movement outside homebase "M_IS": Movement inside homebase "W": Water consumed (g) "Distance": Distance traveled strain: int nonnegative integer indicating the strain number mouse: int, default is None nonnegative integer indicating the mouse number bin_width: int, minute unit, default is 15 minutes number of minutes, the time interval for data aggregation methods: string in {"LombScargleFast", "LombScargle"} indicating the method used in determining periods and best cycle. If choose 'LombScargle', 'disturb_t' must be True. disturb_t: boolean, default is False If True, add uniformly distributed noise to the time sequence which are used to fit the Lomb Scargle model. This is to avoid the singular matrix error that could happen sometimes. plot: boolean, default is True If True, call the visualization function to plot the Lomb Scargle power versus periods plot. First use the data (either strain specific or strain-mouse specific) to fit the LS model, then use the search_range_fit as time sequence to predict the corresponding LS power, at last draw the plot out. There will also be stars and horizontal lines indicating the p-value of significance. Three stars will be p-value in [0,0.001], two stars will be p-value in [0.001,0.01], one star will be p-value in [0.01,0.05]. The horizontal line is the LS power that has p-value of 0.05. search_range_fit: list, numpy array or numpy arange, hours unit, default is None list of numbers as the time sequence to predict the corrsponding Lomb Scargle power. If plot is 'True', these will be drawn as the x-axis. Note that the number of search_range_fit points can not be too small, or the prediction smooth line will not be accurate. However the plot will always give the right periods and their LS power with 1,2 or 3 stars. This could be a sign to check whether search_range_fit is not enough to draw the correct plot. We recommend the default None, which is easy to use. nyquist_factor: int If search_range_fit is None, the algorithm will automatically choose the periods sequence. 5 * nyquist_factor * length(time sequence) / 2 gives the number of power and periods used to make LS prediction and plot the graph. n_cycle: int, default is 10 numbers of periods to be returned by function, which have the highest Lomb Scargle power and p-value. search_range_find: list, tuple or numpy array with length of 2, default is (2,26), hours unit Range of periods to be searched for best cycle. Note that the minimum should be strictly larger than 0 to avoid 1/0 issues. sig: list or numpy array, default is [0.05]. significance level to be used for plot horizontal line. gen_doc: boolean, default is False If true, return the parameters needed for visualize the LS power versus periods Returns ------- cycle: numpy array of length 'n_cycle' The best periods with highest LS power and p-values. cycle_power: numpy array of length 'n_cycle' The corrsponding LS power of 'cycle'. cycle_pvalue: numpy array of length 'n_cycle' The corrsponding p-value of 'cycle'. periods: numpy array of the same length with 'power' use as time sequence in LS model to make predictions.Only return when gen_doc is True. power: numpy array of the same length with 'periods' the corresponding predicted power of periods. Only return when gen_doc is True. sig: list, tuple or numpy array, default is [0.05]. significance level to be used for plot horizontal line. Only return when gen_doc is True. N: int the length of time sequence in the fit model. Only return when gen_doc is True. Examples ------- >>> a,b,c = find_cycle(feature='F', strain = 0,mouse = 0, plot=False,) >>> print(a,b,c) >>> [ 23.98055016 4.81080233 12.00693952 6.01216335 8.0356203 3.4316698 2.56303353 4.9294791 21.37925713 3.5697756 ] [ 0.11543449 0.05138839 0.03853218 0.02982237 0.02275952 0.0147941 0.01151601 0.00998443 0.00845883 0.0082382 ] [ 0.00000000e+00 3.29976046e-10 5.39367189e-07 8.10528027e-05 4.71001953e-03 3.70178834e-01 9.52707020e-01 9.99372657e-01 9.99999981e-01 9.99999998e-01] """ if feature not in ALL_FEATURES: raise ValueError( 'Input value must in {"AS", "F", "M_AS", "M_IS", "W", "Distance"}') if methods not in METHOD: raise ValueError( 'Input value must in {"LombScargleFast","LombScargle"}') # get data if mouse is None: data_all = aggregate_data(feature=feature, bin_width=bin_width) n_mouse_in_strain = len( set(data_all.loc[data_all['strain'] == strain]['mouse'])) data = [[] for i in range(n_mouse_in_strain)] t = [[] for i in range(n_mouse_in_strain)] for i in range(n_mouse_in_strain): data[i] = data_all.loc[(data_all['strain'] == strain) & (data_all['mouse'] == i)][feature] t[i] = np.array( np.arange(0, len(data[i]) * bin_width / 60, bin_width / 60)) data = [val for sublist in data for val in sublist] N = len(data) t = [val for sublist in t for val in sublist] else: if feature == 'Distance': data = aggregate_movement(strain=strain, mouse=mouse, bin_width=bin_width) N = len(data) t = np.arange(0, N * bin_width / 60, bin_width / 60) else: data = aggregate_interval(strain=strain, mouse=mouse, feature=feature, bin_width=bin_width) N = len(data) t = np.arange(0, N * bin_width / 60, bin_width / 60) y = data # fit model if disturb_t is True: t = t + np.random.uniform(-bin_width / 600, bin_width / 600, N) if methods == 'LombScargleFast': model = LombScargleFast(fit_period=False).fit(t=t, y=y) elif methods == 'LombScargle': model = LombScargle(fit_period=False).fit(t=t, y=y) # calculate periods' LS power if search_range_fit is None: periods, power = model.periodogram_auto(nyquist_factor=nyquist_factor) else: periods = search_range_fit power = model.periodogram(periods=search_range_fit) # find best cycle model.optimizer.period_range = search_range_find cycle, cycle_power = model.find_best_periods(return_scores=True, n_periods=n_cycle) cycle_pvalue = 1 - (1 - np.exp(cycle_power / (-2) * (N - 1)))**(2 * N) # visualization if plot is True: lombscargle_visualize(periods=periods, power=power, sig=sig, N=N, cycle_power=cycle_power, cycle_pvalue=cycle_pvalue, cycle=cycle) if gen_doc is True: return periods, power, sig, N, cycle, cycle_power, cycle_pvalue return cycle, cycle_power, cycle_pvalue
import numpy as np import matplotlib.pyplot as plt from astroML.datasets import fetch_LINEAR_sample from gatspy.periodic import LombScargleFast LINEAR_data = fetch_LINEAR_sample() star_id = 10040133 t, mag, dmag = LINEAR_data.get_light_curve(star_id).T # fig, ax = plt.subplots() # ax.errorbar(t, mag, dmag, fmt='.k', ecolor='gray') # ax.set(xlabel='Time (days)', ylabel='magitude', # title='LINEAR object {0}'.format(star_id)) # ax.invert_yaxis() # plt.show() model = LombScargleFast().fit(t, mag, dmag) periods, power = model.periodogram_auto(nyquist_factor=100) fig, ax = plt.subplots() ax.plot(periods, power) ax.set(xlim=(0.2, 1.4), ylim=(0, 0.8), xlabel='period (days)', ylabel='Lomb-Scargle Power') plt.show()
def FitSin(time, flux, error, maxnum=5, nper=20000, minper=0.1, maxper=30.0, plim=0.25, per2=False, returnmodel=True, debug=False): ''' Use Lomb Scargle to find a periodic signal. If it is significant then fit a sine curve and subtract. Repeat this procedure until no more periodic signals are found, or until maximum number of iterations has been reached. Note: this is where major issues were found in the light curve fitting as of Davenport (2016), where the iterative fitting was not adequately subtracting "pointy" features, such as RR Lyr or EBs. Upgrades to the fitting step are needed! Or, don't use iterative sine fitting... Idea for future: if L-S returns a significant P, use a median fit of the phase-folded data at that P instead of a sine fit... Parameters ---------- time : 1-d numpy array flux : 1-d numpy array error : 1-d numpy array maxnum : int, optional maximum number of iterations to try finding periods at (default=5) nper : int, optional number of periods to search over with Lomb Scargle (defeault=20000) minper : float, optional minimum period (in units of time array, nominally days) to search for periods over (default=0.1) maxper : float, optional maximum period (in units of time array, nominally days) to search for periods over (default=30.0) plim : float, optional Lomb-Scargle power threshold needed to define a "significant" period (default=0.25) per2 : bool, optional if True, use the 2-sine model fit at each period. if False, use normal 1-sine model (default=False) returnmodel : bool, optional if True, return the combined sine model. If False, return the data - model (default=True) debug : bool, optional used to print out troubleshooting things (default=False) Returns ------- If returnmodel=True, output = combined sine model (default=True) If returnmodel=False, output = (data - model) ''' flux_out = np.array(flux, copy=True) sin_out = np.zeros_like(flux) # return the sin function! # total baseline of time window dt = np.nanmax(time) - np.nanmin(time) medflux = np.nanmedian(flux) # ti = time[dl[i]:dr[i]] for k in range(0, maxnum): # Use Jake Vanderplas faster version! pgram = LombScargleFast(fit_offset=False) pgram.optimizer.set(period_range=(minper, maxper)) pgram = pgram.fit(time, flux_out - medflux, error) df = (1. / minper - 1. / maxper) / nper f0 = 1. / maxper pwr = pgram.score_frequency_grid(f0, df, nper) freq = f0 + df * np.arange(nper) per = 1. / freq pok = np.where((per < dt) & (per > minper)) pk = per[pok][np.argmax(pwr[pok])] pp = np.max(pwr) if debug is True: print('trial (k): ' + str(k) + '. peak period (pk):' + str(pk) + '. peak power (pp):' + str(pp)) # if a period w/ enough power is detected if (pp > plim): # fit sin curve to window and subtract if per2 is True: p0 = [ pk, 3.0 * np.nanstd(flux_out - medflux), 0.0, pk / 2., 1.5 * np.nanstd(flux_out - medflux), 0.1, 0.0 ] try: pfit, pcov = curve_fit(_sinfunc2, time, flux_out - medflux, p0=p0) if debug is True: print('>>', pfit) except RuntimeError: pfit = [pk, 0., 0., 0., 0., 0., 0.] if debug is True: print('Curve_Fit2 no good') flux_out = flux_out - _sinfunc2(time, *pfit) sin_out = sin_out + _sinfunc2(time, *pfit) else: p0 = [pk, 3.0 * np.nanstd(flux_out - medflux), 0.0, 0.0] try: pfit, pcov = curve_fit(_sinfunc, time, flux_out - medflux, p0=p0) except RuntimeError: pfit = [pk, 0., 0., 0.] if debug is True: print('Curve_Fit no good') flux_out = flux_out - _sinfunc(time, *pfit) sin_out = sin_out + _sinfunc(time, *pfit) # add the median flux for this window BACK in sin_out = sin_out + medflux # if debug is True: # plt.figure() # plt.plot(time, flux) # plt.plot(time, flux_out, c='red') # plt.show() if returnmodel is True: return sin_out else: return flux_out
def find_cycle(feature, strain, mouse=None, bin_width=15, methods='LombScargleFast', disturb_t=False, gen_doc=False, plot=True, search_range_fit=None, nyquist_factor=3, n_cycle=10, search_range_find=(2, 26), sig=np.array([0.05])): """ Use Lomb-Scargel method on different strain and mouse's data to find the best possible periods with highest p-values. The function can be used on specific strains and specific mouses, as well as just specific strains without specifying mouse number. We use the O(NlogN) fast implementation of Lomb-Scargle from the gatspy package, and also provide a way to visualize the result. Note that either plotting or calculating L-S power doesn't use the same method in finding best cycle. The former can use user-specified search_range, while the latter uses default two grid search_range. Parameters ---------- feature: string in {"AS", "F", "M_AS", "M_IS", "W", "Distance"} "AS": Active state probalibity "F": Food consumed (g) "M_AS": Movement outside homebase "M_IS": Movement inside homebase "W": Water consumed (g) "Distance": Distance traveled strain: int nonnegative integer indicating the strain number mouse: int, default is None nonnegative integer indicating the mouse number bin_width: int, minute unit, default is 15 minutes number of minutes, the time interval for data aggregation methods: string in {"LombScargleFast", "LombScargle"} indicating the method used in determining periods and best cycle. If choose 'LombScargle', 'disturb_t' must be True. disturb_t: boolean, default is False If True, add uniformly distributed noise to the time sequence which are used to fit the Lomb Scargle model. This is to avoid the singular matrix error that could happen sometimes. plot: boolean, default is True If True, call the visualization function to plot the Lomb Scargle power versus periods plot. First use the data (either strain specific or strain-mouse specific) to fit the LS model, then use the search_range_fit as time sequence to predict the corresponding LS power, at last draw the plot out. There will also be stars and horizontal lines indicating the p-value of significance. Three stars will be p-value in [0,0.001], two stars will be p-value in [0.001,0.01], one star will be p-value in [0.01,0.05]. The horizontal line is the LS power that has p-value of 0.05. search_range_fit: list, numpy array or numpy arange, hours unit, default is None list of numbers as the time sequence to predict the corrsponding Lomb Scargle power. If plot is 'True', these will be drawn as the x-axis. Note that the number of search_range_fit points can not be too small, or the prediction smooth line will not be accurate. However the plot will always give the right periods and their LS power with 1,2 or 3 stars. This could be a sign to check whether search_range_fit is not enough to draw the correct plot. We recommend the default None, which is easy to use. nyquist_factor: int If search_range_fit is None, the algorithm will automatically choose the periods sequence. 5 * nyquist_factor * length(time sequence) / 2 gives the number of power and periods used to make LS prediction and plot the graph. n_cycle: int, default is 10 numbers of periods to be returned by function, which have the highest Lomb Scargle power and p-value. search_range_find: list, tuple or numpy array with length of 2, default is (2,26), hours unit Range of periods to be searched for best cycle. Note that the minimum should be strictly larger than 0 to avoid 1/0 issues. sig: list or numpy array, default is [0.05]. significance level to be used for plot horizontal line. gen_doc: boolean, default is False If true, return the parameters needed for visualize the LS power versus periods Returns ------- cycle: numpy array of length 'n_cycle' The best periods with highest LS power and p-values. cycle_power: numpy array of length 'n_cycle' The corrsponding LS power of 'cycle'. cycle_pvalue: numpy array of length 'n_cycle' The corrsponding p-value of 'cycle'. periods: numpy array of the same length with 'power' use as time sequence in LS model to make predictions.Only return when gen_doc is True. power: numpy array of the same length with 'periods' the corresponding predicted power of periods. Only return when gen_doc is True. sig: list, tuple or numpy array, default is [0.05]. significance level to be used for plot horizontal line. Only return when gen_doc is True. N: int the length of time sequence in the fit model. Only return when gen_doc is True. Examples ------- >>> a,b,c = find_cycle(feature='F', strain = 0,mouse = 0, plot=False,) >>> print(a,b,c) >>> [ 23.98055016 4.81080233 12.00693952 6.01216335 8.0356203 3.4316698 2.56303353 4.9294791 21.37925713 3.5697756 ] [ 0.11543449 0.05138839 0.03853218 0.02982237 0.02275952 0.0147941 0.01151601 0.00998443 0.00845883 0.0082382 ] [ 0.00000000e+00 3.29976046e-10 5.39367189e-07 8.10528027e-05 4.71001953e-03 3.70178834e-01 9.52707020e-01 9.99372657e-01 9.99999981e-01 9.99999998e-01] """ if feature not in ALL_FEATURES: raise ValueError( 'Input value must in {"AS", "F", "M_AS", "M_IS", "W", "Distance"}') if methods not in METHOD: raise ValueError( 'Input value must in {"LombScargleFast","LombScargle"}') # get data if mouse is None: data_all = aggregate_data(feature=feature, bin_width=bin_width) n_mouse_in_strain = len( set(data_all.loc[data_all['strain'] == strain]['mouse'])) data = [[] for i in range(n_mouse_in_strain)] t = [[] for i in range(n_mouse_in_strain)] for i in range(n_mouse_in_strain): data[i] = data_all.loc[(data_all['strain'] == strain) & ( data_all['mouse'] == i)][feature] t[i] = np.array(np.arange(0, len(data[i]) * bin_width / 60, bin_width / 60)) data = [val for sublist in data for val in sublist] N = len(data) t = [val for sublist in t for val in sublist] else: if feature == 'Distance': data = aggregate_movement( strain=strain, mouse=mouse, bin_width=bin_width) N = len(data) t = np.arange(0, N * bin_width / 60, bin_width / 60) else: data = aggregate_interval( strain=strain, mouse=mouse, feature=feature, bin_width=bin_width) N = len(data) t = np.arange(0, N * bin_width / 60, bin_width / 60) y = data # fit model if disturb_t is True: t = t + np.random.uniform(-bin_width / 600, bin_width / 600, N) if methods == 'LombScargleFast': model = LombScargleFast(fit_period=False).fit(t=t, y=y) elif methods == 'LombScargle': model = LombScargle(fit_period=False).fit(t=t, y=y) # calculate periods' LS power if search_range_fit is None: periods, power = model.periodogram_auto(nyquist_factor=nyquist_factor) else: periods = search_range_fit power = model.periodogram(periods=search_range_fit) # find best cycle model.optimizer.period_range = search_range_find cycle, cycle_power = model.find_best_periods( return_scores=True, n_periods=n_cycle) cycle_pvalue = 1 - (1 - np.exp(cycle_power / (-2) * (N - 1))) ** (2 * N) # visualization if plot is True: lombscargle_visualize(periods=periods, power=power, sig=sig, N=N, cycle_power=cycle_power, cycle_pvalue=cycle_pvalue, cycle=cycle) if gen_doc is True: return periods, power, sig, N, cycle, cycle_power, cycle_pvalue return cycle, cycle_power, cycle_pvalue