def lm_cost(x): n = (x.shape[0] - 1) // 4 tk, xk, yk, thetak = split_state(x) xk = np.concatenate(([start_x], xk, [goal_x])) yk = np.concatenate(([start_y], yk, [goal_y])) thetak = np.concatenate(([start_theta], thetak, [goal_theta])) time_cost = np.sum(tk) # constant curvature path constraint path_cost = np.fabs(get_ceq(x)) # vel, acc, radius limits ineq_cost = np.fabs(np.minimum(0, get_c(x))) # discourage sharp turns (a bit hacky but wanted paths to be less jagged) dist = (xk[1:] - xk[:-1])**2 + (yk[1:] - yk[:-1])**2 # teb paper said that path cost should be much larger than the rest # but these weights/entire cost function could probably use some tuning # cost = .1*time_cost + np.sum(5*path_cost) + 5*np.sum(ineq_cost) + .1*np.sum(dist) cost = np.concatenate( ([0.01 * time_cost], 10 * path_cost, 10 * ineq_cost, 0.1 * dist)) # cost = 0.001*time_cost + np.sum( 5000*path_cost) + 10*np.sum(ineq_cost) + 100*np.sum(dist) return cost
def train(self, subsample_idcs=None, ridge=1e-9, max_storage=1e8): self.idcs = np.sort(subsample_idcs) self.X_ind = self.X[self.idcs, :] self.chunk_size = int(max_storage / float(self.idcs.shape[0])) if self.chunk_size == 0: self.chunk_size = 1 csz = self.chunk_size #can handle len(idcs)**2 memory, len(idcs)**3 time cost Kxx = self.k(self.X[self.idcs, :]) Ysum = np.zeros((self.idcs.shape[0], self.Y.shape[1])) Ksum = np.zeros((self.idcs.shape[0], self.idcs.shape[0])) pbar = ProgressBar('Chunk sum', 0, self.X.shape[0]) j = 0 csz = self.idcs.shape[0] while j * csz < self.X.shape[0]: pbar.update(j * csz) Kchunk = self.k(self.X[j * csz:(j + 1) * csz, :], self.X[self.idcs, :]) Ychunk = self.Y[j * csz:(j + 1) * csz, :] Ysum += Kchunk.T.dot(Ychunk) Ksum += Kchunk.T.dot(Kchunk) j += 1 pbar.finish() self.V = Ksum + self.lvar * Kxx self.V += ridge * np.fabs(self.V).max() * np.eye(self.V.shape[0]) self.alpha = np.linalg.solve(self.V, Ysum) self.V /= self.lvar
def posttrain(self, ridge=1e-9): #compute constants for prediction Kxi = self.k(self.X_ind) self.C = Kxi.copy() j = 0 KY = np.zeros((self.X_ind.shape[0], self.Y.shape[1])) KK = np.zeros((self.X_ind.shape[0], self.X_ind.shape[0])) csz = max(self.sridcs.shape[0], self.X_ind.shape[0]) while j * csz < self.X.shape[0]: Kchunk = self.k(self.X[j * csz:(j + 1) * csz, :], self.X_ind) KY += np.dot(Kchunk.T, self.Y[j * csz:(j + 1) * csz, :]) KK += np.dot(Kchunk.T, Kchunk) j += 1 self.C += KK / self.lvar self.C += ridge * np.fabs(self.C).max() * np.eye(self.C.shape[0]) self.alpha = np.linalg.solve(self.C, KY) / self.lvar Kxi += ridge * np.fabs(Kxi).max() * np.eye(Kxi.shape[0]) self.C = np.linalg.inv(self.C) - np.linalg.inv(Kxi)
def get_c(x): n = (x.shape[0] - 1) // 4 tk, xk, yk, thetak = split_state(x) xk = np.concatenate(([start_x], xk, [goal_x])) yk = np.concatenate(([start_y], yk, [goal_y])) thetak = np.concatenate(([start_theta], thetak, [goal_theta])) dx = xk[1:] - xk[0:-1] dy = yk[1:] - yk[0:-1] dtheta = thetak[1:] - thetak[0:-1] dk = np.array([dx, dy, np.zeros(n + 1)]).T # turning radius mask = np.fabs(dtheta) > 1e-5 dtheta_stable = np.where(mask, dtheta, 1) turn_rad = np.where( mask, np.linalg.norm(dk, axis=1) / np.fabs(2 * np.sin(dtheta_stable / 2)), min_turning_radius) c_rad = turn_rad - min_turning_radius # linear velocity qk = np.array([np.cos(thetak[:-1]), np.sin(thetak[:-1]), np.zeros(n + 1)]).T proj_q_d = np.sum(qk * dk, axis=1) sign_v = sign(proj_q_d) vk = np.linalg.norm(dk, axis=1) * sign_v / tk c_vel = 0.9 * max_vel - np.fabs(vk) # angular velocity wk = dtheta / tk c_w = 0.8 * max_ang_vel - np.fabs(wk) vk = np.concatenate(([start_vel], vk)) # acceleration (finite differences) ak = (vk[1:] - vk[0:-1]) / tk c_acc = np.where(ak < 0, ak + 0.5 * max_dec, 0.5 * max_acc - ak) c = np.concatenate((c_rad, c_vel, c_w, c_acc)) return c
def pretrain(self, subsample_idcs, ridge=1e-9): self.sridcs = np.sort(subsample_idcs) #get matrices necessary to compute khat(x,x') and muhat(x) #using subset of regressors srgp = SubsetRegressorsGP(self.X, self.Y, self.k, self.lvar) srgp.train(self.sridcs, ridge) self.pre_alpha = srgp.alpha self.V = srgp.V self.V += ridge * np.fabs(self.V).max() * np.eye(self.sridcs.shape[0])
def fitFlare(x, y, yerr, tstart, tstop, skew_fac=10): mask = (x > tstart) & (x < tstop) mu0 = (tstart + tstop) / 2 sig0 = (tstop - tstart) / 2 A0 = np.max(y) * 100 skew = 0 try: # Fit a gaussian to the segment popt1, pcov1 = curve_fit(fh.gaussian, x[mask], y[mask], p0=(mu0, sig0, A0), sigma=yerr[mask]) y_model = fh.gaussian(x[mask], popt1[0], popt1[1], popt1[2]) chi1 = fh.redChiSq(y_model, y[mask], yerr[mask], len(y[mask]) - 3) # Fit the Davenport 2014 flare model to the segment popt2, pcov2 = curve_fit(fh.aflare1, x[mask], y[mask], p0=(mu0, sig0, A0), sigma=yerr[mask]) y_model = fh.aflare1(x[mask], popt2[0], popt2[1], popt2[2]) chi2 = fh.redChiSq(y_model, y[mask], yerr[mask], len(y[mask]) - 3) # If the flare model fit worked, calculate the skew by centering on the peak of the aflare model # Use a window scaled to the FWHM of the flare model for integration mu = popt2[0] #np.trapz(x[mask]*A*y[mask], x[mask]) f_hwhm = popt2[1] / 2 t1_skew, t2_skew = mu - skew_fac * f_hwhm, mu + skew_fac * f_hwhm skew_mask = (x > t1_skew) & (x < t2_skew) # Measure the skew by treating time = x and flux = p(x). Calculate the # third moment of p(x) A = 1 / np.trapz(y[skew_mask], x[skew_mask]) var = np.trapz((x[skew_mask] - mu)**2 * A * y[skew_mask], x[skew_mask]) stddev = np.sqrt(np.fabs(var)) skew = np.trapz((x[skew_mask] - mu)**3 * A * y[skew_mask], x[skew_mask]) / stddev**3 except: traceback.print_exc() empty = np.zeros(3) return empty, empty, -1, empty, empty, -1, 0, 0 n_pts = len(x[mask]) n_pts_true = np.floor(((tstop - tstart) * u.d).to(u.min).value / 2) coverage = n_pts / n_pts_true return popt1, np.sqrt(pcov1.diagonal()), chi1, popt2, np.sqrt( pcov2.diagonal()), chi2, skew, coverage
def test_correlated_fit(): num_samples = 400 N = 10 x = norm.rvs(size=(N, num_samples)) r = np.zeros((N, N)) for i in range(N): for j in range(N): r[i, j] = np.exp(-0.8 * np.fabs(i - j)) errl = np.sqrt([3.4, 2.5, 3.6, 2.8, 4.2, 4.7, 4.9, 5.1, 3.2, 4.2]) for i in range(N): for j in range(N): r[i, j] *= errl[i] * errl[j] c = cholesky(r, lower=True) y = np.dot(c, x) x = np.arange(N) for linear in [True, False]: data = [] for i in range(N): if linear: data.append(pe.Obs([[i + 1 + o for o in y[i]]], ['ens'])) else: data.append( pe.Obs([[ np.exp(-(i + 1)) + np.exp(-(i + 1)) * o for o in y[i] ]], ['ens'])) [o.gamma_method() for o in data] if linear: def fitf(p, x): return p[1] + p[0] * x else: def fitf(p, x): return p[1] * np.exp(-p[0] * x) fitp = pe.least_squares(x, data, fitf, expected_chisquare=True) fitpc = pe.least_squares(x, data, fitf, correlated_fit=True) for i in range(2): diff = fitp[i] - fitpc[i] diff.gamma_method() assert (diff.is_zero_within_error(sigma=5))
def get_ceq(x): n = (x.shape[0] - 1) // 4 tk, xk, yk, thetak = split_state(x) xk = np.concatenate(([start_x], xk, [goal_x])) yk = np.concatenate(([start_y], yk, [goal_y])) thetak = np.concatenate(([start_theta], thetak, [goal_theta])) dx = xk[1:] - xk[0:-1] dy = yk[1:] - yk[0:-1] dk = np.array([dx, dy, np.zeros(n + 1)]).T # constant curvature (trapezoidal collocation, see teb paper) ceq = np.array([ np.cos(thetak[0:-1]) + np.cos(thetak[1:]), np.sin(thetak[0:-1]) + np.sin(thetak[1:]), np.zeros(n + 1) ]).T ceq = np.fabs(np.cross(ceq, dk, axisa=1, axisb=1)[:, 2]) return ceq
def sign(v): tol = 1e-5 k = 5e3 res = np.where(np.fabs(v) <= tol, 0, np.tanh(k * v)) return res
def _objective(self, X_i, itr, ridge, compute_constant=False): #chunk size csz = max(self.sridcs.shape[0], self.Zshape[0]) #inducing pt size isz = self.Zshape[0] #subsample size srsz = self.sridcs.shape[0] #reshape inducing pt matrix (scipy.minimize uses a flattened version) X_i = X_i.reshape(self.Zshape) #compute useful constants Kxi = self.k(X_i) Kxi += ridge * np.fabs(Kxi).max() * np.eye(isz) Kxixsr = self.k(X_i, self.X[self.sridcs, :]) # #print(np.linalg.cond(self.V)) #print(np.linalg.cond(Kxi)) # Khxi = np.dot(Kxixsr, np.linalg.solve(self.V, Kxixsr.T)) Kxi_inv_muxi = np.linalg.solve(Kxi, np.dot(Kxixsr, self.pre_alpha)) KxxiTKxxi = np.zeros((isz, isz)) KxxsrTKxxi = np.zeros((srsz, isz)) dYxi_chunks = [ ] #need this because autograd doesn't support array asgnmt KdY = np.zeros((isz, self.Y.shape[1])) KdYi = np.zeros((isz, self.Y.shape[1])) j = 0 while j * csz < self.X.shape[0]: Kchunk_sr = self.k(self.X[j * csz:(j + 1) * csz, :], self.X[self.sridcs, :]) Kchunk_i = self.k(self.X[j * csz:(j + 1) * csz, :], X_i) KxxiTKxxi += np.dot(Kchunk_i.T, Kchunk_i) KxxsrTKxxi += np.dot(Kchunk_sr.T, Kchunk_i) dYxi_chunk = np.dot( Kchunk_i, Kxi_inv_muxi) - self.Y[j * csz:(j + 1) * csz, :] dYxi_chunks.append(dYxi_chunk) KdY += np.dot(Kchunk_i.T, self.dYx[j * csz:(j + 1) * csz, :]) KdYi += np.dot(Kchunk_i.T, dYxi_chunk) j += 1 dYxi = np.vstack(dYxi_chunks) Kxi_inv_KxxiTKxxi = np.linalg.solve(Kxi, KxxiTKxxi) B = np.linalg.solve(Kxi.T, Kxi_inv_KxxiTKxxi.T).T / self.lvar B = 0.5 * (B + B.T) #enforce symmetry (lin solver doesn't guarantee) # #print(np.linalg.cond(np.eye(isz) + np.dot(B, Kxi))) # Xi = np.linalg.solve(np.eye(isz) + np.dot(B, Kxi), B) Xi = 0.5 * (Xi + Xi.T ) #enforce symmetry (lin solver doesn't guarantee) ################## ##Compute ||L||^2 ################## #compute C3 (used for S3 = Kx,xi*C3*Kxi,x ) C3 = np.dot(Xi, np.dot(Kxi, Xi)) - 2. * Xi L = np.trace( np.dot(KxxsrTKxxi.T, np.linalg.solve(self.V, np.dot(KxxsrTKxxi, C3)))) L += np.trace(np.dot(KdY.T, np.dot(C3, KdY))) ##this code computes objective function constants that aren't needed for optimization if compute_constant: Kx = self.k(self.X) Kxxsr = self.k(self.X, self.X[self.sridcs, :]) Khxx = np.dot(Kxxsr, np.linalg.solve(self.V, Kxxsr.T)) L += np.trace(np.dot(Khxx, Kx)) L += np.dot(self.dYx.T, np.dot(Kx, self.dYx)) ################## ##Compute ||L_i||^2 ################## #compute C1 (used for S1 = Kxxi*C1*Kxix) A1 = np.linalg.solve(Kxi, np.eye(isz) - np.dot(Kxi, Xi)) C1 = np.dot(A1, np.dot(Kxi, A1.T)) L_i = np.trace( np.dot(Khxi, np.dot(Kxi_inv_KxxiTKxxi, np.dot(C1, Kxi_inv_KxxiTKxxi.T)))) L_i += np.trace(np.dot(KdYi.T, np.dot(C1, KdYi))) ################## ##Compute <L, L_i> ################## #compute C2 (used for S2 = Kxxi*C2*Kxix) A2 = np.eye(isz) - np.dot(Xi, Kxi) C2 = np.dot(A2, np.linalg.solve(Kxi, A2.T).T) L_L_i = np.trace( np.dot( np.linalg.solve(self.V.T, Kxixsr.T).T, np.dot(KxxsrTKxxi, np.dot(C2, Kxi_inv_KxxiTKxxi.T)))) L_L_i += np.trace(np.dot(KdY.T, np.dot(C2, KdYi))) return (L + L_i - 2 * L_L_i).sum( ) #.sum() converts a 1x1 array to scalar (1x1 arr causes problems for scipy.minimize)
def procFlares(prefix, filenames, path, clobberGP=False, makePlots=False, writeLog=True): if makePlots: plots_path = path + 'plots/' if not os.path.exists(plots_path): os.makedirs(plots_path) gp_path = path + 'gp/' #if not os.path.exists(gp_path): #os.makedirs(gp_path) log_path = path + 'log/' #if not os.path.exists(log_path): #os.makedirs(log_path) if writeLog: if os.path.exists(log_path + prefix + '.log'): os.remove(log_path + prefix + '.log') # Columns for flare table FL_files = np.array([]) FL_TICs = np.array([]) FL_id = np.array([]) FL_t0 = np.array([]) FL_t1 = np.array([]) FL_f0 = np.array([]) FL_f1 = np.array([]) FL_ed = np.array([]) FL_ed_err = np.array([]) FL_skew = np.array([]) FL_cover = np.array([]) FL_mu = np.array([]) FL_std = np.array([]) FL_g_amp = np.array([]) FL_mu_err = np.array([]) FL_std_err = np.array([]) FL_g_amp_err = np.array([]) FL_tpeak = np.array([]) FL_fwhm = np.array([]) FL_f_amp = np.array([]) FL_tpeak_err = np.array([]) FL_fwhm_err = np.array([]) FL_f_amp_err = np.array([]) FL_g_chisq = np.array([]) FL_f_chisq = np.array([]) FL_g_fwhm_win = np.array([]) FL_f_fwhm_win = np.array([]) # Columns for param table P_median = np.array([]) P_s_window = np.array([]) P_acf_1dt = np.array([]) P_acf_amp = np.array([]) failed_files = [] for k in range(len(filenames)): start_time = timing.time() filename = filenames[k] TIC = int(filename.split('-')[-3]) file = path + filename if makePlots: fig, axes = plt.subplots(figsize=(16, 16), nrows=4, sharex=True) print('Processing ' + filename) gp_data_file = gp_path + filename + '.gp' gp_param_file = gp_path + filename + '.gp.par' median = -1 s_window = -1 acf_1dt = -1 acf_amp = -1 with fits.open(file, mode='readonly') as hdulist: try: tess_bjd = hdulist[1].data['TIME'] quality = hdulist[1].data['QUALITY'] pdcsap_flux = hdulist[1].data['PDCSAP_FLUX'] pdcsap_flux_error = hdulist[1].data['PDCSAP_FLUX_ERR'] except: P_median = np.append(P_median, median) P_s_window = np.append(P_s_window, s_window) P_acf_1dt = np.append(P_acf_1dt, acf_1dt) P_acf_amp = np.append(P_acf_amp, acf_amp) failed_files.append(filename) np.savetxt(gp_data_file, ([])) print('Reading file ' + filename + ' failed') continue if makePlots: axes[0].plot(tess_bjd, pdcsap_flux) # Cut out poor quality points ok_cut = (quality == 0) & (~np.isnan(tess_bjd)) & (~np.isnan(pdcsap_flux))\ & (~np.isnan(pdcsap_flux_error)) tbl = Table([tess_bjd[ok_cut], pdcsap_flux[ok_cut], \ pdcsap_flux_error[ok_cut]], names=('TIME', 'PDCSAP_FLUX', 'PDCSAP_FLUX_ERR')) df_tbl = tbl.to_pandas() median = np.nanmedian(df_tbl['PDCSAP_FLUX']) # Estimate the period of the LC with autocorrelation acf = fh.autocorr_estimator(tbl['TIME'], tbl['PDCSAP_FLUX']/median, \ yerr=tbl['PDCSAP_FLUX_ERR']/median, min_period=0.1, max_period=27, max_peaks=2) if len(acf['peaks']) > 0: acf_1dt = acf['peaks'][0]['period'] acf_amp = acf['autocorr'][1][np.where( acf['autocorr'][0] == acf_1dt)] mask = np.where( (acf['autocorr'][0] == acf['peaks'][0]['period']))[0] acf_1pk = acf['autocorr'][1][mask][0] s_window = int(acf_1dt / np.fabs(np.nanmedian(np.diff(df_tbl['TIME']))) / 6) else: acf_1dt = (tbl['TIME'][-1] - tbl['TIME'][0]) / 2 acf_amp = 0 s_window = 128 P_median = np.append(P_median, median) P_s_window = np.append(P_s_window, s_window) P_acf_1dt = np.append(P_acf_1dt, acf_1dt) P_acf_amp = np.append(P_acf_amp, acf_amp) # Run GP fit on the lightcurve if we haven't already if os.path.exists(gp_data_file) and not clobberGP: # Failed GP regression will produce an empty file if os.path.getsize(gp_data_file) == 0: print(file + ' failed (previously) during GP regression') failed_files.append(filename) continue print('GP file already exists, loading...') times, smo, var = np.loadtxt(gp_data_file) else: smo = np.zeros(len(df_tbl['TIME'])) try: if makePlots: ax = axes[1] else: ax = None times, smo, var, params = iterGP_rotation(df_tbl['TIME'].values, df_tbl['PDCSAP_FLUX'].values/median, \ df_tbl['PDCSAP_FLUX_ERR'].values/median, acf_1dt, acf_1pk, ax=ax) #np.savetxt(gp_param_file, params['logs2'], params['logamp'], params['logperiod'], \ # params['logq0'], params['logdeltaq'], params['mix'], params['period']) np.savetxt(gp_param_file, params) np.savetxt(gp_data_file, (times, smo, var)) except: traceback.print_exc() failed_files.append(filename) np.savetxt(gp_data_file, ([])) print(filename + ' failed during GP fitting') continue # The GP is produced from a downsampled lightcurve. Need to interpolate to # compare GP and full LC smo_int = np.interp(tbl['TIME'], times, smo) # Search for flares in the smoothed lightcurve x = np.array(tbl['TIME']) y = np.array(tbl['PDCSAP_FLUX'] / median - smo_int) yerr = np.array(tbl['PDCSAP_FLUX_ERR'] / median) FL = fh.FINDflare(y, yerr, avg_std=True, std_window=s_window, N1=3, N2=1, N3=3) if makePlots: axes[3].plot(x, y, zorder=1) for j in range(len(FL[0])): s1, s2 = FL[0][j], FL[1][j] + 1 axes[3].scatter(x[s1:s2], y[s1:s2], zorder=2) # Measure properties of detected flares if makePlots: fig_fl, axes_fl = plt.subplots(figsize=(16, 16), nrows=4, ncols=4) for j in range(len(FL[0])): s1, s2 = FL[0][j], FL[1][j] + 1 tstart, tstop = x[s1], x[s2] dx_fac = 10 dx = tstop - tstart x1 = tstart - dx * dx_fac / 2 x2 = tstop + dx * dx_fac / 2 mask = (x > x1) & (x < x2) # Mask out other flare detections when fitting models other_mask = np.ones(len(x), dtype=bool) for i in range(len(FL[0])): s1other, s2other = FL[0][i], FL[1][i] + 1 if i == j: continue other_mask[s1other:s2other] = 0 popt1, pstd1, g_chisq, popt2, pstd2, f_chisq, skew, cover = \ fitFlare(x[other_mask], y[other_mask], yerr[other_mask], x1, x2) mu, std, g_amp = popt1[0], popt1[1], popt1[2] mu_err, std_err, g_amp_err = pstd1[0], pstd1[1], pstd1[2] tpeak, fwhm, f_amp = popt2[0], popt2[1], popt2[2] tpeak_err, fwhm_err, f_amp_err = pstd2[0], pstd2[1], pstd2[2] f_fwhm_win = fwhm / (x2 - x1) g_fwhm_win = std / (x2 - x1) ed, ed_err = measureED(x, y, yerr, tpeak, fwhm) FL_files = np.append(FL_files, filename) FL_TICs = np.append(FL_TICs, TIC) FL_t0 = np.append(FL_t0, x1) FL_t1 = np.append(FL_t1, x2) FL_f0 = np.append(FL_f0, np.nanmedian(tbl['PDCSAP_FLUX'][s1:s2])) FL_f1 = np.append(FL_f1, np.nanmax(tbl['PDCSAP_FLUX'][s1:s2])) FL_ed = np.append(FL_ed, ed) FL_ed_err = np.append(FL_ed_err, ed_err) FL_skew = np.append(FL_skew, skew) FL_cover = np.append(FL_cover, cover) FL_mu = np.append(FL_mu, mu) FL_std = np.append(FL_std, std) FL_g_amp = np.append(FL_g_amp, g_amp) FL_mu_err = np.append(FL_mu_err, mu_err) FL_std_err = np.append(FL_std_err, std_err) FL_g_amp_err = np.append(FL_g_amp_err, g_amp_err) FL_tpeak = np.append(FL_tpeak, tpeak) FL_fwhm = np.append(FL_fwhm, fwhm) FL_f_amp = np.append(FL_f_amp, f_amp) FL_tpeak_err = np.append(FL_tpeak_err, tpeak_err) FL_fwhm_err = np.append(FL_fwhm_err, fwhm_err) FL_f_amp_err = np.append(FL_f_amp_err, f_amp_err) FL_g_chisq = np.append(FL_g_chisq, g_chisq) FL_f_chisq = np.append(FL_f_chisq, f_chisq) FL_g_fwhm_win = np.append(FL_g_fwhm_win, g_fwhm_win) FL_f_fwhm_win = np.append(FL_f_fwhm_win, f_fwhm_win) if makePlots and j < 15: row_idx = j // 4 col_idx = j % 4 axes_fl[row_idx][col_idx].errorbar(x[mask], y[mask], yerr=yerr[mask]) axes_fl[row_idx][col_idx].scatter(x[s1:s2], y[s1:s2]) if popt1[0] > 0: xmodel = np.linspace(x1, x2) ymodel = fh.aflare1(xmodel, tpeak, fwhm, f_amp) axes_fl[row_idx][col_idx].plot(xmodel, ymodel, label=r'$\chi_{f}$ = ' + '{:.3f}'.format(f_chisq) \ + '\n FWHM/window = ' + '{:.2f}'.format(f_fwhm_win)) ymodel = fh.gaussian(xmodel, mu, std, g_amp) axes_fl[row_idx][col_idx].plot(xmodel, ymodel, label=r'$\chi_{g}$ = ' + '{:.3f}'.format(g_chisq) \ + '\n FWHM/window = ' + '{:.2f}'.format(g_fwhm_win)) axes_fl[row_idx][col_idx].axvline(tpeak - fwhm / 2, linestyle='--') axes_fl[row_idx][col_idx].axvline(tpeak + fwhm / 2, linestyle='--') axes_fl[row_idx][col_idx].legend() axes_fl[row_idx][col_idx].set_title('Skew = ' + '{:.3f}'.format(skew)) if makePlots: fig.suptitle(filename) axes[0].set_xlabel('Time [BJD - 2457000, days]') axes[0].set_ylabel('Flux [e-/s]') axes[1].set_xlabel('Time [BJD - 2457000, days]') axes[1].set_ylabel('Normalized Flux') axes[2].set_xlabel('Time [BJD - 2457000, days]') axes[2].set_ylabel('Rolling STD of GP') axes[3].set_xlabel('Time [BJD - 2457000, days]') axes[3].set_ylabel('Normalized Flux - GP') fig.savefig(plots_path + filename + '.png', format='png') if len(FL[0] > 0): fig_fl.suptitle(filename) fig_fl.savefig(plots_path + filename + '_flares.png', format='png') plt.clf() if writeLog: with open(log_path + prefix + '.log', 'a') as f: time_elapsed = timing.time() - start_time num_flares = len(FL[0]) f.write('{:^15}'.format(str(k+1) + '/' + str(len(filenames))) + \ '{:<60}'.format(filename) + '{:<20}'.format(time_elapsed) + \ '{:<10}'.format(num_flares) + '\n') # Periodically write to the flare table file and param table file l = k + 1 ALL_TIC = pd.Series(filenames).str.split( '-', expand=True).iloc[:, -3].astype('int') ALL_FILES = pd.Series(filenames).str.split('/', expand=True).iloc[:, -1] flare_out = pd.DataFrame(data={'file':FL_files,'TIC':FL_TICs, 't0':FL_t0, 't1':FL_t1, \ 'med_flux':FL_f0, 'peak_flux':FL_f1, 'ed':FL_ed, \ 'ed_err':FL_ed_err, 'skew':FL_skew, 'cover':FL_cover, \ 'mu':FL_mu, 'std':FL_std, 'g_amp': FL_g_amp, 'mu_err':FL_mu_err, \ 'std_err':FL_std_err, 'g_amp_err':FL_g_amp_err,'tpeak':FL_tpeak, \ 'fwhm':FL_fwhm, 'f_amp':FL_f_amp, 'tpeak_err':FL_tpeak_err, \ 'fwhm_err':FL_fwhm_err, 'f_amp_err':FL_f_amp_err,'f_chisq':FL_f_chisq, \ 'g_chisq':FL_g_chisq, 'f_fwhm_win':FL_f_fwhm_win, 'g_fwhm_win':FL_g_fwhm_win}) flare_out.to_csv(log_path + prefix + '_flare_out.csv', index=False) param_out = pd.DataFrame(data={'file':ALL_FILES[:l], 'TIC':ALL_TIC[:l], 'med':P_median[:l], \ 's_window':P_s_window[:l], 'acf_1dt':P_acf_1dt[:l], 'acf_amp':P_acf_amp[:l]}) param_out.to_csv(log_path + prefix + '_param_out.csv', index=False) for k in range(len(failed_files)): print(failed_files[k])
def coreset_single(N, D, dist, algn): # sys.stderr.write('n: ' + str(N) + ' d: ' +str(D) + ' dist: ' + str(dist) + ' salgn: ' + str(algn) + '\n') x, mu0, Sig0, Sig = gendata(N, D, dist) Sig0inv = np.linalg.inv(Sig0) Siginv = np.linalg.inv(Sig) mup, Sigp = weighted_post(mu0, np.linalg.inv(Sig0), np.linalg.inv(Sig), x, np.ones(x.shape[0])) anm, alg = algn coreset = alg(x, mu0, Sig0, Sig) # incremental M tests prev_err = np.inf for m in range(1, N + 1): coreset.build(m) muw, Sigw = weighted_post(mu0, Sig0inv, Siginv, x, coreset.weights()) w = coreset.weights() # check if coreset for 1 datapoint is immediately optimal if x.shape[0] == 1: assert np.fabs( w - np.array([1]) ) < tol, anm + " failed: coreset not immediately optimal with N = 1. weights: " + str( coreset.weights()) + " mup = " + str(mup) + " Sigp = " + str( Sigp) + " muw = " + str(muw) + " Sigw = " + str(Sigw) # check if coreset is valid assert (w > 0.).sum() <= m, anm + " failed: coreset size > m" assert (w > 0.).sum() == coreset.size( ), anm + " failed: sum of coreset.weights()>0 not equal to size(): sum = " + str( (coreset.weights() > 0).sum()) + " size(): " + str(coreset.size()) assert np.all(w >= 0.), anm + " failed: coreset has negative weights" # check if actual output error is monotone err = weighted_post_KL(mu0, Sig0inv, Siginv, x, w, reverse=True if 'Reverse' in anm else False) assert err - prev_err < tol, anm + " failed: error is not monotone decreasing, err = " + str( err) + " prev_err = " + str(prev_err) # check if coreset is computing error properly assert np.fabs( coreset.error() - err ) < tol, anm + " failed: error est is not close to true err: est err = " + str( coreset.error()) + ' true err = ' + str(err) prev_err = err # save incremental M result w_inc = coreset.weights() # check reset coreset.reset() err = weighted_post_KL(mu0, Sig0inv, Siginv, x, np.zeros(x.shape[0]), reverse=True if 'Reverse' in anm else False) assert coreset.M == 0 and np.all(np.fabs(coreset.weights( )) == 0.) and np.fabs( coreset.error() - err ) < tol and not coreset.reached_numeric_limit, anm + " failed: reset() did not properly reset" # check build up to N all at once vs incremental # do this test for all except bin, where symmetries can cause instabilities in the choice of vector / weights if dist != 'bin': coreset.build(N) w = coreset.weights() err = weighted_post_KL(mu0, Sig0inv, Siginv, x, w, reverse=True if 'Reverse' in anm else False) err_inc = weighted_post_KL(mu0, Sig0inv, Siginv, x, w_inc, reverse=True if 'Reverse' in anm else False) assert np.sqrt( ((w - w_inc)**2).sum() ) < tol, anm + " failed: incremental buid up to N doesn't produce same result as one run at N : \n error = " + str( err) + " error_inc = " + str(err_inc) # check if coreset with all_data_wts is optimal coreset._update_weights(coreset.all_data_wts) assert coreset.error( ) < tol, anm + " failed: coreset with all_data_wts does not have error 0"