def standard_normalizer(self, x): # compute the mean and standard deviation of the input x_means = np.nanmean(x, axis=1)[:, np.newaxis] x_stds = np.nanstd(x, axis=1)[:, np.newaxis] # check to make sure thta x_stds > small threshold, for those not # divide by 1 instead of original standard deviation ind = np.argwhere(x_stds < 10**(-2)) if len(ind) > 0: ind = [v[0] for v in ind] adjust = np.zeros((x_stds.shape)) adjust[ind] = 1.0 x_stds += adjust # fill in any nan values with means ind = np.argwhere(np.isnan(x) == True) for i in ind: x[i[0], i[1]] = x_means[i[0]] # create standard normalizer function normalizer = lambda data: (data - x_means) / x_stds # create inverse standard normalizer inverse_normalizer = lambda data: data * x_stds + x_means # return normalizer return normalizer, inverse_normalizer
def standardize(data, mask): data[~mask] = np.nan m = np.nanmean(data, axis=0) s = np.nanstd(data, axis=0) s[~np.any(mask, axis=0)] = 1 y = (data - m) / s assert np.all(np.isfinite(y)) return y
def sigma_clip(t, y, yerr, mask=None): if mask is not None: m = np.copy(mask) else: m = np.ones(len(t), dtype=bool) while True: mu = np.nanmean(y[m]) sig = np.nanstd(y[m]) m0 = y - mu < 3 * sig if np.all(m0 == m): break m = m0 #t, y, yerr = t[m], y[m], yerr[m] return m
jac=True, options={ 'maxiter': 5000, 'disp': True, 'ftol': 0 }, callback=callback0) PATH = ROOT_PATH + "/MMR_IVs/results/" + sname + "/" os.makedirs(PATH, exist_ok=True) np.save(PATH + 'LMO_errs_{}_nystr.npy'.format(seed), [opt_params, prev_norm, opt_test_err]) if __name__ == '__main__': snames = ['mnist_z', 'mnist_x', 'mnist_xz'] for sname in snames: for seed in range(100): experiment(sname, seed) PATH = ROOT_PATH + "/MMR_IVs/results/" + sname + "/" ress = [] for seed in range(100): filename = PATH + 'LMO_errs_{}_nystr.npy'.format(seed) if os.path.exists(filename): res = np.load(filename, allow_pickle=True) if res[-1] is not None: ress += [res[-1]] ress = np.array(ress) ress = remove_outliers(ress) print(np.nanmean(ress), np.nanstd(ress))
def FINDflare(flux, error, N1=3, N2=1, N3=3, avg_std=False, std_window=7, returnbinary=False, debug=False): ''' The algorithm for local changes due to flares defined by S. W. Chang et al. (2015), Eqn. 3a-d http://arxiv.org/abs/1510.01005 Note: these equations originally in magnitude units, i.e. smaller values are increases in brightness. The signs have been changed, but coefficients have not been adjusted to change from log(flux) to flux. Note: this algorithm originally ran over sections without "changes" as defined by Change Point Analysis. May have serious problems for data with dramatic starspot activity. If possible, remove starspot first! Parameters ---------- flux : numpy array data to search over error : numpy array errors corresponding to data. N1 : int, optional Coefficient from original paper (Default is 3) How many times above the stddev is required. N2 : int, optional Coefficient from original paper (Default is 1) How many times above the stddev and uncertainty is required N3 : int, optional Coefficient from original paper (Default is 3) The number of consecutive points required to flag as a flare avg_std : bool, optional Should the "sigma" in this data be computed by the median of the rolling().std()? (Default is False) (Not part of original algorithm) std_window : float, optional If avg_std=True, how big of a window should it use? (Default is 25 data points) (Not part of original algorithm) returnbinary : bool, optional Should code return the start and stop indicies of flares (default, set to False) or a binary array where 1=flares (set to True) (Not part of original algorithm) ''' med_i = np.nanmedian(flux) if debug is True: print("DEBUG: med_i = {}".format(med_i)) if avg_std is False: sig_i = np.nanstd(flux) # just the stddev of the window else: # take the average of the rolling stddev in the window. # better for windows w/ significant starspots being removed sig_i = np.nanmedian( pd.Series(flux).rolling(std_window, center=True).std()) if debug is True: print("DEBUG: sig_i = ".format(sig_i)) ca = flux - med_i cb = np.abs(flux - med_i) / sig_i cc = np.abs(flux - med_i - error) / sig_i if debug is True: print("DEBUG: N0={}, N1={}, N2={}".format(sum(ca > 0), sum(cb > N1), sum(cc > N2))) # pass cuts from Eqns 3a,b,c ctmp = np.where((ca > 0) & (cb > N1) & (cc > N2)) cindx = np.zeros_like(flux) cindx[ctmp] = 1 # Need to find cumulative number of points that pass "ctmp" # Count in reverse! ConM = np.zeros_like(flux) # this requires a full pass thru the data -> bottleneck for k in range(2, len(flux)): ConM[-k] = cindx[-k] * (ConM[-(k - 1)] + cindx[-k]) # these only defined between dl[i] and dr[i] # find flare start where values in ConM switch from 0 to >=N3 istart_i = np.where((ConM[1:] >= N3) & (ConM[0:-1] - ConM[1:] < 0))[0] + 1 # use the value of ConM to determine how many points away stop is istop_i = istart_i + (ConM[istart_i] - 1) istart_i = np.array(istart_i, dtype='int') istop_i = np.array(istop_i, dtype='int') if returnbinary is False: return istart_i, istop_i else: bin_out = np.zeros_like(flux, dtype='int') for k in range(len(istart_i)): bin_out[istart_i[k]:istop_i[k] + 1] = 1 return bin_out