def peak_shape_qc(window_values, quality_flags): """ Apply a QC on the peak shape to determine if it is of an acceptable shape :param window_values: :param quality_flags: :return: """ first_slopes = [] second_slopes = [] for x in window_values: median = npmedian(x) normalised = [y / median for y in x] fit = polyfit(range(len(x)), normalised, 2) first_slopes.append(fit[0]) second_slopes.append(fit[1]) for i, x in enumerate(first_slopes): if abs(x) > 0.005: quality_flags[i] = 5 elif abs(x) > 0.005 and quality_flags[i] == 5: quality_flags[i] = 1 elif abs(second_slopes[i]) > 0.009: quality_flags[i] = 5 elif abs(second_slopes[i]) < 0.009 and quality_flags[i] == 5: quality_flags[i] = 1 for i, x in enumerate(window_values): if npmedian(x) < 3800: quality_flags[i] = 1 return quality_flags
def lightcurve_moments(ftimes, fmags, ferrs): '''This calculates the weighted mean, stdev, median, MAD, percentiles, skew, kurtosis, fraction of LC beyond 1-stdev, and IQR. Parameters ---------- ftimes,fmags,ferrs : np.array The input mag/flux time-series with all non-finite elements removed. Returns ------- dict A dict with all of the light curve moments calculated. ''' ndet = len(fmags) if ndet > 9: # now calculate the various things we need series_median = npmedian(fmags) series_wmean = (npsum(fmags * (1.0 / (ferrs * ferrs))) / npsum(1.0 / (ferrs * ferrs))) series_mad = npmedian(npabs(fmags - series_median)) series_stdev = 1.483 * series_mad series_skew = spskew(fmags) series_kurtosis = spkurtosis(fmags) # get the beyond1std fraction series_above1std = len(fmags[fmags > (series_median + series_stdev)]) series_below1std = len(fmags[fmags < (series_median - series_stdev)]) # this is the fraction beyond 1 stdev series_beyond1std = (series_above1std + series_below1std) / float(ndet) # get the magnitude percentiles series_mag_percentiles = nppercentile( fmags, [5.0, 10, 17.5, 25, 32.5, 40, 60, 67.5, 75, 82.5, 90, 95]) return { 'median': series_median, 'wmean': series_wmean, 'mad': series_mad, 'stdev': series_stdev, 'skew': series_skew, 'kurtosis': series_kurtosis, 'beyond1std': series_beyond1std, 'mag_percentiles': series_mag_percentiles, 'mag_iqr': series_mag_percentiles[8] - series_mag_percentiles[3], } else: LOGERROR('not enough detections in this magseries ' 'to calculate light curve moments') return None
def stetson_jindex(ftimes, fmags, ferrs, weightbytimediff=False): '''This calculates the Stetson index for the magseries, based on consecutive pairs of observations. Based on Nicole Loncke's work for her Planets and Life certificate at Princeton. This requires finite times, mags, and errs. If weightbytimediff is True, the Stetson index for any pair of mags will be reweighted by the difference in times between them using the scheme in Fruth+ 2012 and Zhange+ 2003 (as seen in Sokolovsky+ 2017). w_i = exp(- (t_i+1 - t_i)/ delta_t ) ''' ndet = len(fmags) if ndet > 9: # get the median and ndet medmag = npmedian(fmags) # get the stetson index elements delta_prefactor = (ndet / (ndet - 1)) sigma_i = delta_prefactor * (fmags - medmag) / ferrs sigma_j = nproll(sigma_i, 1) # Nicole's clever trick to advance indices # by 1 and do x_i*x_(i+1) if weightbytimediff: time_i = ftimes time_j = nproll(ftimes, 1) difft = npdiff(ftimes) deltat = npmedian(difft) weights_i = npexp(-difft / deltat) products = (weights_i * sigma_i[1:] * sigma_j[1:]) else: # ignore first elem since it's actually x_0*x_n products = (sigma_i * sigma_j)[1:] stetsonj = (npsum(npsign(products) * npsqrt(npabs(products)))) / ndet return stetsonj else: LOGERROR('not enough detections in this magseries ' 'to calculate stetson J index') return npnan
def stetson_kindex(fmags, ferrs): ''' This calculates the Stetson K index (robust measure of the kurtosis). Requires finite mags and errs. ''' # use a fill in value for the errors if they're none if ferrs is None: ferrs = npfull_like(mags, 0.005) ndet = len(fmags) if ndet > 9: # get the median and ndet medmag = npmedian(fmags) # get the stetson index elements delta_prefactor = (ndet / (ndet - 1)) sigma_i = delta_prefactor * (fmags - medmag) / ferrs stetsonk = (npsum(npabs(sigma_i)) / (npsqrt(npsum(sigma_i * sigma_i))) * (ndet**(-0.5))) return stetsonk else: LOGERROR('not enough detections in this magseries ' 'to calculate stetson K index') return npnan
def median(numbers): if numpy: return npmedian(numbers) elif py3statistics: return p3median(numbers) quotient, remainder = divmod(len(numbers), 2) if remainder: return sorted(numbers)[quotient] return sum(sorted(numbers)[quotient - 1:quotient + 1]) / 2.
def median_along_line(critical_points, line): v = list() for p in critical_points: v.append(line.direction_value_on_point(p)) med_value = npmedian(array(v)) codirect = line.direc perp_line = Line2D(None, None, coefs=[codirect.get_x(), codirect.get_y(), med_value]) result = perp_line.intersection(line) assert (result is not None) assert (line.is_point_on_line(result)) return result
def pwd_phasebin(phases, mags, binsize=0.002, minbin=9): ''' This bins the phased mag series using the given binsize. ''' bins = np.arange(0.0, 1.0, binsize) binnedphaseinds = npdigitize(phases, bins) binnedphases, binnedmags = [], [] for x in npunique(binnedphaseinds): thisbin_inds = binnedphaseinds == x thisbin_phases = phases[thisbin_inds] thisbin_mags = mags[thisbin_inds] if thisbin_inds.size > minbin: binnedphases.append(npmedian(thisbin_phases)) binnedmags.append(npmedian(thisbin_mags)) return np.array(binnedphases), np.array(binnedmags)
def window_medians(window_values): """ Calculates the window medians for each peak window :param window_values: :return: window_medians """ window_medians_temp_array = nparray(window_values) window_medians_temp_array = npmedian(window_medians_temp_array, axis=1) window_medians = list(window_medians_temp_array) return window_medians
def _fourier_func(fourierparams, phase, mags): '''This returns a summed Fourier cosine series. Parameters ---------- fourierparams : list This MUST be a list of the following form like so:: [period, epoch, [amplitude_1, amplitude_2, amplitude_3, ..., amplitude_X], [phase_1, phase_2, phase_3, ..., phase_X]] where X is the Fourier order. phase,mags : np.array The input phase and magnitude areas to use as the basis for the cosine series. The phases are used directly to generate the values of the function, while the mags array is used to generate the zeroth order amplitude coefficient. Returns ------- np.array The Fourier cosine series function evaluated over `phase`. ''' # figure out the order from the length of the Fourier param list order = int(len(fourierparams) / 2) # get the amplitude and phase coefficients f_amp = fourierparams[:order] f_pha = fourierparams[order:] # calculate all the individual terms of the series f_orders = [ f_amp[x] * npcos(2.0 * pi_value * x * phase + f_pha[x]) for x in range(order) ] # this is the zeroth order coefficient - a constant equal to median mag total_f = npmedian(mags) # sum the series for fo in f_orders: total_f += fo return total_f
def stetson_kindex(fmags, ferrs): '''This calculates the Stetson K index (a robust measure of the kurtosis). Parameters ---------- fmags,ferrs : np.array The input mag/flux time-series to process. Must have no non-finite elems. Returns ------- float The Stetson K variability index. ''' # use a fill in value for the errors if they're none if ferrs is None: ferrs = npfull_like(fmags, 0.005) ndet = len(fmags) if ndet > 9: # get the median and ndet medmag = npmedian(fmags) # get the stetson index elements delta_prefactor = (ndet/(ndet - 1)) sigma_i = delta_prefactor*(fmags - medmag)/ferrs stetsonk = ( npsum(npabs(sigma_i))/(npsqrt(npsum(sigma_i*sigma_i))) * (ndet**(-0.5)) ) return stetsonk else: LOGERROR('not enough detections in this magseries ' 'to calculate stetson K index') return npnan
def sine_series_sum(fourierparams, times, mags, errs): '''This generates a sinusoidal light curve using a sine series. The series is generated using the coefficients provided in fourierparams. This is a sequence like so: [period, epoch, [ampl_1, ampl_2, ampl_3, ..., ampl_X], [pha_1, pha_2, pha_3, ..., pha_X]] where X is the Fourier order. ''' period, epoch, famps, fphases = fourierparams # figure out the order from the length of the Fourier param list forder = len(famps) # phase the times with this period iphase = (times - epoch) / period iphase = iphase - npfloor(iphase) phasesortind = npargsort(iphase) phase = iphase[phasesortind] ptimes = times[phasesortind] pmags = mags[phasesortind] perrs = errs[phasesortind] # calculate all the individual terms of the series fseries = [ famps[x] * npsin(2.0 * MPI * x * phase + fphases[x]) for x in range(forder) ] # this is the zeroth order coefficient - a constant equal to median mag modelmags = npmedian(mags) # sum the series for fo in fseries: modelmags += fo return modelmags, phase, ptimes, pmags, perrs
def get_median_bisector(angle_to_bisector_map): """ :type angle_to_bisector_map: dict :rtype : Line2D """ med_angle = npmedian(array(angle_to_bisector_map.keys())) if len(angle_to_bisector_map) % 2 == 1: bisector = angle_to_bisector_map[med_angle] return bisector else: angle_below = max([a for a in angle_to_bisector_map.keys() if a < med_angle]) angle_above = min([a for a in angle_to_bisector_map.keys() if a > med_angle]) line_below = angle_to_bisector_map[angle_below] line_above = angle_to_bisector_map[angle_above] def invert_if_x_less_zero(vector): return vector.inverted() if vector.get_x() < 0.0 else vector direction_below = invert_if_x_less_zero(line_below.direc.normalized()) direction_above = invert_if_x_less_zero(line_above.direc.normalized()) med_line_direction = direction_above.sum(direction_below) return Line2D(point1=Vector2D(0.0, 0.0), point2=med_line_direction)
def _fourier_func(fourierparams, phase, mags): ''' This returns a summed Fourier series generated using fourierparams. fourierparams is a sequence like so: [ampl_1, ampl_2, ampl_3, ..., ampl_X, pha_1, pha_2, pha_3, ..., pha_X] where X is the Fourier order. mags and phase MUST NOT have any nans. ''' # figure out the order from the length of the Fourier param list order = int(len(fourierparams) / 2) # get the amplitude and phase coefficients f_amp = fourierparams[:order] f_pha = fourierparams[order:] # calculate all the individual terms of the series f_orders = [ f_amp[x] * npcos(2.0 * MPI * x * phase + f_pha[x]) for x in range(order) ] # this is the zeroth order coefficient - a constant equal to median mag total_f = npmedian(mags) # sum the series for fo in f_orders: total_f += fo return total_f
def epd_magseries(times, mags, errs, fsv, fdv, fkv, xcc, ycc, bgv, bge, iha, izd, magsarefluxes=False, epdsmooth_sigclip=3.0, epdsmooth_windowsize=21, epdsmooth_func=smooth_magseries_savgol, epdsmooth_extraparams=None): '''Detrends a magnitude series using External Parameter Decorrelation. The HAT light-curve-specific external parameters are: S: the 'fsv' column, D: the 'fdv' column, K: the 'fkv' column, x coords: the 'xcc' column, y coords: the 'ycc' column, background: the 'bgv' column, background error: the 'bge' column hour angle: the 'iha' column zenith distance: the 'izd' column epdsmooth_windowsize is the number of points to smooth over to generate a smoothed light curve to train the regressor against. epdsmooth_func sets the smoothing filter function to use. A Savitsky-Golay filter is used to smooth the light curve by default. epdsmooth_extraparams is a dict of any extra filter params to supply to the smoothing function. NOTE: The errs are completely ignored and returned unchanged (except for sigclip and finite filtering). ''' finind = np.isfinite(times) & np.isfinite(mags) & np.isfinite(errs) ftimes, fmags, ferrs = times[::][finind], mags[::][finind], errs[::][finind] ffsv, ffdv, ffkv, fxcc, fycc, fbgv, fbge, fiha, fizd = ( fsv[::][finind], fdv[::][finind], fkv[::][finind], xcc[::][finind], ycc[::][finind], bgv[::][finind], bge[::][finind], iha[::][finind], izd[::][finind], ) stimes, smags, serrs, separams = sigclip_magseries_with_extparams( times, mags, errs, [fsv, fdv, fkv, xcc, ycc, bgv, bge, iha, izd], sigclip=epdsmooth_sigclip, magsarefluxes=magsarefluxes ) sfsv, sfdv, sfkv, sxcc, sycc, sbgv, sbge, siha, sizd = separams # smooth the signal if isinstance(epdsmooth_extraparams, dict): smoothedmags = epdsmooth_func(smags, epdsmooth_windowsize, **epdsmooth_extraparams) else: smoothedmags = epdsmooth_func(smags, epdsmooth_windowsize) # initial fit coeffs initcoeffs = np.zeros(22) # fit the smoothed mags and find the EPD function coefficients leastsqfit = leastsq(_epd_residual, initcoeffs, args=(smoothedmags, sfsv, sfdv, sfkv, sxcc, sycc, sbgv, sbge, siha, sizd), full_output=True) # if the fit succeeds, then get the EPD mags if leastsqfit[-1] in (1,2,3,4): fitcoeffs = leastsqfit[0] epdfit = _epd_function(fitcoeffs, ffsv, ffdv, ffkv, fxcc, fycc, fbgv, fbge, fiha, fizd) epdmags = npmedian(fmags) + fmags - epdfit retdict = {'times':ftimes, 'mags':epdmags, 'errs':ferrs, 'fitcoeffs':fitcoeffs, 'fitinfo':leastsqfit, 'fitmags':epdfit} return retdict # if the solution fails, return nothing else: LOGERROR('EPD fit did not converge') return None
def plot_phased_mag_series(times, mags, period, errs=None, epoch='min', outfile=None, sigclip=30.0, phasewrap=True, phasesort=True, phasebin=None, plotphaselim=[-0.8,0.8], yrange=None): '''This plots a phased magnitude time series using the period provided. If epoch is None, uses the min(times) as the epoch. If epoch is a string 'min', then fits a cubic spline to the phased light curve using min(times), finds the magnitude minimum from the fitted light curve, then uses the corresponding time value as the epoch. If epoch is a float, then uses that directly to phase the light curve and as the epoch of the phased mag series plot. If outfile is none, then plots to matplotlib interactive window. If outfile is a string denoting a filename, uses that to write a png/eps/pdf figure. ''' if errs is not None: # remove nans find = npisfinite(times) & npisfinite(mags) & npisfinite(errs) ftimes, fmags, ferrs = times[find], mags[find], errs[find] # get the median and stdev = 1.483 x MAD median_mag = npmedian(fmags) stddev_mag = (npmedian(npabs(fmags - median_mag))) * 1.483 # sigclip next if sigclip: sigind = (npabs(fmags - median_mag)) < (sigclip * stddev_mag) stimes = ftimes[sigind] smags = fmags[sigind] serrs = ferrs[sigind] LOGINFO('sigclip = %s: before = %s observations, ' 'after = %s observations' % (sigclip, len(times), len(stimes))) else: stimes = ftimes smags = fmags serrs = ferrs else: # remove nans find = npisfinite(times) & npisfinite(mags) ftimes, fmags, ferrs = times[find], mags[find], None # get the median and stdev = 1.483 x MAD median_mag = npmedian(fmags) stddev_mag = (npmedian(npabs(fmags - median_mag))) * 1.483 # sigclip next if sigclip: sigind = (npabs(fmags - median_mag)) < (sigclip * stddev_mag) stimes = ftimes[sigind] smags = fmags[sigind] serrs = None LOGINFO('sigclip = %s: before = %s observations, ' 'after = %s observations' % (sigclip, len(times), len(stimes))) else: stimes = ftimes smags = fmags serrs = None # figure out the epoch, if it's None, use the min of the time if epoch is None: epoch = npmin(stimes) # if the epoch is 'min', then fit a spline to the light curve phased # using the min of the time, find the fit mag minimum and use the time for # that as the epoch elif isinstance(epoch,str) and epoch == 'min': spfit = spline_fit_magseries(stimes, smags, serrs, period) epoch = spfit['fitepoch'] # now phase (and optionally, phase bin the light curve) if errs is not None: # phase the magseries phasedlc = phase_magseries_with_errs(stimes, smags, serrs, period, epoch, wrap=phasewrap, sort=phasesort) plotphase = phasedlc['phase'] plotmags = phasedlc['mags'] ploterrs = phasedlc['errs'] # if we're supposed to bin the phases, do so if phasebin: binphasedlc = phase_bin_magseries_with_errs(plotphase, plotmags, ploterrs, binsize=phasebin) plotphase = binphasedlc['binnedphases'] plotmags = binphasedlc['binnedmags'] ploterrs = binphasedlc['binnederrs'] else: # phase the magseries phasedlc = phase_magseries(stimes, smags, period, epoch, wrap=phasewrap, sort=phasesort) plotphase = phasedlc['phase'] plotmags = phasedlc['mags'] ploterrs = None # if we're supposed to bin the phases, do so if phasebin: binphasedlc = phase_bin_magseries(plotphase, plotmags, binsize=phasebin) plotphase = binphasedlc['binnedphases'] plotmags = binphasedlc['binnedmags'] ploterrs = None # finally, make the plots # initialize the plot fig = plt.figure() fig.set_size_inches(7.5,4.8) plt.errorbar(plotphase, plotmags, fmt='bo', yerr=ploterrs, markersize=2.0, markeredgewidth=0.0, ecolor='#B2BEB5', capsize=0) # make a grid plt.grid(color='#a9a9a9', alpha=0.9, zorder=0, linewidth=1.0, linestyle=':') # make lines for phase 0.0, 0.5, and -0.5 plt.axvline(0.0,alpha=0.9,linestyle='dashed',color='g') plt.axvline(-0.5,alpha=0.9,linestyle='dashed',color='g') plt.axvline(0.5,alpha=0.9,linestyle='dashed',color='g') # fix the ticks to use no offsets plt.gca().get_yaxis().get_major_formatter().set_useOffset(False) plt.gca().get_xaxis().get_major_formatter().set_useOffset(False) # get the yrange if yrange and isinstance(yrange,list) and len(yrange) == 2: ymin, ymax = yrange else: ymin, ymax = plt.ylim() plt.ylim(ymax,ymin) # set the x axis limit if not plotphaselim: plot_xlim = plt.xlim() plt.xlim((npmin(plotphase)-0.1, npmax(plotphase)+0.1)) else: plt.xlim((plotphaselim[0],plotphaselim[1])) # set up the labels plt.xlabel('phase') plt.ylabel('magnitude') plt.title('using period: %.6f d and epoch: %.6f' % (period, epoch)) # make the figure if outfile and isinstance(outfile, str): plt.savefig(outfile,bbox_inches='tight') plt.close() return os.path.abspath(outfile) else: plt.show() plt.close() return
def aov_theta(times, mags, errs, frequency, binsize=0.05, minbin=9): '''Calculates the Schwarzenberg-Czerny AoV statistic at a test frequency. Parameters ---------- times,mags,errs : np.array The input time-series and associated errors. frequency : float The test frequency to calculate the theta statistic at. binsize : float The phase bin size to use. minbin : int The minimum number of items in a phase bin to consider in the calculation of the statistic. Returns ------- theta_aov : float The value of the AoV statistic at the specified `frequency`. ''' period = 1.0/frequency fold_time = times[0] phased = phase_magseries(times, mags, period, fold_time, wrap=False, sort=True) phases = phased['phase'] pmags = phased['mags'] bins = nparange(0.0, 1.0, binsize) ndets = phases.size binnedphaseinds = npdigitize(phases, bins) bin_s1_tops = [] bin_s2_tops = [] binndets = [] goodbins = 0 all_xbar = npmedian(pmags) for x in npunique(binnedphaseinds): thisbin_inds = binnedphaseinds == x thisbin_mags = pmags[thisbin_inds] if thisbin_mags.size > minbin: thisbin_ndet = thisbin_mags.size thisbin_xbar = npmedian(thisbin_mags) # get s1 thisbin_s1_top = ( thisbin_ndet * (thisbin_xbar - all_xbar) * (thisbin_xbar - all_xbar) ) # get s2 thisbin_s2_top = npsum((thisbin_mags - all_xbar) * (thisbin_mags - all_xbar)) bin_s1_tops.append(thisbin_s1_top) bin_s2_tops.append(thisbin_s2_top) binndets.append(thisbin_ndet) goodbins = goodbins + 1 # turn the quantities into arrays bin_s1_tops = nparray(bin_s1_tops) bin_s2_tops = nparray(bin_s2_tops) binndets = nparray(binndets) # calculate s1 first s1 = npsum(bin_s1_tops)/(goodbins - 1.0) # then calculate s2 s2 = npsum(bin_s2_tops)/(ndets - goodbins) theta_aov = s1/s2 return theta_aov
def gaussianeb_fit_magseries( times, mags, errs, ebparams, param_bounds=None, scale_errs_redchisq_unity=True, sigclip=10.0, plotfit=False, magsarefluxes=False, verbose=True, curve_fit_kwargs=None, ): '''This fits a double inverted gaussian EB model to a magnitude time series. Parameters ---------- times,mags,errs : np.array The input mag/flux time-series to fit the EB model to. period : float The period to use for EB fit. ebparams : list of float This is a list containing the eclipsing binary parameters:: ebparams = [period (time), epoch (time), pdepth (mags), pduration (phase), psdepthratio, secondaryphase] `period` is the period in days. `epoch` is the time of primary minimum in JD. `pdepth` is the depth of the primary eclipse: - for magnitudes -> `pdepth` should be < 0 - for fluxes -> `pdepth` should be > 0 `pduration` is the length of the primary eclipse in phase. `psdepthratio` is the ratio of the secondary eclipse depth to that of the primary eclipse. `secondaryphase` is the phase at which the minimum of the secondary eclipse is located. This effectively parameterizes eccentricity. If `epoch` is None, this function will do an initial spline fit to find an approximate minimum of the phased light curve using the given period. The `pdepth` provided is checked against the value of `magsarefluxes`. if `magsarefluxes = True`, the `ebdepth` is forced to be > 0; if `magsarefluxes = False`, the `ebdepth` is forced to be < 0. param_bounds : dict or None This is a dict of the upper and lower bounds on each fit parameter. Should be of the form:: {'period': (lower_bound_period, upper_bound_period), 'epoch': (lower_bound_epoch, upper_bound_epoch), 'pdepth': (lower_bound_pdepth, upper_bound_pdepth), 'pduration': (lower_bound_pduration, upper_bound_pduration), 'psdepthratio': (lower_bound_psdepthratio, upper_bound_psdepthratio), 'secondaryphase': (lower_bound_secondaryphase, upper_bound_secondaryphase)} - To indicate that a parameter is fixed, use 'fixed' instead of a tuple providing its lower and upper bounds as tuple. - To indicate that a parameter has no bounds, don't include it in the param_bounds dict. If this is None, the default value of this kwarg will be:: {'period':(0.0,np.inf), # period is between 0 and inf 'epoch':(0.0, np.inf), # epoch is between 0 and inf 'pdepth':(-np.inf,np.inf), # pdepth is between -np.inf and np.inf 'pduration':(0.0,1.0), # pduration is between 0.0 and 1.0 'psdepthratio':(0.0,1.0), # psdepthratio is between 0.0 and 1.0 'secondaryphase':(0.0,1.0), # secondaryphase is between 0.0 and 1.0 scale_errs_redchisq_unity : bool If True, the standard errors on the fit parameters will be scaled to make the reduced chi-sq = 1.0. This sets the ``absolute_sigma`` kwarg for the ``scipy.optimize.curve_fit`` function to False. sigclip : float or int or sequence of two floats/ints or None If a single float or int, a symmetric sigma-clip will be performed using the number provided as the sigma-multiplier to cut out from the input time-series. If a list of two ints/floats is provided, the function will perform an 'asymmetric' sigma-clip. The first element in this list is the sigma value to use for fainter flux/mag values; the second element in this list is the sigma value to use for brighter flux/mag values. For example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma dimmings and greater than 3-sigma brightenings. Here the meaning of "dimming" and "brightening" is set by *physics* (not the magnitude system), which is why the `magsarefluxes` kwarg must be correctly set. If `sigclip` is None, no sigma-clipping will be performed, and the time-series (with non-finite elems removed) will be passed through to the output. magsarefluxes : bool If True, will treat the input values of `mags` as fluxes for purposes of plotting the fit and sig-clipping. plotfit : str or False If this is a string, this function will make a plot for the fit to the mag/flux time-series and writes the plot to the path specified here. ignoreinitfail : bool If this is True, ignores the initial failure to find a set of optimized Fourier parameters using the global optimization function and proceeds to do a least-squares fit anyway. verbose : bool If True, will indicate progress and warn of any problems. curve_fit_kwargs : dict or None If not None, this should be a dict containing extra kwargs to pass to the scipy.optimize.curve_fit function. Returns ------- dict This function returns a dict containing the model fit parameters, the minimized chi-sq value and the reduced chi-sq value. The form of this dict is mostly standardized across all functions in this module:: { 'fittype':'gaussianeb', 'fitinfo':{ 'initialparams':the initial EB params provided, 'finalparams':the final model fit EB params, 'finalparamerrs':formal errors in the params, 'fitmags': the model fit mags, 'fitepoch': the epoch of minimum light for the fit, }, 'fitchisq': the minimized value of the fit's chi-sq, 'fitredchisq':the reduced chi-sq value, 'fitplotfile': the output fit plot if fitplot is not None, 'magseries':{ 'times':input times in phase order of the model, 'phase':the phases of the model mags, 'mags':input mags/fluxes in the phase order of the model, 'errs':errs in the phase order of the model, 'magsarefluxes':input value of magsarefluxes kwarg } } ''' stimes, smags, serrs = sigclip_magseries(times, mags, errs, sigclip=sigclip, magsarefluxes=magsarefluxes) # get rid of zero errs nzind = npnonzero(serrs) stimes, smags, serrs = stimes[nzind], smags[nzind], serrs[nzind] # check the ebparams ebperiod, ebepoch, ebdepth = ebparams[0:3] # check if we have a ebepoch to use if ebepoch is None: if verbose: LOGWARNING('no ebepoch given in ebparams, ' 'trying to figure it out automatically...') # do a spline fit to figure out the approximate min of the LC try: spfit = spline_fit_magseries(times, mags, errs, ebperiod, sigclip=sigclip, magsarefluxes=magsarefluxes, verbose=verbose) ebepoch = spfit['fitinfo']['fitepoch'] # if the spline-fit fails, try a savgol fit instead except Exception: sgfit = savgol_fit_magseries(times, mags, errs, ebperiod, sigclip=sigclip, magsarefluxes=magsarefluxes, verbose=verbose) ebepoch = sgfit['fitinfo']['fitepoch'] # if everything failed, then bail out and ask for the ebepoch finally: if ebepoch is None: LOGERROR("couldn't automatically figure out the eb epoch, " "can't continue. please provide it in ebparams.") # assemble the returndict returndict = { 'fittype': 'gaussianeb', 'fitinfo': { 'initialparams': ebparams, 'finalparams': None, 'finalparamerrs': None, 'fitmags': None, 'fitepoch': None, }, 'fitchisq': npnan, 'fitredchisq': npnan, 'fitplotfile': None, 'magseries': { 'phase': None, 'times': None, 'mags': None, 'errs': None, 'magsarefluxes': magsarefluxes, }, } return returndict else: if ebepoch.size > 1: if verbose: LOGWARNING('could not auto-find a single minimum ' 'for ebepoch, using the first one returned') ebparams[1] = ebepoch[0] else: if verbose: LOGWARNING( 'using automatically determined ebepoch = %.5f' % ebepoch) ebparams[1] = ebepoch.item() # next, check the ebdepth and fix it to the form required if magsarefluxes: if ebdepth < 0.0: ebparams[2] = -ebdepth[2] else: if ebdepth > 0.0: ebparams[2] = -ebdepth[2] # finally, do the fit try: # set up the fit parameter bounds if param_bounds is None: curvefit_bounds = (nparray([0.0, 0.0, -npinf, 0.0, 0.0, 0.0]), nparray([npinf, npinf, npinf, 1.0, 1.0, 1.0])) fitfunc_fixed = {} else: # figure out the bounds lower_bounds = [] upper_bounds = [] fitfunc_fixed = {} for ind, key in enumerate( ('period', 'epoch', 'pdepth', 'pduration', 'psdepthratio', 'secondaryphase')): # handle fixed parameters if (key in param_bounds and isinstance(param_bounds[key], str) and param_bounds[key] == 'fixed'): lower_bounds.append(ebparams[ind] - 1.0e-7) upper_bounds.append(ebparams[ind] + 1.0e-7) fitfunc_fixed[key] = ebparams[ind] # handle parameters with lower and upper bounds elif key in param_bounds and isinstance( param_bounds[key], (tuple, list)): lower_bounds.append(param_bounds[key][0]) upper_bounds.append(param_bounds[key][1]) # handle no parameter bounds else: lower_bounds.append(-npinf) upper_bounds.append(npinf) # generate the bounds sequence in the required format curvefit_bounds = (nparray(lower_bounds), nparray(upper_bounds)) # # set up the curve fit function # curvefit_func = partial(eclipses.invgauss_eclipses_curvefit_func, zerolevel=npmedian(smags), fixed_params=fitfunc_fixed) # # run the fit # if curve_fit_kwargs is not None: finalparams, covmatrix = curve_fit( curvefit_func, stimes, smags, p0=ebparams, sigma=serrs, bounds=curvefit_bounds, absolute_sigma=(not scale_errs_redchisq_unity), **curve_fit_kwargs) else: finalparams, covmatrix = curve_fit( curvefit_func, stimes, smags, p0=ebparams, sigma=serrs, bounds=curvefit_bounds, absolute_sigma=(not scale_errs_redchisq_unity), ) except Exception: LOGEXCEPTION("curve_fit returned an exception") finalparams, covmatrix = None, None # if the fit succeeded, then we can return the final parameters if finalparams is not None and covmatrix is not None: # calculate the chisq and reduced chisq fitmags, phase, ptimes, pmags, perrs = eclipses.invgauss_eclipses_func( finalparams, stimes, smags, serrs) fitchisq = npsum( ((fitmags - pmags) * (fitmags - pmags)) / (perrs * perrs)) fitredchisq = fitchisq / (len(pmags) - len(finalparams) - len(fitfunc_fixed)) stderrs = npsqrt(npdiag(covmatrix)) if verbose: LOGINFO('final fit done. chisq = %.5f, reduced chisq = %.5f' % (fitchisq, fitredchisq)) # get the fit epoch fperiod, fepoch = finalparams[:2] # assemble the returndict returndict = { 'fittype': 'gaussianeb', 'fitinfo': { 'initialparams': ebparams, 'finalparams': finalparams, 'finalparamerrs': stderrs, 'fitmags': fitmags, 'fitepoch': fepoch, }, 'fitchisq': fitchisq, 'fitredchisq': fitredchisq, 'fitplotfile': None, 'magseries': { 'phase': phase, 'times': ptimes, 'mags': pmags, 'errs': perrs, 'magsarefluxes': magsarefluxes, }, } # make the fit plot if required if plotfit and isinstance(plotfit, str): make_fit_plot(phase, pmags, perrs, fitmags, fperiod, ptimes.min(), fepoch, plotfit, magsarefluxes=magsarefluxes) returndict['fitplotfile'] = plotfit return returndict # if the leastsq fit failed, return nothing else: LOGERROR('eb-fit: least-squared fit to the light curve failed!') # assemble the returndict returndict = { 'fittype': 'gaussianeb', 'fitinfo': { 'initialparams': ebparams, 'finalparams': None, 'finalparamerrs': None, 'fitmags': None, 'fitepoch': None, }, 'fitchisq': npnan, 'fitredchisq': npnan, 'fitplotfile': None, 'magseries': { 'phase': None, 'times': None, 'mags': None, 'errs': None, 'magsarefluxes': magsarefluxes, }, } return returndict
def rfepd_magseries(times, mags, errs, externalparam_arrs, magsarefluxes=False, epdsmooth=True, epdsmooth_sigclip=3.0, epdsmooth_windowsize=201, epdsmooth_func=smooth_magseries_savgol, epdsmooth_extraparams=None, rf_subsample=1.0, rf_ntrees=300, rf_extraparams={'criterion':'mse', 'oob_score':False, 'n_jobs':-1}): '''This uses a RandomForestRegressor to de-correlate the given magseries. times, mags, errs are ndarrays of time and magnitude values to filter. externalparam_arrs is a list of ndarrays of external parameters to decorrelate against. These should all be the same size as times, mags, errs. epdsmooth = True sets the training light curve to be a smoothed version of the sigma-clipped light curve. epdsmooth_windowsize is the number of points to smooth over to generate a smoothed light curve to train the regressor against. epdsmooth_func sets the smoothing filter function to use. A Savitsky-Golay filter is used to smooth the light curve by default. epdsmooth_extraparams is a dict of any extra filter params to supply to the smoothing function. rf_subsample is the fraction of the size of the mags array to use for training the random forest regressor. rf_ntrees is the number of trees to use for the RandomForestRegressor. rf_extraparams is any extra params to provide to the RandomForestRegressor instance as a dict. Returns a dict with decorrelated mags and the usual info from the RandomForestRegressor: variable importances, etc. ''' # get finite times, mags, errs finind = np.isfinite(times) & np.isfinite(mags) & np.isfinite(errs) ftimes, fmags, ferrs = times[::][finind], mags[::][finind], errs[::][finind] finalparam_arrs = [] for ep in externalparam_arrs: finalparam_arrs.append(ep[::][finind]) stimes, smags, serrs, eparams = sigclip_magseries_with_extparams( times, mags, errs, externalparam_arrs, sigclip=epdsmooth_sigclip, magsarefluxes=magsarefluxes ) # smoothing is optional for RFR because we train on a fraction of the mag # series and so should not require a smoothed input to fit a function to if epdsmooth: # smooth the signal if isinstance(epdsmooth_extraparams, dict): smoothedmags = epdsmooth_func(smags, epdsmooth_windowsize, **epdsmooth_extraparams) else: smoothedmags = epdsmooth_func(smags, epdsmooth_windowsize) else: smoothedmags = smags # set up the regressor if isinstance(rf_extraparams, dict): RFR = RandomForestRegressor(n_estimators=rf_ntrees, **rf_extraparams) else: RFR = RandomForestRegressor(n_estimators=rf_ntrees) # collect the features features = np.column_stack(eparams) # fit, then generate the predicted values, then get corrected values # we fit on a randomly selected subsample of all the mags if rf_subsample < 1.0: featureindices = np.arange(smoothedmags.size) # these are sorted because time-order should be important training_indices = np.sort( npr.choice(featureindices, size=int(rf_subsample*smoothedmags.size), replace=False) ) else: training_indices = np.arange(smoothedmags.size) RFR.fit(features[training_indices,:], smoothedmags[training_indices]) # predict on the full feature set flux_corrections = RFR.predict(np.column_stack(finalparam_arrs)) corrected_fmags = npmedian(fmags) + fmags - flux_corrections retdict = {'times':ftimes, 'mags':corrected_fmags, 'errs':ferrs, 'feature_importances':RFR.feature_importances_, 'regressor':RFR} return retdict
def aov_periodfind(times, mags, errs, magsarefluxes=False, startp=None, endp=None, stepsize=1.0e-4, autofreq=True, normalize=True, phasebinsize=0.05, mindetperbin=9, nbestpeaks=5, periodepsilon=0.1, sigclip=10.0, nworkers=None, verbose=True): '''This runs a parallelized Analysis-of-Variance (AoV) period search. NOTE: `normalize = True` here as recommended by Schwarzenberg-Czerny 1996, i.e. mags will be normalized to zero and rescaled so their variance = 1.0. Parameters ---------- times,mags,errs : np.array The mag/flux time-series with associated measurement errors to run the period-finding on. magsarefluxes : bool If the input measurement values in `mags` and `errs` are in fluxes, set this to True. startp,endp : float or None The minimum and maximum periods to consider for the transit search. stepsize : float The step-size in frequency to use when constructing a frequency grid for the period search. autofreq : bool If this is True, the value of `stepsize` will be ignored and the :py:func:`astrobase.periodbase.get_frequency_grid` function will be used to generate a frequency grid based on `startp`, and `endp`. If these are None as well, `startp` will be set to 0.1 and `endp` will be set to `times.max() - times.min()`. normalize : bool This sets if the input time-series is normalized to 0.0 and rescaled such that its variance = 1.0. This is the recommended procedure by Schwarzenberg-Czerny 1996. phasebinsize : float The bin size in phase to use when calculating the AoV theta statistic at a test frequency. mindetperbin : int The minimum number of elements in a phase bin to consider it valid when calculating the AoV theta statistic at a test frequency. nbestpeaks : int The number of 'best' peaks to return from the periodogram results, starting from the global maximum of the periodogram peak values. periodepsilon : float The fractional difference between successive values of 'best' periods when sorting by periodogram power to consider them as separate periods (as opposed to part of the same periodogram peak). This is used to avoid broad peaks in the periodogram and make sure the 'best' periods returned are all actually independent. sigclip : float or int or sequence of two floats/ints or None If a single float or int, a symmetric sigma-clip will be performed using the number provided as the sigma-multiplier to cut out from the input time-series. If a list of two ints/floats is provided, the function will perform an 'asymmetric' sigma-clip. The first element in this list is the sigma value to use for fainter flux/mag values; the second element in this list is the sigma value to use for brighter flux/mag values. For example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma dimmings and greater than 3-sigma brightenings. Here the meaning of "dimming" and "brightening" is set by *physics* (not the magnitude system), which is why the `magsarefluxes` kwarg must be correctly set. If `sigclip` is None, no sigma-clipping will be performed, and the time-series (with non-finite elems removed) will be passed through to the output. nworkers : int The number of parallel workers to use when calculating the periodogram. verbose : bool If this is True, will indicate progress and details about the frequency grid used for the period search. Returns ------- dict This function returns a dict, referred to as an `lspinfo` dict in other astrobase functions that operate on periodogram results. This is a standardized format across all astrobase period-finders, and is of the form below:: {'bestperiod': the best period value in the periodogram, 'bestlspval': the periodogram peak associated with the best period, 'nbestpeaks': the input value of nbestpeaks, 'nbestlspvals': nbestpeaks-size list of best period peak values, 'nbestperiods': nbestpeaks-size list of best periods, 'lspvals': the full array of periodogram powers, 'periods': the full array of periods considered, 'method':'aov' -> the name of the period-finder method, 'kwargs':{ dict of all of the input kwargs for record-keeping}} ''' # get rid of nans first and sigclip stimes, smags, serrs = sigclip_magseries(times, mags, errs, magsarefluxes=magsarefluxes, sigclip=sigclip) # make sure there are enough points to calculate a spectrum if len(stimes) > 9 and len(smags) > 9 and len(serrs) > 9: # get the frequencies to use if startp: endf = 1.0/startp else: # default start period is 0.1 day endf = 1.0/0.1 if endp: startf = 1.0/endp else: # default end period is length of time series startf = 1.0/(stimes.max() - stimes.min()) # if we're not using autofreq, then use the provided frequencies if not autofreq: frequencies = nparange(startf, endf, stepsize) if verbose: LOGINFO( 'using %s frequency points, start P = %.3f, end P = %.3f' % (frequencies.size, 1.0/endf, 1.0/startf) ) else: # this gets an automatic grid of frequencies to use frequencies = get_frequency_grid(stimes, minfreq=startf, maxfreq=endf) if verbose: LOGINFO( 'using autofreq with %s frequency points, ' 'start P = %.3f, end P = %.3f' % (frequencies.size, 1.0/frequencies.max(), 1.0/frequencies.min()) ) # map to parallel workers if (not nworkers) or (nworkers > NCPUS): nworkers = NCPUS if verbose: LOGINFO('using %s workers...' % nworkers) pool = Pool(nworkers) # renormalize the working mags to zero and scale them so that the # variance = 1 for use with our LSP functions if normalize: nmags = (smags - npmedian(smags))/npstd(smags) else: nmags = smags tasks = [(stimes, nmags, serrs, x, phasebinsize, mindetperbin) for x in frequencies] lsp = pool.map(_aov_worker, tasks) pool.close() pool.join() del pool lsp = nparray(lsp) periods = 1.0/frequencies # find the nbestpeaks for the periodogram: 1. sort the lsp array by # highest value first 2. go down the values until we find five # values that are separated by at least periodepsilon in period # make sure to filter out non-finite values finitepeakind = npisfinite(lsp) finlsp = lsp[finitepeakind] finperiods = periods[finitepeakind] # make sure that finlsp has finite values before we work on it try: bestperiodind = npargmax(finlsp) except ValueError: LOGERROR('no finite periodogram values ' 'for this mag series, skipping...') return {'bestperiod':npnan, 'bestlspval':npnan, 'nbestpeaks':nbestpeaks, 'nbestlspvals':None, 'nbestperiods':None, 'lspvals':None, 'periods':None, 'method':'aov', 'kwargs':{'startp':startp, 'endp':endp, 'stepsize':stepsize, 'normalize':normalize, 'phasebinsize':phasebinsize, 'mindetperbin':mindetperbin, 'autofreq':autofreq, 'periodepsilon':periodepsilon, 'nbestpeaks':nbestpeaks, 'sigclip':sigclip}} sortedlspind = npargsort(finlsp)[::-1] sortedlspperiods = finperiods[sortedlspind] sortedlspvals = finlsp[sortedlspind] # now get the nbestpeaks nbestperiods, nbestlspvals, peakcount = ( [finperiods[bestperiodind]], [finlsp[bestperiodind]], 1 ) prevperiod = sortedlspperiods[0] # find the best nbestpeaks in the lsp and their periods for period, lspval in zip(sortedlspperiods, sortedlspvals): if peakcount == nbestpeaks: break perioddiff = abs(period - prevperiod) bestperiodsdiff = [abs(period - x) for x in nbestperiods] # print('prevperiod = %s, thisperiod = %s, ' # 'perioddiff = %s, peakcount = %s' % # (prevperiod, period, perioddiff, peakcount)) # this ensures that this period is different from the last # period and from all the other existing best periods by # periodepsilon to make sure we jump to an entire different peak # in the periodogram if (perioddiff > (periodepsilon*prevperiod) and all(x > (periodepsilon*period) for x in bestperiodsdiff)): nbestperiods.append(period) nbestlspvals.append(lspval) peakcount = peakcount + 1 prevperiod = period return {'bestperiod':finperiods[bestperiodind], 'bestlspval':finlsp[bestperiodind], 'nbestpeaks':nbestpeaks, 'nbestlspvals':nbestlspvals, 'nbestperiods':nbestperiods, 'lspvals':lsp, 'periods':periods, 'method':'aov', 'kwargs':{'startp':startp, 'endp':endp, 'stepsize':stepsize, 'normalize':normalize, 'phasebinsize':phasebinsize, 'mindetperbin':mindetperbin, 'autofreq':autofreq, 'periodepsilon':periodepsilon, 'nbestpeaks':nbestpeaks, 'sigclip':sigclip}} else: LOGERROR('no good detections for these times and mags, skipping...') return {'bestperiod':npnan, 'bestlspval':npnan, 'nbestpeaks':nbestpeaks, 'nbestlspvals':None, 'nbestperiods':None, 'lspvals':None, 'periods':None, 'method':'aov', 'kwargs':{'startp':startp, 'endp':endp, 'stepsize':stepsize, 'normalize':normalize, 'phasebinsize':phasebinsize, 'mindetperbin':mindetperbin, 'autofreq':autofreq, 'periodepsilon':periodepsilon, 'nbestpeaks':nbestpeaks, 'sigclip':sigclip}}
def invgauss_eclipses_func(ebparams, times, mags, errs): '''This returns a double eclipse shaped function. Suitable for first order modeling of eclipsing binaries. ebparams = [period (time), epoch (time), pdepth (mags), pduration (phase), psdepthratio, secondaryphase] period is the period in days epoch is the time of minimum in JD pdepth is the depth of the primary eclipse - for magnitudes -> transitdepth should be < 0 - for fluxes -> transitdepth should be > 0 pduration is the length of the primary eclipse in phase psdepthratio is the ratio in the eclipse depths: depth_secondary/depth_primary. This is generally the same as the ratio of the Teffs of the two stars. secondaryphase is the phase at which the minimum of the secondary eclipse is located. This effectively parameterizes eccentricity. All of these will then have fitted values after the fit is done. ''' (period, epoch, pdepth, pduration, depthratio, secondaryphase) = ebparams # generate the phases iphase = (times - epoch) / period iphase = iphase - npfloor(iphase) phasesortind = npargsort(iphase) phase = iphase[phasesortind] ptimes = times[phasesortind] pmags = mags[phasesortind] perrs = errs[phasesortind] zerolevel = npmedian(pmags) modelmags = npfull_like(phase, zerolevel) primaryecl_amp = -pdepth secondaryecl_amp = -pdepth * depthratio primaryecl_std = pduration / 5.0 # we use 5-sigma as full-width -> duration secondaryecl_std = pduration / 5.0 # secondary eclipse has the same duration halfduration = pduration / 2.0 # phase indices primary_eclipse_ingress = ((phase >= (1.0 - halfduration)) & (phase <= 1.0)) primary_eclipse_egress = ((phase >= 0.0) & (phase <= halfduration)) secondary_eclipse_phase = ((phase >= (secondaryphase - halfduration)) & (phase <= (secondaryphase + halfduration))) # put in the eclipses modelmags[primary_eclipse_ingress] = (zerolevel + _gaussian( phase[primary_eclipse_ingress], primaryecl_amp, 1.0, primaryecl_std)) modelmags[primary_eclipse_egress] = (zerolevel + _gaussian( phase[primary_eclipse_egress], primaryecl_amp, 0.0, primaryecl_std)) modelmags[secondary_eclipse_phase] = ( zerolevel + _gaussian(phase[secondary_eclipse_phase], secondaryecl_amp, secondaryphase, secondaryecl_std)) return modelmags, phase, ptimes, pmags, perrs
def aov_theta(times, mags, errs, frequency, binsize=0.05, minbin=9): '''Calculates the Schwarzenberg-Czerny AoV statistic at a test frequency. ''' period = 1.0 / frequency fold_time = times[0] phased = phase_magseries(times, mags, period, fold_time, wrap=False, sort=True) phases = phased['phase'] pmags = phased['mags'] bins = np.arange(0.0, 1.0, binsize) nbins = bins.size ndets = phases.size binnedphaseinds = npdigitize(phases, bins) bin_s1_tops = [] bin_s2_tops = [] binndets = [] goodbins = 0 all_xbar = npmedian(pmags) for x in npunique(binnedphaseinds): thisbin_inds = binnedphaseinds == x thisbin_phases = phases[thisbin_inds] thisbin_mags = pmags[thisbin_inds] if thisbin_mags.size > minbin: thisbin_ndet = thisbin_mags.size thisbin_xbar = npmedian(thisbin_mags) # get s1 thisbin_s1_top = (thisbin_ndet * (thisbin_xbar - all_xbar) * (thisbin_xbar - all_xbar)) # get s2 thisbin_s2_top = npsum( (thisbin_mags - all_xbar) * (thisbin_mags - all_xbar)) bin_s1_tops.append(thisbin_s1_top) bin_s2_tops.append(thisbin_s2_top) binndets.append(thisbin_ndet) goodbins = goodbins + 1 # turn the quantities into arrays bin_s1_tops = nparray(bin_s1_tops) bin_s2_tops = nparray(bin_s2_tops) binndets = nparray(binndets) # calculate s1 first s1 = npsum(bin_s1_tops) / (goodbins - 1.0) # then calculate s2 s2 = npsum(bin_s2_tops) / (ndets - goodbins) theta_aov = s1 / s2 return theta_aov
def plot_mag_series(times, mags, errs=None, outfile=None, sigclip=30.0, timebin=None, yrange=None): '''This plots a magnitude time series. If outfile is none, then plots to matplotlib interactive window. If outfile is a string denoting a filename, uses that to write a png/eps/pdf figure. timebin is either a float indicating binsize in seconds, or None indicating no time-binning is required. ''' if errs is not None: # remove nans find = npisfinite(times) & npisfinite(mags) & npisfinite(errs) ftimes, fmags, ferrs = times[find], mags[find], errs[find] # get the median and stdev = 1.483 x MAD median_mag = npmedian(fmags) stddev_mag = (npmedian(npabs(fmags - median_mag))) * 1.483 # sigclip next if sigclip: sigind = (npabs(fmags - median_mag)) < (sigclip * stddev_mag) stimes = ftimes[sigind] smags = fmags[sigind] serrs = ferrs[sigind] LOGINFO('sigclip = %s: before = %s observations, ' 'after = %s observations' % (sigclip, len(times), len(stimes))) else: stimes = ftimes smags = fmags serrs = ferrs else: # remove nans find = npisfinite(times) & npisfinite(mags) ftimes, fmags, ferrs = times[find], mags[find], None # get the median and stdev = 1.483 x MAD median_mag = npmedian(fmags) stddev_mag = (npmedian(npabs(fmags - median_mag))) * 1.483 # sigclip next if sigclip: sigind = (npabs(fmags - median_mag)) < (sigclip * stddev_mag) stimes = ftimes[sigind] smags = fmags[sigind] serrs = None LOGINFO('sigclip = %s: before = %s observations, ' 'after = %s observations' % (sigclip, len(times), len(stimes))) else: stimes = ftimes smags = fmags serrs = None # now we proceed to binning if timebin and errs is not None: binned = time_bin_magseries_with_errs(stimes, smags, serrs, binsize=timebin) btimes, bmags, berrs = (binned['binnedtimes'], binned['binnedmags'], binned['binnederrs']) elif timebin and errs is None: binned = time_bin_magseries(stimes, smags, binsize=timebin) btimes, bmags, berrs = binned['binnedtimes'], binned['binnedmags'], None else: btimes, bmags, berrs = stimes, smags, serrs # finally, proceed with plotting fig = plt.figure() fig.set_size_inches(7.5,4.8) plt.errorbar(btimes, bmags, fmt='go', yerr=berrs, markersize=2.0, markeredgewidth=0.0, ecolor='grey', capsize=0) # make a grid plt.grid(color='#a9a9a9', alpha=0.9, zorder=0, linewidth=1.0, linestyle=':') # fix the ticks to use no offsets plt.gca().get_yaxis().get_major_formatter().set_useOffset(False) plt.gca().get_xaxis().get_major_formatter().set_useOffset(False) # get the yrange if yrange and isinstance(yrange,list) and len(yrange) == 2: ymin, ymax = yrange else: ymin, ymax = plt.ylim() plt.ylim(ymax,ymin) plt.xlim(npmin(btimes) - 0.001*npmin(btimes), npmax(btimes) + 0.001*npmin(btimes)) plt.xlabel('time [JD]') plt.ylabel('magnitude') if outfile and isinstance(outfile, str): plt.savefig(outfile,bbox_inches='tight') plt.close() return os.path.abspath(outfile) else: plt.show() plt.close() return
def lightcurve_ptp_measures(ftimes, fmags, ferrs): '''This calculates various point-to-point measures (`eta` in Kim+ 2014). Parameters ---------- ftimes,fmags,ferrs : np.array The input mag/flux time-series with all non-finite elements removed. Returns ------- dict A dict with values of the point-to-point measures, including the `eta` variability index (often used as its inverse `inveta` to have the same sense as increasing variability index -> more likely a variable star). ''' ndet = len(fmags) if ndet > 9: timediffs = npdiff(ftimes) # get rid of stuff with time diff = 0.0 nzind = npnonzero(timediffs) ftimes, fmags, ferrs = ftimes[nzind], fmags[nzind], ferrs[nzind] # recalculate ndet and diffs ndet = ftimes.size timediffs = npdiff(ftimes) # calculate the point to point measures p2p_abs_magdiffs = npabs(npdiff(fmags)) p2p_squared_magdiffs = npdiff(fmags) * npdiff(fmags) robstd = npmedian(npabs(fmags - npmedian(fmags))) * 1.483 robvar = robstd * robstd # these are eta from the Kim+ 2014 paper - ratio of point-to-point # difference to the variance of the entire series # this is the robust version eta_robust = npmedian(p2p_abs_magdiffs) / robvar eta_robust = eta_robust / (ndet - 1.0) # this is the usual version eta_normal = npsum(p2p_squared_magdiffs) / npvar(fmags) eta_normal = eta_normal / (ndet - 1.0) timeweights = 1.0 / (timediffs * timediffs) # this is eta_e modified for uneven sampling from the Kim+ 2014 paper eta_uneven_normal = ((npsum(timeweights * p2p_squared_magdiffs) / (npvar(fmags) * npsum(timeweights))) * npmean(timeweights) * (ftimes.max() - ftimes.min()) * (ftimes.max() - ftimes.min())) # this is robust eta_e modified for uneven sampling from the Kim+ 2014 # paper eta_uneven_robust = ((npsum(timeweights * p2p_abs_magdiffs) / (robvar * npsum(timeweights))) * npmedian(timeweights) * (ftimes[-1] - ftimes[0]) * (ftimes[-1] - ftimes[0])) return { 'eta_normal': eta_normal, 'eta_robust': eta_robust, 'eta_uneven_normal': eta_uneven_normal, 'eta_uneven_robust': eta_uneven_robust } else: return None
def sigclip_magseries(times, mags, errs, sigclip=None, iterative=False, niterations=None, meanormedian='median', magsarefluxes=False): ''' Select the finite times, magnitudes (or fluxes), and errors from the passed values, and apply symmetric or asymmetric sigma clipping to them. Returns sigma-clipped times, mags, and errs. Args: times (np.array): ... mags (np.array): numpy array to sigma-clip. Does not assume all values are finite. Does not assume anything about whether they're positive/negative. errs (np.array): ... iterative (bool): True if you want iterative sigma-clipping. If niterations is not set and this is True, sigma-clipping is iterated until no more points are removed. niterations (int): maximum number of iterations to perform for sigma-clipping. If None, the iterative arg takes precedence, and iterative=True will sigma-clip until no more points are removed. If niterations is not None and iterative is False, niterations takes precedence and iteration will occur. meanormedian (string): either 'mean' for sigma-clipping based on the mean value, or 'median' for sigma-clipping based on the median value. Default is 'median'. magsarefluxes (bool): True if your "mags" are in fact fluxes, i.e. if "dimming" corresponds to your "mags" getting smaller. sigclip (float or list): If float, apply symmetric sigma clipping. If list, e.g., [10., 3.], will sigclip out greater than 10-sigma dimmings and greater than 3-sigma brightenings. Here the meaning of "dimming" and "brightening" is set by *physics* (not the magnitude system), which is why the `magsarefluxes` kwarg must be correctly set. Returns: stimes, smags, serrs: (sigmaclipped values of each). ''' returnerrs = True # fake the errors if they don't exist # this is inconsequential to sigma-clipping # we don't return these dummy values if the input errs are None if errs is None: # assume 0.1% errors if not given # this should work for mags and fluxes errs = 0.001 * mags returnerrs = False # filter the input times, mags, errs; do sigclipping and normalization find = npisfinite(times) & npisfinite(mags) & npisfinite(errs) ftimes, fmags, ferrs = times[find], mags[find], errs[find] # get the center value and stdev if meanormedian == 'median': # stddev = 1.483 x MAD center_mag = npmedian(fmags) stddev_mag = (npmedian(npabs(fmags - center_mag))) * 1.483 elif meanormedian == 'mean': center_mag = npmean(fmags) stddev_mag = npstddev(fmags) else: LOGWARNING("unrecognized meanormedian value given to " "sigclip_magseries: %s, defaulting to 'median'" % meanormedian) meanormedian = 'median' center_mag = npmedian(fmags) stddev_mag = (npmedian(npabs(fmags - center_mag))) * 1.483 # sigclip next for a single sigclip value if sigclip and isinstance(sigclip, (float, int)): if not iterative and niterations is None: sigind = (npabs(fmags - center_mag)) < (sigclip * stddev_mag) stimes = ftimes[sigind] smags = fmags[sigind] serrs = ferrs[sigind] else: # # iterative version adapted from scipy.stats.sigmaclip # # First, if niterations is not set, iterate until covergence if niterations is None: delta = 1 this_times = ftimes this_mags = fmags this_errs = ferrs while delta: if meanormedian == 'mean': this_center = npmean(this_mags) this_stdev = npstddev(this_mags) elif meanormedian == 'median': this_center = npmedian(this_mags) this_stdev = (npmedian( npabs(this_mags - this_center))) * 1.483 this_size = this_mags.size # apply the sigclip tsi = ((npabs(this_mags - this_center)) < (sigclip * this_stdev)) # update the arrays this_times = this_times[tsi] this_mags = this_mags[tsi] this_errs = this_errs[tsi] # update delta and go to the top of the loop delta = this_size - this_mags.size else: # If iterating only a certain number of times this_times = ftimes this_mags = fmags this_errs = ferrs iter_num = 0 delta = 1 while iter_num < niterations and delta: if meanormedian == 'mean': this_center = npmean(this_mags) this_stdev = npstddev(this_mags) elif meanormedian == 'median': this_center = npmedian(this_mags) this_stdev = (npmedian( npabs(this_mags - this_center))) * 1.483 this_size = this_mags.size # apply the sigclip tsi = ((npabs(this_mags - this_center)) < (sigclip * this_stdev)) # update the arrays this_times = this_times[tsi] this_mags = this_mags[tsi] this_errs = this_errs[tsi] # update the number of iterations and delta and # go to the top of the loop delta = this_size - this_mags.size iter_num += 1 # final sigclipped versions stimes, smags, serrs = this_times, this_mags, this_errs # this handles sigclipping for asymmetric +ve and -ve clip values elif sigclip and isinstance(sigclip, list) and len(sigclip) == 2: # sigclip is passed as [dimmingclip, brighteningclip] dimmingclip = sigclip[0] brighteningclip = sigclip[1] if not iterative and niterations is None: if magsarefluxes: nottoodimind = ((fmags - center_mag) > (-dimmingclip * stddev_mag)) nottoobrightind = ((fmags - center_mag) < (brighteningclip * stddev_mag)) else: nottoodimind = ((fmags - center_mag) < (dimmingclip * stddev_mag)) nottoobrightind = ((fmags - center_mag) > (-brighteningclip * stddev_mag)) sigind = nottoodimind & nottoobrightind stimes = ftimes[sigind] smags = fmags[sigind] serrs = ferrs[sigind] else: # # iterative version adapted from scipy.stats.sigmaclip # if niterations is None: delta = 1 this_times = ftimes this_mags = fmags this_errs = ferrs while delta: if meanormedian == 'mean': this_center = npmean(this_mags) this_stdev = npstddev(this_mags) elif meanormedian == 'median': this_center = npmedian(this_mags) this_stdev = (npmedian( npabs(this_mags - this_center))) * 1.483 this_size = this_mags.size if magsarefluxes: nottoodimind = ((this_mags - this_center) > (-dimmingclip * this_stdev)) nottoobrightind = ((this_mags - this_center) < (brighteningclip * this_stdev)) else: nottoodimind = ((this_mags - this_center) < (dimmingclip * this_stdev)) nottoobrightind = ((this_mags - this_center) > (-brighteningclip * this_stdev)) # apply the sigclip tsi = nottoodimind & nottoobrightind # update the arrays this_times = this_times[tsi] this_mags = this_mags[tsi] this_errs = this_errs[tsi] # update delta and go to top of the loop delta = this_size - this_mags.size else: # If iterating only a certain number of times this_times = ftimes this_mags = fmags this_errs = ferrs iter_num = 0 delta = 1 while iter_num < niterations and delta: if meanormedian == 'mean': this_center = npmean(this_mags) this_stdev = npstddev(this_mags) elif meanormedian == 'median': this_center = npmedian(this_mags) this_stdev = (npmedian( npabs(this_mags - this_center))) * 1.483 this_size = this_mags.size if magsarefluxes: nottoodimind = ((this_mags - this_center) > (-dimmingclip * this_stdev)) nottoobrightind = ((this_mags - this_center) < (brighteningclip * this_stdev)) else: nottoodimind = ((this_mags - this_center) < (dimmingclip * this_stdev)) nottoobrightind = ((this_mags - this_center) > (-brighteningclip * this_stdev)) # apply the sigclip tsi = nottoodimind & nottoobrightind # update the arrays this_times = this_times[tsi] this_mags = this_mags[tsi] this_errs = this_errs[tsi] # update the number of iterations and delta # and go to top of the loop delta = this_size - this_mags.size iter_num += 1 # final sigclipped versions stimes, smags, serrs = this_times, this_mags, this_errs else: stimes = ftimes smags = fmags serrs = ferrs if returnerrs: return stimes, smags, serrs else: return stimes, smags, None
def extract_features(filename, is_url=False): '''Extracts features to be used in text image classifier. :param filename: input image :param is_url: is input image a url or a file path on disk :return: tuple of features: (average_slope, median_slope, average_tilt, median_tilt, median_differences, average_differences, nr_straight_lines) Most relevant ones are average_slope, average_differences and nr_straight_lines. ''' if is_url: filedata = urllib2.urlopen(filename).read() imagefiledata = cv.CreateMatHeader(1, len(filedata), cv.CV_8UC1) cv.SetData(imagefiledata, filedata, len(filedata)) src = cv.DecodeImageM(imagefiledata, cv.CV_LOAD_IMAGE_GRAYSCALE) else: src = cv.LoadImage(filename, cv.CV_LOAD_IMAGE_GRAYSCALE) # normalize size normalized_size = 400 # smaller dimension will be 400, longer dimension will be proportional orig_size = cv.GetSize(src) max_dim_idx = max(enumerate(orig_size), key=lambda l: l[1])[0] min_dim_idx = [idx for idx in [0, 1] if idx != max_dim_idx][0] new_size = [0, 0] new_size[min_dim_idx] = normalized_size new_size[max_dim_idx] = int( float(orig_size[max_dim_idx]) / orig_size[min_dim_idx] * normalized_size) dst = cv.CreateImage(new_size, 8, 1) cv.Resize(src, dst) # cv.SaveImage("/tmp/resized.jpg",dst) src = dst dst = cv.CreateImage(cv.GetSize(src), 8, 1) color_dst = cv.CreateImage(cv.GetSize(src), 8, 3) storage = cv.CreateMemStorage(0) cv.Canny(src, dst, 50, 200, 3) cv.CvtColor(dst, color_dst, cv.CV_GRAY2BGR) slopes = [] # difference between xs or ys - variant of slope tilts = [] # x coordinates of horizontal lines horizontals = [] # y coordinates of vertical lines verticals = [] if USE_STANDARD: coords = cv.HoughLines2(dst, storage, cv.CV_HOUGH_STANDARD, 1, pi / 180, 50, 50, 10) lines = [] for coord in coords: (rho, theta) = coord a = cos(theta) b = sin(theta) x0 = a * rho y0 = b * rho pt1 = (cv.Round(x0 + 1000 * (-b)), cv.Round(y0 + 1000 * (a))) pt2 = (cv.Round(x0 - 1000 * (-b)), cv.Round(y0 - 1000 * (a))) lines += [(pt1, pt2)] else: lines = cv.HoughLines2(dst, storage, cv.CV_HOUGH_PROBABILISTIC, 1, pi / 180, 50, 50, 10) # eliminate duplicates - there are many especially with the standard version # first round the coordinates to integers divisible with 5 (to eliminate different but really close ones) # TODO # lines = list(set(map(lambda l: tuple([int(p) - int(p)%5 for p in l]), lines))) nr_straight_lines = 0 for line in lines: (pt1, pt2) = line # compute slope, rotate the line so that the slope is smallest # (slope is either delta x/ delta y or the reverse) # add smoothing term in denominator in case of 0 slope = min( abs(pt1[1] - pt2[1]), (abs(pt1[0] - pt2[0]))) / (max(abs(pt1[1] - pt2[1]), (abs(pt1[0] - pt2[0]))) + 0.01) # if slope < 0.1: # if slope < 5: if slope < 0.05: if abs(pt1[0] - pt2[0]) < abs(pt1[1] - pt2[1]): # means it's a horizontal line horizontals.append(pt1[0]) else: verticals.append(pt1[1]) if slope < 0.05: # if slope < 5: # if slope < 0.1: nr_straight_lines += 1 slopes.append(slope) tilts.append(min(abs(pt1[1] - pt2[1]), (abs(pt1[0] - pt2[0])))) # print slope average_slope = sum(slopes) / float(len(slopes)) median_slope = npmedian(nparray(slopes)) average_tilt = sum(tilts) / float(len(tilts)) median_tilt = npmedian(nparray(tilts)) differences = [] horizontals = sorted(horizontals) verticals = sorted(verticals) print "x_differences:" for (i, x) in enumerate(horizontals): if i > 0: # print abs(horizontals[i] - horizontals[i-1]) differences.append(abs(horizontals[i] - horizontals[i - 1])) print "y_differences:" for (i, y) in enumerate(verticals): if i > 0: # print abs(verticals[i] - verticals[i-1]) differences.append(abs(verticals[i] - verticals[i - 1])) print filename print "average_slope:", average_slope print "median_slope:", median_slope print "average_tilt:", average_tilt print "median_tilt:", median_tilt median_differences = npmedian(nparray(differences)) print "median_differences:", median_differences if not differences: # big random number for average difference average_differences = 50 else: average_differences = sum(differences) / float(len(differences)) print "average_differences:", average_differences print "nr_lines:", nr_straight_lines # print "sorted xs:", sorted(lines) return (average_slope, median_slope, average_tilt, median_tilt, median_differences, average_differences, nr_straight_lines)
def stellingwerf_pdm(times, mags, errs, magsarefluxes=False, autofreq=True, startp=None, endp=None, normalize=False, stepsize=1.0e-4, phasebinsize=0.05, mindetperbin=9, nbestpeaks=5, periodepsilon=0.1, # 0.1 sigclip=10.0, nworkers=None, verbose=True): '''This runs a parallel Stellingwerf PDM period search. ''' # get rid of nans first and sigclip stimes, smags, serrs = sigclip_magseries(times, mags, errs, magsarefluxes=magsarefluxes, sigclip=sigclip) # make sure there are enough points to calculate a spectrum if len(stimes) > 9 and len(smags) > 9 and len(serrs) > 9: # get the frequencies to use if startp: endf = 1.0/startp else: # default start period is 0.1 day endf = 1.0/0.1 if endp: startf = 1.0/endp else: # default end period is length of time series startf = 1.0/(stimes.max() - stimes.min()) # if we're not using autofreq, then use the provided frequencies if not autofreq: frequencies = np.arange(startf, endf, stepsize) if verbose: LOGINFO( 'using %s frequency points, start P = %.3f, end P = %.3f' % (frequencies.size, 1.0/endf, 1.0/startf) ) else: # this gets an automatic grid of frequencies to use frequencies = get_frequency_grid(stimes, minfreq=startf, maxfreq=endf) if verbose: LOGINFO( 'using autofreq with %s frequency points, ' 'start P = %.3f, end P = %.3f' % (frequencies.size, 1.0/frequencies.max(), 1.0/frequencies.min()) ) # map to parallel workers if (not nworkers) or (nworkers > NCPUS): nworkers = NCPUS if verbose: LOGINFO('using %s workers...' % nworkers) pool = Pool(nworkers) # renormalize the working mags to zero and scale them so that the # variance = 1 for use with our LSP functions if normalize: nmags = (smags - npmedian(smags))/npstd(smags) else: nmags = smags tasks = [(stimes, nmags, serrs, x, phasebinsize, mindetperbin) for x in frequencies] lsp = pool.map(stellingwerf_pdm_worker, tasks) pool.close() pool.join() del pool lsp = nparray(lsp) periods = 1.0/frequencies # find the nbestpeaks for the periodogram: 1. sort the lsp array by # lowest value first 2. go down the values until we find five values # that are separated by at least periodepsilon in period # make sure to filter out the non-finite values of lsp finitepeakind = npisfinite(lsp) finlsp = lsp[finitepeakind] finperiods = periods[finitepeakind] # finlsp might not have any finite values if the period finding # failed. if so, argmin will return a ValueError. try: bestperiodind = npargmin(finlsp) except ValueError: LOGERROR('no finite periodogram values for ' 'this mag series, skipping...') return {'bestperiod':npnan, 'bestlspval':npnan, 'nbestpeaks':nbestpeaks, 'nbestlspvals':None, 'nbestperiods':None, 'lspvals':None, 'periods':None, 'method':'pdm', 'kwargs':{'startp':startp, 'endp':endp, 'stepsize':stepsize, 'normalize':normalize, 'phasebinsize':phasebinsize, 'mindetperbin':mindetperbin, 'autofreq':autofreq, 'periodepsilon':periodepsilon, 'nbestpeaks':nbestpeaks, 'sigclip':sigclip}} sortedlspind = np.argsort(finlsp) sortedlspperiods = finperiods[sortedlspind] sortedlspvals = finlsp[sortedlspind] prevbestlspval = sortedlspvals[0] # now get the nbestpeaks nbestperiods, nbestlspvals, peakcount = ( [finperiods[bestperiodind]], [finlsp[bestperiodind]], 1 ) prevperiod = sortedlspperiods[0] # find the best nbestpeaks in the lsp and their periods for period, lspval in zip(sortedlspperiods, sortedlspvals): if peakcount == nbestpeaks: break perioddiff = abs(period - prevperiod) bestperiodsdiff = [abs(period - x) for x in nbestperiods] # print('prevperiod = %s, thisperiod = %s, ' # 'perioddiff = %s, peakcount = %s' % # (prevperiod, period, perioddiff, peakcount)) # this ensures that this period is different from the last # period and from all the other existing best periods by # periodepsilon to make sure we jump to an entire different peak # in the periodogram if (perioddiff > (periodepsilon*prevperiod) and all(x > (periodepsilon*prevperiod) for x in bestperiodsdiff)): nbestperiods.append(period) nbestlspvals.append(lspval) peakcount = peakcount + 1 prevperiod = period return {'bestperiod':finperiods[bestperiodind], 'bestlspval':finlsp[bestperiodind], 'nbestpeaks':nbestpeaks, 'nbestlspvals':nbestlspvals, 'nbestperiods':nbestperiods, 'lspvals':lsp, 'periods':periods, 'method':'pdm', 'kwargs':{'startp':startp, 'endp':endp, 'stepsize':stepsize, 'normalize':normalize, 'phasebinsize':phasebinsize, 'mindetperbin':mindetperbin, 'autofreq':autofreq, 'periodepsilon':periodepsilon, 'nbestpeaks':nbestpeaks, 'sigclip':sigclip}} else: LOGERROR('no good detections for these times and mags, skipping...') return {'bestperiod':npnan, 'bestlspval':npnan, 'nbestpeaks':nbestpeaks, 'nbestlspvals':None, 'nbestperiods':None, 'lspvals':None, 'periods':None, 'method':'pdm', 'kwargs':{'startp':startp, 'endp':endp, 'stepsize':stepsize, 'normalize':normalize, 'phasebinsize':phasebinsize, 'mindetperbin':mindetperbin, 'autofreq':autofreq, 'periodepsilon':periodepsilon, 'nbestpeaks':nbestpeaks, 'sigclip':sigclip}}
def autocorr_magseries(times, mags, errs, maxlags=1000, func=_autocorr_func3, fillgaps=0.0, filterwindow=11, forcetimebin=None, sigclip=3.0, magsarefluxes=False, verbose=True): '''This calculates the ACF of a light curve. This will pre-process the light curve to fill in all the gaps and normalize everything to zero. If `fillgaps = 'noiselevel'`, fills the gaps with the noise level obtained via the procedure above. If `fillgaps = 'nan'`, fills the gaps with `np.nan`. Parameters ---------- times,mags,errs : np.array The measurement time-series and associated errors. maxlags : int The maximum number of lags to calculate. func : Python function This is a function to calculate the lags. fillgaps : 'noiselevel' or float This sets what to use to fill in gaps in the time series. If this is 'noiselevel', will smooth the light curve using a point window size of `filterwindow` (this should be an odd integer), subtract the smoothed LC from the actual LC and estimate the RMS. This RMS will be used to fill in the gaps. Other useful values here are 0.0, and npnan. filterwindow : int The light curve's smoothing filter window size to use if `fillgaps='noiselevel`'. forcetimebin : None or float This is used to force a particular cadence in the light curve other than the automatically determined cadence. This effectively rebins the light curve to this cadence. This should be in the same time units as `times`. sigclip : float or int or sequence of two floats/ints or None If a single float or int, a symmetric sigma-clip will be performed using the number provided as the sigma-multiplier to cut out from the input time-series. If a list of two ints/floats is provided, the function will perform an 'asymmetric' sigma-clip. The first element in this list is the sigma value to use for fainter flux/mag values; the second element in this list is the sigma value to use for brighter flux/mag values. For example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma dimmings and greater than 3-sigma brightenings. Here the meaning of "dimming" and "brightening" is set by *physics* (not the magnitude system), which is why the `magsarefluxes` kwarg must be correctly set. If `sigclip` is None, no sigma-clipping will be performed, and the time-series (with non-finite elems removed) will be passed through to the output. magsarefluxes : bool If your input measurements in `mags` are actually fluxes instead of mags, set this is True. verbose : bool If True, will indicate progress and report errors. Returns ------- dict A dict of the following form is returned:: {'itimes': the interpolated time values after gap-filling, 'imags': the interpolated mag/flux values after gap-filling, 'ierrs': the interpolated mag/flux values after gap-filling, 'cadence': the cadence of the output mag/flux time-series, 'minitime': the minimum value of the interpolated times array, 'lags': the lags used to calculate the auto-correlation function, 'acf': the value of the ACF at each lag used} ''' # get the gap-filled timeseries interpolated = fill_magseries_gaps(times, mags, errs, fillgaps=fillgaps, forcetimebin=forcetimebin, sigclip=sigclip, magsarefluxes=magsarefluxes, filterwindow=filterwindow, verbose=verbose) if not interpolated: print('failed to interpolate light curve to minimum cadence!') return None itimes, imags = interpolated['itimes'], interpolated['imags'], # calculate the lags up to maxlags if maxlags: lags = nparange(0, maxlags) else: lags = nparange(itimes.size) series_stdev = 1.483 * npmedian(npabs(imags)) if func != _autocorr_func3: # get the autocorrelation as a function of the lag of the mag series autocorr = nparray( [func(imags, x, imags.size, 0.0, series_stdev) for x in lags]) # this doesn't need a lags array else: autocorr = _autocorr_func3(imags, lags[0], imags.size, 0.0, series_stdev) # return only the maximum number of lags if maxlags is not None: autocorr = autocorr[:maxlags] interpolated.update({ 'minitime': itimes.min(), 'lags': lags, 'acf': autocorr }) return interpolated
def prewhiten_magseries(times, mags, errs, whitenperiod, whitenparams, sigclip=3.0, magsarefluxes=False, plotfit=None, plotfitphasedlconly=True, rescaletomedian=True): '''Removes a periodic sinusoidal signal generated using whitenparams from the input magnitude time series. whitenparams are the Fourier amplitude and phase coefficients: [ampl_1, ampl_2, ampl_3, ..., ampl_X, pha_1, pha_2, pha_3, ..., pha_X] where X is the Fourier order. These are usually the output of a previous Fourier fit to the light curve (from varbase.lcfit.fourier_fit_magseries for example). if rescaletomedian is True, then we add back the constant median term of the magnitudes to the final pre-whitened mag series. ''' stimes, smags, serrs = sigclip_magseries(times, mags, errs, sigclip=sigclip, magsarefluxes=magsarefluxes) median_mag = npmedian(smags) # phase the mag series using the given period and epoch = min(stimes) mintime = npmin(stimes) # calculate the unsorted phase, then sort it iphase = ((stimes - mintime) / whitenperiod - npfloor( (stimes - mintime) / whitenperiod)) phasesortind = npargsort(iphase) # these are the final quantities to use for the Fourier fits phase = iphase[phasesortind] pmags = smags[phasesortind] perrs = serrs[phasesortind] # get the times sorted in phase order (useful to get the fit mag minimum # with respect to phase -- the light curve minimum) ptimes = stimes[phasesortind] # get the Fourier order fourierorder = int(len(whitenparams) / 2) # now subtract the harmonic series from the phased LC # these are still in phase order wmags = pmags - _fourier_func(whitenparams, phase, pmags) # resort everything by time order wtimeorder = npargsort(ptimes) wtimes = ptimes[wtimeorder] wphase = phase[wtimeorder] wmags = wmags[wtimeorder] werrs = perrs[wtimeorder] if rescaletomedian: wmags = wmags + median_mag # prepare the returndict returndict = { 'wtimes': wtimes, # these are in the new time order 'wphase': wphase, 'wmags': wmags, 'werrs': werrs, 'whitenparams': whitenparams, 'whitenperiod': whitenperiod } # make the fit plot if required if plotfit and (isinstance(plotfit, str) or isinstance(plotfit, strio)): if plotfitphasedlconly: plt.figure(figsize=(10, 4.8)) else: plt.figure(figsize=(16, 9.6)) if plotfitphasedlconly: # phased series before whitening plt.subplot(121) plt.plot(phase, pmags, marker='.', color='k', linestyle='None', markersize=2.0, markeredgewidth=0) if not magsarefluxes: plt.gca().invert_yaxis() plt.ylabel('magnitude') else: plt.ylabel('fluxes') plt.xlabel('phase') plt.title('phased LC before pre-whitening') # phased series after whitening plt.subplot(122) plt.plot(wphase, wmags, marker='.', color='g', linestyle='None', markersize=2.0, markeredgewidth=0) if not magsarefluxes: plt.gca().invert_yaxis() plt.ylabel('magnitude') else: plt.ylabel('fluxes') plt.xlabel('phase') plt.title('phased LC after pre-whitening') else: # time series before whitening plt.subplot(221) plt.plot(stimes, smags, marker='.', color='k', linestyle='None', markersize=2.0, markeredgewidth=0) if not magsarefluxes: plt.gca().invert_yaxis() plt.ylabel('magnitude') else: plt.ylabel('fluxes') plt.xlabel('JD') plt.title('LC before pre-whitening') # time series after whitening plt.subplot(222) plt.plot(wtimes, wmags, marker='.', color='g', linestyle='None', markersize=2.0, markeredgewidth=0) if not magsarefluxes: plt.gca().invert_yaxis() plt.ylabel('magnitude') else: plt.ylabel('fluxes') plt.xlabel('JD') plt.title('LC after pre-whitening with period: %.6f' % whitenperiod) # phased series before whitening plt.subplot(223) plt.plot(phase, pmags, marker='.', color='k', linestyle='None', markersize=2.0, markeredgewidth=0) if not magsarefluxes: plt.gca().invert_yaxis() plt.ylabel('magnitude') else: plt.ylabel('fluxes') plt.xlabel('phase') plt.title('phased LC before pre-whitening') # phased series after whitening plt.subplot(224) plt.plot(wphase, wmags, marker='.', color='g', linestyle='None', markersize=2.0, markeredgewidth=0) if not magsarefluxes: plt.gca().invert_yaxis() plt.ylabel('magnitude') else: plt.ylabel('fluxes') plt.xlabel('phase') plt.title('phased LC after pre-whitening') plt.tight_layout() plt.savefig(plotfit, format='png', pad_inches=0.0) plt.close() if isinstance(plotfit, str) or isinstance(plotfit, strio): returndict['fitplotfile'] = plotfit return returndict
def lightcurve_flux_measures(ftimes, fmags, ferrs, magsarefluxes=False): '''This calculates percentiles and percentile ratios of the flux. Parameters ---------- ftimes,fmags,ferrs : np.array The input mag/flux time-series with all non-finite elements removed. magsarefluxes : bool If the `fmags` array actually contains fluxes, will not convert `mags` to fluxes before calculating the percentiles. Returns ------- dict A dict with all of the light curve flux percentiles and percentile ratios calculated. ''' ndet = len(fmags) if ndet > 9: # get the fluxes if magsarefluxes: series_fluxes = fmags else: series_fluxes = 10.0**(-0.4 * fmags) series_flux_median = npmedian(series_fluxes) # get the percent_amplitude for the fluxes series_flux_percent_amplitude = (npmax(npabs(series_fluxes)) / series_flux_median) # get the flux percentiles series_flux_percentiles = nppercentile( series_fluxes, [5.0, 10, 17.5, 25, 32.5, 40, 60, 67.5, 75, 82.5, 90, 95]) series_frat_595 = (series_flux_percentiles[-1] - series_flux_percentiles[0]) series_frat_1090 = (series_flux_percentiles[-2] - series_flux_percentiles[1]) series_frat_175825 = (series_flux_percentiles[-3] - series_flux_percentiles[2]) series_frat_2575 = (series_flux_percentiles[-4] - series_flux_percentiles[3]) series_frat_325675 = (series_flux_percentiles[-5] - series_flux_percentiles[4]) series_frat_4060 = (series_flux_percentiles[-6] - series_flux_percentiles[5]) # calculate the flux percentile ratios series_flux_percentile_ratio_mid20 = series_frat_4060 / series_frat_595 series_flux_percentile_ratio_mid35 = series_frat_325675 / series_frat_595 series_flux_percentile_ratio_mid50 = series_frat_2575 / series_frat_595 series_flux_percentile_ratio_mid65 = series_frat_175825 / series_frat_595 series_flux_percentile_ratio_mid80 = series_frat_1090 / series_frat_595 # calculate the ratio of F595/median flux series_percent_difference_flux_percentile = (series_frat_595 / series_flux_median) series_percentile_magdiff = -2.5 * nplog10( series_percent_difference_flux_percentile) return { 'flux_median': series_flux_median, 'flux_percent_amplitude': series_flux_percent_amplitude, 'flux_percentiles': series_flux_percentiles, 'flux_percentile_ratio_mid20': series_flux_percentile_ratio_mid20, 'flux_percentile_ratio_mid35': series_flux_percentile_ratio_mid35, 'flux_percentile_ratio_mid50': series_flux_percentile_ratio_mid50, 'flux_percentile_ratio_mid65': series_flux_percentile_ratio_mid65, 'flux_percentile_ratio_mid80': series_flux_percentile_ratio_mid80, 'percent_difference_flux_percentile': series_percentile_magdiff, } else: LOGERROR('not enough detections in this magseries ' 'to calculate flux measures') return None
def find_redundant_points(points): bisector_to_pair = make_bisector_to_pair_map(points) angle_to_bisector = make_angle_to_bisector_map(bisector_to_pair) median_bisector = get_median_bisector(angle_to_bisector) lines_pairs = form_lines_pairs(angle_to_bisector, median_bisector) intersection_to_lines_pair = make_intersection_to_pair_map(lines_pairs) ys_critical = [p.get_both()['y'] for p in intersection_to_lines_pair.keys()] # form_pairs_intersections_values(lines_pairs) y_med = npmedian(array(ys_critical)) y_separation_line = get_y_separation_line(median_bisector, y_med) aim_centre_y_side = determine_enclosure_centre_side(points, y_separation_line) crucial_points = find_crucial_points(intersection_to_lines_pair.keys(), y_separation_line, aim_centre_y_side) if not crucial_points: return [] xs_critucal = [p.get_both()['x'] for p in crucial_points] x_med = npmedian(array(xs_critucal)) x_separation_line = Line2D(point1=Vector2D(x_med, 0.0), point2=Vector2D(x_med, 1.0)) aim_centre_x_side = determine_enclosure_centre_side(points, x_separation_line) final_points = find_crucial_points(crucial_points, x_separation_line, aim_centre_x_side) south_vector, west_vector = Vector2D(0.0, -1.0), Vector2D(-1.0, 0.0) south_vector = define_side_direction_vector(y_separation_line, south_vector) south_point = y_separation_line.first.sum(south_vector) south = y_separation_line.define_point_side(south_point) * aim_centre_y_side > 0 west_vector = define_side_direction_vector(x_separation_line, west_vector) west_point = x_separation_line.first.sum(west_vector) west = x_separation_line.define_point_side(west_point) * aim_centre_x_side > 0 final_lines = list() for intersection in final_points: line_pair = intersection_to_lines_pair[intersection] angle0 = abs(y_axis.angle(line_pair[0])) angle1 = abs(y_axis.angle(line_pair[1])) print angle0, angle1 if (south and west) or (not south and not west): if angle0 > angle1: final_lines.append(line_pair[0]) else: final_lines.append(line_pair[1]) else: # south-east or north-west if angle0 < angle1: final_lines.append(line_pair[0]) else: final_lines.append(line_pair[1]) assert final_lines rejected_points = set() comparison_point = point_in_centre_containing_area(x_separation_line, y_separation_line, Vector2D(x_med, y_med), south, west) for line in final_lines: comparison_side = line.define_point_side(comparison_point) points = bisector_to_pair[line] side0 = line.define_point_side(points[0]) side1 = line.define_point_side(points[1]) if side0 * comparison_side > 0: rejected_points.add(points[0]) elif side1 * comparison_side > 0: rejected_points.add(points[1]) else: print "x-med, y-med point side: ", comparison_side assert False return rejected_points
def stetson_jindex(ftimes, fmags, ferrs, weightbytimediff=False): '''This calculates the Stetson index for the magseries, based on consecutive pairs of observations. Based on Nicole Loncke's work for her Planets and Life certificate at Princeton in 2014. Parameters ---------- ftimes,fmags,ferrs : np.array The input mag/flux time-series with all non-finite elements removed. weightbytimediff : bool If this is True, the Stetson index for any pair of mags will be reweighted by the difference in times between them using the scheme in Fruth+ 2012 and Zhange+ 2003 (as seen in Sokolovsky+ 2017):: w_i = exp(- (t_i+1 - t_i)/ delta_t ) Returns ------- float The calculated Stetson J variability index. ''' ndet = len(fmags) if ndet > 9: # get the median and ndet medmag = npmedian(fmags) # get the stetson index elements delta_prefactor = (ndet / (ndet - 1)) sigma_i = delta_prefactor * (fmags - medmag) / ferrs # Nicole's clever trick to advance indices by 1 and do x_i*x_(i+1) sigma_j = nproll(sigma_i, 1) if weightbytimediff: difft = npdiff(ftimes) deltat = npmedian(difft) weights_i = npexp(-difft / deltat) products = (weights_i * sigma_i[1:] * sigma_j[1:]) else: # ignore first elem since it's actually x_0*x_n products = (sigma_i * sigma_j)[1:] stetsonj = (npsum(npsign(products) * npsqrt(npabs(products)))) / ndet return stetsonj else: LOGERROR('not enough detections in this magseries ' 'to calculate stetson J index') return npnan
def bls_snr(blsdict, times, mags, errs, magsarefluxes=False, sigclip=10.0, perioddeltapercent=10, npeaks=None, assumeserialbls=False, verbose=True): '''Calculates the signal to noise ratio for each best peak in the BLS periodogram. This calculates two values of SNR: SNR = transit model depth / RMS of light curve with transit model subtracted altSNR = transit model depth / RMS of light curve inside transit blsdict is the output of either bls_parallel_pfind or bls_serial_pfind. times, mags, errs are ndarrays containing the magnitude series. perioddeltapercent controls the period interval used by a bls_serial_pfind run around each peak period to figure out the transit depth, duration, and ingress/egress bins for eventual calculation of the SNR of the peak. npeaks controls how many of the periods in blsdict['nbestperiods'] to find the SNR for. If it's None, then this will calculate the SNR for all of them. If it's an integer between 1 and len(blsdict['nbestperiods']), will calculate for only the specified number of peak periods, starting from the best period. If assumeserialbls is True, will not rerun bls_serial_pfind to figure out the transit depth, duration, and ingress/egress bins for eventual calculation of the SNR of the peak. This is normally False because we assume that the user will be using bls_parallel_pfind, which works on chunks of frequency space so returns multiple values of transit depth, duration, ingress/egress bin specific to those chunks. These may not be valid for the global best peaks in the periodogram, so we need to rerun bls_serial_pfind around each peak in blsdict['nbestperiods'] to get correct values for these. FIXME: for now, we're only doing simple RMS. Need to calculate red and white-noise RMS as outlined below: - calculate the white noise rms and the red noise rms of the residual. - the white noise rms is just the rms of the residual - the red noise rms = sqrt(binnedrms^2 - expectedbinnedrms^2) - calculate the SNR using: sqrt(delta^2 / ((sigma_w ^2 / nt) + (sigma_r ^2 / Nt)))) where: delta = transit depth sigma_w = white noise rms sigma_r = red noise rms nt = number of in-transit points Nt = number of distinct transits sampled ''' # figure out how many periods to work on if (npeaks and (0 < npeaks < len(blsdict['nbestperiods']))): nperiods = npeaks else: if verbose: LOGWARNING('npeaks not specified or invalid, ' 'getting SNR for all %s BLS peaks' % len(blsdict['nbestperiods'])) nperiods = len(blsdict['nbestperiods']) nbestperiods = blsdict['nbestperiods'][:nperiods] # get rid of nans first and sigclip stimes, smags, serrs = sigclip_magseries(times, mags, errs, magsarefluxes=magsarefluxes, sigclip=sigclip) # make sure there are enough points to calculate a spectrum if len(stimes) > 9 and len(smags) > 9 and len(serrs) > 9: nbestsnrs = [] nbestasnrs = [] transitdepth, transitduration = [], [] # get these later whitenoise, rednoise = [], [] nphasebins, transingressbin, transegressbin = [], [], [] # keep these around for diagnostics allsubtractedmags = [] allphasedmags = [] allphases = [] allblsmodels = [] for ind, period in enumerate(nbestperiods): # get the period interval startp = period - perioddeltapercent * period / 100.0 endp = period + perioddeltapercent * period / 100.0 # see if we need to rerun bls_serial_pfind if not assumeserialbls: # run bls_serial_pfind with the kwargs copied over from the # initial run. replace only the startp, endp, and verbose kwarg # values prevkwargs = blsdict['kwargs'].copy() prevkwargs['verbose'] = verbose prevkwargs['startp'] = startp prevkwargs['endp'] = endp blsres = bls_serial_pfind(times, mags, errs, **prevkwargs) else: blsres = blsdict thistransdepth = blsres['blsresult']['transdepth'] thistransduration = blsres['blsresult']['transduration'] thisbestperiod = blsres['bestperiod'] thistransingressbin = blsres['blsresult']['transingressbin'] thistransegressbin = blsres['blsresult']['transegressbin'] thisnphasebins = blsdict['kwargs']['nphasebins'] # get the minimum light epoch using a spline fit try: spfit = spline_fit_magseries(times, mags, errs, thisbestperiod, magsarefluxes=magsarefluxes, verbose=verbose) thisminepoch = spfit['fitinfo']['fitepoch'] except ValueError: LOGEXCEPTION('could not fit a spline to find a minimum of ' 'the phased LC, trying SavGol fit instead...') # fit a Savitsky-Golay instead and get its minimum savfit = savgol_fit_magseries(times, mags, errs, thisbestperiod, magsarefluxes=magsarefluxes, verbose=verbose) thisminepoch = savfit['fitinfo']['fitepoch'] if isinstance(thisminepoch, np.ndarray): if verbose: LOGWARNING('minimum epoch is actually an array:\n' '%s\n' 'instead of a float, ' 'are there duplicate time values ' 'in the original input? ' 'will use the first value in this array.' % repr(thisminepoch)) thisminepoch = thisminepoch[0] # phase using this epoch phased_magseries = phase_magseries_with_errs(stimes, smags, serrs, thisbestperiod, thisminepoch, wrap=False, sort=True) tphase = phased_magseries['phase'] tmags = phased_magseries['mags'] terrs = phased_magseries['errs'] # use the transit depth and duration to subtract the BLS transit # model from the phased mag series. we're centered about 0.0 as the # phase of the transit minimum so we need to look at stuff from # [0.0, transitphase] and [1.0-transitphase, 1.0] transitphase = thistransduration / 2.0 transitindices = ((tphase < transitphase) | (tphase > (1.0 - transitphase))) # this is the BLS model # constant = median(tmags) outside transit # constant = thistransitdepth inside transit blsmodel = npfull_like(tmags, npmedian(tmags)) if magsarefluxes: blsmodel[transitindices] = (blsmodel[transitindices] + thistransdepth) else: blsmodel[transitindices] = (blsmodel[transitindices] - thistransdepth) # this is the residual of mags - model subtractedmags = tmags - blsmodel # calculate the rms of this residual subtractedrms = npstd(subtractedmags) # the SNR is the transit depth divided by the rms of the residual thissnr = npabs(thistransdepth / subtractedrms) # alt SNR = expected transit depth / rms of timeseries in transit altsnr = npabs(thistransdepth / npstd(tmags[transitindices])) # tell user about stuff if verbose = True if verbose: LOGINFO('peak %s: new best period: %.6f, ' 'fit center of transit: %.5f' % (ind + 1, thisbestperiod, thisminepoch)) LOGINFO('transit ingress phase = %.3f to %.3f' % (1.0 - transitphase, 1.0)) LOGINFO('transit egress phase = %.3f to %.3f' % (0.0, transitphase)) LOGINFO('npoints in transit: %s' % tmags[transitindices].size) LOGINFO('transit depth (delta): %.5f, ' 'frac transit length (q): %.3f, ' ' SNR: %.3f, altSNR: %.3f' % (thistransdepth, thistransduration, thissnr, altsnr)) # update the lists with results from this peak nbestsnrs.append(thissnr) nbestasnrs.append(altsnr) transitdepth.append(thistransdepth) transitduration.append(thistransduration) transingressbin.append(thistransingressbin) transegressbin.append(thistransegressbin) nphasebins.append(thisnphasebins) # update the diagnostics allsubtractedmags.append(subtractedmags) allphasedmags.append(tmags) allphases.append(tphase) allblsmodels.append(blsmodel) # update these when we figure out how to do it # nphasebins.append(thisnphasebins) # transingressbin.append(thisingressbin) # transegressbin.append(thisegressbin) # done with working on each peak # if there aren't enough points in the mag series, bail out else: LOGERROR('no good detections for these times and mags, skipping...') nbestsnrs, whitenoise, rednoise = None, None, None transitdepth, transitduration = None, None nphasebins, transingressbin, transegressbin = None, None, None allsubtractedmags, allphases, allphasedmags = None, None, None return { 'npeaks': npeaks, 'period': nbestperiods, 'snr': nbestsnrs, 'altsnr': nbestasnrs, 'whitenoise': whitenoise, 'rednoise': rednoise, 'transitdepth': transitdepth, 'transitduration': transitduration, 'nphasebins': nphasebins, 'transingressbin': transingressbin, 'transegressbin': transegressbin, 'allblsmodels': allblsmodels, 'allsubtractedmags': allsubtractedmags, 'allphasedmags': allphasedmags, 'allphases': allphases }
def fourier_fit_magseries( times, mags, errs, period, fourierorder=None, fourierparams=None, fix_period=True, scale_errs_redchisq_unity=True, sigclip=3.0, magsarefluxes=False, plotfit=False, ignoreinitfail=True, verbose=True, curve_fit_kwargs=None, ): '''This fits a Fourier series to a mag/flux time series. Parameters ---------- times,mags,errs : np.array The input mag/flux time-series to fit a Fourier cosine series to. period : float The period to use for the Fourier fit. fourierorder : None or int If this is an int, will be interpreted as the Fourier order of the series to fit to the input mag/flux times-series. If this is None and `fourierparams` is specified, `fourierparams` will be used directly to generate the fit Fourier series. If `fourierparams` is also None, this function will try to fit a Fourier cosine series of order 3 to the mag/flux time-series. fourierparams : list of floats or None If this is specified as a list of floats, it must be of the form below:: [fourier_amp1, fourier_amp2, fourier_amp3,...,fourier_ampN, fourier_phase1, fourier_phase2, fourier_phase3,...,fourier_phaseN] to specify a Fourier cosine series of order N. If this is None and `fourierorder` is specified, the Fourier order specified there will be used to construct the Fourier cosine series used to fit the input mag/flux time-series. If both are None, this function will try to fit a Fourier cosine series of order 3 to the input mag/flux time-series. fix_period : bool If True, will fix the period with fitting the sinusoidal function to the phased light curve. scale_errs_redchisq_unity : bool If True, the standard errors on the fit parameters will be scaled to make the reduced chi-sq = 1.0. This sets the ``absolute_sigma`` kwarg for the ``scipy.optimize.curve_fit`` function to False. sigclip : float or int or sequence of two floats/ints or None If a single float or int, a symmetric sigma-clip will be performed using the number provided as the sigma-multiplier to cut out from the input time-series. If a list of two ints/floats is provided, the function will perform an 'asymmetric' sigma-clip. The first element in this list is the sigma value to use for fainter flux/mag values; the second element in this list is the sigma value to use for brighter flux/mag values. For example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma dimmings and greater than 3-sigma brightenings. Here the meaning of "dimming" and "brightening" is set by *physics* (not the magnitude system), which is why the `magsarefluxes` kwarg must be correctly set. If `sigclip` is None, no sigma-clipping will be performed, and the time-series (with non-finite elems removed) will be passed through to the output. magsarefluxes : bool If True, will treat the input values of `mags` as fluxes for purposes of plotting the fit and sig-clipping. plotfit : str or False If this is a string, this function will make a plot for the fit to the mag/flux time-series and writes the plot to the path specified here. ignoreinitfail : bool If this is True, ignores the initial failure to find a set of optimized Fourier parameters using the global optimization function and proceeds to do a least-squares fit anyway. verbose : bool If True, will indicate progress and warn of any problems. curve_fit_kwargs : dict or None If not None, this should be a dict containing extra kwargs to pass to the scipy.optimize.curve_fit function. Returns ------- dict This function returns a dict containing the model fit parameters, the minimized chi-sq value and the reduced chi-sq value. The form of this dict is mostly standardized across all functions in this module:: { 'fittype':'fourier', 'fitinfo':{ 'finalparams': the list of final model fit params, 'finalparamerrs': list of errs for each model fit param, 'fitmags': the model fit mags, 'fitperiod': the fit period if this wasn't set to fixed, 'fitepoch': this is times.min() for this fit type, 'actual_fitepoch': time of minimum light from fit model ... other fit function specific keys ... }, 'fitchisq': the minimized value of the fit's chi-sq, 'fitredchisq':the reduced chi-sq value, 'fitplotfile': the output fit plot if fitplot is not None, 'magseries':{ 'times':input times in phase order of the model, 'phase':the phases of the model mags, 'mags':input mags/fluxes in the phase order of the model, 'errs':errs in the phase order of the model, 'magsarefluxes':input value of magsarefluxes kwarg } } NOTE: the returned value of 'fitepoch' in the 'fitinfo' dict returned by this function is the time value of the first observation since this is where the LC is folded for the fit procedure. To get the actual time of minimum epoch as calculated by a spline fit to the phased LC, use the key 'actual_fitepoch' in the 'fitinfo' dict. ''' stimes, smags, serrs = sigclip_magseries(times, mags, errs, sigclip=sigclip, magsarefluxes=magsarefluxes) # get rid of zero errs nzind = npnonzero(serrs) stimes, smags, serrs = stimes[nzind], smags[nzind], serrs[nzind] phase, pmags, perrs, ptimes, mintime = (get_phased_quantities( stimes, smags, serrs, period)) # get the fourier order either from the scalar order kwarg... if fourierorder and fourierorder > 0 and not fourierparams: fourieramps = [0.6] + [0.2] * (fourierorder - 1) fourierphas = [0.1] + [0.1] * (fourierorder - 1) fourierparams = fourieramps + fourierphas # or from the fully specified coeffs vector elif not fourierorder and fourierparams: fourierorder = int(len(fourierparams) / 2) else: LOGWARNING('specified both/neither Fourier order AND Fourier coeffs, ' 'using default Fourier order of 3') fourierorder = 3 fourieramps = [0.6] + [0.2] * (fourierorder - 1) fourierphas = [0.1] + [0.1] * (fourierorder - 1) fourierparams = fourieramps + fourierphas if verbose: LOGINFO('fitting Fourier series of order %s to ' 'mag series with %s observations, ' 'using period %.6f, folded at %.6f' % (fourierorder, len(phase), period, mintime)) # initial minimize call to find global minimum in chi-sq initialfit = spminimize(_fourier_chisq, fourierparams, args=(phase, pmags, perrs)) # make sure this initial fit succeeds before proceeding if initialfit.success or ignoreinitfail: if verbose: LOGINFO('initial fit done, refining...') leastsqparams = initialfit.x try: curvefit_params = npconcatenate((nparray([period]), leastsqparams)) # set up the bounds for the fit parameters if fix_period: curvefit_bounds = ([period - 1.0e-7] + [-npinf] * fourierorder + [-npinf] * fourierorder, [period + 1.0e-7] + [npinf] * fourierorder + [npinf] * fourierorder) else: curvefit_bounds = ([0.0] + [-npinf] * fourierorder + [-npinf] * fourierorder, [npinf] + [npinf] * fourierorder + [npinf] * fourierorder) curvefit_func = partial( sinusoidal.fourier_curvefit_func, zerolevel=npmedian(smags), epoch=mintime, fixed_period=period if fix_period else None, ) if curve_fit_kwargs is not None: finalparams, covmatrix = curve_fit( curvefit_func, stimes, smags, p0=curvefit_params, sigma=serrs, bounds=curvefit_bounds, absolute_sigma=(not scale_errs_redchisq_unity), **curve_fit_kwargs) else: finalparams, covmatrix = curve_fit( curvefit_func, stimes, smags, p0=curvefit_params, sigma=serrs, bounds=curvefit_bounds, absolute_sigma=(not scale_errs_redchisq_unity), ) except Exception: LOGEXCEPTION("curve_fit returned an exception") finalparams, covmatrix = None, None # if the fit succeeded, then we can return the final parameters if finalparams is not None and covmatrix is not None: # this is the fit period fperiod = finalparams[0] phase, pmags, perrs, ptimes, mintime = (get_phased_quantities( stimes, smags, serrs, fperiod)) # calculate the chisq and reduced chisq fitmags = _fourier_func(finalparams[1:], phase, pmags) fitchisq = npsum( ((fitmags - pmags) * (fitmags - pmags)) / (perrs * perrs)) n_free_params = len(pmags) - len(finalparams) if fix_period: n_free_params -= 1 fitredchisq = fitchisq / n_free_params stderrs = npsqrt(npdiag(covmatrix)) if verbose: LOGINFO('final fit done. chisq = %.5f, reduced chisq = %.5f' % (fitchisq, fitredchisq)) # figure out the time of light curve minimum (i.e. the fit epoch) # this is when the fit mag is maximum (i.e. the faintest) # or if magsarefluxes = True, then this is when fit flux is minimum if not magsarefluxes: fitmagminind = npwhere(fitmags == npmax(fitmags)) else: fitmagminind = npwhere(fitmags == npmin(fitmags)) if len(fitmagminind[0]) > 1: fitmagminind = (fitmagminind[0][0], ) # assemble the returndict returndict = { 'fittype': 'fourier', 'fitinfo': { 'fourierorder': fourierorder, # return coeffs only for backwards compatibility with # existing functions that use the returned value of # fourier_fit_magseries 'finalparams': finalparams[1:], 'finalparamerrs': stderrs, 'initialfit': initialfit, 'fitmags': fitmags, 'fitperiod': finalparams[0], # the 'fitepoch' is just the minimum time here 'fitepoch': mintime, # the actual fit epoch is calculated as the time of minimum # light OF the fit model light curve 'actual_fitepoch': ptimes[fitmagminind] }, 'fitchisq': fitchisq, 'fitredchisq': fitredchisq, 'fitplotfile': None, 'magseries': { 'times': ptimes, 'phase': phase, 'mags': pmags, 'errs': perrs, 'magsarefluxes': magsarefluxes }, } # make the fit plot if required if plotfit and isinstance(plotfit, str): make_fit_plot(phase, pmags, perrs, fitmags, fperiod, mintime, mintime, plotfit, magsarefluxes=magsarefluxes) returndict['fitplotfile'] = plotfit return returndict # if the leastsq fit did not succeed, return Nothing else: LOGERROR( 'fourier-fit: least-squared fit to the light curve failed') return { 'fittype': 'fourier', 'fitinfo': { 'fourierorder': fourierorder, 'finalparams': None, 'finalparamerrs': None, 'initialfit': initialfit, 'fitmags': None, 'fitperiod': None, 'fitepoch': None, 'actual_fitepoch': None, }, 'fitchisq': npnan, 'fitredchisq': npnan, 'fitplotfile': None, 'magseries': { 'times': ptimes, 'phase': phase, 'mags': pmags, 'errs': perrs, 'magsarefluxes': magsarefluxes } } # if the fit didn't succeed, we can't proceed else: LOGERROR('initial Fourier fit did not succeed, ' 'reason: %s, returning scipy OptimizeResult' % initialfit.message) return { 'fittype': 'fourier', 'fitinfo': { 'fourierorder': fourierorder, 'finalparams': None, 'finalparamerrs': None, 'initialfit': initialfit, 'fitmags': None, 'fitperiod': None, 'fitepoch': None, 'actual_fitepoch': None, }, 'fitchisq': npnan, 'fitredchisq': npnan, 'fitplotfile': None, 'magseries': { 'times': ptimes, 'phase': phase, 'mags': pmags, 'errs': perrs, 'magsarefluxes': magsarefluxes } }
def run(dataset, k, it_max=1000, min_variation=1.0e-4, labels=None, full_output=True): # Init centers_coord centers_id = random.randint(dataset.shape[0], size=k) centers_coord = dataset[centers_id, :] # Each instance will be put on a initial random cluster inst_cluster_id = random.randint(k, size=dataset.shape[0]) # Auxiliary vectors to keep code cleaner auxvec_cur_distances = array([0.0] * k) prev_variation = 1.0 + min_variation it = 0 while it < it_max and prev_variation >= min_variation: it += 1 for inst_id in range(dataset.shape[0]): for center_id in range(k): # For each instance, calculate the distance between # each center auxvec_cur_distances[center_id] = \ Kmedians.__euclideandist__( dataset[inst_id, :], centers_coord[center_id, :]) # For each instance, let it be part of the nearest # cluster inst_cluster_id[inst_id] = argmin(auxvec_cur_distances) # For each cluster, calculate the new center coordinates # Using Median for center_id in range(k): new_cur_cluster_coords = npmedian( dataset[inst_cluster_id == center_id, :], axis=0) # Calculate variation between previous centers_coord and # new ones (using infinite norm) prev_variation = max( prev_variation, max(abs(centers_coord[center_id] - new_cur_cluster_coords))) centers_coord[center_id] = new_cur_cluster_coords # Build up answer ans = { "inst_cluster_id": inst_cluster_id, "centers_coord": centers_coord, } if full_output: ans = { "clustering_method": "K-Medians", "k": k, **ans, **ClusterMetrics.runall(dataset=dataset, centers_coord=centers_coord, inst_cluster_id=inst_cluster_id, labels=labels), } return ans
def sigclip_magseries_with_extparams(times, mags, errs, extparams, sigclip=None, iterative=False, magsarefluxes=False): '''Select the finite times, magnitudes (or fluxes), and errors from the passed values, and apply symmetric or asymmetric sigma clipping to them. Returns sigma-clipped times, mags, and errs. Uses the same indices to filter out the values of all arrays in the extparams list. Args: times (np.array): ... mags (np.array): numpy array to sigma-clip. Does not assume all values are finite. Does not assume anything about whether they're positive/negative. errs (np.array): ... extparams (list of np.arrays): external parameters to also filter using the same indices as used for sigma-clipping. iterative (bool): True if you want iterative sigma-clipping. magsarefluxes (bool): True if your "mags" are in fact fluxes, i.e. if "dimming" corresponds to your "mags" getting smaller. sigclip (float or list): If float, apply symmetric sigma clipping. If list, e.g., [10., 3.], will sigclip out greater than 10-sigma dimmings and greater than 3-sigma brightenings. Here the meaning of "dimming" and "brightening" is set by *physics* (not the magnitude system), which is why the `magsarefluxes` kwarg must be correctly set. Returns: stimes, smags, serrs: (sigmaclipped values of each). ''' returnerrs = True # fake the errors if they don't exist # this is inconsequential to sigma-clipping # we don't return these dummy values if the input errs are None if errs is None: # assume 0.1% errors if not given # this should work for mags and fluxes errs = 0.001 * mags returnerrs = False # filter the input times, mags, errs; do sigclipping and normalization find = npisfinite(times) & npisfinite(mags) & npisfinite(errs) ftimes, fmags, ferrs = times[find], mags[find], errs[find] # apply the same indices to the external parameters for epi, eparr in enumerate(extparams): extparams[epi] = eparr[find] # get the median and stdev = 1.483 x MAD median_mag = npmedian(fmags) stddev_mag = (npmedian(npabs(fmags - median_mag))) * 1.483 # sigclip next for a single sigclip value if sigclip and isinstance(sigclip, (float, int)): if not iterative: sigind = (npabs(fmags - median_mag)) < (sigclip * stddev_mag) stimes = ftimes[sigind] smags = fmags[sigind] serrs = ferrs[sigind] # apply the same indices to the external parameters for epi, eparr in enumerate(extparams): extparams[epi] = eparr[sigind] else: # # iterative version adapted from scipy.stats.sigmaclip # delta = 1 this_times = ftimes this_mags = fmags this_errs = ferrs while delta: this_median = npmedian(this_mags) this_stdev = (npmedian(npabs(this_mags - this_median))) * 1.483 this_size = this_mags.size # apply the sigclip tsi = (npabs(this_mags - this_median)) < (sigclip * this_stdev) # update the arrays this_times = this_times[tsi] this_mags = this_mags[tsi] this_errs = this_errs[tsi] # apply the same indices to the external parameters for epi, eparr in enumerate(extparams): extparams[epi] = eparr[tsi] # update delta and go to the top of the loop delta = this_size - this_mags.size # final sigclipped versions stimes, smags, serrs = this_times, this_mags, this_errs # this handles sigclipping for asymmetric +ve and -ve clip values elif sigclip and isinstance(sigclip, list) and len(sigclip) == 2: # sigclip is passed as [dimmingclip, brighteningclip] dimmingclip = sigclip[0] brighteningclip = sigclip[1] if not iterative: if magsarefluxes: nottoodimind = ((fmags - median_mag) > (-dimmingclip * stddev_mag)) nottoobrightind = ((fmags - median_mag) < (brighteningclip * stddev_mag)) else: nottoodimind = ((fmags - median_mag) < (dimmingclip * stddev_mag)) nottoobrightind = ((fmags - median_mag) > (-brighteningclip * stddev_mag)) sigind = nottoodimind & nottoobrightind stimes = ftimes[sigind] smags = fmags[sigind] serrs = ferrs[sigind] # apply the same indices to the external parameters for epi, eparr in enumerate(extparams): extparams[epi] = eparr[sigind] else: # # iterative version adapted from scipy.stats.sigmaclip # delta = 1 this_times = ftimes this_mags = fmags this_errs = ferrs while delta: this_median = npmedian(this_mags) this_stdev = (npmedian(npabs(this_mags - this_median))) * 1.483 this_size = this_mags.size if magsarefluxes: nottoodimind = ((this_mags - this_median) > (-dimmingclip * this_stdev)) nottoobrightind = ((this_mags - this_median) < (brighteningclip * this_stdev)) else: nottoodimind = ((this_mags - this_median) < (dimmingclip * this_stdev)) nottoobrightind = ((this_mags - this_median) > (-brighteningclip * this_stdev)) # apply the sigclip tsi = nottoodimind & nottoobrightind # update the arrays this_times = this_times[tsi] this_mags = this_mags[tsi] this_errs = this_errs[tsi] # apply the same indices to the external parameters for epi, eparr in enumerate(extparams): extparams[epi] = eparr[tsi] # update delta and go to top of the loop delta = this_size - this_mags.size # final sigclipped versions stimes, smags, serrs = this_times, this_mags, this_errs else: stimes = ftimes smags = fmags serrs = ferrs if returnerrs: return stimes, smags, serrs, extparams else: return stimes, smags, None, extparams
def L2(input_path_L1, input_path_Compensate, output_path, E0_const): try: input_fp_Compensate = open(input_path_Compensate, 'r') except IOError: print "IO error;Check the input File: ", input_path_Compensate except: print "Unexpected Open Error: ", input_path_Compensate input_fp_Compensate.close() try: input_fp_L1 = open(input_path_L1, 'r') except IOError: print "IO error;Check the input File: ", input_path_L1 except Error: print "Unexpected Open Error: ", input_path_L1 input_fp_L1.close() #output file path output_file_path = os.path.join(output_path, 'ResultL2.csv') try: output_fp = open(output_file_path, 'w+') except IOError: print "IO error;Check the output File: ", output_file_path return 'L2 failed' #output_plot_1�� Reynold-Taylor Equation���� output_plot_2_file_path = os.path.join(output_path, 'Plot_L2_2.csv') try: output_plot_2_fp = open(output_plot_2_file_path, 'w+') except IOError: print "IO error;Check the output File: ", output_plot_2_file_path return 'L2 failed' try: Compensate_csv = csv.reader(input_fp_Compensate, delimiter = ',') except csv.Error: print "Parse ErrorCheck the input File: ", input_path_Compensate except StandardError: print "Unexpected Read Error: ", input_path_Compensate try: L1_csv = csv.reader(input_fp_L1, delimiter = ',') except csv.Error: print "Parse ErrorCheck the input File: ", input_path_L1 except StandardError: print "Unexpected Read Error: ", input_path_L1 n_Compensate = 0 n_L1 = 0 data_Compensate = [] data_L1 = [] for row in Compensate_csv: data_Compensate.append(row) n_Compensate = n_Compensate + 1 for row in L1_csv: data_L1.append(row) n_L1 = n_L1 + 1 #Data count check if(n_Compensate != n_L1): print 'Count Error;Process count dismatch between Compensate and L1' return 'L2 failed' #initialize date = [] rsdn = npzeros(n_Compensate) Ta = npzeros(n_Compensate) h2o = npzeros(n_Compensate) #press = npzeros(n_Compensate) #Read Input Data i = 0 for row in data_Compensate: rsdn[i] = float(row[0]) Ta[i] = float(row[1]) h2o[i] = float(row[2]) i = i + 1 press = 998.0 #initialize Fs = npzeros(n_L1) Fc = npzeros(n_L1) Fsc = npzeros(n_L1) Hs = npzeros(n_L1) Hc = npzeros(n_L1) Hsc = npzeros(n_L1) LEs = npzeros(n_L1) LEc = npzeros(n_L1) LEsc = npzeros(n_L1) co2 = npzeros(n_L1) ustar = npzeros(n_L1) itime = npzeros(n_L1) iustar = npzeros(n_L1) date = [] i = 0 for row in data_L1: date.append(row[0]) Fs[i] = float(row[1]) Fc[i] = float(row[2]) Fsc[i] = float(row[3]) Hs[i] = float(row[4]) Hc[i] = float(row[5]) Hsc[i] = float(row[6]) LEs[i] = float(row[7]) LEc[i] = float(row[8]) LEsc[i] = float(row[9]) co2[i] = float(row[10]) ustar[i] = float(row[14]) itime[i] = float(row[15]) iustar[i] = float(row[16]) i = i + 1 # Define constants and parameters for gap filling #-------------------------------------------------------------------------- num_day = 28 ni = 36 nd = 10 n1 = 2 # how many the largest points are considered for respiration # DO NOT Modify! #num_point_per_day = 24 # number of data points per day (48 -> 30 min avg time) #avgtime = 30 #determine num_point_per_day automatically . using datetime module date_1st = datetime.datetime.strptime(date[0], "%Y-%m-%d %H:%M") date_2nd = datetime.datetime.strptime(date[1], "%Y-%m-%d %H:%M") date_diff = date_2nd - date_1st avgtime = int(date_diff.seconds / 60) # averaging time (minutes) num_point_per_day = 1440 / avgtime # number of data points per day (1440 : minutes of a day) num_segment = num_point_per_day * num_day num_avg = int(n_L1 / num_segment) num_day_2 = 7 # nday_re = 20 # noverlap = 5 num_day_re = 20 noverlap = 5 ni = int(num_point_per_day * 3 / 4) # the data point that night starts nd = 300 / avgtime # how many the largest points are considered for respiration (300 : minitues of 5 hours) #-------------------------------------------------------------------------- #E0_const = True # Do you want to use constant E0 for one year? Y/N beta0 = nparray([2, 200]) Tref = 10.0 T0 = -46.02 gap_limit = 0.025 ustar_limit = 0.5 upper_Fc = 0.35 # upper limit of nighttime CO2 flux (mg/m2/s) Fc_limit = 0.005 ## Information for MLT drsdn = 50.0 # W/m2 dta = 2.5 # oC dvpd = 5.0 # 5 hPa rv = 461.51 #-------------------------------------------------------------------------- upper_co2 = 1000.0 # upper limit of CO2 concent.(mg/m3) upper_h2o = 60.0 # upper limit of H2O concent. (g/m3) upper_Ta = 60.0 # upper limit of air temperature (oC) lower_Fc = -3.0 # lower limit of daytime CO2 flux (mg/m2/s) lower_LE = -200 # lower limit of LE (W/m2) lower_H = -300 # lower limit of H (W/m2) upper_Fc = 3 # upper limit of nighttime CO2 flux (mg/m2/s) upper_LE = 800 # upper limit of LE (W/m2) upper_H = 800 # upper limit of H (W/m2) upper_agc = 95.0 # upper limit of AGC value ustar_limit = 0.03 # minimum ustar for filtering out nighttime fluxes Fc_limit = 0.005 # lower limit of Re (ecosystem respiration) (mg/m2/s) gap_limit = 0.025 # 0.025 --> 95# confidence interval Tak = npzeros(len(Ta)) tr = npzeros(len(Ta)) ea = npzeros(len(Ta)) es = npzeros(len(Ta)) vpd = npzeros(len(Ta)) #-------------------------------------------------------------------------- # calculation of vapor pressure deficit a = [13.3185, 1.9760, 0.6445, 0.1299] for i in range(n_Compensate): Tak[i] = Ta[i] + 273.15 tr[i] = 1.0-(373.15/Tak[i]) es[i] = 1013.25*exp(a[0]*tr[i]-a[1]*(tr[i]**2)-(a[2]*(tr[i]**3))-a[3]*(tr[i]**4)) # hPa for i in range(n_L1): ea[i] = h2o[i] vpd[i]= float(es[i]) - float(ea[i]) #unit is hPa Fc_filled = copy.deepcopy(Fsc) print 'Gap Filling Process' print 'Before running this program, ' print ' please make sure that you correctly set all parameters' #print 'E0_const'. E0_const #print 'nn', nn #print 'num_point_per_day', num_point_per_day #print 'num_day_2', num_day_2 #print 'num_day_re', num_day_re #print 'noverlap', noverlap #print 'drsdn', drsdn #print 'dta', dta #print 'dvpd', dvpd #print '-------------------------------------------------------------------' index = [] for main_j in range(num_avg): # loop for gap-filling of CO2 fluxes seg_start_i = main_j * num_segment seg_fin_i = seg_start_i + num_segment if((seg_start_i + 2 * num_segment) > n_L1): seg_fin_i = n_L1 x2 = [] x3 = [] #-------------------------------------------------------------------------- if(main_j == 0): print 'Application of modified lookup table method' #-------------------------------------------------------------------------- for i in range(seg_start_i, seg_fin_i): if(itime[i] == 1): ii = 0 if(isnan(Fsc[i]) == True): jj = 0 while ((ii < 1) and (jj <= 4)): ta_f = Ta[i] rsdn_f = rsdn[i] vpd_f = vpd[i] i0 = i - jj * num_day_2 * num_point_per_day i1 = i + jj * num_day_2 * num_point_per_day+1 if(i0 < 1): i0 = 0 i1 = 2 * jj * num_day_2 * num_point_per_day+1 if(i1 >= n_L1): i0 = n_L1 - 2 * jj * num_day_2 * num_point_per_day i1 = n_L1 if(i0 < 1): i0 = 0 ks = 0 for j in range(i0, i1): if((fabs(vpd_f - vpd[j]) < dvpd) and \ (fabs(rsdn_f - rsdn[j]) < drsdn) and \ (fabs(ta_f - Ta[j]) < dta) and \ (isnan(Fsc[j]) == False)): ks = ks + 1 x3_temp = [] x2.append(Fsc[j]) x3_temp.append(j) x3_temp.append(vpd[j]) x3_temp.append(rsdn[j]) x3_temp.append(Ta[j]) x3_temp.append(ks) x3.append(x3_temp) ii = ks #index_temp = [] #index_temp.append(i) #index_temp.append(i0) #index_temp.append(i1) #index_temp.append(ks) #index_temp.append(main_j) #index.append(index_temp) if(ks >= 1): Fc_filled[i] = npmedian(nparray(x2)) jj = jj + 1 x2 = [] x3 = [] if(ii < 1): jj = 0 while(ii < 1): rsdn_f = rsdn[i] i0 = i - jj * num_day_2 * num_point_per_day i1 = i + jj * num_day_2 * num_point_per_day+1 if(i0 < 1): i0 = 0 i1 = 2 * jj * num_day_2 * num_point_per_day+1 if(i1 >= n_L1): i0 = n_L1 - 2 * jj * num_day_2 * num_point_per_day i1 = n_L1 if(i0 < 0): i0 = 0 ks = 0 for j in range(i0, i1): if((fabs(rsdn_f - rsdn[j]) < drsdn) and \ (isnan(Fsc[j]) == False)): ks = ks + 1 x3_temp = [] x2.append(Fsc[j]) x3_temp.append(j) x3_temp.append(vpd[j]) x3_temp.append(rsdn[j]) x3_temp.append(Ta[j]) x3_temp.append(ks) x3.append(x3_temp) ii = ks #index_temp = [] #index_temp.append(i) #index_temp.append(i0) #index_temp.append(i1) #index_temp.append(ks) #index_temp.append(main_j) #index.append(index_temp) if(ks >= 1): Fc_filled[i] = npmedian(nparray(x2)) jj = jj + 1 x2 = [] x3 = [] x2 = [] x3 = [] ks = 0 d = npzeros((n_L1, 5)) d2 = npzeros((n_L1, 5)) dd = npzeros((n_L1, 5)) x4 = [] #Regression to Lloyd-Taylor equation print 'Regression to Lloyd-Taylor equation' if(E0_const == True): for i in range(ni-1, n_Compensate, num_point_per_day): t1 = npzeros(nd) for j in range(nd): t1[j] = Fsc[i + j] #Set to 'descend' t2, IX = Common.matlab_sort(t1) k2 = 0 for k in range(nd-1): if((isnan(t2[k]) == False) and \ (t2[k] < upper_Fc) and \ (t2[k+1] > Fc_limit)): k2 = k2 + 1 if(k2 >= 2): for j in range(nd-1): if((itime[i+1 + IX[j]] == 0) and \ (isnan(t2[j]) == False) and \ (isnan(Ta[i+1 + IX[j]]) == False) and \ (t2[j] < upper_Fc) and \ (t2[j + 1] > Fc_limit) and \ (iustar[i + IX[j]] == 0) and \ (iustar[i + IX[j+1]] == 0)): x3.append(t2[j]) x3.append(t2[j+1]) x2.append(Ta[i + IX[j]]) x2.append(Ta[i + IX[j+1]]) x4.append(date[i + IX[j]]) x4.append(date[i + IX[j+1]]) ks = ks + n1 break TC = copy.deepcopy(nparray(x2)) PV = copy.deepcopy(nparray(x3)) betafit = spfmin(Common.Reco, beta0, args = (TC, PV), disp=False) A = betafit[0] B = betafit[1] yfit = npzeros(len(TC)) for i in range(len(TC)): yfit[i] = A * exp(B * (1 / (10 + 46.02) - 1 / (TC[i] + 46.02))) E0 = betafit[1] E0l = copy.deepcopy(E0) # figure(1) # plot(TC][PV,'ko',TC][yfit,'or') # grid # xlabel('air temperature (^oC)') # ylabel('Ecosystem respiration(mgm^{-2}s^{-1})') # TC = x2' # PV = x3' # # [beta][resnorm] = lsqcurvefit(@myfun][beta0][TC][PV) # # A=beta(1) # B=beta(2) # yfit=A.*exp(B.*(1./(10.+46.02)-1./(TC+46.02))) # E0 = betafit(2) # E0l = E0 # # # figure(5) # plot(TC][PV,'ko',TC][yfit,'or') # grid # xlabel('air temperature (^oC)') # ylabel('Ecosystem respiration(mgm^{-2}s^{-1})') x2 = [] x3 = [] t1 = [] t2 = [] TC = [] PV = [] yfit = npzeros(len(TC)) #num_day_re = 20 #noverlap = 5 #avgtime = 30 delta = (60 / avgtime) * 24 * num_day_re dnoverlap = (60 / avgtime) * 24 * noverlap jj = 0 sday = [] Rref = [] RE_limit = [] stdev_E0 = [] E0v = [] REs = [] Taylor_date = [] yfit_array = [] for i in range(0, n_L1, dnoverlap): i0 = int(i - delta / 2) i1 = int(i + delta / 2) if(i0 < 1): i0 = 0 i1 = int(i0 + delta) if(i1 >= n_L1): i0 = int(n_L1 - delta) - 1 i1 = n_L1 ks = 1 for j in range(i0+ni-1, i1, num_point_per_day): t1 = npzeros(nd) for k in range(nd): t1[k] = Fsc[j + k] #Set to 'descend' t2, IX = Common.matlab_sort(t1) k2 = 1 for k in range(nd-1): if((isnan(t2[k]) == False) and \ (t2[k] < upper_Fc) and \ (t2[k+1] > Fc_limit)): k2 = k2 + 1 if(k2 >= n1): for k in range(nd-1): if((itime[j+1 +IX[k]] == 0) and \ (isnan(t2[k]) == False) and \ (isnan(Ta[j + IX[k]]) == False) and \ (t2[k] < upper_Fc) and \ (t2[k+1] > Fc_limit) and \ (iustar[j +IX[k]] == 0) and \ (iustar[j +IX[k+1]] == 0)): x3.append(t2[k]) x3.append(t2[k+1]) x2.append(Ta[j + IX[k]]) x2.append(Ta[j + IX[k+1]]) Taylor_date.append(str(date[j + IX[k]])) Taylor_date.append(str(date[j + IX[k+1]])) ks = ks + n1 break ks = ks - 1 if(ks < 6): if(E0_const == True): Rref.append(float('NaN')) RE_limit.append(float('NaN')) jj = jj + 1 else: Rref.append(float('NaN')) E0v.append(float('NaN')) stdev_E0.append(float('NaN')) RE_limit.append(float('NaN')) jj = jj + 1 else: TC = copy.deepcopy(nparray(x2)) PV = copy.deepcopy(nparray(x3)) if(E0_const == True): betafit = spfmin(Common.Reco2, beta0, args = (TC, PV, E0l), disp=False) A = betafit[0] Rref.append(A) for j in range(len(TC)): yfit = A * exp(E0 * (1.0/(10.0+46.02) - 1.0/(TC[j] + 46.02))) yfit_array.append(yfit) REs.append(PV[j] - yfit) sz = nparray(REs).shape upper = fabs(Common.tq(gap_limit, sz[0]-1 ) ) RE_limit.append(upper*Common.stdn1(nparray(REs))/sqrt(sz[0])) jj = jj + 1 else: betafit=spfmin(Common.Reco2, beta0, args = (TC, PV, E0)) A=betafit[0] B=betafit[1] Rref.append(A) E0v.append(B) if((B < 0) or (B > 450)): E0v.append(float('NaN')) for j in range(len(TC)): yfit = A * exp(E0v[jj] * (1.0 / (10.0 + 46.02) - 1.0 / (TC[j] + 46.02))) yfit_array.append(yfit) REs.append(PV[j] - yfit) sz = nparray(REs).shape upper = abs(Common.tq(gap_limit, sz[0]-1)) stdev_E0.append(Common.stdn1(REs) / sqrt(sz[0])) RE_limit.append(upper * Common.stdn1(nparray(REs)) / sqrt(sz[0])) jj = jj + 1 #Regression to Lloyd-Taylor equation with 28-day segmentation date_extracted = re.search('^(\d{4}[-]\d{2}[-]\d{2})',str(Taylor_date[0])) #print date_extracted.group(0) if(date_extracted != None): fname = 'Plot_L2_1_'+str(date_extracted.group(0))+'.csv' output_plot_1_file_path = os.path.join(output_path, fname) try: output_plot_1_fp = open(output_plot_1_file_path, 'w+') except IOError: print "IO error;Check the output File: ", output_plot_1_file_path return 'L2 failed' for i in range(len(TC)): file_plot_str = StringIO() file_plot_str.write(Taylor_date[i] + ',') #1 file_plot_str.write(str(A) + ',') #2 file_plot_str.write(str(B) + ',') #3 file_plot_str.write(str(TC[i]) + ',') #4 file_plot_str.write(str(PV[i]) + ',' ) #5 file_plot_str.write(str(yfit_array[i]) + '\n' ) #6 output_plot_string = file_plot_str.getvalue() output_plot_1_fp.write(output_plot_string) output_plot_1_fp.close() sday_temp = [] sday_temp.append(i) sday_temp.append(i0) sday_temp.append(i1) sday.append(sday_temp) x2 = [] x3 = [] t1 = [] t2 = [] TC = [] PV = [] Taylor_date = [] REs = [] yfit = [] sday = nparray(sday) if(E0_const == True): print 'Long-term E0 ' E0s = copy.deepcopy(E0l) else: E0v_s = [] stdev_E0_s = [] for k in range(len(E0v)): E0v_s.append(E0v[k]/stdev_E0[k]) stdev_E0_s.append(1/stdev_E0[k]) print 'Short-term E0 ' E0s = npnansum(E0v_s)/npnansum(stdev_E0_s) Rref = [] #REs = [] #RE_limit = [] jj = 0 for i in range(0, n_L1, dnoverlap): i0 = i - delta / 2 i1 = i + delta / 2 if(i0 < 1): i0 = 0 i1 = i0 + delta if(i1 >= n_L1): i0 = n_L1 - delta - 1 i1 = n_L1 ks = 1 for j in range(i0+ni-1, i1, num_point_per_day): t1 = npzeros(nd) for k in range(nd): t1[k] = Fsc[j + k] #Set to 'descend' t2, IX = Common.matlab_sort(t1) k2 = 1 for k in range(nd-1): if((isnan(t2[k]) == False) and \ (t2[k] < upper_Fc) and \ (t2[k+1] > Fc_limit)): k2 = k2 + 1 if(k2 >= n1): for k in range(nd-1): if((itime[j+1 + IX[k]] == 0) and \ (isnan(t2[k]) == False) and \ (isnan(Ta[j + IX[k]]) == False) and \ (t2[k] < upper_Fc) and \ (t2[k+1] > Fc_limit) and \ (iustar[j + IX[k]] == 0) and \ (iustar[j + IX[k+1]] == 0)): x3.append(t2[k]) x3.append(t2[k + 1]) x2.append(Ta[j + IX[k]]) x2.append(Ta[j + IX[k+1]]) ks = ks + n1 break ks = ks - 1 if(ks < 6): # Rref.append(Rref[jj]) # RE_limit.append(RE_limit[jj]) Rref.append(Rref[-1]) RE_limit.append(RE_limit[-1]) if(E0_const != True): stdev_E0.append(stdev_E0[jj]) jj = jj + 1 else: TC = nparray(x2) PV = nparray(x3) betafit = spfmin(Common.Reco2, beta0, args = (TC, PV, E0s), disp=False) A=betafit[0] Rref.append(A) for j in range(len(TC)): yfit = Rref[jj] * exp(E0s * (1 / (10 + 46.02) - 1 / (TC[j] + 46.02))) REs.append(PV[j]-yfit) sz = nparray(REs).shape upper = abs(Common.tq(gap_limit, sz[0]-1)) RE_limit.append(upper*Common.stdn1(REs)/sqrt(sz[0])) jj = jj + 1 x2 = [] x3 = [] t1 = [] t2 = [] TC = [] PV = [] #for k in REs: # print k REs = [] #for k in Rref: # print k ## ks = 0 nsp2 = npzeros((n_L1 / num_point_per_day)) RE = npzeros(n_L1) GPP = npzeros(n_L1) for i in range(n_L1): RE[i] = float('NaN') GPP[i] = float('NaN') for i in range(0, n_L1, num_point_per_day): i0 = i i1 = i + num_point_per_day if(i0 >=sday[ks][1]): ks = ks + 1 if(i0 >= sday[len(sday)-1][1]): ks = len(sday)-1 for j in range(i0, i1): if(E0_const == True): yfit=Rref[ks-1] * exp(E0l * (1.0 / (10 + 46.02) - 1.0 / (Ta[j] + 46.02))) else: yfit=Rref[ks-1] * exp(E0s * (1.0 / (10 + 46.02) - 1.0 / (Ta[j] + 46.02))) RE[j] = yfit if(itime[j]==0): # nighttime condition RE[j] = Fc_filled[j] if((isnan(Fsc[j]) == True) or \ ((Fsc[j]-yfit) < RE_limit[ks-1]) or \ ((Fsc[j]-yfit) > 1.0 * RE_limit[ks-1]) or \ (iustar[j] == 1)): nsp2[ks-1] = nsp2[ks-1] + 1 Fc_filled[j] = yfit RE[j] = Fc_filled[j] GPP[j] = RE[j]- Fc_filled[j] #figure(2) #plot(time][Fsc][time][Fc_filled[:],'or') #set(gca,'XTick',[year0:1/12:year0+0.9999999]) #set(gca,'xticklabel',xticks) #ylim = [-1.5][1.5] #set(gca,'xLim',xlim(:)) #ylabel('F_c (mgm^{-2}s^{-1})') #-------------------------------------------------------------------------- #-------------------------------------------------------------------------- print 'Gap-filling of LE' #-------------------------------------------------------------------------- x2 = [] x3 = [] index = [] LE_filled = copy.deepcopy(LEsc) for main_j in range(num_avg): # loop for gap-filling of H2O fluxes seg_start_i = main_j * num_segment seg_fin_i = seg_start_i + num_segment if((seg_start_i + 2 * num_segment) > n_L1): seg_fin_i = n_L1 x2 = [] x3 = [] for i in range(seg_start_i, seg_fin_i): ii = 0 if(isnan(LEsc[i]) == True): jj = 0 while((ii < 1) and (jj <= 4)): ta_f = Ta[i] rsdn_f = rsdn[i] vpd_f = vpd[i] i0 = i - jj * num_day_2 * num_point_per_day i1 = i + jj * num_day_2 * num_point_per_day+1 if(i0 < 1): i0 = 0 i1 = 2 * jj * num_day_2 * num_point_per_day+1 if(i1 >= n_L1): i0 = n_L1 - 2 * jj * num_day_2 * num_point_per_day - 1 i1 = n_L1 if(i0 < 1): i0 = 0 ks = 0 for j in range(i0, i1): if((fabs(vpd_f-vpd[j]) < dvpd) and \ (fabs(rsdn_f-rsdn[j]) < drsdn) and \ (fabs(ta_f-Ta[j]) < dta) and \ (isnan(LEsc[j]) == False)): x3_temp = [] x2.append(LEsc[j]) x3_temp.append(j) x3_temp.append(vpd[j]) x3_temp.append(rsdn[j]) x3_temp.append(Ta[j]) x3_temp.append(ks) x3.append(x3_temp) ks = ks + 1 ii = ks #index_temp = [] #index_temp.append(i) #index_temp.append(i0) #index_temp.append(i1) #index_temp.append(ks) #index_temp.append(main_j) #index.append(index_temp) if(ks >= 1): LE_filled[i] = npmedian(nparray(x2)) jj = jj + 1 x2 = [] x3 = [] if(ii < 1): jj = 0 while(ii < 1): rsdn_f = rsdn[i] i0 = i - jj * num_day_2 * num_point_per_day i1 = i + jj * num_day_2 * num_point_per_day + 1 if(i0 < 1): i0 = 0 i1 = 2 * jj * num_day_2 * num_point_per_day + 1 if(i1 >= n_L1): i0 = n_L1 - 2 * jj * num_day_2 * num_point_per_day - 1 i1 = n_L1 if(i0 < 1): i0 = 0 ks = 0 for j in range(i0, i1): if((fabs(rsdn_f-rsdn[j]) < drsdn) and \ (isnan(LEsc[j]) == False)): x3_temp = [] x2.append(LEsc[j]) x3_temp.append(j) x3_temp.append(vpd[j]) x3_temp.append(rsdn[j]) x3_temp.append(Ta[j]) x3_temp.append(ks) x3.append(x3_temp) ks = ks + 1 ii = ks #index_temp = [] #index_temp.append(i) #index_temp.append(i0) #index_temp.append(i1) #index_temp.append(ks) #index_temp.append(main_j) #index.append(index_temp) if(ks >= 1): LE_filled[i] = npmedian(nparray(x2)) jj = jj + 1 x2 = [] x3 = [] x2 = [] x3 = [] ks = 0 #figure(3) #plot(time][LEsc][time][LE_filled[:],'or') #set(gca,'XTick',[year0:1/12:year0+0.9999999]) #set(gca,'xticklabel',xticks) #ylim = [-100][600] #set(gca,'xLim',xlim(:)) #ylabel('LE (Wm^{-2})') ## #-------------------------------------------------------------------------- print 'Gap-filling of H (sensible heat flux)' #-------------------------------------------------------------------------- x2 = [] x3 = [] index = [] H_filled = copy.deepcopy(Hsc) for main_j in range(num_avg): # loop for gap-filling of H2O fluxes seg_start_i = main_j * num_segment seg_fin_i = seg_start_i + num_segment if((seg_start_i + 2 * num_segment) >= n_L1): seg_fin_i = n_L1 x2 = [] x3 = [] for i in range(seg_start_i, seg_fin_i): ii = 0 if(isnan(Hsc[i]) == True): jj = 0 while ((ii < 1) and (jj <= 4)): ta_f = Ta[i] rsdn_f = rsdn[i] vpd_f = vpd[i] i0 = i - jj * num_day_2 * num_point_per_day i1 = i + jj * num_day_2 * num_point_per_day+1 if(i0 < 1): i0 = 0 i1 = 2 * jj * num_day_2 * num_point_per_day+1 if(i1 >= n_L1): i0 = n_L1 - 2 * jj * num_day_2 * num_point_per_day - 1 i1 = n_L1 if(i0 < 1): i0 = 0 ks = 0 for j in range(i0, i1): if((fabs(vpd_f-vpd[j]) < dvpd) and \ (fabs(rsdn_f-rsdn[j]) < drsdn) and \ (fabs(ta_f-Ta[j]) < dta) and \ (isnan(Hsc[j]) == False)): x3_temp = [] x2.append(Hsc[j]) x3_temp.append(j) x3_temp.append(vpd[j]) x3_temp.append(rsdn[j]) x3_temp.append(Ta[j]) x3_temp.append(ks) ks = ks + 1 x3.append(x3_temp) ii = ks #index_temp = [] #index_temp.append(i) #index_temp.append(i0) #index_temp.append(i1) #index_temp.append(ks) #index_temp.append(main_j) #index.append(index_temp) if(ks >= 1): H_filled[i] = npmedian(nparray(x2)) jj = jj + 1 x2 = [] x3 = [] if(ii < 1): jj = 0 while(ii < 1): rsdn_f = rsdn[i] i0 = i - jj * num_day_2 * num_point_per_day i1 = i + jj * num_day_2 * num_point_per_day+1 if(i0 < 1): i0 = 0 i1 = 2 * jj * num_day_2 * num_point_per_day+1 if(i1 >= n_L1): i0 = n_L1 - 2 * jj * num_day_2 * num_point_per_day - 1 i1 = n_L1 if(i0 < 1): i0 = 0 ks = 0 for j in range(i0, i1): if((fabs(rsdn_f-rsdn[j]) < drsdn) and \ (isnan(Hsc[j]) == False)): ks = ks + 1 x3_temp = [] x2.append(Hsc[j]) x3_temp.append(j) x3_temp.append(vpd[j]) x3_temp.append(rsdn[j]) x3_temp.append(Ta[j]) x3_temp.append(ks) x3.append(x3_temp) ii = ks #index_temp = [] #index_temp.append(i) #index_temp.append(i0) #index_temp.append(i1) #index_temp.append(ks) #index_temp.append(main_j) #index.append(index_temp) if(ks >= 1): H_filled[i] = npmedian(nparray(x2)) jj = jj + 1 x2 = [] x3 = [] x2 = [] x3 = [] ks = 0 #figure(4) #plot(time,Hsc,time,H_filled[:],'or') #set(gca,'XTick',[year0:1/12:year0+0.9999999]) #set(gca,'xticklabel',xticks) #ylim = [-100,600] #set(gca,'xLim',xlim(:)) #ylabel('H (Wm^{-2})') ## print '-------------------------------------------------------------------' print 'Calculating daily mean values' print '-------------------------------------------------------------------' # disp('Press any key to calculate daily mean values') # pause print '-------------------------------------------------------------------' print 'calculation of daily mean. Unit seg_start_i [C g/m2/day].' print '-------------------------------------------------------------------' Fsc_daily = npzeros(n_L1/num_point_per_day) GPP_daily = npzeros(n_L1/num_point_per_day) RE_daily = npzeros(n_L1/num_point_per_day) ET_daily = npzeros(n_L1/num_point_per_day) H_daily = npzeros(n_L1/num_point_per_day) LE_daily = npzeros(n_L1/num_point_per_day) H_daily = npzeros(n_L1/num_point_per_day) k = 0 for i in range(0, n_L1, num_point_per_day): for j in range(i,i + num_point_per_day): Fsc_daily[k] = Fsc_daily[k] + Fc_filled[j] GPP_daily[k] = GPP_daily[k] + GPP[j] RE_daily[k] = RE_daily[k] + RE[j] ET_daily[k] = ET_daily[k] + LE_filled[j] Fsc_daily[k] = Fsc_daily[k] * (60*float(avgtime)/1000*12/44) GPP_daily[k] = GPP_daily[k] * (60*float(avgtime)/1000*12/44) RE_daily[k] = RE_daily[k] * (60*float(avgtime)/1000*12/44) ET_daily[k] = ET_daily[k] * (60*float(avgtime)/(2440)/1000) k = k + 1 NEE_annual= npmean(Fc_filled)*float((1800*48*(n_L1/(60.0/avgtime*24))*12/44/1000.0)) GPP_annual = npmean(GPP)*float((1800*48*(n_L1/(60.0/avgtime*24))*12/44/1000.0)) RE_annual = npmean(RE)*float((1800*48*(n_L1/(60.0/avgtime*24))*12/44/1000.0)) NEE_std_annual = Common.stdn1(Fsc_daily)/sqrt(n_L1/(60.0/avgtime*24))*(n_L1/(60.0/avgtime*24)) GPP_std_annual = Common.stdn1(GPP_daily)/sqrt(n_L1/(60.0/avgtime*24))*(n_L1/(60.0/avgtime*24)) RE_std_annual = Common.stdn1(RE_daily)/sqrt(n_L1/(60.0/avgtime*24))*(n_L1/(60.0/avgtime*24)) print 'NEE_annual', NEE_annual print 'GPP_annual', GPP_annual print 'RE_annual', RE_annual print 'NEE_std_annual', NEE_std_annual print 'GPP_std_annual', GPP_std_annual print 'RE_std_annual', RE_std_annual print '-------------------------------------------------------------------' print 'Calculating daily mean ETs' print '-------------------------------------------------------------------' print 'calculation of daily mean. Unit seg_start_i [C g/m2/day].' print '-------------------------------------------------------------------' k = 0 for i in range(0, n_L1, num_point_per_day): for j in range(i,i + num_point_per_day): LE_daily[k] = LE_daily[k] \ + LE_filled[j]*(60*float(avgtime)/(2440*1000)) k = k + 1 # npmean(Fc_filled) LE_annual = npmean(LE_filled)*(1800.0*48.0*float(n_L1/(60.0/avgtime*24))/2440.0/1000.0) LE_std_annual = Common.stdn1(LE_daily)/sqrt(n_L1/float(60.0/avgtime*24.0))*float(n_L1/(60.0/avgtime*24.0)) print 'LE_annaul', LE_annual print 'LE_std_annaul', LE_std_annual print '-------------------------------------------------------------------' print 'Calculating daily npmean heating rate' print '-------------------------------------------------------------------' print 'calculation of daily npmean heating rate. Unit seg_start_i [MJ/m2/day].' print '-------------------------------------------------------------------' k = 0 for i in range(0, n_L1, num_point_per_day): for j in range(i,i + num_point_per_day): H_daily[k] = H_daily[k] \ + H_filled[j]*(60*float(avgtime)/(1004*1.0))/(10E6) k = k + 1 H_annual = sum(H_daily) H_std_annual = Common.stdn1(H_daily) print 'H_annaul', H_annual print 'H_std_annaul', H_std_annual for i in range(len(Fsc)): file_plot_str = StringIO() file_plot_str.write(str(date[i]) + ',') #1 file_plot_str.write(str(Fsc[i]) + ',') #2 file_plot_str.write(str(Fc_filled[i]) + ',') #3 file_plot_str.write(str(LEsc[i]) + ',') #4 file_plot_str.write(str(LE_filled[i]) + ',') #5 file_plot_str.write(str(Hsc[i]) + ',') #6 file_plot_str.write(str(H_filled[i]) + '\n') #7 output_plot_string = file_plot_str.getvalue() output_plot_2_fp.write(output_plot_string) output_plot_2_fp.close() #For output #Assume data start from 0:00 output_Fsc_daily = npzeros(n_L1) output_GPP_daily = npzeros(n_L1) output_RE_daily = npzeros(n_L1) output_ET_daily = npzeros(n_L1) output_LE_daily = npzeros(n_L1) output_H_daily = npzeros(n_L1) j = 0 for i in range(n_L1): output_Fsc_daily[i] = Fsc_daily[j] output_GPP_daily[i] = GPP_daily[j] output_RE_daily[i] = RE_daily[j] output_ET_daily[i] = ET_daily[j] output_H_daily[i] = H_daily[j] output_LE_daily[i] = LE_daily[j] if((i+1) % num_point_per_day == 0): j = j + 1 for i in range(n_L1): file_str = StringIO() file_str.write(str(output_ET_daily[i]) + ',') #1 file_str.write(str(Fc_filled[i]) + ',' ) #2 file_str.write(str(output_Fsc_daily[i]) + ',' ) #3 file_str.write(str(GPP[i]) + ',' ) #4 file_str.write(str(output_GPP_daily[i]) + ',') #5 file_str.write(str(GPP_annual) + ',') #6 file_str.write(str(GPP_std_annual) + ',') #7 file_str.write(str(H_filled[i]) + ',') #8 file_str.write(str(output_H_daily[i]) + ',') #9 file_str.write(str(H_annual) + ',') #10 file_str.write(str(H_std_annual) + ',') #11 file_str.write(str(LE_filled[i]) + ',') #12 file_str.write(str(output_LE_daily[i]) + ',') #13 file_str.write(str(LE_annual) + ',') #14 file_str.write(str(LE_std_annual) + ',') #15 file_str.write(str(NEE_annual) + ',') #16 file_str.write(str(NEE_std_annual) + ',') #17 file_str.write(str(output_RE_daily[i]) + ',') #18 file_str.write(str(RE_annual) + ',') #19 file_str.write(str(RE_std_annual) + ',') #20 file_str.write(str(co2[i]) + ',') #21 file_str.write(str(rsdn[i]) + ',') #22 file_str.write(str(ea[i]) + ',') #23 file_str.write(str(h2o[i]) + ',') #24 file_str.write(str(Ta[i]) + ',') #25 file_str.write(str(vpd[i]) + '\n' ) #26 output_string = file_str.getvalue() output_fp.write(output_string) output_fp.close() return 'L2 Done'