def check_dependencies(): """ This is a general "dependency-catcher" function, which checks whether a few packages important for can be imported. Versions for some packages some dependencies can be hidden; this makes them explicit, especially if you are worried about . """ import cdips as cd # require astroquery >= 0.4.0 # see: https://astroquery.readthedocs.io/en/latest/ # `pip install --pre astroquery` import astroquery as aq updatemsg = ('Need to update astroquery. Preferred method ' '`pip install --pre astroquery`') assert int(aq.__version__.split('.')[1]) >= 4, updatemsg # require astropy >= 4.0 # generally `conda update astropy` should be fine. import astropy as ap updatemsg = ('Need to update astropy. Preferred method ' '`conda update astropy`') assert int(ap.__version__.split('.')[0]) >= 4, updatemsg # # wotan bleeding edge install # or better (bleeding edge): clone and setup.py install # import pygam import wotan from wotan import version wotanversion = version.WOTAN_VERSIONING wotanversiontuple = tuple(wotanversion.split('.')) updatemsg = ('Need to update wotan. Please clone & setup.py ' 'install https://github.com/hippke/wotan') assert int(wotanversiontuple[0]) >= 1 assert int(wotanversiontuple[1]) >= 10, updatemsg # # TLS bleeding edge install for verbose kwarg, among others # from transitleastsquares.tls_constants import TLS_VERSION tlsversion = TLS_VERSION.split(' ')[4].split('.') updatemsg = ('Need to update transitleastsquares. Please clone & setup.py ' 'install https://github.com/hippke/tls') assert int(tlsversion[0]) >= 1 assert int(tlsversion[1]) >= 0, updatemsg assert int(tlsversion[2]) >= 28, updatemsg # check if pspline works with the expected number of args as of wotan v1.10 # nb. also requires the bugfix with the stdev cut. rng = np.random.default_rng(42) time = np.linspace(0, 10, 100) flux = (np.ones(len(time)) + 1e-2 * np.linspace(-1, 1, len(time)) + rng.random(len(time)) * 1e-3) edge_cutoff = 0 max_splines = 4 stdev_cut = 1.5 return_nsplines = False verbose = False from wotan.pspline import pspline import warnings warnings.filterwarnings("ignore", category=RuntimeWarning) trend_flux, n_splines = pspline(time, flux, edge_cutoff, max_splines, stdev_cut, return_nsplines, verbose) # # notch and locor: # clone and setup.py install https://github.com/lgbouma/Notch_and_LOCoR, # which was forked from Aaron Rizzuto's implentation. # also requires # clone and setup.py install https://github.com/evertrol/mpyfit # from notch_and_locor.core import sliding_window from notch_and_locor.core import rcomb print('testing.check_dependencies passed!') # # photutils: used in vetting report creation # $ conda install -c conda-forge photutils # import photutils
def flatten(time, flux, window_length=None, edge_cutoff=0, break_tolerance=None, cval=None, return_trend=False, method='biweight', kernel=None, kernel_size=None, kernel_period=None, proportiontocut=constants.PROPORTIONTOCUT, robust=False, mask=None): """ ``flatten`` removes low frequency trends in time-series data. Parameters ---------- time : array-like Time values flux : array-like Flux values for every time point window_length : float The length of the filter window in units of ``time`` (usually days), or in cadences (for cadence-based sliders ``savgol`` and ``medfilt``). method : string, default: ``biweight`` Detrending method. Rime-windowed sliders: ``median``, ``biweight``, ``hodges``, ``tau``, ``welsch``, ``huber``, ``huber_psi``, ``andrewsinewave``, ``mean``, ``hampel``, ``ramsay``, ``trim_mean``, ``hampelfilt``, ``winsorize``. Cadence based slider: ``medfilt``. Splines: ``hspline``, ``rspline`, ``pspline``. Locally weighted scatterplot smoothing: ``lowess``. Savitzky-Golay filter: ``savgol``. Gaussian processes: ``gp``. Cosine Filtering with Autocorrelation Minimization: ``cofiam``. Cosine fitting: ``cosine``, Friedman's Super-Smoother: ``supersmoother``. Gaussian regressions: ``ridge``, ``lasso``, ``elasticnet``. break_tolerance : float, default: window_length/2 If there are large gaps in time (larger than ``window_length``/2), flatten will split the flux into several sub-lightcurves and apply the filter to each individually. ``break_tolerance`` must be in the same unit as ``time`` (usually days). To disable this feature, set ``break_tolerance`` to 0. If the method is ``supersmoother`` and no ``break_tolerance`` is provided, it will be taken as `1` in units of ``time``. edge_cutoff : float, default: None Trends near edges are less robust. Depending on the data, it may be beneficial to remove edges. The ``edge_cutoff`` defines the length (in units of time) to be cut off each edge. Default: Zero. Cut off is maximally ``window_length``/2, as this fills the window completely. Applicable only for time-windowed sliders. cval : float or int Tuning parameter for the robust estimators. See documentation for defaults. Larger values for make the estimate more efficient but less robust. For the super-smoother, cval determines the bass enhancement (smoothness) and can be `None` or in the range 0 < ``cval`` < 10. For the ``savgol``, ``cval`` determines the (integer) polynomial order (default: 2). proportiontocut : float, default: 0.1 Fraction to cut off (or filled) of both tails of the distribution using methods ``trim_mean`` (or ``winsorize``) kernel : str, default: `squared_exp` Choice of `squared_exp` (squared exponential), `matern`, `periodic`, `periodic_auto`. kernel_size : float, default: 1 The length scale of the Gaussian Process kernel. kernel_period : float The periodicity of the Gaussian Process kernel (in units of ``time``). Must be provided for the kernel `periodic`. Can not be specified for the `periodic_auto`, for which it is determined automatically using a Lomb-Scargle periodogram pre-search. robust : bool, default: False If `True`, the fitting process will be run iteratively. In each iteration, 2-sigma outliers from the fitted trend will be clipped until convergence. Supported by the Gaussian Process kernels `squared_exp` and `matern`, as well as `cosine` fitting. return_trend : bool, default: False If `True`, the method will return a tuple of two elements (``flattened_flux``, ``trend_flux``) where ``trend_flux`` is the removed trend. Returns ------- flatten_flux : array-like Flattened flux. trend_flux : array-like Trend in the flux. Only returned if ``return_trend`` is `True`. """ if method not in constants.methods: raise ValueError('Unknown detrending method') # Numba can't handle strings, so we're passing the location estimator as an int: if method == 'biweight': method_code = 1 elif method == 'andrewsinewave': method_code = 2 elif method == 'welsch': method_code = 3 elif method == 'hodges': method_code = 4 elif method == 'median': method_code = 5 elif method == 'mean': method_code = 6 elif method == 'trim_mean': method_code = 7 elif method == 'winsorize': method_code = 8 elif method == 'hampelfilt': method_code = 9 elif method == 'huber_psi': method_code = 10 elif method == 'tau': method_code = 11 error_text = 'proportiontocut must be >0 and <0.5' if not isinstance(proportiontocut, float): raise ValueError(error_text) if proportiontocut >= 0.5 or proportiontocut <= 0: raise ValueError(error_text) # Default cval values for robust location estimators if cval is None: if method == 'biweight': cval = 5 elif method == 'andrewsinewave': cval = 1.339 elif method == 'welsch': cval = 2.11 elif method == 'huber': cval = 1.5 elif method == 'huber_psi': cval = 1.28 elif method in ['trim_mean', 'winsorize']: cval = proportiontocut elif method == 'hampelfilt': cval = 3 elif method == 'tau': cval = 4.5 elif method == 'hampel': cval = (1.7, 3.4, 8.5) elif method == 'ramsay': cval = 0.3 elif method == 'savgol': # polyorder cval = 2 # int elif method in 'ridge lasso elasticnet': cval = 1 else: cval = 0 # avoid numba type inference error: None type multi with float if cval is not None and method == 'supersmoother': if cval > 0 and cval < 10: supersmoother_alpha = cval else: supersmoother_alpha = None # Maximum gap in time should be half a window size. # Any larger is nonsense, because then the array has a full window of data if window_length is None: window_length = 2 # so that break_tolerance = 1 in the supersmoother case if break_tolerance is None: break_tolerance = window_length / 2 if break_tolerance == 0: break_tolerance = inf # Numba is very fast, but doesn't play nicely with NaN values # Therefore, we make new time-flux arrays with only the floating point values # All calculations are done within these arrays # Afterwards, the trend is transplanted into the original arrays (with the NaNs) if mask is None: mask = np.ones(len(time)) else: mask = array(~mask, dtype=float64) # Invert to stay consistent with TLS time = array(time, dtype=float64) flux = array(flux, dtype=float64) mask_nans = isnan(time * flux) time_compressed = np.ma.compressed(np.ma.masked_array(time, mask_nans)) flux_compressed = np.ma.compressed(np.ma.masked_array(flux, mask_nans)) mask_compressed = np.ma.compressed(np.ma.masked_array(mask, mask_nans)) # Get the indexes of the gaps gaps_indexes = get_gaps_indexes(time_compressed, break_tolerance=break_tolerance) trend_flux = array([]) trend_segment = array([]) # Iterate over all segments for i in range(len(gaps_indexes) - 1): time_view = time_compressed[gaps_indexes[i]:gaps_indexes[i + 1]] flux_view = flux_compressed[gaps_indexes[i]:gaps_indexes[i + 1]] mask_view = mask_compressed[gaps_indexes[i]:gaps_indexes[i + 1]] methods = [ "biweight", "andrewsinewave", "welsch", "hodges", "median", "mean", "trim_mean", "winsorize", "huber_psi", "hampelfilt", "tau" ] if method in methods: trend_segment = running_segment(time_view, flux_view, mask_view, window_length, edge_cutoff, cval, method_code) elif method in ["huber", "hampel", "ramsay"]: trend_segment = running_segment_slow(time_view, flux_view, mask_view, window_length, edge_cutoff, cval, method) elif method == 'lowess': trend_segment = lowess(time_view, flux_view, mask_view, window_length) elif method == 'hspline': trend_segment = detrend_huber_spline(time_view, flux_view, mask_view, knot_distance=window_length) elif method == 'supersmoother': try: from supersmoother import SuperSmoother as supersmoother except: raise ImportError('Could not import supersmoother') win = window_length / (max(time) - min(time)) trend_segment = supersmoother( alpha=supersmoother_alpha, primary_spans=(constants.primary_span_lower * win, win, constants.primary_span_upper * win), middle_span=constants.middle_span * win, final_span=constants.upper_span * win).fit( time_view, flux_view, ).predict(time_view) elif method == 'cofiam': trend_segment = detrend_cofiam(time_view, flux_view, window_length) elif method == 'cosine': trend_segment = detrend_cosine(time_view, flux_view, window_length, robust, mask_view) elif method == 'savgol': if window_length % 2 == 0: window_length += 1 trend_segment = savgol_filter(flux_view, window_length, polyorder=int(cval)) elif method == 'medfilt': trend_segment = medfilt(flux_view, window_length) elif method == 'gp': #print('Segment', i + 1, 'of', len(gaps_indexes) - 1) trend_segment = make_gp(time_view, flux_view, mask_view, kernel, kernel_size, kernel_period, robust) elif method == 'rspline': trend_segment = iter_spline(time_view, flux_view, mask_view, window_length) elif method == 'pspline': print('Segment', i + 1, 'of', len(gaps_indexes) - 1) trend_segment = pspline(time_view, flux_view) elif method in "ridge lasso elasticnet": trend_segment = regression(time_view, flux_view, method, window_length, cval) trend_flux = append(trend_flux, trend_segment) # Insert results of non-NaNs into original data stream trend_lc = full(len(time), nan) mask_nans = where(~mask_nans)[0] for idx in range(len(mask_nans)): trend_lc[mask_nans[idx]] = trend_flux[idx] trend_lc[trend_lc == 0] = np.nan # avoid division by zero flatten_lc = flux / trend_lc if return_trend: return flatten_lc, trend_lc return flatten_lc
def detrend_flux(time, flux, break_tolerance=0.5, method='pspline', cval=None, window_length=None, edge_cutoff=None): """ Apply the wotan flatten function. Implemented methods include pspline, biweight, and median. Args: time, flux (np.ndarray): array of times and fluxes. break_tolerance (float): maximum time past which light curve is split into timegroups, each of which is detrended individually. method (str): 'pspline', 'biweight', 'median'. cval (float): the wotan 'biweight' tuning parameter. window_length (float): length of the window in days. edge_cutoff (float): how much of the edge to remove. Only works for 'median' method. Returns: flat_flux, trend_flux (np.ndarray): flattened array, and the trend vector that was divided out. See also: https://wotan.readthedocs.io/en/latest/ """ # Initial pre-processing: verify that under break_tolerance, time and flux # do not have any sections with <=6 points. Spline detrending routines do # not like fitting lines. N_groups, group_inds = find_lc_timegroups(time, mingap=break_tolerance) SECTION_CUTOFF = 6 for g in group_inds: if len(time[g]) <= SECTION_CUTOFF: time[g], flux[g] = np.nan, np.nan try: if method == 'pspline': # Detrend each time segment individually, to prevent overfitting # based on the `max_splines` parameter. flat_fluxs, trend_fluxs = [], [] for g in group_inds: tgtime, tgflux = time[g], flux[g] t_min, t_max = np.min(tgtime), np.max(tgtime) t_baseline = t_max - t_min transit_timescale = 6 / 24. # e.g., for a 25 day segment, we want max_splines to be ~100, # i.e., 1 spline point every 6 hours. this helps prevent # overfitting. max_splines = int(t_baseline / transit_timescale) # a 2-sigma cutoff is standard, but there's no obvious reason for # this being the case. generally, anything far from the median # shouldn't go into the fit. stdev_cut = 1.5 edge_cutoff = 0 # note: major API update in wotan v1.6 # pspline(time, flux, edge_cutoff, max_splines, stdev_cut, return_nsplines, verbose) _trend_flux, _nsplines = pspline(tgtime, tgflux, edge_cutoff, max_splines, stdev_cut, False, False) _flat_flux = tgflux / _trend_flux flat_fluxs.append(_flat_flux) trend_fluxs.append(_trend_flux) flat_flux = np.hstack(flat_fluxs) trend_flux = np.hstack(trend_fluxs) elif method == 'biweight': flat_flux, trend_flux = flatten(time, flux, method='biweight', return_trend=True, break_tolerance=break_tolerance, window_length=window_length, cval=cval) elif method == 'median': flat_flux, trend_flux = flatten(time, flux, method='median', return_trend=True, break_tolerance=break_tolerance, window_length=window_length, edge_cutoff=edge_cutoff) elif method == 'none': flat_flux = flux trend_flux = None else: raise NotImplementedError except ValueError as e: msg = ('WRN! {}. Probably have a short segment. Trying to nan it out.'. format(repr(e))) print(msg) SECTION_CUTOFF = min([len(time[g]) for g in group_inds]) for g in group_inds: if len(time[g]) <= SECTION_CUTOFF: time[g], flux[g] = np.nan, np.nan # NOTE: code duplication here if method == 'pspline': # matched detrending to do_initial_period_finding flat_flux, trend_flux = flatten(time, flux, method='pspline', return_trend=True, break_tolerance=break_tolerance, robust=True) elif method == 'biweight': # another option: flat_flux, trend_flux = flatten(time, flux, method='biweight', return_trend=True, break_tolerance=break_tolerance, window_length=window_length, cval=cval) else: raise NotImplementedError return flat_flux, trend_flux