Exemplo n.º 1
0
def check_dependencies():
    """
    This is a general "dependency-catcher" function, which checks whether
    a few packages important for can be imported.

    Versions for some packages
    some dependencies can be hidden; this makes them explicit, especially if
    you are worried about .
    """

    import cdips as cd

    # require astroquery >= 0.4.0
    # see: https://astroquery.readthedocs.io/en/latest/
    # `pip install --pre astroquery`
    import astroquery as aq
    updatemsg = ('Need to update astroquery. Preferred method '
                 '`pip install --pre astroquery`')
    assert int(aq.__version__.split('.')[1]) >= 4, updatemsg

    # require astropy >= 4.0
    # generally `conda update astropy` should be fine.
    import astropy as ap
    updatemsg = ('Need to update astropy. Preferred method '
                 '`conda update astropy`')
    assert int(ap.__version__.split('.')[0]) >= 4, updatemsg

    #
    # wotan bleeding edge install
    # or better (bleeding edge): clone and setup.py install
    #
    import pygam
    import wotan
    from wotan import version
    wotanversion = version.WOTAN_VERSIONING
    wotanversiontuple = tuple(wotanversion.split('.'))
    updatemsg = ('Need to update wotan. Please clone & setup.py '
                 'install https://github.com/hippke/wotan')
    assert int(wotanversiontuple[0]) >= 1
    assert int(wotanversiontuple[1]) >= 10, updatemsg

    #
    # TLS bleeding edge install for verbose kwarg, among others
    #
    from transitleastsquares.tls_constants import TLS_VERSION
    tlsversion = TLS_VERSION.split(' ')[4].split('.')
    updatemsg = ('Need to update transitleastsquares. Please clone & setup.py '
                 'install https://github.com/hippke/tls')
    assert int(tlsversion[0]) >= 1
    assert int(tlsversion[1]) >= 0, updatemsg
    assert int(tlsversion[2]) >= 28, updatemsg

    # check if pspline works with the expected number of args as of wotan v1.10
    # nb. also requires the bugfix with the stdev cut.
    rng = np.random.default_rng(42)
    time = np.linspace(0, 10, 100)
    flux = (np.ones(len(time)) + 1e-2 * np.linspace(-1, 1, len(time)) +
            rng.random(len(time)) * 1e-3)
    edge_cutoff = 0
    max_splines = 4
    stdev_cut = 1.5
    return_nsplines = False
    verbose = False
    from wotan.pspline import pspline
    import warnings
    warnings.filterwarnings("ignore", category=RuntimeWarning)
    trend_flux, n_splines = pspline(time, flux, edge_cutoff, max_splines,
                                    stdev_cut, return_nsplines, verbose)

    #
    # notch and locor:
    # clone and setup.py install https://github.com/lgbouma/Notch_and_LOCoR,
    # which was forked from Aaron Rizzuto's implentation.
    # also requires
    # clone and setup.py install https://github.com/evertrol/mpyfit
    #
    from notch_and_locor.core import sliding_window
    from notch_and_locor.core import rcomb

    print('testing.check_dependencies passed!')

    #
    # photutils: used in vetting report creation
    # $ conda install -c conda-forge photutils
    #
    import photutils
Exemplo n.º 2
0
def flatten(time,
            flux,
            window_length=None,
            edge_cutoff=0,
            break_tolerance=None,
            cval=None,
            return_trend=False,
            method='biweight',
            kernel=None,
            kernel_size=None,
            kernel_period=None,
            proportiontocut=constants.PROPORTIONTOCUT,
            robust=False,
            mask=None):
    """
    ``flatten`` removes low frequency trends in time-series data.
        
    Parameters
    ----------
    time : array-like
        Time values
    flux : array-like
        Flux values for every time point
    window_length : float
        The length of the filter window in units of ``time`` (usually days), or in
        cadences (for cadence-based sliders ``savgol`` and ``medfilt``).
    method : string, default: ``biweight``
        Detrending method. Rime-windowed sliders: ``median``, ``biweight``, ``hodges``,
        ``tau``, ``welsch``, ``huber``, ``huber_psi``, ``andrewsinewave``, ``mean``,
        ``hampel``, ``ramsay``, ``trim_mean``, ``hampelfilt``, ``winsorize``. Cadence
        based slider: ``medfilt``. Splines: ``hspline``, ``rspline`, ``pspline``.
        Locally weighted scatterplot smoothing: ``lowess``. Savitzky-Golay filter:
        ``savgol``. Gaussian processes: ``gp``. Cosine Filtering with Autocorrelation
        Minimization: ``cofiam``.  Cosine fitting: ``cosine``, Friedman's Super-Smoother:
        ``supersmoother``. Gaussian regressions: ``ridge``, ``lasso``, ``elasticnet``.
    break_tolerance : float, default: window_length/2
        If there are large gaps in time (larger than ``window_length``/2), flatten will
        split the flux into several sub-lightcurves and apply the filter to each
        individually. ``break_tolerance`` must be in the same unit as ``time`` (usually
        days). To disable this feature, set ``break_tolerance`` to 0. If the method is
        ``supersmoother`` and no ``break_tolerance`` is provided, it will be taken as
        `1` in units of ``time``.
    edge_cutoff : float, default: None
        Trends near edges are less robust. Depending on the data, it may be beneficial
        to remove edges. The ``edge_cutoff`` defines the length (in units of time) to be
        cut off each edge. Default: Zero. Cut off is maximally ``window_length``/2, as
        this fills the window completely. Applicable only for time-windowed sliders.
    cval : float or int
        Tuning parameter for the robust estimators. See documentation for defaults. 
        Larger values for make the estimate more efficient but less robust. For the 
        super-smoother, cval determines the bass enhancement (smoothness) and can be 
        `None` or in the range 0 < ``cval`` < 10. For the ``savgol``, ``cval`` 
        determines the (integer) polynomial order (default: 2).
    proportiontocut : float, default: 0.1
        Fraction to cut off (or filled) of both tails of the distribution using methods
        ``trim_mean`` (or ``winsorize``)
    kernel : str, default: `squared_exp`
        Choice of `squared_exp` (squared exponential), `matern`, `periodic`,
        `periodic_auto`.
    kernel_size : float, default: 1
        The length scale of the Gaussian Process kernel.
    kernel_period : float
        The periodicity of the Gaussian Process kernel (in units of ``time``). Must be
        provided for the kernel `periodic`. Can not be specified for the
        `periodic_auto`, for which it is determined automatically using a Lomb-Scargle
        periodogram pre-search.
    robust : bool, default: False
        If `True`, the fitting process will be run iteratively. In each iteration,
        2-sigma outliers from the fitted trend will be  clipped until convergence.
        Supported by the Gaussian Process kernels `squared_exp` and `matern`, as well as
        `cosine` fitting.

    return_trend : bool, default: False
        If `True`, the method will return a tuple of two elements
        (``flattened_flux``, ``trend_flux``) where ``trend_flux`` is the removed trend.

    Returns
    -------
    flatten_flux : array-like
        Flattened flux.
    trend_flux : array-like
        Trend in the flux. Only returned if ``return_trend`` is `True`.
    """
    if method not in constants.methods:
        raise ValueError('Unknown detrending method')

    # Numba can't handle strings, so we're passing the location estimator as an int:
    if method == 'biweight':
        method_code = 1
    elif method == 'andrewsinewave':
        method_code = 2
    elif method == 'welsch':
        method_code = 3
    elif method == 'hodges':
        method_code = 4
    elif method == 'median':
        method_code = 5
    elif method == 'mean':
        method_code = 6
    elif method == 'trim_mean':
        method_code = 7
    elif method == 'winsorize':
        method_code = 8
    elif method == 'hampelfilt':
        method_code = 9
    elif method == 'huber_psi':
        method_code = 10
    elif method == 'tau':
        method_code = 11

    error_text = 'proportiontocut must be >0 and <0.5'
    if not isinstance(proportiontocut, float):
        raise ValueError(error_text)
    if proportiontocut >= 0.5 or proportiontocut <= 0:
        raise ValueError(error_text)

    # Default cval values for robust location estimators
    if cval is None:
        if method == 'biweight':
            cval = 5
        elif method == 'andrewsinewave':
            cval = 1.339
        elif method == 'welsch':
            cval = 2.11
        elif method == 'huber':
            cval = 1.5
        elif method == 'huber_psi':
            cval = 1.28
        elif method in ['trim_mean', 'winsorize']:
            cval = proportiontocut
        elif method == 'hampelfilt':
            cval = 3
        elif method == 'tau':
            cval = 4.5
        elif method == 'hampel':
            cval = (1.7, 3.4, 8.5)
        elif method == 'ramsay':
            cval = 0.3
        elif method == 'savgol':  # polyorder
            cval = 2  # int
        elif method in 'ridge lasso elasticnet':
            cval = 1
        else:
            cval = 0  # avoid numba type inference error: None type multi with float

    if cval is not None and method == 'supersmoother':
        if cval > 0 and cval < 10:
            supersmoother_alpha = cval
        else:
            supersmoother_alpha = None

    # Maximum gap in time should be half a window size.
    # Any larger is nonsense,  because then the array has a full window of data
    if window_length is None:
        window_length = 2  # so that break_tolerance = 1 in the supersmoother case
    if break_tolerance is None:
        break_tolerance = window_length / 2
    if break_tolerance == 0:
        break_tolerance = inf

    # Numba is very fast, but doesn't play nicely with NaN values
    # Therefore, we make new time-flux arrays with only the floating point values
    # All calculations are done within these arrays
    # Afterwards, the trend is transplanted into the original arrays (with the NaNs)
    if mask is None:
        mask = np.ones(len(time))
    else:
        mask = array(~mask,
                     dtype=float64)  # Invert to stay consistent with TLS
    time = array(time, dtype=float64)
    flux = array(flux, dtype=float64)

    mask_nans = isnan(time * flux)
    time_compressed = np.ma.compressed(np.ma.masked_array(time, mask_nans))
    flux_compressed = np.ma.compressed(np.ma.masked_array(flux, mask_nans))
    mask_compressed = np.ma.compressed(np.ma.masked_array(mask, mask_nans))

    # Get the indexes of the gaps
    gaps_indexes = get_gaps_indexes(time_compressed,
                                    break_tolerance=break_tolerance)
    trend_flux = array([])
    trend_segment = array([])

    # Iterate over all segments
    for i in range(len(gaps_indexes) - 1):
        time_view = time_compressed[gaps_indexes[i]:gaps_indexes[i + 1]]
        flux_view = flux_compressed[gaps_indexes[i]:gaps_indexes[i + 1]]
        mask_view = mask_compressed[gaps_indexes[i]:gaps_indexes[i + 1]]
        methods = [
            "biweight", "andrewsinewave", "welsch", "hodges", "median", "mean",
            "trim_mean", "winsorize", "huber_psi", "hampelfilt", "tau"
        ]
        if method in methods:
            trend_segment = running_segment(time_view, flux_view, mask_view,
                                            window_length, edge_cutoff, cval,
                                            method_code)
        elif method in ["huber", "hampel", "ramsay"]:
            trend_segment = running_segment_slow(time_view, flux_view,
                                                 mask_view, window_length,
                                                 edge_cutoff, cval, method)
        elif method == 'lowess':
            trend_segment = lowess(time_view, flux_view, mask_view,
                                   window_length)
        elif method == 'hspline':
            trend_segment = detrend_huber_spline(time_view,
                                                 flux_view,
                                                 mask_view,
                                                 knot_distance=window_length)
        elif method == 'supersmoother':
            try:
                from supersmoother import SuperSmoother as supersmoother
            except:
                raise ImportError('Could not import supersmoother')
            win = window_length / (max(time) - min(time))
            trend_segment = supersmoother(
                alpha=supersmoother_alpha,
                primary_spans=(constants.primary_span_lower * win, win,
                               constants.primary_span_upper * win),
                middle_span=constants.middle_span * win,
                final_span=constants.upper_span * win).fit(
                    time_view,
                    flux_view,
                ).predict(time_view)
        elif method == 'cofiam':
            trend_segment = detrend_cofiam(time_view, flux_view, window_length)
        elif method == 'cosine':
            trend_segment = detrend_cosine(time_view, flux_view, window_length,
                                           robust, mask_view)
        elif method == 'savgol':
            if window_length % 2 == 0:
                window_length += 1
            trend_segment = savgol_filter(flux_view,
                                          window_length,
                                          polyorder=int(cval))
        elif method == 'medfilt':
            trend_segment = medfilt(flux_view, window_length)
        elif method == 'gp':
            #print('Segment', i + 1, 'of', len(gaps_indexes) - 1)
            trend_segment = make_gp(time_view, flux_view, mask_view, kernel,
                                    kernel_size, kernel_period, robust)
        elif method == 'rspline':
            trend_segment = iter_spline(time_view, flux_view, mask_view,
                                        window_length)
        elif method == 'pspline':
            print('Segment', i + 1, 'of', len(gaps_indexes) - 1)
            trend_segment = pspline(time_view, flux_view)
        elif method in "ridge lasso elasticnet":
            trend_segment = regression(time_view, flux_view, method,
                                       window_length, cval)

        trend_flux = append(trend_flux, trend_segment)

    # Insert results of non-NaNs into original data stream
    trend_lc = full(len(time), nan)
    mask_nans = where(~mask_nans)[0]
    for idx in range(len(mask_nans)):
        trend_lc[mask_nans[idx]] = trend_flux[idx]
    trend_lc[trend_lc == 0] = np.nan  # avoid division by zero
    flatten_lc = flux / trend_lc
    if return_trend:
        return flatten_lc, trend_lc
    return flatten_lc
Exemplo n.º 3
0
def detrend_flux(time,
                 flux,
                 break_tolerance=0.5,
                 method='pspline',
                 cval=None,
                 window_length=None,
                 edge_cutoff=None):
    """
    Apply the wotan flatten function. Implemented methods include pspline,
    biweight, and median.

    Args:
        time, flux (np.ndarray): array of times and fluxes.

        break_tolerance (float): maximum time past which light curve is split
        into timegroups, each of which is detrended individually.

        method (str): 'pspline', 'biweight', 'median'.

        cval (float): the wotan 'biweight' tuning parameter.

        window_length (float): length of the window in days.

        edge_cutoff (float): how much of the edge to remove. Only works for
        'median' method.

    Returns:
        flat_flux, trend_flux (np.ndarray): flattened array, and the trend
        vector that was divided out.

    See also:
        https://wotan.readthedocs.io/en/latest/
    """

    # Initial pre-processing: verify that under break_tolerance, time and flux
    # do not have any sections with <=6 points. Spline detrending routines do
    # not like fitting lines.
    N_groups, group_inds = find_lc_timegroups(time, mingap=break_tolerance)
    SECTION_CUTOFF = 6
    for g in group_inds:
        if len(time[g]) <= SECTION_CUTOFF:
            time[g], flux[g] = np.nan, np.nan

    try:
        if method == 'pspline':

            # Detrend each time segment individually, to prevent overfitting
            # based on the `max_splines` parameter.

            flat_fluxs, trend_fluxs = [], []
            for g in group_inds:
                tgtime, tgflux = time[g], flux[g]

                t_min, t_max = np.min(tgtime), np.max(tgtime)
                t_baseline = t_max - t_min
                transit_timescale = 6 / 24.

                # e.g., for a 25 day segment, we want max_splines to be ~100,
                # i.e., 1 spline point every 6 hours.  this helps prevent
                # overfitting.
                max_splines = int(t_baseline / transit_timescale)

                # a 2-sigma cutoff is standard, but there's no obvious reason for
                # this being the case. generally, anything far from the median
                # shouldn't go into the fit.
                stdev_cut = 1.5

                edge_cutoff = 0

                # note: major API update in wotan v1.6
                # pspline(time, flux, edge_cutoff, max_splines, stdev_cut, return_nsplines, verbose)
                _trend_flux, _nsplines = pspline(tgtime, tgflux, edge_cutoff,
                                                 max_splines, stdev_cut, False,
                                                 False)

                _flat_flux = tgflux / _trend_flux

                flat_fluxs.append(_flat_flux)
                trend_fluxs.append(_trend_flux)

            flat_flux = np.hstack(flat_fluxs)
            trend_flux = np.hstack(trend_fluxs)

        elif method == 'biweight':
            flat_flux, trend_flux = flatten(time,
                                            flux,
                                            method='biweight',
                                            return_trend=True,
                                            break_tolerance=break_tolerance,
                                            window_length=window_length,
                                            cval=cval)

        elif method == 'median':
            flat_flux, trend_flux = flatten(time,
                                            flux,
                                            method='median',
                                            return_trend=True,
                                            break_tolerance=break_tolerance,
                                            window_length=window_length,
                                            edge_cutoff=edge_cutoff)

        elif method == 'none':
            flat_flux = flux
            trend_flux = None

        else:
            raise NotImplementedError

    except ValueError as e:
        msg = ('WRN! {}. Probably have a short segment. Trying to nan it out.'.
               format(repr(e)))
        print(msg)

        SECTION_CUTOFF = min([len(time[g]) for g in group_inds])
        for g in group_inds:
            if len(time[g]) <= SECTION_CUTOFF:
                time[g], flux[g] = np.nan, np.nan

        # NOTE: code duplication here
        if method == 'pspline':
            # matched detrending to do_initial_period_finding
            flat_flux, trend_flux = flatten(time,
                                            flux,
                                            method='pspline',
                                            return_trend=True,
                                            break_tolerance=break_tolerance,
                                            robust=True)
        elif method == 'biweight':
            # another option:
            flat_flux, trend_flux = flatten(time,
                                            flux,
                                            method='biweight',
                                            return_trend=True,
                                            break_tolerance=break_tolerance,
                                            window_length=window_length,
                                            cval=cval)
        else:
            raise NotImplementedError

    return flat_flux, trend_flux