Пример #1
0
def peak_shape_qc(window_values, quality_flags):
    """
    Apply a QC on the peak shape to determine if it is of an acceptable shape
    :param window_values:
    :param quality_flags:
    :return:
    """
    first_slopes = []
    second_slopes = []

    for x in window_values:
        median = npmedian(x)
        normalised = [y / median for y in x]

        fit = polyfit(range(len(x)), normalised, 2)
        first_slopes.append(fit[0])
        second_slopes.append(fit[1])

    for i, x in enumerate(first_slopes):
        if abs(x) > 0.005:
            quality_flags[i] = 5
        elif abs(x) > 0.005 and quality_flags[i] == 5:
            quality_flags[i] = 1
        elif abs(second_slopes[i]) > 0.009:
            quality_flags[i] = 5
        elif abs(second_slopes[i]) < 0.009 and quality_flags[i] == 5:
            quality_flags[i] = 1

    for i, x in enumerate(window_values):
        if npmedian(x) < 3800:
            quality_flags[i] = 1

    return quality_flags
Пример #2
0
def lightcurve_moments(ftimes, fmags, ferrs):
    '''This calculates the weighted mean, stdev, median, MAD, percentiles, skew,
    kurtosis, fraction of LC beyond 1-stdev, and IQR.

    Parameters
    ----------

    ftimes,fmags,ferrs : np.array
        The input mag/flux time-series with all non-finite elements removed.

    Returns
    -------

    dict
        A dict with all of the light curve moments calculated.

    '''

    ndet = len(fmags)

    if ndet > 9:

        # now calculate the various things we need
        series_median = npmedian(fmags)
        series_wmean = (npsum(fmags * (1.0 / (ferrs * ferrs))) /
                        npsum(1.0 / (ferrs * ferrs)))
        series_mad = npmedian(npabs(fmags - series_median))
        series_stdev = 1.483 * series_mad
        series_skew = spskew(fmags)
        series_kurtosis = spkurtosis(fmags)

        # get the beyond1std fraction
        series_above1std = len(fmags[fmags > (series_median + series_stdev)])
        series_below1std = len(fmags[fmags < (series_median - series_stdev)])

        # this is the fraction beyond 1 stdev
        series_beyond1std = (series_above1std + series_below1std) / float(ndet)

        # get the magnitude percentiles
        series_mag_percentiles = nppercentile(
            fmags, [5.0, 10, 17.5, 25, 32.5, 40, 60, 67.5, 75, 82.5, 90, 95])

        return {
            'median': series_median,
            'wmean': series_wmean,
            'mad': series_mad,
            'stdev': series_stdev,
            'skew': series_skew,
            'kurtosis': series_kurtosis,
            'beyond1std': series_beyond1std,
            'mag_percentiles': series_mag_percentiles,
            'mag_iqr': series_mag_percentiles[8] - series_mag_percentiles[3],
        }

    else:
        LOGERROR('not enough detections in this magseries '
                 'to calculate light curve moments')
        return None
Пример #3
0
def stetson_jindex(ftimes, fmags, ferrs, weightbytimediff=False):
    '''This calculates the Stetson index for the magseries, based on consecutive
    pairs of observations. Based on Nicole Loncke's work for her Planets and
    Life certificate at Princeton.

    This requires finite times, mags, and errs.

    If weightbytimediff is True, the Stetson index for any pair of mags will be
    reweighted by the difference in times between them using the scheme in
    Fruth+ 2012 and Zhange+ 2003 (as seen in Sokolovsky+ 2017).

    w_i = exp(- (t_i+1 - t_i)/ delta_t )

    '''

    ndet = len(fmags)

    if ndet > 9:

        # get the median and ndet
        medmag = npmedian(fmags)

        # get the stetson index elements
        delta_prefactor = (ndet / (ndet - 1))
        sigma_i = delta_prefactor * (fmags - medmag) / ferrs
        sigma_j = nproll(sigma_i,
                         1)  # Nicole's clever trick to advance indices
        # by 1 and do x_i*x_(i+1)

        if weightbytimediff:

            time_i = ftimes
            time_j = nproll(ftimes, 1)
            difft = npdiff(ftimes)
            deltat = npmedian(difft)

            weights_i = npexp(-difft / deltat)
            products = (weights_i * sigma_i[1:] * sigma_j[1:])
        else:
            # ignore first elem since it's actually x_0*x_n
            products = (sigma_i * sigma_j)[1:]

        stetsonj = (npsum(npsign(products) * npsqrt(npabs(products)))) / ndet

        return stetsonj

    else:

        LOGERROR('not enough detections in this magseries '
                 'to calculate stetson J index')
        return npnan
Пример #4
0
def stetson_kindex(fmags, ferrs):
    '''
    This calculates the Stetson K index (robust measure of the kurtosis).

    Requires finite mags and errs.

    '''

    # use a fill in value for the errors if they're none
    if ferrs is None:
        ferrs = npfull_like(mags, 0.005)

    ndet = len(fmags)

    if ndet > 9:

        # get the median and ndet
        medmag = npmedian(fmags)

        # get the stetson index elements
        delta_prefactor = (ndet / (ndet - 1))
        sigma_i = delta_prefactor * (fmags - medmag) / ferrs

        stetsonk = (npsum(npabs(sigma_i)) /
                    (npsqrt(npsum(sigma_i * sigma_i))) * (ndet**(-0.5)))

        return stetsonk

    else:

        LOGERROR('not enough detections in this magseries '
                 'to calculate stetson K index')
        return npnan
Пример #5
0
def median(numbers):
    if numpy:
        return npmedian(numbers)
    elif py3statistics:
        return p3median(numbers)
    quotient, remainder = divmod(len(numbers), 2)
    if remainder:
        return sorted(numbers)[quotient]
    return sum(sorted(numbers)[quotient - 1:quotient + 1]) / 2.
Пример #6
0
def median_along_line(critical_points, line):
    v = list()
    for p in critical_points:
        v.append(line.direction_value_on_point(p))
    med_value = npmedian(array(v))
    codirect = line.direc
    perp_line = Line2D(None, None,
                         coefs=[codirect.get_x(), codirect.get_y(), med_value])
    result = perp_line.intersection(line)
    assert (result is not None)
    assert (line.is_point_on_line(result))
    return result
Пример #7
0
def pwd_phasebin(phases, mags, binsize=0.002, minbin=9):
    '''
    This bins the phased mag series using the given binsize.

    '''

    bins = np.arange(0.0, 1.0, binsize)
    binnedphaseinds = npdigitize(phases, bins)

    binnedphases, binnedmags = [], []

    for x in npunique(binnedphaseinds):

        thisbin_inds = binnedphaseinds == x
        thisbin_phases = phases[thisbin_inds]
        thisbin_mags = mags[thisbin_inds]
        if thisbin_inds.size > minbin:
            binnedphases.append(npmedian(thisbin_phases))
            binnedmags.append(npmedian(thisbin_mags))

    return np.array(binnedphases), np.array(binnedmags)
Пример #8
0
def window_medians(window_values):
    """
    Calculates the window medians for each peak window
    :param window_values:
    :return: window_medians
    """
    window_medians_temp_array = nparray(window_values)

    window_medians_temp_array = npmedian(window_medians_temp_array, axis=1)

    window_medians = list(window_medians_temp_array)

    return window_medians
Пример #9
0
def _fourier_func(fourierparams, phase, mags):
    '''This returns a summed Fourier cosine series.

    Parameters
    ----------

    fourierparams : list
        This MUST be a list of the following form like so::

            [period,
             epoch,
             [amplitude_1, amplitude_2, amplitude_3, ..., amplitude_X],
             [phase_1, phase_2, phase_3, ..., phase_X]]

        where X is the Fourier order.

    phase,mags : np.array
        The input phase and magnitude areas to use as the basis for the cosine
        series. The phases are used directly to generate the values of the
        function, while the mags array is used to generate the zeroth order
        amplitude coefficient.

    Returns
    -------

    np.array
        The Fourier cosine series function evaluated over `phase`.

    '''

    # figure out the order from the length of the Fourier param list
    order = int(len(fourierparams) / 2)

    # get the amplitude and phase coefficients
    f_amp = fourierparams[:order]
    f_pha = fourierparams[order:]

    # calculate all the individual terms of the series
    f_orders = [
        f_amp[x] * npcos(2.0 * pi_value * x * phase + f_pha[x])
        for x in range(order)
    ]

    # this is the zeroth order coefficient - a constant equal to median mag
    total_f = npmedian(mags)

    # sum the series
    for fo in f_orders:
        total_f += fo

    return total_f
Пример #10
0
def stetson_kindex(fmags, ferrs):
    '''This calculates the Stetson K index (a robust measure of the kurtosis).

    Parameters
    ----------

    fmags,ferrs : np.array
        The input mag/flux time-series to process. Must have no non-finite
        elems.

    Returns
    -------

    float
        The Stetson K variability index.

    '''

    # use a fill in value for the errors if they're none
    if ferrs is None:
        ferrs = npfull_like(fmags, 0.005)

    ndet = len(fmags)

    if ndet > 9:

        # get the median and ndet
        medmag = npmedian(fmags)

        # get the stetson index elements
        delta_prefactor = (ndet/(ndet - 1))
        sigma_i = delta_prefactor*(fmags - medmag)/ferrs

        stetsonk = (
            npsum(npabs(sigma_i))/(npsqrt(npsum(sigma_i*sigma_i))) *
            (ndet**(-0.5))
        )

        return stetsonk

    else:

        LOGERROR('not enough detections in this magseries '
                 'to calculate stetson K index')
        return npnan
Пример #11
0
def sine_series_sum(fourierparams, times, mags, errs):
    '''This generates a sinusoidal light curve using a sine series.

    The series is generated using the coefficients provided in
    fourierparams. This is a sequence like so:

    [period,
     epoch,
     [ampl_1, ampl_2, ampl_3, ..., ampl_X],
     [pha_1, pha_2, pha_3, ..., pha_X]]

    where X is the Fourier order.

    '''

    period, epoch, famps, fphases = fourierparams

    # figure out the order from the length of the Fourier param list
    forder = len(famps)

    # phase the times with this period
    iphase = (times - epoch) / period
    iphase = iphase - npfloor(iphase)

    phasesortind = npargsort(iphase)
    phase = iphase[phasesortind]
    ptimes = times[phasesortind]
    pmags = mags[phasesortind]
    perrs = errs[phasesortind]

    # calculate all the individual terms of the series
    fseries = [
        famps[x] * npsin(2.0 * MPI * x * phase + fphases[x])
        for x in range(forder)
    ]

    # this is the zeroth order coefficient - a constant equal to median mag
    modelmags = npmedian(mags)

    # sum the series
    for fo in fseries:
        modelmags += fo

    return modelmags, phase, ptimes, pmags, perrs
Пример #12
0
def get_median_bisector(angle_to_bisector_map):
    """
    :type angle_to_bisector_map: dict
    :rtype : Line2D
    """
    med_angle = npmedian(array(angle_to_bisector_map.keys()))
    if len(angle_to_bisector_map) % 2 == 1:
        bisector = angle_to_bisector_map[med_angle]
        return bisector
    else:
        angle_below = max([a for a in angle_to_bisector_map.keys() if a < med_angle])
        angle_above = min([a for a in angle_to_bisector_map.keys() if a > med_angle])
        line_below = angle_to_bisector_map[angle_below]
        line_above = angle_to_bisector_map[angle_above]

        def invert_if_x_less_zero(vector):
            return vector.inverted() if vector.get_x() < 0.0 else vector
        direction_below = invert_if_x_less_zero(line_below.direc.normalized())
        direction_above = invert_if_x_less_zero(line_above.direc.normalized())
        med_line_direction = direction_above.sum(direction_below)
        return Line2D(point1=Vector2D(0.0, 0.0), point2=med_line_direction)
Пример #13
0
def _fourier_func(fourierparams, phase, mags):
    '''
    This returns a summed Fourier series generated using fourierparams.

    fourierparams is a sequence like so:

    [ampl_1, ampl_2, ampl_3, ..., ampl_X,
     pha_1, pha_2, pha_3, ..., pha_X]

    where X is the Fourier order.

    mags and phase MUST NOT have any nans.

    '''

    # figure out the order from the length of the Fourier param list
    order = int(len(fourierparams) / 2)

    # get the amplitude and phase coefficients
    f_amp = fourierparams[:order]
    f_pha = fourierparams[order:]

    # calculate all the individual terms of the series
    f_orders = [
        f_amp[x] * npcos(2.0 * MPI * x * phase + f_pha[x])
        for x in range(order)
    ]

    # this is the zeroth order coefficient - a constant equal to median mag
    total_f = npmedian(mags)

    # sum the series
    for fo in f_orders:
        total_f += fo

    return total_f
Пример #14
0
def epd_magseries(times, mags, errs,
                  fsv, fdv, fkv, xcc, ycc, bgv, bge, iha, izd,
                  magsarefluxes=False,
                  epdsmooth_sigclip=3.0,
                  epdsmooth_windowsize=21,
                  epdsmooth_func=smooth_magseries_savgol,
                  epdsmooth_extraparams=None):
    '''Detrends a magnitude series using External Parameter Decorrelation.

    The HAT light-curve-specific external parameters are:

    S: the 'fsv' column,
    D: the 'fdv' column,
    K: the 'fkv' column,
    x coords: the 'xcc' column,
    y coords: the 'ycc' column,
    background: the 'bgv' column,
    background error: the 'bge' column
    hour angle: the 'iha' column
    zenith distance: the 'izd' column

    epdsmooth_windowsize is the number of points to smooth over to generate a
    smoothed light curve to train the regressor against.

    epdsmooth_func sets the smoothing filter function to use. A Savitsky-Golay
    filter is used to smooth the light curve by default.

    epdsmooth_extraparams is a dict of any extra filter params to supply to the
    smoothing function.

    NOTE: The errs are completely ignored and returned unchanged (except for
    sigclip and finite filtering).

    '''

    finind = np.isfinite(times) & np.isfinite(mags) & np.isfinite(errs)
    ftimes, fmags, ferrs = times[::][finind], mags[::][finind], errs[::][finind]
    ffsv, ffdv, ffkv, fxcc, fycc, fbgv, fbge, fiha, fizd = (
        fsv[::][finind],
        fdv[::][finind],
        fkv[::][finind],
        xcc[::][finind],
        ycc[::][finind],
        bgv[::][finind],
        bge[::][finind],
        iha[::][finind],
        izd[::][finind],
    )

    stimes, smags, serrs, separams = sigclip_magseries_with_extparams(
        times, mags, errs,
        [fsv, fdv, fkv, xcc, ycc, bgv, bge, iha, izd],
        sigclip=epdsmooth_sigclip,
        magsarefluxes=magsarefluxes
    )
    sfsv, sfdv, sfkv, sxcc, sycc, sbgv, sbge, siha, sizd = separams

    # smooth the signal
    if isinstance(epdsmooth_extraparams, dict):
        smoothedmags = epdsmooth_func(smags,
                                      epdsmooth_windowsize,
                                      **epdsmooth_extraparams)
    else:
        smoothedmags = epdsmooth_func(smags, epdsmooth_windowsize)

    # initial fit coeffs
    initcoeffs = np.zeros(22)

    # fit the smoothed mags and find the EPD function coefficients
    leastsqfit = leastsq(_epd_residual,
                         initcoeffs,
                         args=(smoothedmags,
                               sfsv, sfdv, sfkv, sxcc,
                               sycc, sbgv, sbge, siha, sizd),
                         full_output=True)

    # if the fit succeeds, then get the EPD mags
    if leastsqfit[-1] in (1,2,3,4):

        fitcoeffs = leastsqfit[0]
        epdfit = _epd_function(fitcoeffs,
                               ffsv, ffdv, ffkv, fxcc, fycc,
                               fbgv, fbge, fiha, fizd)

        epdmags = npmedian(fmags) + fmags - epdfit

        retdict = {'times':ftimes,
                   'mags':epdmags,
                   'errs':ferrs,
                   'fitcoeffs':fitcoeffs,
                   'fitinfo':leastsqfit,
                   'fitmags':epdfit}

        return retdict

    # if the solution fails, return nothing
    else:

        LOGERROR('EPD fit did not converge')
        return None
Пример #15
0
def plot_phased_mag_series(times,
                           mags,
                           period,
                           errs=None,
                           epoch='min',
                           outfile=None,
                           sigclip=30.0,
                           phasewrap=True,
                           phasesort=True,
                           phasebin=None,
                           plotphaselim=[-0.8,0.8],
                           yrange=None):
    '''This plots a phased magnitude time series using the period provided.

    If epoch is None, uses the min(times) as the epoch.

    If epoch is a string 'min', then fits a cubic spline to the phased light
    curve using min(times), finds the magnitude minimum from the fitted light
    curve, then uses the corresponding time value as the epoch.

    If epoch is a float, then uses that directly to phase the light curve and as
    the epoch of the phased mag series plot.

    If outfile is none, then plots to matplotlib interactive window. If outfile
    is a string denoting a filename, uses that to write a png/eps/pdf figure.

    '''

    if errs is not None:

        # remove nans
        find = npisfinite(times) & npisfinite(mags) & npisfinite(errs)
        ftimes, fmags, ferrs = times[find], mags[find], errs[find]

        # get the median and stdev = 1.483 x MAD
        median_mag = npmedian(fmags)
        stddev_mag = (npmedian(npabs(fmags - median_mag))) * 1.483

        # sigclip next
        if sigclip:

            sigind = (npabs(fmags - median_mag)) < (sigclip * stddev_mag)

            stimes = ftimes[sigind]
            smags = fmags[sigind]
            serrs = ferrs[sigind]

            LOGINFO('sigclip = %s: before = %s observations, '
                    'after = %s observations' %
                    (sigclip, len(times), len(stimes)))

        else:

            stimes = ftimes
            smags = fmags
            serrs = ferrs

    else:

        # remove nans
        find = npisfinite(times) & npisfinite(mags)
        ftimes, fmags, ferrs = times[find], mags[find], None

        # get the median and stdev = 1.483 x MAD
        median_mag = npmedian(fmags)
        stddev_mag = (npmedian(npabs(fmags - median_mag))) * 1.483

        # sigclip next
        if sigclip:

            sigind = (npabs(fmags - median_mag)) < (sigclip * stddev_mag)

            stimes = ftimes[sigind]
            smags = fmags[sigind]
            serrs = None

            LOGINFO('sigclip = %s: before = %s observations, '
                    'after = %s observations' %
                    (sigclip, len(times), len(stimes)))

        else:

            stimes = ftimes
            smags = fmags
            serrs = None


    # figure out the epoch, if it's None, use the min of the time
    if epoch is None:

        epoch = npmin(stimes)

    # if the epoch is 'min', then fit a spline to the light curve phased
    # using the min of the time, find the fit mag minimum and use the time for
    # that as the epoch
    elif isinstance(epoch,str) and epoch == 'min':

        spfit = spline_fit_magseries(stimes, smags, serrs, period)
        epoch = spfit['fitepoch']


    # now phase (and optionally, phase bin the light curve)
    if errs is not None:

        # phase the magseries
        phasedlc = phase_magseries_with_errs(stimes,
                                             smags,
                                             serrs,
                                             period,
                                             epoch,
                                             wrap=phasewrap,
                                             sort=phasesort)
        plotphase = phasedlc['phase']
        plotmags = phasedlc['mags']
        ploterrs = phasedlc['errs']

        # if we're supposed to bin the phases, do so
        if phasebin:

            binphasedlc = phase_bin_magseries_with_errs(plotphase,
                                                        plotmags,
                                                        ploterrs,
                                                        binsize=phasebin)
            plotphase = binphasedlc['binnedphases']
            plotmags = binphasedlc['binnedmags']
            ploterrs = binphasedlc['binnederrs']

    else:

        # phase the magseries
        phasedlc = phase_magseries(stimes,
                                   smags,
                                   period,
                                   epoch,
                                   wrap=phasewrap,
                                   sort=phasesort)
        plotphase = phasedlc['phase']
        plotmags = phasedlc['mags']
        ploterrs = None

        # if we're supposed to bin the phases, do so
        if phasebin:

            binphasedlc = phase_bin_magseries(plotphase,
                                              plotmags,
                                              binsize=phasebin)
            plotphase = binphasedlc['binnedphases']
            plotmags = binphasedlc['binnedmags']
            ploterrs = None


    # finally, make the plots

    # initialize the plot
    fig = plt.figure()
    fig.set_size_inches(7.5,4.8)

    plt.errorbar(plotphase, plotmags, fmt='bo', yerr=ploterrs,
                 markersize=2.0, markeredgewidth=0.0, ecolor='#B2BEB5',
                 capsize=0)

    # make a grid
    plt.grid(color='#a9a9a9',
             alpha=0.9,
             zorder=0,
             linewidth=1.0,
             linestyle=':')

    # make lines for phase 0.0, 0.5, and -0.5
    plt.axvline(0.0,alpha=0.9,linestyle='dashed',color='g')
    plt.axvline(-0.5,alpha=0.9,linestyle='dashed',color='g')
    plt.axvline(0.5,alpha=0.9,linestyle='dashed',color='g')

    # fix the ticks to use no offsets
    plt.gca().get_yaxis().get_major_formatter().set_useOffset(False)
    plt.gca().get_xaxis().get_major_formatter().set_useOffset(False)

    # get the yrange
    if yrange and isinstance(yrange,list) and len(yrange) == 2:
        ymin, ymax = yrange
    else:
        ymin, ymax = plt.ylim()
    plt.ylim(ymax,ymin)

    # set the x axis limit
    if not plotphaselim:
        plot_xlim = plt.xlim()
        plt.xlim((npmin(plotphase)-0.1,
                  npmax(plotphase)+0.1))
    else:
        plt.xlim((plotphaselim[0],plotphaselim[1]))

    # set up the labels
    plt.xlabel('phase')
    plt.ylabel('magnitude')
    plt.title('using period: %.6f d and epoch: %.6f' % (period, epoch))

    # make the figure
    if outfile and isinstance(outfile, str):

        plt.savefig(outfile,bbox_inches='tight')
        plt.close()
        return os.path.abspath(outfile)

    else:

        plt.show()
        plt.close()
        return
Пример #16
0
def aov_theta(times, mags, errs, frequency,
              binsize=0.05, minbin=9):
    '''Calculates the Schwarzenberg-Czerny AoV statistic at a test frequency.

    Parameters
    ----------

    times,mags,errs : np.array
        The input time-series and associated errors.

    frequency : float
        The test frequency to calculate the theta statistic at.

    binsize : float
        The phase bin size to use.

    minbin : int
        The minimum number of items in a phase bin to consider in the
        calculation of the statistic.

    Returns
    -------

    theta_aov : float
        The value of the AoV statistic at the specified `frequency`.

    '''

    period = 1.0/frequency
    fold_time = times[0]

    phased = phase_magseries(times,
                             mags,
                             period,
                             fold_time,
                             wrap=False,
                             sort=True)

    phases = phased['phase']
    pmags = phased['mags']
    bins = nparange(0.0, 1.0, binsize)
    ndets = phases.size

    binnedphaseinds = npdigitize(phases, bins)

    bin_s1_tops = []
    bin_s2_tops = []
    binndets = []
    goodbins = 0

    all_xbar = npmedian(pmags)

    for x in npunique(binnedphaseinds):

        thisbin_inds = binnedphaseinds == x
        thisbin_mags = pmags[thisbin_inds]

        if thisbin_mags.size > minbin:

            thisbin_ndet = thisbin_mags.size
            thisbin_xbar = npmedian(thisbin_mags)

            # get s1
            thisbin_s1_top = (
                thisbin_ndet *
                (thisbin_xbar - all_xbar) *
                (thisbin_xbar - all_xbar)
            )

            # get s2
            thisbin_s2_top = npsum((thisbin_mags - all_xbar) *
                                   (thisbin_mags - all_xbar))

            bin_s1_tops.append(thisbin_s1_top)
            bin_s2_tops.append(thisbin_s2_top)
            binndets.append(thisbin_ndet)
            goodbins = goodbins + 1


    # turn the quantities into arrays
    bin_s1_tops = nparray(bin_s1_tops)
    bin_s2_tops = nparray(bin_s2_tops)
    binndets = nparray(binndets)

    # calculate s1 first
    s1 = npsum(bin_s1_tops)/(goodbins - 1.0)

    # then calculate s2
    s2 = npsum(bin_s2_tops)/(ndets - goodbins)

    theta_aov = s1/s2

    return theta_aov
Пример #17
0
def gaussianeb_fit_magseries(
    times,
    mags,
    errs,
    ebparams,
    param_bounds=None,
    scale_errs_redchisq_unity=True,
    sigclip=10.0,
    plotfit=False,
    magsarefluxes=False,
    verbose=True,
    curve_fit_kwargs=None,
):
    '''This fits a double inverted gaussian EB model to a magnitude time series.

    Parameters
    ----------

    times,mags,errs : np.array
        The input mag/flux time-series to fit the EB model to.

    period : float
        The period to use for EB fit.

    ebparams : list of float
        This is a list containing the eclipsing binary parameters::

            ebparams = [period (time),
                        epoch (time),
                        pdepth (mags),
                        pduration (phase),
                        psdepthratio,
                        secondaryphase]

        `period` is the period in days.

        `epoch` is the time of primary minimum in JD.

        `pdepth` is the depth of the primary eclipse:

        - for magnitudes -> `pdepth` should be < 0
        - for fluxes     -> `pdepth` should be > 0

        `pduration` is the length of the primary eclipse in phase.

        `psdepthratio` is the ratio of the secondary eclipse depth to that of
        the primary eclipse.

        `secondaryphase` is the phase at which the minimum of the secondary
        eclipse is located. This effectively parameterizes eccentricity.

        If `epoch` is None, this function will do an initial spline fit to find
        an approximate minimum of the phased light curve using the given period.

        The `pdepth` provided is checked against the value of
        `magsarefluxes`. if `magsarefluxes = True`, the `ebdepth` is forced to
        be > 0; if `magsarefluxes = False`, the `ebdepth` is forced to be < 0.

    param_bounds : dict or None
        This is a dict of the upper and lower bounds on each fit
        parameter. Should be of the form::

            {'period':         (lower_bound_period, upper_bound_period),
             'epoch':          (lower_bound_epoch, upper_bound_epoch),
             'pdepth':         (lower_bound_pdepth, upper_bound_pdepth),
             'pduration':      (lower_bound_pduration, upper_bound_pduration),
             'psdepthratio':   (lower_bound_psdepthratio,
                                upper_bound_psdepthratio),
             'secondaryphase': (lower_bound_secondaryphase,
                                upper_bound_secondaryphase)}

        - To indicate that a parameter is fixed, use 'fixed' instead of a tuple
          providing its lower and upper bounds as tuple.

        - To indicate that a parameter has no bounds, don't include it in the
          param_bounds dict.

        If this is None, the default value of this kwarg will be::

            {'period':(0.0,np.inf),      # period is between 0 and inf
             'epoch':(0.0, np.inf),      # epoch is between 0 and inf
             'pdepth':(-np.inf,np.inf),  # pdepth is between -np.inf and np.inf
             'pduration':(0.0,1.0),      # pduration is between 0.0 and 1.0
             'psdepthratio':(0.0,1.0),   # psdepthratio is between 0.0 and 1.0
             'secondaryphase':(0.0,1.0), # secondaryphase is between 0.0 and 1.0

    scale_errs_redchisq_unity : bool
        If True, the standard errors on the fit parameters will be scaled to
        make the reduced chi-sq = 1.0. This sets the ``absolute_sigma`` kwarg
        for the ``scipy.optimize.curve_fit`` function to False.

    sigclip : float or int or sequence of two floats/ints or None
        If a single float or int, a symmetric sigma-clip will be performed using
        the number provided as the sigma-multiplier to cut out from the input
        time-series.

        If a list of two ints/floats is provided, the function will perform an
        'asymmetric' sigma-clip. The first element in this list is the sigma
        value to use for fainter flux/mag values; the second element in this
        list is the sigma value to use for brighter flux/mag values. For
        example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma
        dimmings and greater than 3-sigma brightenings. Here the meaning of
        "dimming" and "brightening" is set by *physics* (not the magnitude
        system), which is why the `magsarefluxes` kwarg must be correctly set.

        If `sigclip` is None, no sigma-clipping will be performed, and the
        time-series (with non-finite elems removed) will be passed through to
        the output.

    magsarefluxes : bool
        If True, will treat the input values of `mags` as fluxes for purposes of
        plotting the fit and sig-clipping.

    plotfit : str or False
        If this is a string, this function will make a plot for the fit to the
        mag/flux time-series and writes the plot to the path specified here.

    ignoreinitfail : bool
        If this is True, ignores the initial failure to find a set of optimized
        Fourier parameters using the global optimization function and proceeds
        to do a least-squares fit anyway.

    verbose : bool
        If True, will indicate progress and warn of any problems.

    curve_fit_kwargs : dict or None
        If not None, this should be a dict containing extra kwargs to pass to
        the scipy.optimize.curve_fit function.

    Returns
    -------

    dict
        This function returns a dict containing the model fit parameters, the
        minimized chi-sq value and the reduced chi-sq value. The form of this
        dict is mostly standardized across all functions in this module::

            {
                'fittype':'gaussianeb',
                'fitinfo':{
                    'initialparams':the initial EB params provided,
                    'finalparams':the final model fit EB params,
                    'finalparamerrs':formal errors in the params,
                    'fitmags': the model fit mags,
                    'fitepoch': the epoch of minimum light for the fit,
                },
                'fitchisq': the minimized value of the fit's chi-sq,
                'fitredchisq':the reduced chi-sq value,
                'fitplotfile': the output fit plot if fitplot is not None,
                'magseries':{
                    'times':input times in phase order of the model,
                    'phase':the phases of the model mags,
                    'mags':input mags/fluxes in the phase order of the model,
                    'errs':errs in the phase order of the model,
                    'magsarefluxes':input value of magsarefluxes kwarg
                }
            }


    '''

    stimes, smags, serrs = sigclip_magseries(times,
                                             mags,
                                             errs,
                                             sigclip=sigclip,
                                             magsarefluxes=magsarefluxes)

    # get rid of zero errs
    nzind = npnonzero(serrs)
    stimes, smags, serrs = stimes[nzind], smags[nzind], serrs[nzind]

    # check the ebparams
    ebperiod, ebepoch, ebdepth = ebparams[0:3]

    # check if we have a ebepoch to use
    if ebepoch is None:

        if verbose:
            LOGWARNING('no ebepoch given in ebparams, '
                       'trying to figure it out automatically...')
        # do a spline fit to figure out the approximate min of the LC
        try:
            spfit = spline_fit_magseries(times,
                                         mags,
                                         errs,
                                         ebperiod,
                                         sigclip=sigclip,
                                         magsarefluxes=magsarefluxes,
                                         verbose=verbose)
            ebepoch = spfit['fitinfo']['fitepoch']

        # if the spline-fit fails, try a savgol fit instead
        except Exception:
            sgfit = savgol_fit_magseries(times,
                                         mags,
                                         errs,
                                         ebperiod,
                                         sigclip=sigclip,
                                         magsarefluxes=magsarefluxes,
                                         verbose=verbose)
            ebepoch = sgfit['fitinfo']['fitepoch']

        # if everything failed, then bail out and ask for the ebepoch
        finally:

            if ebepoch is None:
                LOGERROR("couldn't automatically figure out the eb epoch, "
                         "can't continue. please provide it in ebparams.")

                # assemble the returndict
                returndict = {
                    'fittype': 'gaussianeb',
                    'fitinfo': {
                        'initialparams': ebparams,
                        'finalparams': None,
                        'finalparamerrs': None,
                        'fitmags': None,
                        'fitepoch': None,
                    },
                    'fitchisq': npnan,
                    'fitredchisq': npnan,
                    'fitplotfile': None,
                    'magseries': {
                        'phase': None,
                        'times': None,
                        'mags': None,
                        'errs': None,
                        'magsarefluxes': magsarefluxes,
                    },
                }

                return returndict

            else:

                if ebepoch.size > 1:
                    if verbose:
                        LOGWARNING('could not auto-find a single minimum '
                                   'for ebepoch, using the first one returned')
                    ebparams[1] = ebepoch[0]

                else:

                    if verbose:
                        LOGWARNING(
                            'using automatically determined ebepoch = %.5f' %
                            ebepoch)
                    ebparams[1] = ebepoch.item()

    # next, check the ebdepth and fix it to the form required
    if magsarefluxes:
        if ebdepth < 0.0:
            ebparams[2] = -ebdepth[2]

    else:
        if ebdepth > 0.0:
            ebparams[2] = -ebdepth[2]

    # finally, do the fit
    try:

        # set up the fit parameter bounds
        if param_bounds is None:

            curvefit_bounds = (nparray([0.0, 0.0, -npinf, 0.0, 0.0, 0.0]),
                               nparray([npinf, npinf, npinf, 1.0, 1.0, 1.0]))
            fitfunc_fixed = {}

        else:

            # figure out the bounds
            lower_bounds = []
            upper_bounds = []
            fitfunc_fixed = {}

            for ind, key in enumerate(
                ('period', 'epoch', 'pdepth', 'pduration', 'psdepthratio',
                 'secondaryphase')):

                # handle fixed parameters
                if (key in param_bounds and isinstance(param_bounds[key], str)
                        and param_bounds[key] == 'fixed'):

                    lower_bounds.append(ebparams[ind] - 1.0e-7)
                    upper_bounds.append(ebparams[ind] + 1.0e-7)
                    fitfunc_fixed[key] = ebparams[ind]

                # handle parameters with lower and upper bounds
                elif key in param_bounds and isinstance(
                        param_bounds[key], (tuple, list)):

                    lower_bounds.append(param_bounds[key][0])
                    upper_bounds.append(param_bounds[key][1])

                # handle no parameter bounds
                else:

                    lower_bounds.append(-npinf)
                    upper_bounds.append(npinf)

            # generate the bounds sequence in the required format
            curvefit_bounds = (nparray(lower_bounds), nparray(upper_bounds))

        #
        # set up the curve fit function
        #
        curvefit_func = partial(eclipses.invgauss_eclipses_curvefit_func,
                                zerolevel=npmedian(smags),
                                fixed_params=fitfunc_fixed)

        #
        # run the fit
        #
        if curve_fit_kwargs is not None:

            finalparams, covmatrix = curve_fit(
                curvefit_func,
                stimes,
                smags,
                p0=ebparams,
                sigma=serrs,
                bounds=curvefit_bounds,
                absolute_sigma=(not scale_errs_redchisq_unity),
                **curve_fit_kwargs)

        else:

            finalparams, covmatrix = curve_fit(
                curvefit_func,
                stimes,
                smags,
                p0=ebparams,
                sigma=serrs,
                bounds=curvefit_bounds,
                absolute_sigma=(not scale_errs_redchisq_unity),
            )

    except Exception:
        LOGEXCEPTION("curve_fit returned an exception")
        finalparams, covmatrix = None, None

    # if the fit succeeded, then we can return the final parameters
    if finalparams is not None and covmatrix is not None:

        # calculate the chisq and reduced chisq
        fitmags, phase, ptimes, pmags, perrs = eclipses.invgauss_eclipses_func(
            finalparams, stimes, smags, serrs)
        fitchisq = npsum(
            ((fitmags - pmags) * (fitmags - pmags)) / (perrs * perrs))
        fitredchisq = fitchisq / (len(pmags) - len(finalparams) -
                                  len(fitfunc_fixed))

        stderrs = npsqrt(npdiag(covmatrix))

        if verbose:
            LOGINFO('final fit done. chisq = %.5f, reduced chisq = %.5f' %
                    (fitchisq, fitredchisq))

        # get the fit epoch
        fperiod, fepoch = finalparams[:2]

        # assemble the returndict
        returndict = {
            'fittype': 'gaussianeb',
            'fitinfo': {
                'initialparams': ebparams,
                'finalparams': finalparams,
                'finalparamerrs': stderrs,
                'fitmags': fitmags,
                'fitepoch': fepoch,
            },
            'fitchisq': fitchisq,
            'fitredchisq': fitredchisq,
            'fitplotfile': None,
            'magseries': {
                'phase': phase,
                'times': ptimes,
                'mags': pmags,
                'errs': perrs,
                'magsarefluxes': magsarefluxes,
            },
        }

        # make the fit plot if required
        if plotfit and isinstance(plotfit, str):

            make_fit_plot(phase,
                          pmags,
                          perrs,
                          fitmags,
                          fperiod,
                          ptimes.min(),
                          fepoch,
                          plotfit,
                          magsarefluxes=magsarefluxes)

            returndict['fitplotfile'] = plotfit

        return returndict

    # if the leastsq fit failed, return nothing
    else:

        LOGERROR('eb-fit: least-squared fit to the light curve failed!')

        # assemble the returndict
        returndict = {
            'fittype': 'gaussianeb',
            'fitinfo': {
                'initialparams': ebparams,
                'finalparams': None,
                'finalparamerrs': None,
                'fitmags': None,
                'fitepoch': None,
            },
            'fitchisq': npnan,
            'fitredchisq': npnan,
            'fitplotfile': None,
            'magseries': {
                'phase': None,
                'times': None,
                'mags': None,
                'errs': None,
                'magsarefluxes': magsarefluxes,
            },
        }

        return returndict
Пример #18
0
def rfepd_magseries(times, mags, errs,
                    externalparam_arrs,
                    magsarefluxes=False,
                    epdsmooth=True,
                    epdsmooth_sigclip=3.0,
                    epdsmooth_windowsize=201,
                    epdsmooth_func=smooth_magseries_savgol,
                    epdsmooth_extraparams=None,
                    rf_subsample=1.0,
                    rf_ntrees=300,
                    rf_extraparams={'criterion':'mse',
                                    'oob_score':False,
                                    'n_jobs':-1}):
    '''This uses a RandomForestRegressor to de-correlate the given magseries.

    times, mags, errs are ndarrays of time and magnitude values to filter.

    externalparam_arrs is a list of ndarrays of external parameters to
    decorrelate against. These should all be the same size as times, mags, errs.

    epdsmooth = True sets the training light curve to be a smoothed version of
    the sigma-clipped light curve.

    epdsmooth_windowsize is the number of points to smooth over to generate a
    smoothed light curve to train the regressor against.

    epdsmooth_func sets the smoothing filter function to use. A Savitsky-Golay
    filter is used to smooth the light curve by default.

    epdsmooth_extraparams is a dict of any extra filter params to supply to the
    smoothing function.

    rf_subsample is the fraction of the size of the mags array to use for
    training the random forest regressor.

    rf_ntrees is the number of trees to use for the RandomForestRegressor.

    rf_extraparams is any extra params to provide to the RandomForestRegressor
    instance as a dict.

    Returns a dict with decorrelated mags and the usual info from the
    RandomForestRegressor: variable importances, etc.

    '''
    # get finite times, mags, errs
    finind = np.isfinite(times) & np.isfinite(mags) & np.isfinite(errs)
    ftimes, fmags, ferrs = times[::][finind], mags[::][finind], errs[::][finind]
    finalparam_arrs = []
    for ep in externalparam_arrs:
        finalparam_arrs.append(ep[::][finind])

    stimes, smags, serrs, eparams = sigclip_magseries_with_extparams(
        times, mags, errs,
        externalparam_arrs,
        sigclip=epdsmooth_sigclip,
        magsarefluxes=magsarefluxes
    )

    # smoothing is optional for RFR because we train on a fraction of the mag
    # series and so should not require a smoothed input to fit a function to
    if epdsmooth:

        # smooth the signal
        if isinstance(epdsmooth_extraparams, dict):
            smoothedmags = epdsmooth_func(smags,
                                          epdsmooth_windowsize,
                                          **epdsmooth_extraparams)
        else:
            smoothedmags = epdsmooth_func(smags,
                                          epdsmooth_windowsize)

    else:

         smoothedmags = smags


    # set up the regressor
    if isinstance(rf_extraparams, dict):
        RFR = RandomForestRegressor(n_estimators=rf_ntrees,
                                    **rf_extraparams)
    else:
        RFR = RandomForestRegressor(n_estimators=rf_ntrees)

    # collect the features
    features = np.column_stack(eparams)

    # fit, then generate the predicted values, then get corrected values

    # we fit on a randomly selected subsample of all the mags
    if rf_subsample < 1.0:
        featureindices = np.arange(smoothedmags.size)

        # these are sorted because time-order should be important
        training_indices = np.sort(
            npr.choice(featureindices,
                       size=int(rf_subsample*smoothedmags.size),
                       replace=False)
        )
    else:
        training_indices = np.arange(smoothedmags.size)

    RFR.fit(features[training_indices,:], smoothedmags[training_indices])

    # predict on the full feature set
    flux_corrections = RFR.predict(np.column_stack(finalparam_arrs))
    corrected_fmags = npmedian(fmags) + fmags - flux_corrections

    retdict = {'times':ftimes,
               'mags':corrected_fmags,
               'errs':ferrs,
               'feature_importances':RFR.feature_importances_,
               'regressor':RFR}

    return retdict
Пример #19
0
def aov_periodfind(times,
                   mags,
                   errs,
                   magsarefluxes=False,
                   startp=None,
                   endp=None,
                   stepsize=1.0e-4,
                   autofreq=True,
                   normalize=True,
                   phasebinsize=0.05,
                   mindetperbin=9,
                   nbestpeaks=5,
                   periodepsilon=0.1,
                   sigclip=10.0,
                   nworkers=None,
                   verbose=True):
    '''This runs a parallelized Analysis-of-Variance (AoV) period search.

    NOTE: `normalize = True` here as recommended by Schwarzenberg-Czerny 1996,
    i.e. mags will be normalized to zero and rescaled so their variance = 1.0.

    Parameters
    ----------

    times,mags,errs : np.array
        The mag/flux time-series with associated measurement errors to run the
        period-finding on.

    magsarefluxes : bool
        If the input measurement values in `mags` and `errs` are in fluxes, set
        this to True.

    startp,endp : float or None
        The minimum and maximum periods to consider for the transit search.

    stepsize : float
        The step-size in frequency to use when constructing a frequency grid for
        the period search.

    autofreq : bool
        If this is True, the value of `stepsize` will be ignored and the
        :py:func:`astrobase.periodbase.get_frequency_grid` function will be used
        to generate a frequency grid based on `startp`, and `endp`. If these are
        None as well, `startp` will be set to 0.1 and `endp` will be set to
        `times.max() - times.min()`.

    normalize : bool
        This sets if the input time-series is normalized to 0.0 and rescaled
        such that its variance = 1.0. This is the recommended procedure by
        Schwarzenberg-Czerny 1996.

    phasebinsize : float
        The bin size in phase to use when calculating the AoV theta statistic at
        a test frequency.

    mindetperbin : int
        The minimum number of elements in a phase bin to consider it valid when
        calculating the AoV theta statistic at a test frequency.

    nbestpeaks : int
        The number of 'best' peaks to return from the periodogram results,
        starting from the global maximum of the periodogram peak values.

    periodepsilon : float
        The fractional difference between successive values of 'best' periods
        when sorting by periodogram power to consider them as separate periods
        (as opposed to part of the same periodogram peak). This is used to avoid
        broad peaks in the periodogram and make sure the 'best' periods returned
        are all actually independent.

    sigclip : float or int or sequence of two floats/ints or None
        If a single float or int, a symmetric sigma-clip will be performed using
        the number provided as the sigma-multiplier to cut out from the input
        time-series.

        If a list of two ints/floats is provided, the function will perform an
        'asymmetric' sigma-clip. The first element in this list is the sigma
        value to use for fainter flux/mag values; the second element in this
        list is the sigma value to use for brighter flux/mag values. For
        example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma
        dimmings and greater than 3-sigma brightenings. Here the meaning of
        "dimming" and "brightening" is set by *physics* (not the magnitude
        system), which is why the `magsarefluxes` kwarg must be correctly set.

        If `sigclip` is None, no sigma-clipping will be performed, and the
        time-series (with non-finite elems removed) will be passed through to
        the output.

    nworkers : int
        The number of parallel workers to use when calculating the periodogram.

    verbose : bool
        If this is True, will indicate progress and details about the frequency
        grid used for the period search.

    Returns
    -------

    dict
        This function returns a dict, referred to as an `lspinfo` dict in other
        astrobase functions that operate on periodogram results. This is a
        standardized format across all astrobase period-finders, and is of the
        form below::

            {'bestperiod': the best period value in the periodogram,
             'bestlspval': the periodogram peak associated with the best period,
             'nbestpeaks': the input value of nbestpeaks,
             'nbestlspvals': nbestpeaks-size list of best period peak values,
             'nbestperiods': nbestpeaks-size list of best periods,
             'lspvals': the full array of periodogram powers,
             'periods': the full array of periods considered,
             'method':'aov' -> the name of the period-finder method,
             'kwargs':{ dict of all of the input kwargs for record-keeping}}

    '''

    # get rid of nans first and sigclip
    stimes, smags, serrs = sigclip_magseries(times,
                                             mags,
                                             errs,
                                             magsarefluxes=magsarefluxes,
                                             sigclip=sigclip)

    # make sure there are enough points to calculate a spectrum
    if len(stimes) > 9 and len(smags) > 9 and len(serrs) > 9:

        # get the frequencies to use
        if startp:
            endf = 1.0/startp
        else:
            # default start period is 0.1 day
            endf = 1.0/0.1

        if endp:
            startf = 1.0/endp
        else:
            # default end period is length of time series
            startf = 1.0/(stimes.max() - stimes.min())

        # if we're not using autofreq, then use the provided frequencies
        if not autofreq:
            frequencies = nparange(startf, endf, stepsize)
            if verbose:
                LOGINFO(
                    'using %s frequency points, start P = %.3f, end P = %.3f' %
                    (frequencies.size, 1.0/endf, 1.0/startf)
                )
        else:
            # this gets an automatic grid of frequencies to use
            frequencies = get_frequency_grid(stimes,
                                             minfreq=startf,
                                             maxfreq=endf)
            if verbose:
                LOGINFO(
                    'using autofreq with %s frequency points, '
                    'start P = %.3f, end P = %.3f' %
                    (frequencies.size,
                     1.0/frequencies.max(),
                     1.0/frequencies.min())
                )

        # map to parallel workers
        if (not nworkers) or (nworkers > NCPUS):
            nworkers = NCPUS
            if verbose:
                LOGINFO('using %s workers...' % nworkers)

        pool = Pool(nworkers)

        # renormalize the working mags to zero and scale them so that the
        # variance = 1 for use with our LSP functions
        if normalize:
            nmags = (smags - npmedian(smags))/npstd(smags)
        else:
            nmags = smags

        tasks = [(stimes, nmags, serrs, x, phasebinsize, mindetperbin)
                 for x in frequencies]

        lsp = pool.map(_aov_worker, tasks)

        pool.close()
        pool.join()
        del pool

        lsp = nparray(lsp)
        periods = 1.0/frequencies

        # find the nbestpeaks for the periodogram: 1. sort the lsp array by
        # highest value first 2. go down the values until we find five
        # values that are separated by at least periodepsilon in period

        # make sure to filter out non-finite values
        finitepeakind = npisfinite(lsp)
        finlsp = lsp[finitepeakind]
        finperiods = periods[finitepeakind]

        # make sure that finlsp has finite values before we work on it
        try:

            bestperiodind = npargmax(finlsp)

        except ValueError:

            LOGERROR('no finite periodogram values '
                     'for this mag series, skipping...')
            return {'bestperiod':npnan,
                    'bestlspval':npnan,
                    'nbestpeaks':nbestpeaks,
                    'nbestlspvals':None,
                    'nbestperiods':None,
                    'lspvals':None,
                    'periods':None,
                    'method':'aov',
                    'kwargs':{'startp':startp,
                              'endp':endp,
                              'stepsize':stepsize,
                              'normalize':normalize,
                              'phasebinsize':phasebinsize,
                              'mindetperbin':mindetperbin,
                              'autofreq':autofreq,
                              'periodepsilon':periodepsilon,
                              'nbestpeaks':nbestpeaks,
                              'sigclip':sigclip}}

        sortedlspind = npargsort(finlsp)[::-1]
        sortedlspperiods = finperiods[sortedlspind]
        sortedlspvals = finlsp[sortedlspind]

        # now get the nbestpeaks
        nbestperiods, nbestlspvals, peakcount = (
            [finperiods[bestperiodind]],
            [finlsp[bestperiodind]],
            1
        )
        prevperiod = sortedlspperiods[0]

        # find the best nbestpeaks in the lsp and their periods
        for period, lspval in zip(sortedlspperiods, sortedlspvals):

            if peakcount == nbestpeaks:
                break
            perioddiff = abs(period - prevperiod)
            bestperiodsdiff = [abs(period - x) for x in nbestperiods]

            # print('prevperiod = %s, thisperiod = %s, '
            #       'perioddiff = %s, peakcount = %s' %
            #       (prevperiod, period, perioddiff, peakcount))

            # this ensures that this period is different from the last
            # period and from all the other existing best periods by
            # periodepsilon to make sure we jump to an entire different peak
            # in the periodogram
            if (perioddiff > (periodepsilon*prevperiod) and
                all(x > (periodepsilon*period) for x in bestperiodsdiff)):
                nbestperiods.append(period)
                nbestlspvals.append(lspval)
                peakcount = peakcount + 1

            prevperiod = period


        return {'bestperiod':finperiods[bestperiodind],
                'bestlspval':finlsp[bestperiodind],
                'nbestpeaks':nbestpeaks,
                'nbestlspvals':nbestlspvals,
                'nbestperiods':nbestperiods,
                'lspvals':lsp,
                'periods':periods,
                'method':'aov',
                'kwargs':{'startp':startp,
                          'endp':endp,
                          'stepsize':stepsize,
                          'normalize':normalize,
                          'phasebinsize':phasebinsize,
                          'mindetperbin':mindetperbin,
                          'autofreq':autofreq,
                          'periodepsilon':periodepsilon,
                          'nbestpeaks':nbestpeaks,
                          'sigclip':sigclip}}

    else:

        LOGERROR('no good detections for these times and mags, skipping...')
        return {'bestperiod':npnan,
                'bestlspval':npnan,
                'nbestpeaks':nbestpeaks,
                'nbestlspvals':None,
                'nbestperiods':None,
                'lspvals':None,
                'periods':None,
                'method':'aov',
                'kwargs':{'startp':startp,
                          'endp':endp,
                          'stepsize':stepsize,
                          'normalize':normalize,
                          'phasebinsize':phasebinsize,
                          'mindetperbin':mindetperbin,
                          'autofreq':autofreq,
                          'periodepsilon':periodepsilon,
                          'nbestpeaks':nbestpeaks,
                          'sigclip':sigclip}}
Пример #20
0
def invgauss_eclipses_func(ebparams, times, mags, errs):
    '''This returns a double eclipse shaped function.

    Suitable for first order modeling of eclipsing binaries.

    ebparams = [period (time),
                epoch (time),
                pdepth (mags),
                pduration (phase),
                psdepthratio,
                secondaryphase]

    period is the period in days

    epoch is the time of minimum in JD

    pdepth is the depth of the primary eclipse
    - for magnitudes -> transitdepth should be < 0
    - for fluxes     -> transitdepth should be > 0

    pduration is the length of the primary eclipse in phase

    psdepthratio is the ratio in the eclipse depths:
    depth_secondary/depth_primary. This is generally the same as the ratio of
    the Teffs of the two stars.

    secondaryphase is the phase at which the minimum of the secondary eclipse is
    located. This effectively parameterizes eccentricity.

    All of these will then have fitted values after the fit is done.

    '''

    (period, epoch, pdepth, pduration, depthratio, secondaryphase) = ebparams

    # generate the phases
    iphase = (times - epoch) / period
    iphase = iphase - npfloor(iphase)

    phasesortind = npargsort(iphase)
    phase = iphase[phasesortind]
    ptimes = times[phasesortind]
    pmags = mags[phasesortind]
    perrs = errs[phasesortind]

    zerolevel = npmedian(pmags)
    modelmags = npfull_like(phase, zerolevel)

    primaryecl_amp = -pdepth
    secondaryecl_amp = -pdepth * depthratio

    primaryecl_std = pduration / 5.0  # we use 5-sigma as full-width -> duration
    secondaryecl_std = pduration / 5.0  # secondary eclipse has the same duration

    halfduration = pduration / 2.0

    # phase indices
    primary_eclipse_ingress = ((phase >=
                                (1.0 - halfduration)) & (phase <= 1.0))
    primary_eclipse_egress = ((phase >= 0.0) & (phase <= halfduration))

    secondary_eclipse_phase = ((phase >= (secondaryphase - halfduration)) &
                               (phase <= (secondaryphase + halfduration)))

    # put in the eclipses
    modelmags[primary_eclipse_ingress] = (zerolevel + _gaussian(
        phase[primary_eclipse_ingress], primaryecl_amp, 1.0, primaryecl_std))
    modelmags[primary_eclipse_egress] = (zerolevel + _gaussian(
        phase[primary_eclipse_egress], primaryecl_amp, 0.0, primaryecl_std))
    modelmags[secondary_eclipse_phase] = (
        zerolevel + _gaussian(phase[secondary_eclipse_phase], secondaryecl_amp,
                              secondaryphase, secondaryecl_std))

    return modelmags, phase, ptimes, pmags, perrs
Пример #21
0
def aov_theta(times, mags, errs, frequency, binsize=0.05, minbin=9):
    '''Calculates the Schwarzenberg-Czerny AoV statistic at a test frequency.

    '''

    period = 1.0 / frequency
    fold_time = times[0]

    phased = phase_magseries(times,
                             mags,
                             period,
                             fold_time,
                             wrap=False,
                             sort=True)

    phases = phased['phase']
    pmags = phased['mags']
    bins = np.arange(0.0, 1.0, binsize)
    nbins = bins.size
    ndets = phases.size

    binnedphaseinds = npdigitize(phases, bins)

    bin_s1_tops = []
    bin_s2_tops = []
    binndets = []
    goodbins = 0

    all_xbar = npmedian(pmags)

    for x in npunique(binnedphaseinds):

        thisbin_inds = binnedphaseinds == x
        thisbin_phases = phases[thisbin_inds]
        thisbin_mags = pmags[thisbin_inds]

        if thisbin_mags.size > minbin:

            thisbin_ndet = thisbin_mags.size
            thisbin_xbar = npmedian(thisbin_mags)

            # get s1
            thisbin_s1_top = (thisbin_ndet * (thisbin_xbar - all_xbar) *
                              (thisbin_xbar - all_xbar))

            # get s2
            thisbin_s2_top = npsum(
                (thisbin_mags - all_xbar) * (thisbin_mags - all_xbar))

            bin_s1_tops.append(thisbin_s1_top)
            bin_s2_tops.append(thisbin_s2_top)
            binndets.append(thisbin_ndet)
            goodbins = goodbins + 1

    # turn the quantities into arrays
    bin_s1_tops = nparray(bin_s1_tops)
    bin_s2_tops = nparray(bin_s2_tops)
    binndets = nparray(binndets)

    # calculate s1 first
    s1 = npsum(bin_s1_tops) / (goodbins - 1.0)

    # then calculate s2
    s2 = npsum(bin_s2_tops) / (ndets - goodbins)

    theta_aov = s1 / s2

    return theta_aov
Пример #22
0
def plot_mag_series(times,
                    mags,
                    errs=None,
                    outfile=None,
                    sigclip=30.0,
                    timebin=None,
                    yrange=None):
    '''This plots a magnitude time series.

    If outfile is none, then plots to matplotlib interactive window. If outfile
    is a string denoting a filename, uses that to write a png/eps/pdf figure.

    timebin is either a float indicating binsize in seconds, or None indicating
    no time-binning is required.

    '''

    if errs is not None:

        # remove nans
        find = npisfinite(times) & npisfinite(mags) & npisfinite(errs)
        ftimes, fmags, ferrs = times[find], mags[find], errs[find]

        # get the median and stdev = 1.483 x MAD
        median_mag = npmedian(fmags)
        stddev_mag = (npmedian(npabs(fmags - median_mag))) * 1.483

        # sigclip next
        if sigclip:

            sigind = (npabs(fmags - median_mag)) < (sigclip * stddev_mag)

            stimes = ftimes[sigind]
            smags = fmags[sigind]
            serrs = ferrs[sigind]

            LOGINFO('sigclip = %s: before = %s observations, '
                    'after = %s observations' %
                    (sigclip, len(times), len(stimes)))

        else:

            stimes = ftimes
            smags = fmags
            serrs = ferrs

    else:

        # remove nans
        find = npisfinite(times) & npisfinite(mags)
        ftimes, fmags, ferrs = times[find], mags[find], None

        # get the median and stdev = 1.483 x MAD
        median_mag = npmedian(fmags)
        stddev_mag = (npmedian(npabs(fmags - median_mag))) * 1.483

        # sigclip next
        if sigclip:

            sigind = (npabs(fmags - median_mag)) < (sigclip * stddev_mag)

            stimes = ftimes[sigind]
            smags = fmags[sigind]
            serrs = None

            LOGINFO('sigclip = %s: before = %s observations, '
                    'after = %s observations' %
                    (sigclip, len(times), len(stimes)))

        else:

            stimes = ftimes
            smags = fmags
            serrs = None

    # now we proceed to binning
    if timebin and errs is not None:

        binned = time_bin_magseries_with_errs(stimes, smags, serrs,
                                              binsize=timebin)
        btimes, bmags, berrs = (binned['binnedtimes'],
                                binned['binnedmags'],
                                binned['binnederrs'])

    elif timebin and errs is None:

        binned = time_bin_magseries(stimes, smags,
                                    binsize=timebin)
        btimes, bmags, berrs = binned['binnedtimes'], binned['binnedmags'], None

    else:

        btimes, bmags, berrs = stimes, smags, serrs


    # finally, proceed with plotting
    fig = plt.figure()
    fig.set_size_inches(7.5,4.8)

    plt.errorbar(btimes, bmags, fmt='go', yerr=berrs,
                 markersize=2.0, markeredgewidth=0.0, ecolor='grey',
                 capsize=0)

    # make a grid
    plt.grid(color='#a9a9a9',
             alpha=0.9,
             zorder=0,
             linewidth=1.0,
             linestyle=':')

    # fix the ticks to use no offsets
    plt.gca().get_yaxis().get_major_formatter().set_useOffset(False)
    plt.gca().get_xaxis().get_major_formatter().set_useOffset(False)

    # get the yrange
    if yrange and isinstance(yrange,list) and len(yrange) == 2:
        ymin, ymax = yrange
    else:
        ymin, ymax = plt.ylim()
    plt.ylim(ymax,ymin)

    plt.xlim(npmin(btimes) - 0.001*npmin(btimes),
             npmax(btimes) + 0.001*npmin(btimes))

    plt.xlabel('time [JD]')
    plt.ylabel('magnitude')

    if outfile and isinstance(outfile, str):

        plt.savefig(outfile,bbox_inches='tight')
        plt.close()
        return os.path.abspath(outfile)

    else:

        plt.show()
        plt.close()
        return
Пример #23
0
def lightcurve_ptp_measures(ftimes, fmags, ferrs):
    '''This calculates various point-to-point measures (`eta` in Kim+ 2014).

    Parameters
    ----------

    ftimes,fmags,ferrs : np.array
        The input mag/flux time-series with all non-finite elements removed.

    Returns
    -------

    dict
        A dict with values of the point-to-point measures, including the `eta`
        variability index (often used as its inverse `inveta` to have the same
        sense as increasing variability index -> more likely a variable star).

    '''

    ndet = len(fmags)

    if ndet > 9:

        timediffs = npdiff(ftimes)

        # get rid of stuff with time diff = 0.0
        nzind = npnonzero(timediffs)
        ftimes, fmags, ferrs = ftimes[nzind], fmags[nzind], ferrs[nzind]

        # recalculate ndet and diffs
        ndet = ftimes.size
        timediffs = npdiff(ftimes)

        # calculate the point to point measures
        p2p_abs_magdiffs = npabs(npdiff(fmags))
        p2p_squared_magdiffs = npdiff(fmags) * npdiff(fmags)

        robstd = npmedian(npabs(fmags - npmedian(fmags))) * 1.483
        robvar = robstd * robstd

        # these are eta from the Kim+ 2014 paper - ratio of point-to-point
        # difference to the variance of the entire series

        # this is the robust version
        eta_robust = npmedian(p2p_abs_magdiffs) / robvar
        eta_robust = eta_robust / (ndet - 1.0)

        # this is the usual version
        eta_normal = npsum(p2p_squared_magdiffs) / npvar(fmags)
        eta_normal = eta_normal / (ndet - 1.0)

        timeweights = 1.0 / (timediffs * timediffs)

        # this is eta_e modified for uneven sampling from the Kim+ 2014 paper
        eta_uneven_normal = ((npsum(timeweights * p2p_squared_magdiffs) /
                              (npvar(fmags) * npsum(timeweights))) *
                             npmean(timeweights) *
                             (ftimes.max() - ftimes.min()) *
                             (ftimes.max() - ftimes.min()))

        # this is robust eta_e modified for uneven sampling from the Kim+ 2014
        # paper
        eta_uneven_robust = ((npsum(timeweights * p2p_abs_magdiffs) /
                              (robvar * npsum(timeweights))) *
                             npmedian(timeweights) * (ftimes[-1] - ftimes[0]) *
                             (ftimes[-1] - ftimes[0]))

        return {
            'eta_normal': eta_normal,
            'eta_robust': eta_robust,
            'eta_uneven_normal': eta_uneven_normal,
            'eta_uneven_robust': eta_uneven_robust
        }

    else:

        return None
Пример #24
0
def sigclip_magseries(times,
                      mags,
                      errs,
                      sigclip=None,
                      iterative=False,
                      niterations=None,
                      meanormedian='median',
                      magsarefluxes=False):
    '''
    Select the finite times, magnitudes (or fluxes), and errors from the
    passed values, and apply symmetric or asymmetric sigma clipping to them.
    Returns sigma-clipped times, mags, and errs.

    Args:
        times (np.array): ...

        mags (np.array): numpy array to sigma-clip. Does not assume all values
        are finite. Does not assume anything about whether they're
        positive/negative.

        errs (np.array): ...

        iterative (bool): True if you want iterative sigma-clipping. If
        niterations is not set and this is True, sigma-clipping is iterated
        until no more points are removed.

        niterations (int): maximum number of iterations to perform for
        sigma-clipping. If None, the iterative arg takes precedence, and
        iterative=True will sigma-clip until no more points are removed.
        If niterations is not None and iterative is False, niterations takes
        precedence and iteration will occur.

        meanormedian (string): either 'mean' for sigma-clipping based on the
        mean value, or 'median' for sigma-clipping based on the median value.
        Default is 'median'.

        magsarefluxes (bool): True if your "mags" are in fact fluxes, i.e. if
        "dimming" corresponds to your "mags" getting smaller.

        sigclip (float or list): If float, apply symmetric sigma clipping. If
        list, e.g., [10., 3.], will sigclip out greater than 10-sigma dimmings
        and greater than 3-sigma brightenings. Here the meaning of "dimming"
        and "brightening" is set by *physics* (not the magnitude system), which
        is why the `magsarefluxes` kwarg must be correctly set.

    Returns:
        stimes, smags, serrs: (sigmaclipped values of each).
    '''

    returnerrs = True

    # fake the errors if they don't exist
    # this is inconsequential to sigma-clipping
    # we don't return these dummy values if the input errs are None
    if errs is None:
        # assume 0.1% errors if not given
        # this should work for mags and fluxes
        errs = 0.001 * mags
        returnerrs = False

    # filter the input times, mags, errs; do sigclipping and normalization
    find = npisfinite(times) & npisfinite(mags) & npisfinite(errs)
    ftimes, fmags, ferrs = times[find], mags[find], errs[find]

    # get the center value and stdev
    if meanormedian == 'median':  # stddev = 1.483 x MAD

        center_mag = npmedian(fmags)
        stddev_mag = (npmedian(npabs(fmags - center_mag))) * 1.483

    elif meanormedian == 'mean':

        center_mag = npmean(fmags)
        stddev_mag = npstddev(fmags)

    else:
        LOGWARNING("unrecognized meanormedian value given to "
                   "sigclip_magseries: %s, defaulting to 'median'" %
                   meanormedian)
        meanormedian = 'median'
        center_mag = npmedian(fmags)
        stddev_mag = (npmedian(npabs(fmags - center_mag))) * 1.483

    # sigclip next for a single sigclip value
    if sigclip and isinstance(sigclip, (float, int)):

        if not iterative and niterations is None:

            sigind = (npabs(fmags - center_mag)) < (sigclip * stddev_mag)

            stimes = ftimes[sigind]
            smags = fmags[sigind]
            serrs = ferrs[sigind]

        else:

            #
            # iterative version adapted from scipy.stats.sigmaclip
            #

            # First, if niterations is not set, iterate until covergence
            if niterations is None:

                delta = 1

                this_times = ftimes
                this_mags = fmags
                this_errs = ferrs

                while delta:

                    if meanormedian == 'mean':
                        this_center = npmean(this_mags)
                        this_stdev = npstddev(this_mags)
                    elif meanormedian == 'median':
                        this_center = npmedian(this_mags)
                        this_stdev = (npmedian(
                            npabs(this_mags - this_center))) * 1.483
                    this_size = this_mags.size

                    # apply the sigclip
                    tsi = ((npabs(this_mags - this_center)) <
                           (sigclip * this_stdev))

                    # update the arrays
                    this_times = this_times[tsi]
                    this_mags = this_mags[tsi]
                    this_errs = this_errs[tsi]

                    # update delta and go to the top of the loop
                    delta = this_size - this_mags.size

            else:  # If iterating only a certain number of times

                this_times = ftimes
                this_mags = fmags
                this_errs = ferrs

                iter_num = 0
                delta = 1
                while iter_num < niterations and delta:

                    if meanormedian == 'mean':

                        this_center = npmean(this_mags)
                        this_stdev = npstddev(this_mags)

                    elif meanormedian == 'median':

                        this_center = npmedian(this_mags)
                        this_stdev = (npmedian(
                            npabs(this_mags - this_center))) * 1.483
                    this_size = this_mags.size

                    # apply the sigclip
                    tsi = ((npabs(this_mags - this_center)) <
                           (sigclip * this_stdev))

                    # update the arrays
                    this_times = this_times[tsi]
                    this_mags = this_mags[tsi]
                    this_errs = this_errs[tsi]

                    # update the number of iterations and delta and
                    # go to the top of the loop
                    delta = this_size - this_mags.size
                    iter_num += 1

            # final sigclipped versions
            stimes, smags, serrs = this_times, this_mags, this_errs

    # this handles sigclipping for asymmetric +ve and -ve clip values
    elif sigclip and isinstance(sigclip, list) and len(sigclip) == 2:

        # sigclip is passed as [dimmingclip, brighteningclip]
        dimmingclip = sigclip[0]
        brighteningclip = sigclip[1]

        if not iterative and niterations is None:

            if magsarefluxes:
                nottoodimind = ((fmags - center_mag) >
                                (-dimmingclip * stddev_mag))
                nottoobrightind = ((fmags - center_mag) <
                                   (brighteningclip * stddev_mag))
            else:
                nottoodimind = ((fmags - center_mag) <
                                (dimmingclip * stddev_mag))
                nottoobrightind = ((fmags - center_mag) >
                                   (-brighteningclip * stddev_mag))

            sigind = nottoodimind & nottoobrightind

            stimes = ftimes[sigind]
            smags = fmags[sigind]
            serrs = ferrs[sigind]

        else:

            #
            # iterative version adapted from scipy.stats.sigmaclip
            #
            if niterations is None:

                delta = 1

                this_times = ftimes
                this_mags = fmags
                this_errs = ferrs

                while delta:

                    if meanormedian == 'mean':

                        this_center = npmean(this_mags)
                        this_stdev = npstddev(this_mags)

                    elif meanormedian == 'median':
                        this_center = npmedian(this_mags)
                        this_stdev = (npmedian(
                            npabs(this_mags - this_center))) * 1.483
                    this_size = this_mags.size

                    if magsarefluxes:
                        nottoodimind = ((this_mags - this_center) >
                                        (-dimmingclip * this_stdev))
                        nottoobrightind = ((this_mags - this_center) <
                                           (brighteningclip * this_stdev))
                    else:
                        nottoodimind = ((this_mags - this_center) <
                                        (dimmingclip * this_stdev))
                        nottoobrightind = ((this_mags - this_center) >
                                           (-brighteningclip * this_stdev))

                    # apply the sigclip
                    tsi = nottoodimind & nottoobrightind

                    # update the arrays
                    this_times = this_times[tsi]
                    this_mags = this_mags[tsi]
                    this_errs = this_errs[tsi]

                    # update delta and go to top of the loop
                    delta = this_size - this_mags.size

            else:  # If iterating only a certain number of times
                this_times = ftimes
                this_mags = fmags
                this_errs = ferrs

                iter_num = 0
                delta = 1

                while iter_num < niterations and delta:

                    if meanormedian == 'mean':
                        this_center = npmean(this_mags)
                        this_stdev = npstddev(this_mags)
                    elif meanormedian == 'median':
                        this_center = npmedian(this_mags)
                        this_stdev = (npmedian(
                            npabs(this_mags - this_center))) * 1.483
                    this_size = this_mags.size

                    if magsarefluxes:
                        nottoodimind = ((this_mags - this_center) >
                                        (-dimmingclip * this_stdev))
                        nottoobrightind = ((this_mags - this_center) <
                                           (brighteningclip * this_stdev))
                    else:
                        nottoodimind = ((this_mags - this_center) <
                                        (dimmingclip * this_stdev))
                        nottoobrightind = ((this_mags - this_center) >
                                           (-brighteningclip * this_stdev))

                    # apply the sigclip
                    tsi = nottoodimind & nottoobrightind

                    # update the arrays
                    this_times = this_times[tsi]
                    this_mags = this_mags[tsi]
                    this_errs = this_errs[tsi]

                    # update the number of iterations and delta
                    # and go to top of the loop
                    delta = this_size - this_mags.size
                    iter_num += 1

            # final sigclipped versions
            stimes, smags, serrs = this_times, this_mags, this_errs

    else:

        stimes = ftimes
        smags = fmags
        serrs = ferrs

    if returnerrs:
        return stimes, smags, serrs
    else:
        return stimes, smags, None
def extract_features(filename, is_url=False):
    '''Extracts features to be used in text image classifier.
    :param filename: input image
    :param is_url: is input image a url or a file path on disk
    :return: tuple of features:
    (average_slope, median_slope, average_tilt, median_tilt, median_differences, average_differences, nr_straight_lines)
    Most relevant ones are average_slope, average_differences and nr_straight_lines.
    '''

    if is_url:
        filedata = urllib2.urlopen(filename).read()
        imagefiledata = cv.CreateMatHeader(1, len(filedata), cv.CV_8UC1)
        cv.SetData(imagefiledata, filedata, len(filedata))
        src = cv.DecodeImageM(imagefiledata, cv.CV_LOAD_IMAGE_GRAYSCALE)
    else:
        src = cv.LoadImage(filename, cv.CV_LOAD_IMAGE_GRAYSCALE)

    # normalize size
    normalized_size = 400

    # smaller dimension will be 400, longer dimension will be proportional
    orig_size = cv.GetSize(src)

    max_dim_idx = max(enumerate(orig_size), key=lambda l: l[1])[0]
    min_dim_idx = [idx for idx in [0, 1] if idx != max_dim_idx][0]
    new_size = [0, 0]
    new_size[min_dim_idx] = normalized_size
    new_size[max_dim_idx] = int(
        float(orig_size[max_dim_idx]) / orig_size[min_dim_idx] *
        normalized_size)
    dst = cv.CreateImage(new_size, 8, 1)
    cv.Resize(src, dst)
    # cv.SaveImage("/tmp/resized.jpg",dst)
    src = dst

    dst = cv.CreateImage(cv.GetSize(src), 8, 1)
    color_dst = cv.CreateImage(cv.GetSize(src), 8, 3)
    storage = cv.CreateMemStorage(0)

    cv.Canny(src, dst, 50, 200, 3)
    cv.CvtColor(dst, color_dst, cv.CV_GRAY2BGR)

    slopes = []
    # difference between xs or ys - variant of slope
    tilts = []
    # x coordinates of horizontal lines
    horizontals = []
    # y coordinates of vertical lines
    verticals = []

    if USE_STANDARD:
        coords = cv.HoughLines2(dst, storage, cv.CV_HOUGH_STANDARD, 1,
                                pi / 180, 50, 50, 10)
        lines = []
        for coord in coords:
            (rho, theta) = coord
            a = cos(theta)
            b = sin(theta)
            x0 = a * rho
            y0 = b * rho
            pt1 = (cv.Round(x0 + 1000 * (-b)), cv.Round(y0 + 1000 * (a)))
            pt2 = (cv.Round(x0 - 1000 * (-b)), cv.Round(y0 - 1000 * (a)))
            lines += [(pt1, pt2)]

    else:
        lines = cv.HoughLines2(dst, storage, cv.CV_HOUGH_PROBABILISTIC, 1,
                               pi / 180, 50, 50, 10)

    # eliminate duplicates - there are many especially with the standard version
    # first round the coordinates to integers divisible with 5 (to eliminate different but really close ones)
    # TODO
    # lines = list(set(map(lambda l: tuple([int(p) - int(p)%5 for p in l]), lines)))

    nr_straight_lines = 0
    for line in lines:
        (pt1, pt2) = line

        # compute slope, rotate the line so that the slope is smallest
        # (slope is either delta x/ delta y or the reverse)
        # add smoothing term in denominator in case of 0
        slope = min(
            abs(pt1[1] - pt2[1]),
            (abs(pt1[0] - pt2[0]))) / (max(abs(pt1[1] - pt2[1]),
                                           (abs(pt1[0] - pt2[0]))) + 0.01)
        # if slope < 0.1:
        # if slope < 5:
        if slope < 0.05:
            if abs(pt1[0] - pt2[0]) < abs(pt1[1] - pt2[1]):
                # means it's a horizontal line
                horizontals.append(pt1[0])
            else:
                verticals.append(pt1[1])
        if slope < 0.05:
            # if slope < 5:
            # if slope < 0.1:
            nr_straight_lines += 1
        slopes.append(slope)
        tilts.append(min(abs(pt1[1] - pt2[1]), (abs(pt1[0] - pt2[0]))))
        # print slope
    average_slope = sum(slopes) / float(len(slopes))
    median_slope = npmedian(nparray(slopes))
    average_tilt = sum(tilts) / float(len(tilts))
    median_tilt = npmedian(nparray(tilts))
    differences = []
    horizontals = sorted(horizontals)
    verticals = sorted(verticals)
    print "x_differences:"
    for (i, x) in enumerate(horizontals):
        if i > 0:
            # print abs(horizontals[i] - horizontals[i-1])
            differences.append(abs(horizontals[i] - horizontals[i - 1]))
    print "y_differences:"
    for (i, y) in enumerate(verticals):
        if i > 0:
            # print abs(verticals[i] - verticals[i-1])
            differences.append(abs(verticals[i] - verticals[i - 1]))

    print filename
    print "average_slope:", average_slope
    print "median_slope:", median_slope
    print "average_tilt:", average_tilt
    print "median_tilt:", median_tilt
    median_differences = npmedian(nparray(differences))
    print "median_differences:", median_differences
    if not differences:
        # big random number for average difference
        average_differences = 50
    else:
        average_differences = sum(differences) / float(len(differences))
    print "average_differences:", average_differences
    print "nr_lines:", nr_straight_lines

    # print "sorted xs:", sorted(lines)

    return (average_slope, median_slope, average_tilt, median_tilt,
            median_differences, average_differences, nr_straight_lines)
Пример #26
0
def stellingwerf_pdm(times,
                     mags,
                     errs,
                     magsarefluxes=False,
                     autofreq=True,
                     startp=None,
                     endp=None,
                     normalize=False,
                     stepsize=1.0e-4,
                     phasebinsize=0.05,
                     mindetperbin=9,
                     nbestpeaks=5,
                     periodepsilon=0.1, # 0.1
                     sigclip=10.0,
                     nworkers=None,
                     verbose=True):
    '''This runs a parallel Stellingwerf PDM period search.

    '''

    # get rid of nans first and sigclip
    stimes, smags, serrs = sigclip_magseries(times,
                                             mags,
                                             errs,
                                             magsarefluxes=magsarefluxes,
                                             sigclip=sigclip)

    # make sure there are enough points to calculate a spectrum
    if len(stimes) > 9 and len(smags) > 9 and len(serrs) > 9:

        # get the frequencies to use
        if startp:
            endf = 1.0/startp
        else:
            # default start period is 0.1 day
            endf = 1.0/0.1

        if endp:
            startf = 1.0/endp
        else:
            # default end period is length of time series
            startf = 1.0/(stimes.max() - stimes.min())

        # if we're not using autofreq, then use the provided frequencies
        if not autofreq:
            frequencies = np.arange(startf, endf, stepsize)
            if verbose:
                LOGINFO(
                    'using %s frequency points, start P = %.3f, end P = %.3f' %
                    (frequencies.size, 1.0/endf, 1.0/startf)
                )
        else:
            # this gets an automatic grid of frequencies to use
            frequencies = get_frequency_grid(stimes,
                                             minfreq=startf,
                                             maxfreq=endf)
            if verbose:
                LOGINFO(
                    'using autofreq with %s frequency points, '
                    'start P = %.3f, end P = %.3f' %
                    (frequencies.size,
                     1.0/frequencies.max(),
                     1.0/frequencies.min())
                )

        # map to parallel workers
        if (not nworkers) or (nworkers > NCPUS):
            nworkers = NCPUS
            if verbose:
                LOGINFO('using %s workers...' % nworkers)

        pool = Pool(nworkers)

        # renormalize the working mags to zero and scale them so that the
        # variance = 1 for use with our LSP functions
        if normalize:
            nmags = (smags - npmedian(smags))/npstd(smags)
        else:
            nmags = smags

        tasks = [(stimes, nmags, serrs, x, phasebinsize, mindetperbin)
                 for x in frequencies]

        lsp = pool.map(stellingwerf_pdm_worker, tasks)

        pool.close()
        pool.join()
        del pool

        lsp = nparray(lsp)
        periods = 1.0/frequencies

        # find the nbestpeaks for the periodogram: 1. sort the lsp array by
        # lowest value first 2. go down the values until we find five values
        # that are separated by at least periodepsilon in period

        # make sure to filter out the non-finite values of lsp
        finitepeakind = npisfinite(lsp)
        finlsp = lsp[finitepeakind]
        finperiods = periods[finitepeakind]

        # finlsp might not have any finite values if the period finding
        # failed. if so, argmin will return a ValueError.
        try:

            bestperiodind = npargmin(finlsp)

        except ValueError:

            LOGERROR('no finite periodogram values for '
                     'this mag series, skipping...')
            return {'bestperiod':npnan,
                    'bestlspval':npnan,
                    'nbestpeaks':nbestpeaks,
                    'nbestlspvals':None,
                    'nbestperiods':None,
                    'lspvals':None,
                    'periods':None,
                    'method':'pdm',
                    'kwargs':{'startp':startp,
                              'endp':endp,
                              'stepsize':stepsize,
                              'normalize':normalize,
                              'phasebinsize':phasebinsize,
                              'mindetperbin':mindetperbin,
                              'autofreq':autofreq,
                              'periodepsilon':periodepsilon,
                              'nbestpeaks':nbestpeaks,
                              'sigclip':sigclip}}

        sortedlspind = np.argsort(finlsp)
        sortedlspperiods = finperiods[sortedlspind]
        sortedlspvals = finlsp[sortedlspind]

        prevbestlspval = sortedlspvals[0]

        # now get the nbestpeaks
        nbestperiods, nbestlspvals, peakcount = (
            [finperiods[bestperiodind]],
            [finlsp[bestperiodind]],
            1
        )
        prevperiod = sortedlspperiods[0]

        # find the best nbestpeaks in the lsp and their periods
        for period, lspval in zip(sortedlspperiods, sortedlspvals):

            if peakcount == nbestpeaks:
                break
            perioddiff = abs(period - prevperiod)
            bestperiodsdiff = [abs(period - x) for x in nbestperiods]

            # print('prevperiod = %s, thisperiod = %s, '
            #       'perioddiff = %s, peakcount = %s' %
            #       (prevperiod, period, perioddiff, peakcount))

            # this ensures that this period is different from the last
            # period and from all the other existing best periods by
            # periodepsilon to make sure we jump to an entire different peak
            # in the periodogram
            if (perioddiff > (periodepsilon*prevperiod) and
                all(x > (periodepsilon*prevperiod) for x in bestperiodsdiff)):
                nbestperiods.append(period)
                nbestlspvals.append(lspval)
                peakcount = peakcount + 1

            prevperiod = period


        return {'bestperiod':finperiods[bestperiodind],
                'bestlspval':finlsp[bestperiodind],
                'nbestpeaks':nbestpeaks,
                'nbestlspvals':nbestlspvals,
                'nbestperiods':nbestperiods,
                'lspvals':lsp,
                'periods':periods,
                'method':'pdm',
                'kwargs':{'startp':startp,
                          'endp':endp,
                          'stepsize':stepsize,
                          'normalize':normalize,
                          'phasebinsize':phasebinsize,
                          'mindetperbin':mindetperbin,
                          'autofreq':autofreq,
                          'periodepsilon':periodepsilon,
                          'nbestpeaks':nbestpeaks,
                          'sigclip':sigclip}}

    else:

        LOGERROR('no good detections for these times and mags, skipping...')
        return {'bestperiod':npnan,
                'bestlspval':npnan,
                'nbestpeaks':nbestpeaks,
                'nbestlspvals':None,
                'nbestperiods':None,
                'lspvals':None,
                'periods':None,
                'method':'pdm',
                'kwargs':{'startp':startp,
                          'endp':endp,
                          'stepsize':stepsize,
                          'normalize':normalize,
                          'phasebinsize':phasebinsize,
                          'mindetperbin':mindetperbin,
                          'autofreq':autofreq,
                          'periodepsilon':periodepsilon,
                          'nbestpeaks':nbestpeaks,
                          'sigclip':sigclip}}
Пример #27
0
def autocorr_magseries(times,
                       mags,
                       errs,
                       maxlags=1000,
                       func=_autocorr_func3,
                       fillgaps=0.0,
                       filterwindow=11,
                       forcetimebin=None,
                       sigclip=3.0,
                       magsarefluxes=False,
                       verbose=True):
    '''This calculates the ACF of a light curve.

    This will pre-process the light curve to fill in all the gaps and normalize
    everything to zero. If `fillgaps = 'noiselevel'`, fills the gaps with the
    noise level obtained via the procedure above. If `fillgaps = 'nan'`, fills
    the gaps with `np.nan`.

    Parameters
    ----------

    times,mags,errs : np.array
        The measurement time-series and associated errors.

    maxlags : int
        The maximum number of lags to calculate.

    func : Python function
        This is a function to calculate the lags.

    fillgaps : 'noiselevel' or float
        This sets what to use to fill in gaps in the time series. If this is
        'noiselevel', will smooth the light curve using a point window size of
        `filterwindow` (this should be an odd integer), subtract the smoothed LC
        from the actual LC and estimate the RMS. This RMS will be used to fill
        in the gaps. Other useful values here are 0.0, and npnan.

    filterwindow : int
        The light curve's smoothing filter window size to use if
        `fillgaps='noiselevel`'.

    forcetimebin : None or float
        This is used to force a particular cadence in the light curve other than
        the automatically determined cadence. This effectively rebins the light
        curve to this cadence. This should be in the same time units as `times`.

    sigclip : float or int or sequence of two floats/ints or None
        If a single float or int, a symmetric sigma-clip will be performed using
        the number provided as the sigma-multiplier to cut out from the input
        time-series.

        If a list of two ints/floats is provided, the function will perform an
        'asymmetric' sigma-clip. The first element in this list is the sigma
        value to use for fainter flux/mag values; the second element in this
        list is the sigma value to use for brighter flux/mag values. For
        example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma
        dimmings and greater than 3-sigma brightenings. Here the meaning of
        "dimming" and "brightening" is set by *physics* (not the magnitude
        system), which is why the `magsarefluxes` kwarg must be correctly set.

        If `sigclip` is None, no sigma-clipping will be performed, and the
        time-series (with non-finite elems removed) will be passed through to
        the output.

    magsarefluxes : bool
        If your input measurements in `mags` are actually fluxes instead of
        mags, set this is True.

    verbose : bool
        If True, will indicate progress and report errors.

    Returns
    -------

    dict
        A dict of the following form is returned::

            {'itimes': the interpolated time values after gap-filling,
             'imags': the interpolated mag/flux values after gap-filling,
             'ierrs': the interpolated mag/flux values after gap-filling,
             'cadence': the cadence of the output mag/flux time-series,
             'minitime': the minimum value of the interpolated times array,
             'lags': the lags used to calculate the auto-correlation function,
             'acf': the value of the ACF at each lag used}

    '''

    # get the gap-filled timeseries
    interpolated = fill_magseries_gaps(times,
                                       mags,
                                       errs,
                                       fillgaps=fillgaps,
                                       forcetimebin=forcetimebin,
                                       sigclip=sigclip,
                                       magsarefluxes=magsarefluxes,
                                       filterwindow=filterwindow,
                                       verbose=verbose)

    if not interpolated:
        print('failed to interpolate light curve to minimum cadence!')
        return None

    itimes, imags = interpolated['itimes'], interpolated['imags'],

    # calculate the lags up to maxlags
    if maxlags:
        lags = nparange(0, maxlags)
    else:
        lags = nparange(itimes.size)

    series_stdev = 1.483 * npmedian(npabs(imags))

    if func != _autocorr_func3:

        # get the autocorrelation as a function of the lag of the mag series
        autocorr = nparray(
            [func(imags, x, imags.size, 0.0, series_stdev) for x in lags])

    # this doesn't need a lags array
    else:

        autocorr = _autocorr_func3(imags, lags[0], imags.size, 0.0,
                                   series_stdev)
        # return only the maximum number of lags
        if maxlags is not None:
            autocorr = autocorr[:maxlags]

    interpolated.update({
        'minitime': itimes.min(),
        'lags': lags,
        'acf': autocorr
    })

    return interpolated
Пример #28
0
def prewhiten_magseries(times,
                        mags,
                        errs,
                        whitenperiod,
                        whitenparams,
                        sigclip=3.0,
                        magsarefluxes=False,
                        plotfit=None,
                        plotfitphasedlconly=True,
                        rescaletomedian=True):
    '''Removes a periodic sinusoidal signal generated using whitenparams from
    the input magnitude time series.

    whitenparams are the Fourier amplitude and phase coefficients:

    [ampl_1, ampl_2, ampl_3, ..., ampl_X,
     pha_1, pha_2, pha_3, ..., pha_X]

    where X is the Fourier order. These are usually the output of a previous
    Fourier fit to the light curve (from varbase.lcfit.fourier_fit_magseries for
    example).

    if rescaletomedian is True, then we add back the constant median term of the
    magnitudes to the final pre-whitened mag series.

    '''

    stimes, smags, serrs = sigclip_magseries(times,
                                             mags,
                                             errs,
                                             sigclip=sigclip,
                                             magsarefluxes=magsarefluxes)

    median_mag = npmedian(smags)

    # phase the mag series using the given period and epoch = min(stimes)
    mintime = npmin(stimes)

    # calculate the unsorted phase, then sort it
    iphase = ((stimes - mintime) / whitenperiod - npfloor(
        (stimes - mintime) / whitenperiod))
    phasesortind = npargsort(iphase)

    # these are the final quantities to use for the Fourier fits
    phase = iphase[phasesortind]
    pmags = smags[phasesortind]
    perrs = serrs[phasesortind]

    # get the times sorted in phase order (useful to get the fit mag minimum
    # with respect to phase -- the light curve minimum)
    ptimes = stimes[phasesortind]

    # get the Fourier order
    fourierorder = int(len(whitenparams) / 2)

    # now subtract the harmonic series from the phased LC
    # these are still in phase order
    wmags = pmags - _fourier_func(whitenparams, phase, pmags)

    # resort everything by time order
    wtimeorder = npargsort(ptimes)
    wtimes = ptimes[wtimeorder]
    wphase = phase[wtimeorder]
    wmags = wmags[wtimeorder]
    werrs = perrs[wtimeorder]

    if rescaletomedian:
        wmags = wmags + median_mag

    # prepare the returndict
    returndict = {
        'wtimes': wtimes,  # these are in the new time order
        'wphase': wphase,
        'wmags': wmags,
        'werrs': werrs,
        'whitenparams': whitenparams,
        'whitenperiod': whitenperiod
    }

    # make the fit plot if required
    if plotfit and (isinstance(plotfit, str) or isinstance(plotfit, strio)):

        if plotfitphasedlconly:
            plt.figure(figsize=(10, 4.8))
        else:
            plt.figure(figsize=(16, 9.6))

        if plotfitphasedlconly:

            # phased series before whitening
            plt.subplot(121)
            plt.plot(phase,
                     pmags,
                     marker='.',
                     color='k',
                     linestyle='None',
                     markersize=2.0,
                     markeredgewidth=0)

            if not magsarefluxes:
                plt.gca().invert_yaxis()
                plt.ylabel('magnitude')
            else:
                plt.ylabel('fluxes')

            plt.xlabel('phase')
            plt.title('phased LC before pre-whitening')

            # phased series after whitening
            plt.subplot(122)
            plt.plot(wphase,
                     wmags,
                     marker='.',
                     color='g',
                     linestyle='None',
                     markersize=2.0,
                     markeredgewidth=0)

            if not magsarefluxes:
                plt.gca().invert_yaxis()
                plt.ylabel('magnitude')
            else:
                plt.ylabel('fluxes')

            plt.xlabel('phase')
            plt.title('phased LC after pre-whitening')

        else:

            # time series before whitening
            plt.subplot(221)
            plt.plot(stimes,
                     smags,
                     marker='.',
                     color='k',
                     linestyle='None',
                     markersize=2.0,
                     markeredgewidth=0)

            if not magsarefluxes:
                plt.gca().invert_yaxis()
                plt.ylabel('magnitude')
            else:
                plt.ylabel('fluxes')

            plt.xlabel('JD')
            plt.title('LC before pre-whitening')

            # time series after whitening
            plt.subplot(222)
            plt.plot(wtimes,
                     wmags,
                     marker='.',
                     color='g',
                     linestyle='None',
                     markersize=2.0,
                     markeredgewidth=0)

            if not magsarefluxes:
                plt.gca().invert_yaxis()
                plt.ylabel('magnitude')
            else:
                plt.ylabel('fluxes')

            plt.xlabel('JD')
            plt.title('LC after pre-whitening with period: %.6f' %
                      whitenperiod)

            # phased series before whitening
            plt.subplot(223)
            plt.plot(phase,
                     pmags,
                     marker='.',
                     color='k',
                     linestyle='None',
                     markersize=2.0,
                     markeredgewidth=0)

            if not magsarefluxes:
                plt.gca().invert_yaxis()
                plt.ylabel('magnitude')
            else:
                plt.ylabel('fluxes')

            plt.xlabel('phase')
            plt.title('phased LC before pre-whitening')

            # phased series after whitening
            plt.subplot(224)
            plt.plot(wphase,
                     wmags,
                     marker='.',
                     color='g',
                     linestyle='None',
                     markersize=2.0,
                     markeredgewidth=0)

            if not magsarefluxes:
                plt.gca().invert_yaxis()
                plt.ylabel('magnitude')
            else:
                plt.ylabel('fluxes')

            plt.xlabel('phase')
            plt.title('phased LC after pre-whitening')

        plt.tight_layout()
        plt.savefig(plotfit, format='png', pad_inches=0.0)
        plt.close()

        if isinstance(plotfit, str) or isinstance(plotfit, strio):
            returndict['fitplotfile'] = plotfit

    return returndict
Пример #29
0
def lightcurve_flux_measures(ftimes, fmags, ferrs, magsarefluxes=False):
    '''This calculates percentiles and percentile ratios of the flux.

    Parameters
    ----------

    ftimes,fmags,ferrs : np.array
        The input mag/flux time-series with all non-finite elements removed.

    magsarefluxes : bool
        If the `fmags` array actually contains fluxes, will not convert `mags`
        to fluxes before calculating the percentiles.

    Returns
    -------

    dict
        A dict with all of the light curve flux percentiles and percentile
        ratios calculated.

    '''

    ndet = len(fmags)

    if ndet > 9:

        # get the fluxes
        if magsarefluxes:
            series_fluxes = fmags
        else:
            series_fluxes = 10.0**(-0.4 * fmags)

        series_flux_median = npmedian(series_fluxes)

        # get the percent_amplitude for the fluxes
        series_flux_percent_amplitude = (npmax(npabs(series_fluxes)) /
                                         series_flux_median)

        # get the flux percentiles
        series_flux_percentiles = nppercentile(
            series_fluxes,
            [5.0, 10, 17.5, 25, 32.5, 40, 60, 67.5, 75, 82.5, 90, 95])
        series_frat_595 = (series_flux_percentiles[-1] -
                           series_flux_percentiles[0])
        series_frat_1090 = (series_flux_percentiles[-2] -
                            series_flux_percentiles[1])
        series_frat_175825 = (series_flux_percentiles[-3] -
                              series_flux_percentiles[2])
        series_frat_2575 = (series_flux_percentiles[-4] -
                            series_flux_percentiles[3])
        series_frat_325675 = (series_flux_percentiles[-5] -
                              series_flux_percentiles[4])
        series_frat_4060 = (series_flux_percentiles[-6] -
                            series_flux_percentiles[5])

        # calculate the flux percentile ratios
        series_flux_percentile_ratio_mid20 = series_frat_4060 / series_frat_595
        series_flux_percentile_ratio_mid35 = series_frat_325675 / series_frat_595
        series_flux_percentile_ratio_mid50 = series_frat_2575 / series_frat_595
        series_flux_percentile_ratio_mid65 = series_frat_175825 / series_frat_595
        series_flux_percentile_ratio_mid80 = series_frat_1090 / series_frat_595

        # calculate the ratio of F595/median flux
        series_percent_difference_flux_percentile = (series_frat_595 /
                                                     series_flux_median)
        series_percentile_magdiff = -2.5 * nplog10(
            series_percent_difference_flux_percentile)

        return {
            'flux_median': series_flux_median,
            'flux_percent_amplitude': series_flux_percent_amplitude,
            'flux_percentiles': series_flux_percentiles,
            'flux_percentile_ratio_mid20': series_flux_percentile_ratio_mid20,
            'flux_percentile_ratio_mid35': series_flux_percentile_ratio_mid35,
            'flux_percentile_ratio_mid50': series_flux_percentile_ratio_mid50,
            'flux_percentile_ratio_mid65': series_flux_percentile_ratio_mid65,
            'flux_percentile_ratio_mid80': series_flux_percentile_ratio_mid80,
            'percent_difference_flux_percentile': series_percentile_magdiff,
        }

    else:

        LOGERROR('not enough detections in this magseries '
                 'to calculate flux measures')
        return None
Пример #30
0
def find_redundant_points(points):
    bisector_to_pair = make_bisector_to_pair_map(points)
    angle_to_bisector = make_angle_to_bisector_map(bisector_to_pair)
    median_bisector = get_median_bisector(angle_to_bisector)
    lines_pairs = form_lines_pairs(angle_to_bisector, median_bisector)
    intersection_to_lines_pair = make_intersection_to_pair_map(lines_pairs)

    ys_critical = [p.get_both()['y'] for p in intersection_to_lines_pair.keys()] # form_pairs_intersections_values(lines_pairs)
    y_med = npmedian(array(ys_critical))
    y_separation_line = get_y_separation_line(median_bisector, y_med)
    aim_centre_y_side = determine_enclosure_centre_side(points, y_separation_line)
    crucial_points = find_crucial_points(intersection_to_lines_pair.keys(), y_separation_line, aim_centre_y_side)

    if not crucial_points:
        return []

    xs_critucal = [p.get_both()['x'] for p in crucial_points]
    x_med = npmedian(array(xs_critucal))
    x_separation_line = Line2D(point1=Vector2D(x_med, 0.0), point2=Vector2D(x_med, 1.0))
    aim_centre_x_side = determine_enclosure_centre_side(points, x_separation_line)
    final_points = find_crucial_points(crucial_points, x_separation_line, aim_centre_x_side)

    south_vector, west_vector = Vector2D(0.0, -1.0), Vector2D(-1.0, 0.0)
    south_vector = define_side_direction_vector(y_separation_line, south_vector)
    south_point = y_separation_line.first.sum(south_vector)
    south = y_separation_line.define_point_side(south_point) * aim_centre_y_side > 0

    west_vector = define_side_direction_vector(x_separation_line, west_vector)
    west_point = x_separation_line.first.sum(west_vector)
    west = x_separation_line.define_point_side(west_point) * aim_centre_x_side > 0

    final_lines = list()
    for intersection in final_points:
        line_pair = intersection_to_lines_pair[intersection]
        angle0 = abs(y_axis.angle(line_pair[0]))
        angle1 = abs(y_axis.angle(line_pair[1]))
        print angle0, angle1
        if (south and west) or (not south and not west):
            if angle0 > angle1:
                final_lines.append(line_pair[0])
            else:
                final_lines.append(line_pair[1])
        else:
            # south-east or north-west
            if angle0 < angle1:
                final_lines.append(line_pair[0])
            else:
                final_lines.append(line_pair[1])

    assert final_lines

    rejected_points = set()
    comparison_point = point_in_centre_containing_area(x_separation_line,
                                                       y_separation_line,
                                                       Vector2D(x_med, y_med),
                                                       south, west)
    for line in final_lines:
        comparison_side = line.define_point_side(comparison_point)
        points = bisector_to_pair[line]
        side0 = line.define_point_side(points[0])
        side1 = line.define_point_side(points[1])
        if side0 * comparison_side > 0:
            rejected_points.add(points[0])
        elif side1 * comparison_side > 0:
            rejected_points.add(points[1])
        else:
            print "x-med, y-med point side: ", comparison_side
            assert False

    return rejected_points
Пример #31
0
def stetson_jindex(ftimes, fmags, ferrs, weightbytimediff=False):
    '''This calculates the Stetson index for the magseries, based on consecutive
    pairs of observations.

    Based on Nicole Loncke's work for her Planets and Life certificate at
    Princeton in 2014.

    Parameters
    ----------

    ftimes,fmags,ferrs : np.array
        The input mag/flux time-series with all non-finite elements removed.

    weightbytimediff : bool
        If this is True, the Stetson index for any pair of mags will be
        reweighted by the difference in times between them using the scheme in
        Fruth+ 2012 and Zhange+ 2003 (as seen in Sokolovsky+ 2017)::

            w_i = exp(- (t_i+1 - t_i)/ delta_t )

    Returns
    -------

    float
        The calculated Stetson J variability index.

    '''

    ndet = len(fmags)

    if ndet > 9:

        # get the median and ndet
        medmag = npmedian(fmags)

        # get the stetson index elements
        delta_prefactor = (ndet / (ndet - 1))
        sigma_i = delta_prefactor * (fmags - medmag) / ferrs

        # Nicole's clever trick to advance indices by 1 and do x_i*x_(i+1)
        sigma_j = nproll(sigma_i, 1)

        if weightbytimediff:

            difft = npdiff(ftimes)
            deltat = npmedian(difft)

            weights_i = npexp(-difft / deltat)
            products = (weights_i * sigma_i[1:] * sigma_j[1:])
        else:
            # ignore first elem since it's actually x_0*x_n
            products = (sigma_i * sigma_j)[1:]

        stetsonj = (npsum(npsign(products) * npsqrt(npabs(products)))) / ndet

        return stetsonj

    else:

        LOGERROR('not enough detections in this magseries '
                 'to calculate stetson J index')
        return npnan
Пример #32
0
def bls_snr(blsdict,
            times,
            mags,
            errs,
            magsarefluxes=False,
            sigclip=10.0,
            perioddeltapercent=10,
            npeaks=None,
            assumeserialbls=False,
            verbose=True):
    '''Calculates the signal to noise ratio for each best peak in the BLS
    periodogram.

    This calculates two values of SNR:

    SNR = transit model depth / RMS of light curve with transit model subtracted
    altSNR = transit model depth / RMS of light curve inside transit

    blsdict is the output of either bls_parallel_pfind or bls_serial_pfind.

    times, mags, errs are ndarrays containing the magnitude series.

    perioddeltapercent controls the period interval used by a bls_serial_pfind
    run around each peak period to figure out the transit depth, duration, and
    ingress/egress bins for eventual calculation of the SNR of the peak.

    npeaks controls how many of the periods in blsdict['nbestperiods'] to find
    the SNR for. If it's None, then this will calculate the SNR for all of
    them. If it's an integer between 1 and len(blsdict['nbestperiods']), will
    calculate for only the specified number of peak periods, starting from the
    best period.

    If assumeserialbls is True, will not rerun bls_serial_pfind to figure out
    the transit depth, duration, and ingress/egress bins for eventual
    calculation of the SNR of the peak. This is normally False because we assume
    that the user will be using bls_parallel_pfind, which works on chunks of
    frequency space so returns multiple values of transit depth, duration,
    ingress/egress bin specific to those chunks. These may not be valid for the
    global best peaks in the periodogram, so we need to rerun bls_serial_pfind
    around each peak in blsdict['nbestperiods'] to get correct values for these.

    FIXME: for now, we're only doing simple RMS. Need to calculate red and
    white-noise RMS as outlined below:

      - calculate the white noise rms and the red noise rms of the residual.

        - the white noise rms is just the rms of the residual
        - the red noise rms = sqrt(binnedrms^2 - expectedbinnedrms^2)

      - calculate the SNR using:

        sqrt(delta^2 / ((sigma_w ^2 / nt) + (sigma_r ^2 / Nt))))

        where:

        delta = transit depth
        sigma_w = white noise rms
        sigma_r = red noise rms
        nt = number of in-transit points
        Nt = number of distinct transits sampled

    '''

    # figure out how many periods to work on
    if (npeaks and (0 < npeaks < len(blsdict['nbestperiods']))):
        nperiods = npeaks
    else:
        if verbose:
            LOGWARNING('npeaks not specified or invalid, '
                       'getting SNR for all %s BLS peaks' %
                       len(blsdict['nbestperiods']))
        nperiods = len(blsdict['nbestperiods'])

    nbestperiods = blsdict['nbestperiods'][:nperiods]

    # get rid of nans first and sigclip
    stimes, smags, serrs = sigclip_magseries(times,
                                             mags,
                                             errs,
                                             magsarefluxes=magsarefluxes,
                                             sigclip=sigclip)

    # make sure there are enough points to calculate a spectrum
    if len(stimes) > 9 and len(smags) > 9 and len(serrs) > 9:

        nbestsnrs = []
        nbestasnrs = []
        transitdepth, transitduration = [], []

        # get these later
        whitenoise, rednoise = [], []
        nphasebins, transingressbin, transegressbin = [], [], []

        # keep these around for diagnostics
        allsubtractedmags = []
        allphasedmags = []
        allphases = []
        allblsmodels = []

        for ind, period in enumerate(nbestperiods):

            # get the period interval
            startp = period - perioddeltapercent * period / 100.0
            endp = period + perioddeltapercent * period / 100.0

            # see if we need to rerun bls_serial_pfind
            if not assumeserialbls:

                # run bls_serial_pfind with the kwargs copied over from the
                # initial run. replace only the startp, endp, and verbose kwarg
                # values
                prevkwargs = blsdict['kwargs'].copy()
                prevkwargs['verbose'] = verbose
                prevkwargs['startp'] = startp
                prevkwargs['endp'] = endp

                blsres = bls_serial_pfind(times, mags, errs, **prevkwargs)

            else:
                blsres = blsdict

            thistransdepth = blsres['blsresult']['transdepth']
            thistransduration = blsres['blsresult']['transduration']
            thisbestperiod = blsres['bestperiod']
            thistransingressbin = blsres['blsresult']['transingressbin']
            thistransegressbin = blsres['blsresult']['transegressbin']
            thisnphasebins = blsdict['kwargs']['nphasebins']

            # get the minimum light epoch using a spline fit
            try:

                spfit = spline_fit_magseries(times,
                                             mags,
                                             errs,
                                             thisbestperiod,
                                             magsarefluxes=magsarefluxes,
                                             verbose=verbose)
                thisminepoch = spfit['fitinfo']['fitepoch']

            except ValueError:

                LOGEXCEPTION('could not fit a spline to find a minimum of '
                             'the phased LC, trying SavGol fit instead...')
                # fit a Savitsky-Golay instead and get its minimum
                savfit = savgol_fit_magseries(times,
                                              mags,
                                              errs,
                                              thisbestperiod,
                                              magsarefluxes=magsarefluxes,
                                              verbose=verbose)
                thisminepoch = savfit['fitinfo']['fitepoch']

            if isinstance(thisminepoch, np.ndarray):
                if verbose:
                    LOGWARNING('minimum epoch is actually an array:\n'
                               '%s\n'
                               'instead of a float, '
                               'are there duplicate time values '
                               'in the original input? '
                               'will use the first value in this array.' %
                               repr(thisminepoch))
                thisminepoch = thisminepoch[0]

            # phase using this epoch
            phased_magseries = phase_magseries_with_errs(stimes,
                                                         smags,
                                                         serrs,
                                                         thisbestperiod,
                                                         thisminepoch,
                                                         wrap=False,
                                                         sort=True)

            tphase = phased_magseries['phase']
            tmags = phased_magseries['mags']
            terrs = phased_magseries['errs']

            # use the transit depth and duration to subtract the BLS transit
            # model from the phased mag series. we're centered about 0.0 as the
            # phase of the transit minimum so we need to look at stuff from
            # [0.0, transitphase] and [1.0-transitphase, 1.0]
            transitphase = thistransduration / 2.0

            transitindices = ((tphase < transitphase) | (tphase >
                                                         (1.0 - transitphase)))

            # this is the BLS model
            # constant = median(tmags) outside transit
            # constant = thistransitdepth inside transit
            blsmodel = npfull_like(tmags, npmedian(tmags))

            if magsarefluxes:
                blsmodel[transitindices] = (blsmodel[transitindices] +
                                            thistransdepth)
            else:
                blsmodel[transitindices] = (blsmodel[transitindices] -
                                            thistransdepth)

            # this is the residual of mags - model
            subtractedmags = tmags - blsmodel

            # calculate the rms of this residual
            subtractedrms = npstd(subtractedmags)

            # the SNR is the transit depth divided by the rms of the residual
            thissnr = npabs(thistransdepth / subtractedrms)

            # alt SNR = expected transit depth / rms of timeseries in transit
            altsnr = npabs(thistransdepth / npstd(tmags[transitindices]))

            # tell user about stuff if verbose = True
            if verbose:

                LOGINFO('peak %s: new best period: %.6f, '
                        'fit center of transit: %.5f' %
                        (ind + 1, thisbestperiod, thisminepoch))

                LOGINFO('transit ingress phase = %.3f to %.3f' %
                        (1.0 - transitphase, 1.0))
                LOGINFO('transit egress phase = %.3f to %.3f' %
                        (0.0, transitphase))
                LOGINFO('npoints in transit: %s' % tmags[transitindices].size)

                LOGINFO('transit depth (delta): %.5f, '
                        'frac transit length (q): %.3f, '
                        ' SNR: %.3f, altSNR: %.3f' %
                        (thistransdepth, thistransduration, thissnr, altsnr))

            # update the lists with results from this peak
            nbestsnrs.append(thissnr)
            nbestasnrs.append(altsnr)
            transitdepth.append(thistransdepth)
            transitduration.append(thistransduration)
            transingressbin.append(thistransingressbin)
            transegressbin.append(thistransegressbin)
            nphasebins.append(thisnphasebins)

            # update the diagnostics
            allsubtractedmags.append(subtractedmags)
            allphasedmags.append(tmags)
            allphases.append(tphase)
            allblsmodels.append(blsmodel)

            # update these when we figure out how to do it
            # nphasebins.append(thisnphasebins)
            # transingressbin.append(thisingressbin)
            # transegressbin.append(thisegressbin)

        # done with working on each peak

    # if there aren't enough points in the mag series, bail out
    else:

        LOGERROR('no good detections for these times and mags, skipping...')
        nbestsnrs, whitenoise, rednoise = None, None, None
        transitdepth, transitduration = None, None
        nphasebins, transingressbin, transegressbin = None, None, None
        allsubtractedmags, allphases, allphasedmags = None, None, None

    return {
        'npeaks': npeaks,
        'period': nbestperiods,
        'snr': nbestsnrs,
        'altsnr': nbestasnrs,
        'whitenoise': whitenoise,
        'rednoise': rednoise,
        'transitdepth': transitdepth,
        'transitduration': transitduration,
        'nphasebins': nphasebins,
        'transingressbin': transingressbin,
        'transegressbin': transegressbin,
        'allblsmodels': allblsmodels,
        'allsubtractedmags': allsubtractedmags,
        'allphasedmags': allphasedmags,
        'allphases': allphases
    }
Пример #33
0
def fourier_fit_magseries(
    times,
    mags,
    errs,
    period,
    fourierorder=None,
    fourierparams=None,
    fix_period=True,
    scale_errs_redchisq_unity=True,
    sigclip=3.0,
    magsarefluxes=False,
    plotfit=False,
    ignoreinitfail=True,
    verbose=True,
    curve_fit_kwargs=None,
):
    '''This fits a Fourier series to a mag/flux time series.

    Parameters
    ----------

    times,mags,errs : np.array
        The input mag/flux time-series to fit a Fourier cosine series to.

    period : float
        The period to use for the Fourier fit.

    fourierorder : None or int
        If this is an int, will be interpreted as the Fourier order of the
        series to fit to the input mag/flux times-series. If this is None and
        `fourierparams` is specified, `fourierparams` will be used directly to
        generate the fit Fourier series. If `fourierparams` is also None, this
        function will try to fit a Fourier cosine series of order 3 to the
        mag/flux time-series.

    fourierparams : list of floats or None
        If this is specified as a list of floats, it must be of the form below::

            [fourier_amp1, fourier_amp2, fourier_amp3,...,fourier_ampN,
             fourier_phase1, fourier_phase2, fourier_phase3,...,fourier_phaseN]

        to specify a Fourier cosine series of order N. If this is None and
        `fourierorder` is specified, the Fourier order specified there will be
        used to construct the Fourier cosine series used to fit the input
        mag/flux time-series. If both are None, this function will try to fit a
        Fourier cosine series of order 3 to the input mag/flux time-series.

    fix_period : bool
        If True, will fix the period with fitting the sinusoidal function to the
        phased light curve.

    scale_errs_redchisq_unity : bool
        If True, the standard errors on the fit parameters will be scaled to
        make the reduced chi-sq = 1.0. This sets the ``absolute_sigma`` kwarg
        for the ``scipy.optimize.curve_fit`` function to False.

    sigclip : float or int or sequence of two floats/ints or None
        If a single float or int, a symmetric sigma-clip will be performed using
        the number provided as the sigma-multiplier to cut out from the input
        time-series.

        If a list of two ints/floats is provided, the function will perform an
        'asymmetric' sigma-clip. The first element in this list is the sigma
        value to use for fainter flux/mag values; the second element in this
        list is the sigma value to use for brighter flux/mag values. For
        example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma
        dimmings and greater than 3-sigma brightenings. Here the meaning of
        "dimming" and "brightening" is set by *physics* (not the magnitude
        system), which is why the `magsarefluxes` kwarg must be correctly set.

        If `sigclip` is None, no sigma-clipping will be performed, and the
        time-series (with non-finite elems removed) will be passed through to
        the output.

    magsarefluxes : bool
        If True, will treat the input values of `mags` as fluxes for purposes of
        plotting the fit and sig-clipping.

    plotfit : str or False
        If this is a string, this function will make a plot for the fit to the
        mag/flux time-series and writes the plot to the path specified here.

    ignoreinitfail : bool
        If this is True, ignores the initial failure to find a set of optimized
        Fourier parameters using the global optimization function and proceeds
        to do a least-squares fit anyway.

    verbose : bool
        If True, will indicate progress and warn of any problems.

    curve_fit_kwargs : dict or None
        If not None, this should be a dict containing extra kwargs to pass to
        the scipy.optimize.curve_fit function.

    Returns
    -------

    dict
        This function returns a dict containing the model fit parameters, the
        minimized chi-sq value and the reduced chi-sq value. The form of this
        dict is mostly standardized across all functions in this module::

            {
                'fittype':'fourier',
                'fitinfo':{
                    'finalparams': the list of final model fit params,
                    'finalparamerrs': list of errs for each model fit param,
                    'fitmags': the model fit mags,
                    'fitperiod': the fit period if this wasn't set to fixed,
                    'fitepoch': this is times.min() for this fit type,
                    'actual_fitepoch': time of minimum light from fit model
                    ... other fit function specific keys ...
                },
                'fitchisq': the minimized value of the fit's chi-sq,
                'fitredchisq':the reduced chi-sq value,
                'fitplotfile': the output fit plot if fitplot is not None,
                'magseries':{
                    'times':input times in phase order of the model,
                    'phase':the phases of the model mags,
                    'mags':input mags/fluxes in the phase order of the model,
                    'errs':errs in the phase order of the model,
                    'magsarefluxes':input value of magsarefluxes kwarg
                }
            }

        NOTE: the returned value of 'fitepoch' in the 'fitinfo' dict returned by
        this function is the time value of the first observation since this is
        where the LC is folded for the fit procedure. To get the actual time of
        minimum epoch as calculated by a spline fit to the phased LC, use the
        key 'actual_fitepoch' in the 'fitinfo' dict.

    '''

    stimes, smags, serrs = sigclip_magseries(times,
                                             mags,
                                             errs,
                                             sigclip=sigclip,
                                             magsarefluxes=magsarefluxes)

    # get rid of zero errs
    nzind = npnonzero(serrs)
    stimes, smags, serrs = stimes[nzind], smags[nzind], serrs[nzind]

    phase, pmags, perrs, ptimes, mintime = (get_phased_quantities(
        stimes, smags, serrs, period))

    # get the fourier order either from the scalar order kwarg...
    if fourierorder and fourierorder > 0 and not fourierparams:

        fourieramps = [0.6] + [0.2] * (fourierorder - 1)
        fourierphas = [0.1] + [0.1] * (fourierorder - 1)
        fourierparams = fourieramps + fourierphas

    # or from the fully specified coeffs vector
    elif not fourierorder and fourierparams:

        fourierorder = int(len(fourierparams) / 2)

    else:
        LOGWARNING('specified both/neither Fourier order AND Fourier coeffs, '
                   'using default Fourier order of 3')
        fourierorder = 3
        fourieramps = [0.6] + [0.2] * (fourierorder - 1)
        fourierphas = [0.1] + [0.1] * (fourierorder - 1)
        fourierparams = fourieramps + fourierphas

    if verbose:
        LOGINFO('fitting Fourier series of order %s to '
                'mag series with %s observations, '
                'using period %.6f, folded at %.6f' %
                (fourierorder, len(phase), period, mintime))

    # initial minimize call to find global minimum in chi-sq
    initialfit = spminimize(_fourier_chisq,
                            fourierparams,
                            args=(phase, pmags, perrs))

    # make sure this initial fit succeeds before proceeding
    if initialfit.success or ignoreinitfail:

        if verbose:
            LOGINFO('initial fit done, refining...')

        leastsqparams = initialfit.x

        try:

            curvefit_params = npconcatenate((nparray([period]), leastsqparams))

            # set up the bounds for the fit parameters
            if fix_period:
                curvefit_bounds = ([period - 1.0e-7] +
                                   [-npinf] * fourierorder +
                                   [-npinf] * fourierorder,
                                   [period + 1.0e-7] + [npinf] * fourierorder +
                                   [npinf] * fourierorder)
            else:
                curvefit_bounds = ([0.0] + [-npinf] * fourierorder +
                                   [-npinf] * fourierorder,
                                   [npinf] + [npinf] * fourierorder +
                                   [npinf] * fourierorder)

            curvefit_func = partial(
                sinusoidal.fourier_curvefit_func,
                zerolevel=npmedian(smags),
                epoch=mintime,
                fixed_period=period if fix_period else None,
            )

            if curve_fit_kwargs is not None:

                finalparams, covmatrix = curve_fit(
                    curvefit_func,
                    stimes,
                    smags,
                    p0=curvefit_params,
                    sigma=serrs,
                    bounds=curvefit_bounds,
                    absolute_sigma=(not scale_errs_redchisq_unity),
                    **curve_fit_kwargs)

            else:

                finalparams, covmatrix = curve_fit(
                    curvefit_func,
                    stimes,
                    smags,
                    p0=curvefit_params,
                    sigma=serrs,
                    bounds=curvefit_bounds,
                    absolute_sigma=(not scale_errs_redchisq_unity),
                )

        except Exception:
            LOGEXCEPTION("curve_fit returned an exception")
            finalparams, covmatrix = None, None

        # if the fit succeeded, then we can return the final parameters
        if finalparams is not None and covmatrix is not None:

            # this is the fit period
            fperiod = finalparams[0]

            phase, pmags, perrs, ptimes, mintime = (get_phased_quantities(
                stimes, smags, serrs, fperiod))

            # calculate the chisq and reduced chisq
            fitmags = _fourier_func(finalparams[1:], phase, pmags)

            fitchisq = npsum(
                ((fitmags - pmags) * (fitmags - pmags)) / (perrs * perrs))

            n_free_params = len(pmags) - len(finalparams)
            if fix_period:
                n_free_params -= 1

            fitredchisq = fitchisq / n_free_params
            stderrs = npsqrt(npdiag(covmatrix))

            if verbose:
                LOGINFO('final fit done. chisq = %.5f, reduced chisq = %.5f' %
                        (fitchisq, fitredchisq))

            # figure out the time of light curve minimum (i.e. the fit epoch)
            # this is when the fit mag is maximum (i.e. the faintest)
            # or if magsarefluxes = True, then this is when fit flux is minimum
            if not magsarefluxes:
                fitmagminind = npwhere(fitmags == npmax(fitmags))
            else:
                fitmagminind = npwhere(fitmags == npmin(fitmags))
            if len(fitmagminind[0]) > 1:
                fitmagminind = (fitmagminind[0][0], )

            # assemble the returndict
            returndict = {
                'fittype': 'fourier',
                'fitinfo': {
                    'fourierorder': fourierorder,
                    # return coeffs only for backwards compatibility with
                    # existing functions that use the returned value of
                    # fourier_fit_magseries
                    'finalparams': finalparams[1:],
                    'finalparamerrs': stderrs,
                    'initialfit': initialfit,
                    'fitmags': fitmags,
                    'fitperiod': finalparams[0],
                    # the 'fitepoch' is just the minimum time here
                    'fitepoch': mintime,
                    # the actual fit epoch is calculated as the time of minimum
                    # light OF the fit model light curve
                    'actual_fitepoch': ptimes[fitmagminind]
                },
                'fitchisq': fitchisq,
                'fitredchisq': fitredchisq,
                'fitplotfile': None,
                'magseries': {
                    'times': ptimes,
                    'phase': phase,
                    'mags': pmags,
                    'errs': perrs,
                    'magsarefluxes': magsarefluxes
                },
            }

            # make the fit plot if required
            if plotfit and isinstance(plotfit, str):

                make_fit_plot(phase,
                              pmags,
                              perrs,
                              fitmags,
                              fperiod,
                              mintime,
                              mintime,
                              plotfit,
                              magsarefluxes=magsarefluxes)

                returndict['fitplotfile'] = plotfit

            return returndict

        # if the leastsq fit did not succeed, return Nothing
        else:
            LOGERROR(
                'fourier-fit: least-squared fit to the light curve failed')
            return {
                'fittype': 'fourier',
                'fitinfo': {
                    'fourierorder': fourierorder,
                    'finalparams': None,
                    'finalparamerrs': None,
                    'initialfit': initialfit,
                    'fitmags': None,
                    'fitperiod': None,
                    'fitepoch': None,
                    'actual_fitepoch': None,
                },
                'fitchisq': npnan,
                'fitredchisq': npnan,
                'fitplotfile': None,
                'magseries': {
                    'times': ptimes,
                    'phase': phase,
                    'mags': pmags,
                    'errs': perrs,
                    'magsarefluxes': magsarefluxes
                }
            }

    # if the fit didn't succeed, we can't proceed
    else:

        LOGERROR('initial Fourier fit did not succeed, '
                 'reason: %s, returning scipy OptimizeResult' %
                 initialfit.message)

        return {
            'fittype': 'fourier',
            'fitinfo': {
                'fourierorder': fourierorder,
                'finalparams': None,
                'finalparamerrs': None,
                'initialfit': initialfit,
                'fitmags': None,
                'fitperiod': None,
                'fitepoch': None,
                'actual_fitepoch': None,
            },
            'fitchisq': npnan,
            'fitredchisq': npnan,
            'fitplotfile': None,
            'magseries': {
                'times': ptimes,
                'phase': phase,
                'mags': pmags,
                'errs': perrs,
                'magsarefluxes': magsarefluxes
            }
        }
Пример #34
0
    def run(dataset,
            k,
            it_max=1000,
            min_variation=1.0e-4,
            labels=None,
            full_output=True):

        # Init centers_coord
        centers_id = random.randint(dataset.shape[0], size=k)
        centers_coord = dataset[centers_id, :]

        # Each instance will be put on a initial random cluster
        inst_cluster_id = random.randint(k, size=dataset.shape[0])

        # Auxiliary vectors to keep code cleaner
        auxvec_cur_distances = array([0.0] * k)

        prev_variation = 1.0 + min_variation
        it = 0
        while it < it_max and prev_variation >= min_variation:
            it += 1

            for inst_id in range(dataset.shape[0]):
                for center_id in range(k):
                    # For each instance, calculate the distance between
                    # each center
                    auxvec_cur_distances[center_id] = \
                     Kmedians.__euclideandist__(
                      dataset[inst_id, :],
                      centers_coord[center_id, :])

                # For each instance, let it be part of the nearest
                # cluster
                inst_cluster_id[inst_id] = argmin(auxvec_cur_distances)

            # For each cluster, calculate the new center coordinates
            # Using Median
            for center_id in range(k):
                new_cur_cluster_coords = npmedian(
                    dataset[inst_cluster_id == center_id, :], axis=0)

                # Calculate variation between previous centers_coord and
                # new ones (using infinite norm)
                prev_variation = max(
                    prev_variation,
                    max(abs(centers_coord[center_id] -
                            new_cur_cluster_coords)))

                centers_coord[center_id] = new_cur_cluster_coords

        # Build up answer
        ans = {
            "inst_cluster_id": inst_cluster_id,
            "centers_coord": centers_coord,
        }

        if full_output:
            ans = {
                "clustering_method":
                "K-Medians",
                "k":
                k,
                **ans,
                **ClusterMetrics.runall(dataset=dataset,
                                        centers_coord=centers_coord,
                                        inst_cluster_id=inst_cluster_id,
                                        labels=labels),
            }

        return ans
Пример #35
0
def sigclip_magseries_with_extparams(times,
                                     mags,
                                     errs,
                                     extparams,
                                     sigclip=None,
                                     iterative=False,
                                     magsarefluxes=False):
    '''Select the finite times, magnitudes (or fluxes), and errors from the passed
    values, and apply symmetric or asymmetric sigma clipping to them.  Returns
    sigma-clipped times, mags, and errs. Uses the same indices to filter out the
    values of all arrays in the extparams list.

    Args:
        times (np.array): ...

        mags (np.array): numpy array to sigma-clip. Does not assume all values
        are finite. Does not assume anything about whether they're
        positive/negative.

        errs (np.array): ...

        extparams (list of np.arrays): external parameters to also filter using
        the same indices as used for sigma-clipping.

        iterative (bool): True if you want iterative sigma-clipping.

        magsarefluxes (bool): True if your "mags" are in fact fluxes, i.e. if
        "dimming" corresponds to your "mags" getting smaller.

        sigclip (float or list): If float, apply symmetric sigma clipping. If
        list, e.g., [10., 3.], will sigclip out greater than 10-sigma dimmings
        and greater than 3-sigma brightenings. Here the meaning of "dimming"
        and "brightening" is set by *physics* (not the magnitude system), which
        is why the `magsarefluxes` kwarg must be correctly set.

    Returns:
        stimes, smags, serrs: (sigmaclipped values of each).

    '''

    returnerrs = True

    # fake the errors if they don't exist
    # this is inconsequential to sigma-clipping
    # we don't return these dummy values if the input errs are None
    if errs is None:
        # assume 0.1% errors if not given
        # this should work for mags and fluxes
        errs = 0.001 * mags
        returnerrs = False

    # filter the input times, mags, errs; do sigclipping and normalization
    find = npisfinite(times) & npisfinite(mags) & npisfinite(errs)
    ftimes, fmags, ferrs = times[find], mags[find], errs[find]

    # apply the same indices to the external parameters
    for epi, eparr in enumerate(extparams):
        extparams[epi] = eparr[find]

    # get the median and stdev = 1.483 x MAD
    median_mag = npmedian(fmags)
    stddev_mag = (npmedian(npabs(fmags - median_mag))) * 1.483

    # sigclip next for a single sigclip value
    if sigclip and isinstance(sigclip, (float, int)):

        if not iterative:

            sigind = (npabs(fmags - median_mag)) < (sigclip * stddev_mag)

            stimes = ftimes[sigind]
            smags = fmags[sigind]
            serrs = ferrs[sigind]

            # apply the same indices to the external parameters
            for epi, eparr in enumerate(extparams):
                extparams[epi] = eparr[sigind]

        else:

            #
            # iterative version adapted from scipy.stats.sigmaclip
            #
            delta = 1

            this_times = ftimes
            this_mags = fmags
            this_errs = ferrs

            while delta:

                this_median = npmedian(this_mags)
                this_stdev = (npmedian(npabs(this_mags - this_median))) * 1.483
                this_size = this_mags.size

                # apply the sigclip
                tsi = (npabs(this_mags - this_median)) < (sigclip * this_stdev)

                # update the arrays
                this_times = this_times[tsi]
                this_mags = this_mags[tsi]
                this_errs = this_errs[tsi]

                # apply the same indices to the external parameters
                for epi, eparr in enumerate(extparams):
                    extparams[epi] = eparr[tsi]

                # update delta and go to the top of the loop
                delta = this_size - this_mags.size

            # final sigclipped versions
            stimes, smags, serrs = this_times, this_mags, this_errs

    # this handles sigclipping for asymmetric +ve and -ve clip values
    elif sigclip and isinstance(sigclip, list) and len(sigclip) == 2:

        # sigclip is passed as [dimmingclip, brighteningclip]
        dimmingclip = sigclip[0]
        brighteningclip = sigclip[1]

        if not iterative:

            if magsarefluxes:
                nottoodimind = ((fmags - median_mag) >
                                (-dimmingclip * stddev_mag))
                nottoobrightind = ((fmags - median_mag) <
                                   (brighteningclip * stddev_mag))
            else:
                nottoodimind = ((fmags - median_mag) <
                                (dimmingclip * stddev_mag))
                nottoobrightind = ((fmags - median_mag) >
                                   (-brighteningclip * stddev_mag))

            sigind = nottoodimind & nottoobrightind

            stimes = ftimes[sigind]
            smags = fmags[sigind]
            serrs = ferrs[sigind]

            # apply the same indices to the external parameters
            for epi, eparr in enumerate(extparams):
                extparams[epi] = eparr[sigind]

        else:

            #
            # iterative version adapted from scipy.stats.sigmaclip
            #
            delta = 1

            this_times = ftimes
            this_mags = fmags
            this_errs = ferrs

            while delta:

                this_median = npmedian(this_mags)
                this_stdev = (npmedian(npabs(this_mags - this_median))) * 1.483
                this_size = this_mags.size

                if magsarefluxes:
                    nottoodimind = ((this_mags - this_median) >
                                    (-dimmingclip * this_stdev))
                    nottoobrightind = ((this_mags - this_median) <
                                       (brighteningclip * this_stdev))
                else:
                    nottoodimind = ((this_mags - this_median) <
                                    (dimmingclip * this_stdev))
                    nottoobrightind = ((this_mags - this_median) >
                                       (-brighteningclip * this_stdev))

                # apply the sigclip
                tsi = nottoodimind & nottoobrightind

                # update the arrays
                this_times = this_times[tsi]
                this_mags = this_mags[tsi]
                this_errs = this_errs[tsi]

                # apply the same indices to the external parameters
                for epi, eparr in enumerate(extparams):
                    extparams[epi] = eparr[tsi]

                # update delta and go to top of the loop
                delta = this_size - this_mags.size

            # final sigclipped versions
            stimes, smags, serrs = this_times, this_mags, this_errs

    else:

        stimes = ftimes
        smags = fmags
        serrs = ferrs

    if returnerrs:
        return stimes, smags, serrs, extparams
    else:
        return stimes, smags, None, extparams
Пример #36
0
def L2(input_path_L1, input_path_Compensate, output_path, E0_const):

	try:
		input_fp_Compensate = open(input_path_Compensate, 'r')
	except IOError:
		print "IO error;Check the input File: ", input_path_Compensate
	except:
		print "Unexpected Open Error: ", input_path_Compensate
		input_fp_Compensate.close()
		
	
	try:
		input_fp_L1 = open(input_path_L1, 'r')
	except IOError:
		print "IO error;Check the input File: ", input_path_L1
	except Error:
		print "Unexpected Open Error: ", input_path_L1
		input_fp_L1.close()
	
	#output file path
	output_file_path = os.path.join(output_path, 'ResultL2.csv')
	try:
		output_fp = open(output_file_path, 'w+')
	except IOError:
		print "IO error;Check the output File: ", output_file_path
		return 'L2 failed'

	#output_plot_1�� Reynold-Taylor Equation����
	
	output_plot_2_file_path = os.path.join(output_path, 'Plot_L2_2.csv')
	try:
		output_plot_2_fp = open(output_plot_2_file_path, 'w+')
	except IOError:
		print "IO error;Check the output File: ", output_plot_2_file_path
		return 'L2 failed'	
	
	
	try:
		Compensate_csv = csv.reader(input_fp_Compensate, delimiter = ',')
	except csv.Error:    
		print "Parse ErrorCheck the input File: ", input_path_Compensate
	except StandardError:
		print "Unexpected Read Error: ", input_path_Compensate
	
	try:
		L1_csv = csv.reader(input_fp_L1, delimiter = ',')
	except csv.Error:    
		print "Parse ErrorCheck the input File: ", input_path_L1
	except StandardError:
		print "Unexpected Read Error: ", input_path_L1
	n_Compensate = 0
	n_L1 = 0
	
	data_Compensate = []
	data_L1 = []
	
	for row in Compensate_csv:
		data_Compensate.append(row)
		n_Compensate = n_Compensate + 1
	for row in L1_csv:
		data_L1.append(row)
		n_L1 = n_L1 + 1
		
	#Data count check
	if(n_Compensate != n_L1):
		print 'Count Error;Process count dismatch between Compensate and L1'
		return 'L2 failed'

	#initialize
	date = []
	rsdn = npzeros(n_Compensate)
	Ta = npzeros(n_Compensate)
	h2o = npzeros(n_Compensate)
	#press = npzeros(n_Compensate)
	
	#Read Input Data
	i = 0
	for row in data_Compensate:
		rsdn[i] = float(row[0])
		Ta[i] = float(row[1])
		h2o[i] = float(row[2])
		
		i = i + 1

	press = 998.0	
	#initialize    
	Fs = npzeros(n_L1)
	Fc = npzeros(n_L1)
	Fsc = npzeros(n_L1)
	
	Hs = npzeros(n_L1)
	Hc = npzeros(n_L1)
	Hsc = npzeros(n_L1)
	
	LEs = npzeros(n_L1)
	LEc = npzeros(n_L1)
	LEsc = npzeros(n_L1)
	
	co2 = npzeros(n_L1)
	ustar = npzeros(n_L1)
	itime = npzeros(n_L1)
	iustar = npzeros(n_L1)
	date = []
	
	i = 0
	for row in data_L1:
		date.append(row[0])
		Fs[i] = float(row[1])
		Fc[i] = float(row[2])
		Fsc[i] = float(row[3])
		
		Hs[i] = float(row[4])
		Hc[i] = float(row[5])
		Hsc[i] = float(row[6])
		
		LEs[i] = float(row[7])
		LEc[i] = float(row[8])
		LEsc[i] = float(row[9])
		
		co2[i] = float(row[10])
		ustar[i] = float(row[14])
		itime[i] = float(row[15])
		iustar[i] = float(row[16])
		i = i + 1
	# Define constants and parameters for gap filling
	#--------------------------------------------------------------------------
	num_day = 28
	ni = 36
	nd = 10
	n1 = 2         	# how many the largest points are considered for respiration
					# DO NOT Modify!
	#num_point_per_day = 24          # number of data points per day (48 -> 30 min avg time)
	#avgtime = 30
	#determine num_point_per_day automatically . using datetime module
	date_1st = datetime.datetime.strptime(date[0], "%Y-%m-%d %H:%M")
	date_2nd = datetime.datetime.strptime(date[1], "%Y-%m-%d %H:%M")
	date_diff = date_2nd - date_1st
	avgtime = int(date_diff.seconds / 60) # averaging time (minutes)
	
	num_point_per_day = 1440 / avgtime # number of data points per day (1440 : minutes of a day)
	num_segment = num_point_per_day * num_day
	num_avg = int(n_L1 / num_segment)
	num_day_2 = 7
	# nday_re = 20
	# noverlap = 5
	num_day_re = 20
	noverlap = 5
	
	ni = int(num_point_per_day * 3 / 4) # the data point that night starts
	nd = 300 / avgtime 					# how many the largest points are considered for respiration (300 : minitues of 5 hours)
	
	#--------------------------------------------------------------------------
	#E0_const = True # Do you want to use constant E0 for one year? Y/N

	
	beta0 = nparray([2, 200])
	Tref = 10.0
	T0 = -46.02
	gap_limit = 0.025
	ustar_limit = 0.5
	upper_Fc = 0.35   # upper limit of nighttime CO2 flux (mg/m2/s)
	Fc_limit = 0.005
	
	## Information for MLT
	drsdn = 50.0   # W/m2
	dta = 2.5      # oC
	dvpd = 5.0     # 5 hPa
	rv = 461.51
	#--------------------------------------------------------------------------
	
	upper_co2 = 1000.0 # upper limit of CO2 concent.(mg/m3)
	upper_h2o = 60.0  # upper limit of H2O concent. (g/m3)
	upper_Ta = 60.0   # upper limit of air temperature (oC)
	lower_Fc = -3.0   # lower limit of daytime CO2 flux (mg/m2/s)
	lower_LE = -200    # lower limit of LE (W/m2)
	lower_H = -300     # lower limit of H (W/m2)
	upper_Fc = 3   # upper limit of nighttime CO2 flux (mg/m2/s)
	upper_LE = 800    # upper limit of LE (W/m2)
	upper_H = 800     # upper limit of H (W/m2)
	upper_agc = 95.0  # upper limit of AGC value
	ustar_limit = 0.03 # minimum ustar for filtering out nighttime fluxes
	Fc_limit = 0.005  # lower limit of Re (ecosystem respiration) (mg/m2/s)
	gap_limit = 0.025 # 0.025 --> 95# confidence interval

	Tak = npzeros(len(Ta))
	tr = npzeros(len(Ta))
	ea = npzeros(len(Ta))
	es = npzeros(len(Ta))
	vpd = npzeros(len(Ta))
	#--------------------------------------------------------------------------
	# calculation of vapor pressure deficit
	a = [13.3185, 1.9760, 0.6445, 0.1299]
	
	for i in range(n_Compensate):
		Tak[i] = Ta[i] + 273.15
		tr[i] = 1.0-(373.15/Tak[i])
		es[i] = 1013.25*exp(a[0]*tr[i]-a[1]*(tr[i]**2)-(a[2]*(tr[i]**3))-a[3]*(tr[i]**4)) # hPa
	
	for i in range(n_L1):
		ea[i] = h2o[i]
		vpd[i]= float(es[i]) - float(ea[i])  #unit is hPa
	
		
	Fc_filled = copy.deepcopy(Fsc)
	
	print 'Gap Filling Process'
	print 'Before running this program, '
	print '   please make sure that you correctly set all parameters'
	#print 'E0_const'. E0_const
	#print 'nn', nn
	#print 'num_point_per_day', num_point_per_day
	#print 'num_day_2', num_day_2
	#print 'num_day_re', num_day_re
	#print 'noverlap', noverlap
	#print 'drsdn', drsdn
	#print 'dta', dta
	#print 'dvpd', dvpd
	#print '-------------------------------------------------------------------'
	index = []

	for main_j in range(num_avg):       # loop for gap-filling of CO2 fluxes           
		
		seg_start_i = main_j * num_segment
		seg_fin_i = seg_start_i + num_segment
		
		if((seg_start_i + 2 * num_segment) > n_L1):
			seg_fin_i = n_L1
		x2 = []
		x3 = []
	#--------------------------------------------------------------------------
		if(main_j == 0):
			print 'Application of modified lookup table method'
	#--------------------------------------------------------------------------
		for i in range(seg_start_i, seg_fin_i):
			if(itime[i] == 1):
				ii = 0                
				if(isnan(Fsc[i]) == True):
					jj = 0
					while ((ii < 1) and (jj <= 4)):
						ta_f = Ta[i]
						rsdn_f = rsdn[i]
						vpd_f = vpd[i]

						i0 = i - jj * num_day_2 * num_point_per_day
						i1 = i + jj * num_day_2 * num_point_per_day+1
						
						if(i0 < 1): 
							i0 = 0
							i1 = 2 * jj * num_day_2 * num_point_per_day+1
						if(i1 >= n_L1):
							i0 = n_L1 - 2 * jj * num_day_2 * num_point_per_day
							i1 = n_L1
							if(i0 < 1):
								i0 = 0	
						ks = 0
						for j in range(i0, i1):
							if((fabs(vpd_f - vpd[j]) 	< dvpd) and \
								(fabs(rsdn_f - rsdn[j]) < drsdn) and \
								(fabs(ta_f - Ta[j]) 	< dta) and \
								(isnan(Fsc[j]) 			== False)):
								ks = ks + 1
								x3_temp = []
								
								x2.append(Fsc[j])
								x3_temp.append(j)
								x3_temp.append(vpd[j])
								x3_temp.append(rsdn[j])
								x3_temp.append(Ta[j])
								x3_temp.append(ks)
								x3.append(x3_temp)
								
						ii = ks
						
						#index_temp = []
						#index_temp.append(i)
						#index_temp.append(i0)
						#index_temp.append(i1)
						#index_temp.append(ks)
						#index_temp.append(main_j)
						#index.append(index_temp)
						
						if(ks >= 1):
							Fc_filled[i] = npmedian(nparray(x2))

						jj = jj + 1
						x2 = []
						x3 = []

					if(ii < 1):
						jj = 0
						while(ii < 1):
							rsdn_f = rsdn[i]

							i0 = i - jj * num_day_2 * num_point_per_day
							i1 = i + jj * num_day_2 * num_point_per_day+1
							if(i0 < 1): 
								i0 = 0
								i1 = 2 * jj * num_day_2 * num_point_per_day+1
							
							if(i1 >= n_L1):
								i0 = n_L1 - 2 * jj * num_day_2 * num_point_per_day
								i1 = n_L1
								if(i0 < 0):
									i0 = 0

							ks = 0
							for j in range(i0, i1):
								if((fabs(rsdn_f - rsdn[j]) 	< drsdn) and \
								(isnan(Fsc[j]) 				== False)):
									ks = ks + 1                      
									x3_temp = []
								
									x2.append(Fsc[j])
									x3_temp.append(j)
									x3_temp.append(vpd[j])
									x3_temp.append(rsdn[j])
									x3_temp.append(Ta[j])
									x3_temp.append(ks)
									x3.append(x3_temp)

							ii = ks
							
							#index_temp = []
							#index_temp.append(i)
							#index_temp.append(i0)
							#index_temp.append(i1)
							#index_temp.append(ks)
							#index_temp.append(main_j)
							#index.append(index_temp)
							
							if(ks >= 1):
								Fc_filled[i] = npmedian(nparray(x2))
								
							jj = jj + 1
							x2 = []
							x3 = []
							
		x2 = []
		x3 = []

		ks = 0
		
	d = npzeros((n_L1, 5))
	d2 = npzeros((n_L1, 5))
	dd = npzeros((n_L1, 5))
	x4 = []
	#Regression to Lloyd-Taylor equation
	print 'Regression to Lloyd-Taylor equation'
	if(E0_const == True):
		for i in range(ni-1, n_Compensate, num_point_per_day):
			t1 = npzeros(nd)
			
			for j in range(nd):
				t1[j] = Fsc[i + j]
			
			#Set to 'descend'
			t2, IX = Common.matlab_sort(t1)

			k2 = 0
			for k in range(nd-1):
				if((isnan(t2[k]) 	== False) and \
						(t2[k] 		< upper_Fc) and \
						(t2[k+1] 	> Fc_limit)):
					k2 = k2 + 1
			
			if(k2 >= 2):
				for j in range(nd-1):
					
					if((itime[i+1 + IX[j]]      == 0) and \
					(isnan(t2[j])           	== False) and \
					(isnan(Ta[i+1 + IX[j]])  	== False) and \
					(t2[j]                  	< upper_Fc) and \
					(t2[j + 1]					> Fc_limit) and \
					(iustar[i + IX[j]]      	== 0) and \
					(iustar[i + IX[j+1]]    	== 0)):
						x3.append(t2[j])
						x3.append(t2[j+1])
						x2.append(Ta[i + IX[j]])
						x2.append(Ta[i + IX[j+1]])
						x4.append(date[i + IX[j]])
						x4.append(date[i + IX[j+1]])
			
						ks = ks + n1
						break	
		TC = copy.deepcopy(nparray(x2))
		PV = copy.deepcopy(nparray(x3))
	
		
		betafit = spfmin(Common.Reco, beta0, args = (TC, PV), disp=False)
		A = betafit[0]
		B = betafit[1]
		yfit = npzeros(len(TC))
		for i in range(len(TC)):
			yfit[i] = A * exp(B * (1 / (10 + 46.02) - 1 / (TC[i] + 46.02)))
		E0 = betafit[1]
		E0l = copy.deepcopy(E0)
		

		
		#    figure(1)
		#   plot(TC][PV,'ko',TC][yfit,'or')
		#    grid
		#    xlabel('air temperature (^oC)')
		#    ylabel('Ecosystem respiration(mgm^{-2}s^{-1})')

		#     TC = x2'
		#     PV = x3'
		#     
		#     [beta][resnorm] = lsqcurvefit(@myfun][beta0][TC][PV)
		#     
		#     A=beta(1)
		#     B=beta(2)
		#     yfit=A.*exp(B.*(1./(10.+46.02)-1./(TC+46.02)))
		#     E0 = betafit(2)
		#     E0l = E0
		# 
		# 
		#     figure(5)
		#     plot(TC][PV,'ko',TC][yfit,'or')
		#     grid
		#     xlabel('air temperature (^oC)')
		#     ylabel('Ecosystem respiration(mgm^{-2}s^{-1})')
		x2 = []
		x3 = []
		t1 = []
		t2 = []
		TC = []
		PV = []
		yfit = npzeros(len(TC))

	#num_day_re = 20
	#noverlap = 5
	#avgtime = 30
	delta = (60 / avgtime) * 24 * num_day_re
	dnoverlap = (60 / avgtime) * 24 * noverlap
	jj = 0
	sday = []
	Rref = []
	RE_limit = []
	stdev_E0 = []
	E0v = []
	REs = []
	Taylor_date = []
	yfit_array = []
	for i in range(0, n_L1, dnoverlap):
		i0 = int(i - delta / 2)
		i1 = int(i + delta / 2)
		
		if(i0 < 1):
			i0 = 0
			i1 = int(i0 + delta)
		if(i1 >= n_L1):
			i0 = int(n_L1 - delta) - 1
			i1 = n_L1
		
		ks = 1
		for j in range(i0+ni-1, i1, num_point_per_day):
			t1 = npzeros(nd)
			for k in range(nd):
				t1[k] = Fsc[j + k]
			#Set to 'descend'
			t2, IX = Common.matlab_sort(t1)
			
			k2 = 1
			for k in range(nd-1):
				if((isnan(t2[k])    ==  False) and \
					(t2[k]          <   upper_Fc) and \
					(t2[k+1]        >   Fc_limit)):
					k2 = k2 + 1
			
			if(k2 >= n1):
				for k in range(nd-1):
					if((itime[j+1 +IX[k]]  	== 0) and \
					(isnan(t2[k])           == False) and \
					(isnan(Ta[j + IX[k]])	== False) and \
					(t2[k]                  < upper_Fc) and \
					(t2[k+1]                > Fc_limit) and \
					(iustar[j +IX[k]]      == 0) and \
					(iustar[j +IX[k+1]]    == 0)):
						x3.append(t2[k])
						x3.append(t2[k+1])
						x2.append(Ta[j + IX[k]])
						x2.append(Ta[j + IX[k+1]])
						Taylor_date.append(str(date[j + IX[k]]))
						Taylor_date.append(str(date[j + IX[k+1]]))
					
						ks = ks + n1
						break
			
		ks = ks - 1
		
		if(ks < 6):
			if(E0_const == True):
				Rref.append(float('NaN'))
				RE_limit.append(float('NaN'))
				jj = jj + 1
			else:
				Rref.append(float('NaN'))
				E0v.append(float('NaN'))
				stdev_E0.append(float('NaN'))
				RE_limit.append(float('NaN'))
				jj = jj + 1
		else:
			TC = copy.deepcopy(nparray(x2))
			PV = copy.deepcopy(nparray(x3))
			
			if(E0_const == True):
				betafit = spfmin(Common.Reco2, beta0, args = (TC, PV, E0l), disp=False)
				A = betafit[0]
				Rref.append(A)
				
				for j in range(len(TC)):
					yfit = A * exp(E0 * (1.0/(10.0+46.02) - 1.0/(TC[j] + 46.02)))
					yfit_array.append(yfit)
					REs.append(PV[j] - yfit)
				
				sz = nparray(REs).shape
				upper = fabs(Common.tq(gap_limit, sz[0]-1 ) )
				RE_limit.append(upper*Common.stdn1(nparray(REs))/sqrt(sz[0]))
				
				jj = jj + 1
			else:
				betafit=spfmin(Common.Reco2, beta0, args = (TC, PV, E0))
				
				A=betafit[0]
				B=betafit[1]
				Rref.append(A)
				E0v.append(B)
				
				if((B < 0) or (B > 450)):
					E0v.append(float('NaN'))
				
				for j in range(len(TC)):
					yfit = A * exp(E0v[jj] * (1.0 / (10.0 + 46.02) - 1.0 / (TC[j] + 46.02)))
					yfit_array.append(yfit)
					REs.append(PV[j] - yfit)
				
				sz = nparray(REs).shape
				upper = abs(Common.tq(gap_limit, sz[0]-1))
				stdev_E0.append(Common.stdn1(REs) / sqrt(sz[0]))
				RE_limit.append(upper * Common.stdn1(nparray(REs)) / sqrt(sz[0]))
				
				jj = jj + 1
			#Regression to Lloyd-Taylor equation with 28-day segmentation
			date_extracted = re.search('^(\d{4}[-]\d{2}[-]\d{2})',str(Taylor_date[0]))
			#print date_extracted.group(0)
			if(date_extracted != None):
				fname = 'Plot_L2_1_'+str(date_extracted.group(0))+'.csv'
				output_plot_1_file_path = os.path.join(output_path, fname)	
				
				try:
					output_plot_1_fp = open(output_plot_1_file_path, 'w+')
				except IOError:
					print "IO error;Check the output File: ", output_plot_1_file_path
					return 'L2 failed'	
				
				for i in range(len(TC)):
					file_plot_str = StringIO()
					file_plot_str.write(Taylor_date[i]		+ ',')		#1	
					file_plot_str.write(str(A)	 			+ ',') 		#2
					file_plot_str.write(str(B)	 			+ ',') 		#3
					file_plot_str.write(str(TC[i]) 			+ ',') 		#4
					file_plot_str.write(str(PV[i]) 			+ ',' )		#5
					file_plot_str.write(str(yfit_array[i])	+ '\n' )	#6
					
					output_plot_string = file_plot_str.getvalue()
					
					output_plot_1_fp.write(output_plot_string)
				output_plot_1_fp.close()
			
				
		sday_temp = []
		sday_temp.append(i)
		sday_temp.append(i0)
		sday_temp.append(i1)
		sday.append(sday_temp)
		
		
		
		x2 = []
		x3 = []
		t1 = []
		t2 = []
		TC = []
		PV = []
		Taylor_date = []
		REs = []
		yfit = []

	sday = nparray(sday)
	if(E0_const == True):
		print 'Long-term E0 '
		E0s = copy.deepcopy(E0l)
	else:
		E0v_s = []
		stdev_E0_s = []
		for k in range(len(E0v)):
			E0v_s.append(E0v[k]/stdev_E0[k])
			stdev_E0_s.append(1/stdev_E0[k])
		print 'Short-term E0 '
		E0s = npnansum(E0v_s)/npnansum(stdev_E0_s)
	Rref = []
	#REs = []
	#RE_limit = []
	
	
	jj = 0
	for i in range(0, n_L1, dnoverlap):

		i0 = i - delta / 2
		i1 = i + delta / 2
	
		if(i0 < 1):
			i0 = 0
			i1 = i0 + delta
		if(i1 >= n_L1):
			i0 = n_L1 - delta - 1
			i1 = n_L1
		ks = 1
		for j in range(i0+ni-1,  i1,  num_point_per_day):
			t1 = npzeros(nd)
			for k in range(nd):
				t1[k] = Fsc[j + k]
			#Set to 'descend'
			t2, IX = Common.matlab_sort(t1)

			k2 = 1
			
			for k in range(nd-1):
				if((isnan(t2[k])	== 	False) and \
					(t2[k]			< 	upper_Fc) and \
					(t2[k+1]		> 	Fc_limit)):
					
					k2 = k2 + 1
				
			if(k2 >= n1):
				for k in range(nd-1):
					if((itime[j+1 + IX[k]]		== 0) and \
					(isnan(t2[k])               == False) and \
					(isnan(Ta[j + IX[k]])    	== False) and \
					(t2[k]                    	< upper_Fc) and \
					(t2[k+1]                   	> Fc_limit) and \
					(iustar[j + IX[k]]        	==  0) and \
					(iustar[j + IX[k+1]]        ==  0)):
						x3.append(t2[k])
						x3.append(t2[k + 1])
						x2.append(Ta[j + IX[k]])
						x2.append(Ta[j + IX[k+1]])
						
						ks = ks + n1
						break
					
		ks = ks - 1

		if(ks < 6):
			
#			Rref.append(Rref[jj])
#			RE_limit.append(RE_limit[jj])
			
			Rref.append(Rref[-1])
			RE_limit.append(RE_limit[-1])
			
			if(E0_const != True):
				stdev_E0.append(stdev_E0[jj])
				
			jj = jj + 1
            
			
		else:

			TC = nparray(x2)
			PV = nparray(x3)
			
			betafit = spfmin(Common.Reco2, beta0, args = (TC, PV, E0s), disp=False)
			
			A=betafit[0]
			Rref.append(A)
			
			for j in range(len(TC)):
				yfit = Rref[jj] * exp(E0s * (1 / (10 + 46.02) - 1 / (TC[j] + 46.02)))
				REs.append(PV[j]-yfit)
				
			sz = nparray(REs).shape
			upper = abs(Common.tq(gap_limit, sz[0]-1))
			RE_limit.append(upper*Common.stdn1(REs)/sqrt(sz[0]))
            
			jj = jj + 1
	
		x2 = []
		x3 = []
		t1 = []
		t2 = []
		TC = []
		PV = []
		
		#for k in REs:
		#	print k
		REs = []
	#for k in Rref:
	#	print k
	##    
	
	ks = 0
	nsp2 = npzeros((n_L1 / num_point_per_day))
	RE = npzeros(n_L1) 
	GPP = npzeros(n_L1)
	for i in range(n_L1):
		RE[i] = float('NaN')
		GPP[i] = float('NaN')
	
	for i in range(0, n_L1, num_point_per_day):
		i0 = i
		i1 = i + num_point_per_day
		if(i0 >=sday[ks][1]):
			ks = ks + 1
			if(i0 >= sday[len(sday)-1][1]):
				ks = len(sday)-1
		
		for j in range(i0, i1):
			if(E0_const == True):
				yfit=Rref[ks-1] * exp(E0l * (1.0 / (10 + 46.02) - 1.0 / (Ta[j] + 46.02)))
			else:
				yfit=Rref[ks-1] * exp(E0s * (1.0 / (10 + 46.02) - 1.0 / (Ta[j] + 46.02)))
			
			RE[j] = yfit
			if(itime[j]==0):                 # nighttime condition
				RE[j] = Fc_filled[j]
				
				if((isnan(Fsc[j])    	==  True) or \
					((Fsc[j]-yfit)		<   RE_limit[ks-1]) or \
					((Fsc[j]-yfit)  	>   1.0 * RE_limit[ks-1]) or \
					(iustar[j]      	==  1)):
					nsp2[ks-1] = nsp2[ks-1] + 1
					Fc_filled[j] = yfit
					RE[j] = Fc_filled[j]
			
			GPP[j] = RE[j]- Fc_filled[j]
			
	
	#figure(2)
	#plot(time][Fsc][time][Fc_filled[:],'or')
	#set(gca,'XTick',[year0:1/12:year0+0.9999999])
	#set(gca,'xticklabel',xticks)            
	#ylim = [-1.5][1.5]
	#set(gca,'xLim',xlim(:))
	#ylabel('F_c (mgm^{-2}s^{-1})')
	#--------------------------------------------------------------------------


	#--------------------------------------------------------------------------
	print 'Gap-filling of LE'
	#--------------------------------------------------------------------------
	x2 = []
	x3 = []
	index = []
	LE_filled = copy.deepcopy(LEsc)
	for main_j in range(num_avg):       # loop for gap-filling of H2O fluxes           
		
		
		seg_start_i = main_j * num_segment
		seg_fin_i = seg_start_i + num_segment
		
		if((seg_start_i + 2 * num_segment) > n_L1):
			seg_fin_i = n_L1
			
		x2 = []
		x3 = []

		for i in range(seg_start_i, seg_fin_i):
			
			ii = 0               
			
			if(isnan(LEsc[i]) == True):
				jj = 0
				while((ii < 1) and (jj <= 4)):
					ta_f = Ta[i]
					rsdn_f = rsdn[i]
					vpd_f = vpd[i]

					i0 = i - jj * num_day_2 * num_point_per_day
					i1 = i + jj * num_day_2 * num_point_per_day+1
					if(i0 < 1):
						i0 = 0
						i1 = 2 * jj * num_day_2 * num_point_per_day+1
					if(i1 >= n_L1):
						i0 = n_L1 - 2 * jj * num_day_2 * num_point_per_day - 1
						i1 = n_L1
						if(i0 < 1):
							i0 = 0
					ks = 0
					for j in range(i0, i1):
						if((fabs(vpd_f-vpd[j])	< dvpd) and \
						(fabs(rsdn_f-rsdn[j])	< drsdn)  and \
						(fabs(ta_f-Ta[j])   	< dta) and \
						(isnan(LEsc[j])    		== False)):
							x3_temp = []
							x2.append(LEsc[j])
							x3_temp.append(j)
							x3_temp.append(vpd[j])
							x3_temp.append(rsdn[j])
							x3_temp.append(Ta[j])
							x3_temp.append(ks)
							x3.append(x3_temp)                                
							ks = ks + 1
							
					ii = ks
					#index_temp = []
					#index_temp.append(i)
					#index_temp.append(i0)
					#index_temp.append(i1)
					#index_temp.append(ks)
					#index_temp.append(main_j)
					#index.append(index_temp)

					if(ks >= 1):
						LE_filled[i] = npmedian(nparray(x2))
					jj = jj + 1
					x2 = [] 
					x3 = [] 
					
				if(ii < 1):
					jj = 0
					while(ii < 1):
						rsdn_f = rsdn[i]

						i0 = i - jj * num_day_2 * num_point_per_day 
						i1 = i + jj * num_day_2 * num_point_per_day + 1
						if(i0 < 1):
							i0 = 0
							i1 = 2 * jj * num_day_2 * num_point_per_day + 1
						if(i1 >= n_L1):
							i0 = n_L1 - 2 * jj * num_day_2 * num_point_per_day - 1
							i1 = n_L1
							if(i0 < 1):
								i0 = 0
								
						ks = 0
						for j in range(i0, i1):
							if((fabs(rsdn_f-rsdn[j]) 	< drsdn)  and \
								(isnan(LEsc[j])       		== False)):
								
								x3_temp = []
								x2.append(LEsc[j])
								x3_temp.append(j)
								x3_temp.append(vpd[j])
								x3_temp.append(rsdn[j])
								x3_temp.append(Ta[j])
								x3_temp.append(ks)
								x3.append(x3_temp)
								ks = ks + 1
							
						ii = ks
						#index_temp = []
						#index_temp.append(i)
						#index_temp.append(i0)
						#index_temp.append(i1)
						#index_temp.append(ks)
						#index_temp.append(main_j)
						#index.append(index_temp)

						if(ks >= 1):
							LE_filled[i] = npmedian(nparray(x2))
						jj = jj + 1
						x2 = []
						x3 = []
		
		x2 = []
		x3 = []
		ks = 0

	#figure(3)
	#plot(time][LEsc][time][LE_filled[:],'or')
	#set(gca,'XTick',[year0:1/12:year0+0.9999999])
	#set(gca,'xticklabel',xticks)            
	#ylim = [-100][600]
	#set(gca,'xLim',xlim(:))
	#ylabel('LE (Wm^{-2})')

	##
	#--------------------------------------------------------------------------
	
	print 'Gap-filling of H (sensible heat flux)'
	
	#--------------------------------------------------------------------------
	x2 = []
	x3 = []
	index = []
	H_filled = copy.deepcopy(Hsc)

	for main_j in range(num_avg):       # loop for gap-filling of H2O fluxes           
		seg_start_i = main_j * num_segment
		seg_fin_i = seg_start_i + num_segment
		
		if((seg_start_i + 2 * num_segment) >= n_L1):
			seg_fin_i = n_L1
		
		x2 = []
		x3 = []

		for i in range(seg_start_i, seg_fin_i):
			ii = 0                
			if(isnan(Hsc[i])	== True):
				jj = 0
				while ((ii < 1) and (jj <= 4)):
					ta_f = Ta[i]
					rsdn_f = rsdn[i]
					vpd_f = vpd[i]

					i0 = i - jj * num_day_2 * num_point_per_day
					i1 = i + jj * num_day_2 * num_point_per_day+1
					if(i0 < 1):
						i0 = 0
						i1 = 2 * jj * num_day_2 * num_point_per_day+1
					if(i1 >= n_L1):
						i0 = n_L1 - 2 * jj * num_day_2 * num_point_per_day - 1
						i1 = n_L1
						if(i0 < 1):
							i0 = 0
						
					ks = 0
					for j in range(i0, i1):
						if((fabs(vpd_f-vpd[j])	< dvpd) and \
						(fabs(rsdn_f-rsdn[j])  	< drsdn)  and \
						(fabs(ta_f-Ta[j])      	< dta) and \
						(isnan(Hsc[j])        	== False)):
							                   
							x3_temp = []
							x2.append(Hsc[j])
							x3_temp.append(j)
							x3_temp.append(vpd[j])
							x3_temp.append(rsdn[j])
							x3_temp.append(Ta[j])
							x3_temp.append(ks)
							ks = ks + 1   
							x3.append(x3_temp)    
					ii = ks
					#index_temp = []
					#index_temp.append(i)
					#index_temp.append(i0)
					#index_temp.append(i1)
					#index_temp.append(ks)
					#index_temp.append(main_j)
					#index.append(index_temp)

					if(ks >= 1):
						H_filled[i] = npmedian(nparray(x2))
					jj = jj + 1
					x2 = [] 
					x3 = []

				if(ii < 1):
					jj = 0
					while(ii < 1):
						rsdn_f = rsdn[i]

						i0 = i - jj * num_day_2 * num_point_per_day
						i1 = i + jj * num_day_2 * num_point_per_day+1
						if(i0 < 1):
							i0 = 0
							i1 = 2 * jj * num_day_2 * num_point_per_day+1
						if(i1 >= n_L1):
							i0 = n_L1 - 2 * jj * num_day_2 * num_point_per_day - 1
							i1 = n_L1
							if(i0 < 1):
								i0 = 0
							
						ks = 0
						for j in range(i0, i1): 
							if((fabs(rsdn_f-rsdn[j])    <  drsdn)  and \
							(isnan(Hsc[j])            	== False)):
								ks = ks + 1                      
								x3_temp = []
								x2.append(Hsc[j])
								x3_temp.append(j)
								x3_temp.append(vpd[j])
								x3_temp.append(rsdn[j])
								x3_temp.append(Ta[j])
								x3_temp.append(ks)
								x3.append(x3_temp)    
						
						ii = ks
						#index_temp = []
						#index_temp.append(i)
						#index_temp.append(i0)
						#index_temp.append(i1)
						#index_temp.append(ks)
						#index_temp.append(main_j)
						#index.append(index_temp)
						

						if(ks >= 1):
							H_filled[i] = npmedian(nparray(x2))

						jj = jj + 1
						x2 = []
						x3 = []
					
		x2 = []
		x3 = []
		ks = 0

	#figure(4)
	#plot(time,Hsc,time,H_filled[:],'or')
	#set(gca,'XTick',[year0:1/12:year0+0.9999999])
	#set(gca,'xticklabel',xticks)            
	#ylim = [-100,600]
	#set(gca,'xLim',xlim(:))
	#ylabel('H (Wm^{-2})')
	
	##
	print '-------------------------------------------------------------------'
	print 'Calculating daily mean values'
	print '-------------------------------------------------------------------'
	# disp('Press any key to calculate daily mean values')
	# pause
	print '-------------------------------------------------------------------'
	print 'calculation of daily mean. Unit seg_start_i [C g/m2/day].'
	print '-------------------------------------------------------------------'

	Fsc_daily = npzeros(n_L1/num_point_per_day)
	GPP_daily = npzeros(n_L1/num_point_per_day)
	RE_daily = npzeros(n_L1/num_point_per_day)
	ET_daily = npzeros(n_L1/num_point_per_day)
	H_daily = npzeros(n_L1/num_point_per_day)
	LE_daily = npzeros(n_L1/num_point_per_day)
	H_daily = npzeros(n_L1/num_point_per_day)
	
	k = 0
	for i in range(0, n_L1, num_point_per_day):
		for j in range(i,i + num_point_per_day):
			Fsc_daily[k] = Fsc_daily[k] + Fc_filled[j]
			GPP_daily[k] = GPP_daily[k] + GPP[j]
			RE_daily[k] = RE_daily[k] + RE[j]
			ET_daily[k] = ET_daily[k] + LE_filled[j]
		Fsc_daily[k] = Fsc_daily[k]	*	(60*float(avgtime)/1000*12/44)
		GPP_daily[k] = GPP_daily[k]	*	(60*float(avgtime)/1000*12/44)    
		RE_daily[k] = RE_daily[k]	*	(60*float(avgtime)/1000*12/44)        
		ET_daily[k] = ET_daily[k]	*	(60*float(avgtime)/(2440)/1000)
		
		k = k + 1
	NEE_annual= npmean(Fc_filled)*float((1800*48*(n_L1/(60.0/avgtime*24))*12/44/1000.0))
	GPP_annual = npmean(GPP)*float((1800*48*(n_L1/(60.0/avgtime*24))*12/44/1000.0))
	RE_annual = npmean(RE)*float((1800*48*(n_L1/(60.0/avgtime*24))*12/44/1000.0))
	NEE_std_annual = Common.stdn1(Fsc_daily)/sqrt(n_L1/(60.0/avgtime*24))*(n_L1/(60.0/avgtime*24))
	GPP_std_annual = Common.stdn1(GPP_daily)/sqrt(n_L1/(60.0/avgtime*24))*(n_L1/(60.0/avgtime*24))
	RE_std_annual = Common.stdn1(RE_daily)/sqrt(n_L1/(60.0/avgtime*24))*(n_L1/(60.0/avgtime*24))
	
	print 'NEE_annual', NEE_annual
	print 'GPP_annual', GPP_annual
	print 'RE_annual', RE_annual
	print 'NEE_std_annual', NEE_std_annual
	print 'GPP_std_annual', GPP_std_annual
	print 'RE_std_annual', RE_std_annual
	
	print '-------------------------------------------------------------------'
	print 'Calculating daily mean ETs'
	print '-------------------------------------------------------------------'
	print 'calculation of daily mean. Unit seg_start_i [C g/m2/day].'
	print '-------------------------------------------------------------------'

	
	k = 0
	for i in range(0, n_L1, num_point_per_day):
		for j in range(i,i + num_point_per_day):
			LE_daily[k] = LE_daily[k] \
				+ LE_filled[j]*(60*float(avgtime)/(2440*1000))
		k = k + 1    


	# npmean(Fc_filled)
	LE_annual = npmean(LE_filled)*(1800.0*48.0*float(n_L1/(60.0/avgtime*24))/2440.0/1000.0)
	LE_std_annual = Common.stdn1(LE_daily)/sqrt(n_L1/float(60.0/avgtime*24.0))*float(n_L1/(60.0/avgtime*24.0))

	print 'LE_annaul', LE_annual
	print 'LE_std_annaul', LE_std_annual
	
	print '-------------------------------------------------------------------'
	print 'Calculating daily npmean heating rate'
	print '-------------------------------------------------------------------'
	print 'calculation of daily npmean heating rate. Unit seg_start_i [MJ/m2/day].'
	print '-------------------------------------------------------------------'

	k = 0
	for i in range(0, n_L1, num_point_per_day):
		for j in range(i,i + num_point_per_day):
			H_daily[k] = H_daily[k] \
				+ H_filled[j]*(60*float(avgtime)/(1004*1.0))/(10E6)
		k = k + 1

	H_annual = sum(H_daily)
	H_std_annual = Common.stdn1(H_daily)
	
	print 'H_annaul', H_annual
	print 'H_std_annaul', H_std_annual

	
	for i in range(len(Fsc)):
		file_plot_str = StringIO()
		file_plot_str.write(str(date[i])			+ ',') 		#1
		file_plot_str.write(str(Fsc[i]) 			+ ',') 		#2
		file_plot_str.write(str(Fc_filled[i]) 		+ ',') 		#3
		file_plot_str.write(str(LEsc[i]) 			+ ',') 		#4
		file_plot_str.write(str(LE_filled[i]) 		+ ',') 		#5
		file_plot_str.write(str(Hsc[i]) 			+ ',') 		#6
		file_plot_str.write(str(H_filled[i]) 		+ '\n') 	#7
		

		output_plot_string = file_plot_str.getvalue()
		
		output_plot_2_fp.write(output_plot_string)
	output_plot_2_fp.close()
	
	
	#For output
	#Assume data start from 0:00
	output_Fsc_daily = npzeros(n_L1)
	output_GPP_daily = npzeros(n_L1)
	output_RE_daily = npzeros(n_L1)
	output_ET_daily = npzeros(n_L1)
	output_LE_daily = npzeros(n_L1)
	output_H_daily = npzeros(n_L1)
	
	j = 0
	for i in range(n_L1):
		output_Fsc_daily[i] = Fsc_daily[j]
		output_GPP_daily[i] = GPP_daily[j]
		output_RE_daily[i] = RE_daily[j]
		output_ET_daily[i] = ET_daily[j]
		output_H_daily[i] = H_daily[j]
		output_LE_daily[i] = LE_daily[j]
		if((i+1) % num_point_per_day == 0):
			j = j + 1
			

	for i in range(n_L1):
		file_str = StringIO()

		file_str.write(str(output_ET_daily[i]) + ',') 		#1
		
		file_str.write(str(Fc_filled[i]) + ',' )			#2
		file_str.write(str(output_Fsc_daily[i])   + ',' )	#3
		
		file_str.write(str(GPP[i]) + ',' )					#4
		file_str.write(str(output_GPP_daily[i]) + ',')		#5
		file_str.write(str(GPP_annual) + ',')				#6
		file_str.write(str(GPP_std_annual) + ',')			#7
		
		file_str.write(str(H_filled[i]) + ',')				#8
		file_str.write(str(output_H_daily[i]) + ',')		#9
		file_str.write(str(H_annual) + ',')					#10
		file_str.write(str(H_std_annual) + ',')				#11
		
		file_str.write(str(LE_filled[i]) + ',')				#12
		file_str.write(str(output_LE_daily[i]) + ',')		#13
		file_str.write(str(LE_annual) + ',')				#14
		file_str.write(str(LE_std_annual) + ',')			#15
		
		file_str.write(str(NEE_annual) + ',')				#16
		file_str.write(str(NEE_std_annual) + ',')			#17
		
		file_str.write(str(output_RE_daily[i]) + ',')		#18
		file_str.write(str(RE_annual) + ',')				#19
		file_str.write(str(RE_std_annual) + ',')			#20
		
		file_str.write(str(co2[i]) + ',')					#21
		file_str.write(str(rsdn[i]) + ',')					#22
		file_str.write(str(ea[i]) + ',')					#23
		file_str.write(str(h2o[i]) + ',')					#24
		file_str.write(str(Ta[i]) + ',')					#25
		file_str.write(str(vpd[i]) + '\n' )					#26
	
		output_string = file_str.getvalue()
		
		output_fp.write(output_string)

	output_fp.close()
	
	return 'L2 Done'