def density_maxima(feature, weighted_values):
    """Return the maxima of a KDE based on the values provided."""
    kde_est = kde.KDE1D([v for v, w in weighted_values],
                        weights=[w for v, w in weighted_values],
                        bandwidth=kde_bandwidth)
    xs = arange(all_features[feature]['range'][0],
                all_features[feature]['range'][-1], kde_resolution).tolist()
    ys = kde_est(xs).tolist()
    left_maxima = [a > b for a, b in zip(ys, [0] + ys[:-1])]
    right_maxima = [a > b for a, b in zip(ys, ys[1:] + [0])]
    maxima = [l & r for l, r in zip(left_maxima, right_maxima)]
    max_coords = [(x, y) for x, y, m in zip(xs, ys, maxima) if m]
    return max_coords
示例#2
0
def get_density(Xs, support):
    # http://pythonhosted.org/PyQt-Fit/KDE_tut.html
    from scipy import stats
    from pyqt_fit import kde, kde_methods  #easy_install distribute; sudo pip install git+https://github.com/Multiplicom/pyqt-fit.git
    densities = []
    for X in Xs:
        if X.shape[0] < 2:
            density = 1. * support
        else:
            est_lin = kde.KDE1D(X,
                                lower=0,
                                method=kde_methods.linear_combination)
            density = est_lin(support)
        # density[density < 0] = 0
        densities.append(density)
    return densities
示例#3
0
文件: kd_utils.py 项目: tvwenger/kd
def calc_hpd(samples, kdetype, alpha=0.683, pdf_bins=1000):
    """
    Fit a kernel density estimator (KDE) to the posterior given
    by a collection of samples. Return the mode (posterior peak)
    and the highest posterior density (HPD) determined by the minimum
    width Bayesian credible interval (BCI) containing a fraction of
    the posterior samples. The posterior should be well described by a
    single-modal distribution.

    Parameters:
      samples :: 1-D array of scalars
        The samples being fit with a KDE

      kdetype :: string
        Which KDE method to use
          'pyqt' uses pyqt_fit with boundary at 0
          'scipy' uses gaussian_kde with no boundary

      alpha :: scalar (optional)
        The fraction of samples included in the BCI.

      pdf_bins :: integer (optional)
        Number of bins used in calculating the PDF

    Returns: kde, mode, lower, upper
      kde :: scipy.gaussian_kde or pyqt_fit.1DKDE object
        The KDE calculated for this kinematic distance

      mode :: scalar
        The mode of the posterior

      lower :: scalar
        The lower bound of the BCI

      upper :: scalar
        The upper bound of the BCI
    """
    # check inputs
    if (alpha <= 0.0) or (alpha >= 1.0):
        raise ValueError("alpha should be between 0 and 1.")
    #
    # Fit KDE
    #
    nans = np.isnan(samples)
    if np.sum(~nans) < 2:
        # skip if fewer than two non-nans
        return (None, np.nan, np.nan, np.nan)
    try:
        if kdetype == "scipy":
            kde = gaussian_kde(samples[~nans])
        elif kdetype == "pyqt":
            kde = pyqt_kde.KDE1D(samples[~nans],
                                 lower=0,
                                 method=kde_methods.linear_combination)
        else:
            raise ValueError("Invalid KDE method: {0}".format(kdetype))
    except np.linalg.LinAlgError:
        # catch singular matricies (i.e. all values are the same)
        return (None, np.nan, np.nan, np.nan)
    #
    # Compute PDF
    #
    xdata = np.linspace(np.nanmin(samples), np.nanmax(samples), pdf_bins)
    pdf = kde(xdata)
    #
    # Get the location of the mode
    #
    mode = xdata[np.argmax(pdf)]
    if np.isnan(mode):
        return (None, np.nan, np.nan, np.nan)
    #
    # Reverse sort the PDF and xdata and find the BCI
    #
    sort_pdf = sorted(zip(xdata, pdf / np.sum(pdf)),
                      key=lambda x: x[1],
                      reverse=True)
    cum_prob = 0.0
    bci_xdata = np.empty(len(xdata), dtype=float) * np.nan
    for i, dat in enumerate(sort_pdf):
        cum_prob += dat[1]
        bci_xdata[i] = dat[0]
        if cum_prob >= alpha:
            break
    lower = np.nanmin(bci_xdata)
    upper = np.nanmax(bci_xdata)
    return kde, mode, lower, upper
示例#4
0
文件: ensemble.py 项目: jimc101/PyLag
def get_probability_density_1D(file_names, dates, depths, depth_bnds,
                               pylag_time_rounding):
    """Compute the ensemble mean concentration in 1D
    
    Particle concentrations are computed on the dates and at the depth levels
    given in the arrays `dates` and `depth`. Each member of the ensemble is
    a separate realisation, with particles starting at the sames locations
    and at the same time in each run. A different method should be used to
    compute probability densities for ensembles in which particles are released
    at different times.
    
    To compute particle concentrations a gaussian kernel density estimator
    is used. Boundaries are treated as being reflecting, thus there is no loss
    of density.
    
    Parameters
    ---------
    file_names : list[str]
        List of sorted PyLag output files. Each output file corresponds to one member
        of the ensemble.
    
    dates : 1D NumPy array ([t], datetime)
        Dates on which to compute the ensemble mean concentration.
    
    depths : 2D Numpy array ([t, z], float)
        Depths at which to compute the ensemble mean concentration. The array is
        2D, since it may be desirable to have the depths at which concentrations
        are calculated vary in time (e.g. if the model has a moving free
        surface). NB dates.shape[0] must equal depths.shape[0].
    
    depths_bnds : 2D Numpy array ([t, 2], float)
        These are the lower and upper depth bands which are required by the
        kernel method.

    pylag_time_rounding : int
        The number of seconds PyLag outputs should be rounded to.

    Returns
    -------
    conc : 2D Numpy array (float)
        The concentration at the specified times and depths
    """
    # Function requires pyqt_fit. First check that it is installed
    if not have_pyqt_fit:
        raise RuntimeError(
            "PyQt-fit was not found within this python distribution. Please see PyLag's documentation "
            "for more information.")

    if dates.shape[0] != depths.shape[0]:
        raise ValueError('Array lengths do not match')

    # Array sizes
    n_trials = len(file_names)
    n_times = dates.shape[0]
    n_zlevs = depths.shape[1]

    # Use kernel method to estimate density
    dens = np.empty((n_trials, n_times, n_zlevs), dtype=float)
    for i, file_name in enumerate(file_names):
        viewer = Viewer(file_name, time_rounding=pylag_time_rounding)

        # Establish the indices of the time points we want to work with
        time_indices = [viewer.date.tolist().index(date) for date in dates]

        for j, t_idx in enumerate(time_indices):
            zmin = depth_bnds[j, 0]
            zmax = depth_bnds[j, 1]
            est = kde.KDE1D(viewer('z')[t_idx, :].squeeze(),
                            lower=zmin,
                            upper=zmax,
                            method=kde_methods.reflection,
                            kernel=kernels.normal_kernel1d())
            dens[i, j, :] = est(depths[j, :])

    return np.mean(dens, axis=0)
示例#5
0
def pdf_parallax_results_worker(plx_samples, kdetype, pdf_bins=100):
    """
    Finds the parallax distance and distance uncertainty from the 
    output of many samples from parallax. See pdf_parallax for more 
    details.

    Parameters:
      plx_samples : 1-D array
                    This array contains the output from parallax
                    for a parallax distance (kpc) for
                    many samples (i.e. it is the "Rgal" array from
                    parallax output)
      kdetype : string
                which KDE method to use
                'pyqt' uses pyqt_fit with linear combination
                   and boundary at 0
                'scipy' uses gaussian_kde with no boundary
      pdf_bins : integer (optional)
                 number of bins used in calculating PDF

    Returns: kde, peak_dist, peak_dist_err_neg, peak_dist_err_pos
      kde : scipy.gaussian_kde object
            The KDE calculated for this kinematic distance
      peak_dist : scalar
                  The distance associated with the peak of the PDF
      peak_dist_err_neg : scalar
                      The negative uncertainty of peak_dist
      peak_dist_err_pos : scalar
                      The positive uncertainty of peak_dist
    """
    #
    # Compute kernel density estimator and PDF
    #
    nans = np.isnan(plx_samples)
    if np.sum(~nans) < 2:
        # skip if fewer than two non-nans
        return (None, np.nan, np.nan, np.nan)
    try:
        if kdetype == 'scipy':
            kde = gaussian_kde(plx_samples[~nans])
        elif kdetype == 'pyqt':
            kde = pyqt_kde.KDE1D(plx_samples[~nans],
                                 lower=0,
                                 method=kde_methods.linear_combination)
        else:
            print("INVALIDE KDE METHOD: {0}".format(kdetype))
            return (None, np.nan, np.nan, np.nan)
    except np.linalg.LinAlgError:
        # catch singular matricies (i.e. all values are the same)
        return (None, np.nan, np.nan, np.nan)
    dists = np.linspace(np.nanmin(plx_samples), np.nanmax(plx_samples),
                        pdf_bins)
    pdf = kde(dists)
    #
    # Find index, value, and distance of peak of PDF
    #
    peak_ind = np.argmax(pdf)
    peak_value = pdf[peak_ind]
    peak_dist = dists[peak_ind]
    if np.isnan(peak_value):
        # too few good samples?
        return (None, np.nan, np.nan, np.nan)
    #
    # Walk down from peak of PDF until integral between two
    # bounds is 68.3% of the total integral (=1 because it's
    # normalized). Step size is 1% of peak value.
    #
    for target in np.arange(peak_value, 0., -0.01 * peak_value):
        # find bounds
        if peak_ind == 0:
            lower = 0
        else:
            lower = np.argmin(np.abs(target - pdf[0:peak_ind]))
        if peak_ind == len(pdf) - 1:
            upper = len(pdf) - 1
        else:
            upper = np.argmin(np.abs(target - pdf[peak_ind:])) + peak_ind
        # integrate
        #integral = kde.integrate_box_1d(dists[lower],dists[upper])
        integral = integrate.quad(kde, dists[lower], dists[upper])[0]
        if integral > 0.683:
            peak_dist_err_neg = peak_dist - dists[lower]
            peak_dist_err_pos = dists[upper] - peak_dist
            break
    else:
        return (None, np.nan, np.nan, np.nan)
    #
    # Return results
    #
    return (kde, peak_dist, peak_dist_err_neg, peak_dist_err_pos)
示例#6
0
    weights = np.ones_like(x)/float(len(x))
    hy, _, _ = ax[1].hist(x, weights=weights, bins=int(np.sqrt(len(x))), color='k', histtype='step')
    ax[1].fill_between(rcx,0.,hy.max(),color='red',alpha=0.8,zorder=1001,label='RC Confidence Interval')
    ax[1].scatter(x,fg_m/fg_m.max()*hy.max(),c='cornflowerblue',alpha=.5,label='FG',s=5)
    ax[1].scatter(x,bg_m/fg_m.max()*hy.max(),c='orange',alpha=.5,label='BG',s=5)
    ax[1].legend(loc='best',fancybox='True')
    ax[1].set_title('Histogram in Absolute magnitude')
    fig.tight_layout()
    plt.savefig('Output/Ben_K2/TRILEGAL_result-comp.png')
    plt.close('all')

    '''Getting KDEs'''
    kdes = []
    fig, ax = plt.subplots(chain.shape[1],chain.shape[1])
    for n in range(chain.shape[1]):
        est_large = kde.KDE1D(chain[:,n])
        xs, ys = est_large.grid()
        kdes.append(np.array([xs,ys]))
        a = ax[n,n].hist(chain[:,n],bins=int(np.sqrt(len(chain[:,n]))),histtype='step',color='k', normed=True)
        ax[n,n].plot(xs,ys,c='cornflowerblue')
        ax[n,n].set_title(labels_mc[n])
    fig.tight_layout()
    fig.savefig('Output/Ben_K2/KDE_fits.png')
    plt.close(fig)


####---SETTING UP AND RUNNING MCMC
####-----K2 RUN
    x, y, xerr, df, dfT= get_values('K2')

    start_params = res