示例#1
0
def _get_kdes(train_ats, train_pred, class_matrix, args):
    """Kernel density estimation

    Args:
        train_ats (list): List of activation traces in training set.
        train_pred (list): List of prediction of train set.
        class_matrix (list): List of index of classes.
        args: Keyboard args.

    Returns:
        kdes (list): List of kdes per label if classification task.
        removed_cols (list): List of removed columns by variance threshold.
    """

    sess = K.get_session()
    K.set_learning_phase(False)

    removed_cols = []
    if args.is_classification:
        for label in range(args.num_classes):
            col_vectors = np.transpose(train_ats[class_matrix[label]])
            for i in range(col_vectors.shape[0]):
                if (np.var(col_vectors[i]) < args.var_threshold
                        and i not in removed_cols):
                    removed_cols.append(i)

        kdes = {}
        for label in tqdm(range(args.num_classes), desc="kde"):
            refined_ats = np.transpose(train_ats[class_matrix[label]])
            refined_ats = np.delete(refined_ats, removed_cols, axis=0)

            if refined_ats.shape[0] == 0:
                print(
                    warn("ats were removed by threshold {}".format(
                        args.var_threshold)))
                break
            kdes[label] = gaussian_kde(refined_ats)
            #kdes[label] = DensityEstimate(sess, np.transpose(refined_ats), sigma=0.864)
            print(refined_ats.shape)
            #print(kdes[label].factor)

    else:
        col_vectors = np.transpose(train_ats)
        for i in range(col_vectors.shape[0]):
            if np.var(col_vectors[i]) < args.var_threshold:
                removed_cols.append(i)

        refined_ats = np.transpose(train_ats)
        refined_ats = np.delete(refined_ats, removed_cols, axis=0)
        if refined_ats.shape[0] == 0:
            print(
                warn("ats were removed by threshold {}".format(
                    args.var_threshold)))
        kdes = [gaussian_kde(refined_ats)]

    print(infog("The number of removed columns: {}".format(len(removed_cols))))

    return kdes, removed_cols
    def __init__(self, ehefile="../Resources/EHE/EHE_effective.csv"):
        pass_sr = pd.read_csv(ehefile)
        bins = np.linspace(-1, 1, 180)
        m = np.histogram(pass_sr["cos(ImpLF_zen)"],
                         weights=np.array(pass_sr["wE3"]) / 0.457845099495,
                         bins=bins,
                         normed=True)

        self.density_nocos = gaussian_kde(np.rad2deg(
            np.arccos(np.array(pass_sr["cos(ImpLF_zen)"]))),
                                          weights=np.array(pass_sr["wE3"]))
        self.density_nocos.set_bandwidth(0.04)
        self.x_nocos = m[1]

        bins = np.linspace(0, 180, 180)
        m = np.histogram(np.rad2deg(np.arccos(pass_sr["cos(ImpLF_zen)"])),
                         weights=np.array(pass_sr["wE3"]) / 0.457845099495,
                         bins=bins,
                         normed=True)

        self.density = gaussian_kde(np.array(pass_sr["cos(ImpLF_zen)"]),
                                    weights=np.array(pass_sr["wE3"]))
        self.density.set_bandwidth(0.05)
        self.x = m[1]
示例#3
0
文件: score.py 项目: greatlse/barnaba
def score(args):

    files = args.files
    print "# Calculating ESCORE..."

    fh = open(args.name,'w')
    fh.write("# This is a baRNAba run.\n")
    for k in sorted(args.__dict__):
        s = "# " + str(k) + " " + str(args.__dict__[k]) + "\n"
        fh.write(s)

    # calculate interaction matrix of the reference structure
    ref_pdb = reader.Pdb(args.ff,res_mode=args.res_mode)
    ref_len = len(ref_pdb.model.sequence)
    ref_mat = ref_pdb.model.get_mat_score(args.cutoff)

    kernel = kde.gaussian_kde(ref_mat)
    kernel.set_bandwidth(0.25)

    print "# KDE computed. Bandwidth=",kernel.factor
    print "# Calculating ESCORE..."
    
    if(args.xtc!=None):
        assert len(files)==1, "# Error: when providing XTC trajectories, specify a single reference PDB file with -f"

    for i in xrange(0,len(files)):
        cur_pdb = reader.Pdb(files[i],res_mode=args.res_mode)
        cur_pdb.set_xtc(args.xtc)
            
        idx = 0
        while(idx>=0):
            cur_mat = cur_pdb.model.get_mat_score(args.cutoff+0.2)
            val = kernel(cur_mat)
            string = '%8.5f ' % (sum(val))
            string += '%s.%i \n' % (files[i],idx)
            fh.write(string)
            idx = cur_pdb.read()
                                
    fh.close()
    return 0
示例#4
0
def kde_posterior_pdf(paramx,
                      paramy,
                      posterior,
                      npoints=100,
                      bin_limits=None,
                      bw_method='scott',
                      fft=True):
    r"""
    Kenerl density estimate (KDE) of two-dimensional posterior pdf with
    Gaussian kernel.
    
    See e.g. 
    `wiki <https://en.wikipedia.org/wiki/Kernel_density_estimation/>`_ and
    `scipy <http://docs.scipy.org/doc/scipy-0.17.0/reference/generated/scipy.stats.gaussian_kde.html>`_
    for more information.
    
    .. warning::
        By default, the band-width is estimated with Scott's rule of thumb. This
        could lead to biased/inaccurate estimates of the pdf if the parent
        distribution isn't approximately Gaussian.
        
    .. warning::
        There is no special treatment for e.g. boundaries, which can be 
        problematic.

    .. warning::
        Posterior pdf normalized such that maximum value is one.

    :param paramx: Data column of parameter x
    :type paramx: numpy.ndarray
    :param paramy: Data column of parameter y
    :type paramy: numpy.ndarray
    :param posterior: Data column of posterior weight
    :type posterior: numpy.ndarray
    :param npoints: Number of points to evaluate PDF at per dimension
    :type npoints: integer
    :param bin_limits: Bin limits for histogram
    :type bin_limits: list [[xmin,xmax],[ymin,ymax]]
    :param bw_method: Method for determining band-width or bandwidth
    :type bw_method: string or float
    :param fft: Whether to use Fast-Fourier transform
    :type fft: bool

    :returns: KDE of posterior pdf at x and y centers
    :rtype: named tuple (pdf: numpy.ndarray, bin_centers_x: \
        numpy.ndarray, bin_centers_y: numpy.ndarray)

    :Example:

    >>> npoints = 100
    >>> pdf, x, y = kde_posterior_pdf(data[2], data[3], data[0], npoints=npoints)
    >>> assert len(pdf) == npoints
    >>> assert len(x) == npoints
    >>> assert len(y) == npoints
    """
    if bin_limits:
        upper_x = max(bin_limits[0])
        lower_x = min(bin_limits[0])
        upper_y = max(bin_limits[1])
        lower_y = min(bin_limits[1])
    else:
        upper_x = max(paramx)
        lower_x = min(paramx)
        upper_y = max(paramy)
        lower_y = min(paramy)

    kde_func = gaussian_kde(np.array((paramx, paramy)),
                            weights=posterior,
                            bw_method=bw_method,
                            fft=fft)

    centers_x = np.linspace(lower_x, upper_x, npoints)
    centers_y = np.linspace(lower_y, upper_y, npoints)
    points = np.array([[x, y] for x in centers_x for y in centers_y]).T
    kde = kde_func(points)
    kde = np.reshape(kde, (npoints, npoints))

    # Normalize the pdf so that its maximum value is one. NB in other functions,
    # normalize such that area is one.
    kde = kde / kde.max()

    return _kde_posterior_pdf_2D(kde, centers_x, centers_y)
示例#5
0
def kde_posterior_pdf(parameter,
                      posterior,
                      npoints=500,
                      bin_limits=None,
                      norm_area=False,
                      bw_method='scott',
                      fft=True
                      ):
    r"""
    Kernel density estimate (KDE) of one-dimensional posterior pdf with
    Gaussian kernel.

    See e.g.
    `wiki <https://en.wikipedia.org/wiki/Kernel_density_estimation/>`_ and
    `scipy <http://docs.scipy.org/doc/scipy-0.17.0/reference/generated/scipy.stats.gaussian_kde.html>`_
    for more information.

    .. warning::
        By default, the band-width is estimated with Scott's rule of thumb. This
        could lead to biased/inaccurate estimates of the pdf if the parent
        distribution isn't approximately Gaussian.

    .. warning::
        There is no special treatment for e.g. boundaries, which can be
        problematic.

    .. warning::
        By default, posterior pdf normalized such that maximum value is one.

    :param parameter: Data column of parameter of interest
    :type parameter: numpy.ndarray
    :param posterior: Data column of posterior weight
    :type posterior: numpy.ndarray
    :param npoints: Number of points to evaluate PDF at
    :type npoints: integer
    :param bin_limits: Bin limits for histogram
    :type bin_limits: list [[xmin,xmax],[ymin,ymax]]
    :param norm_area: If True, normalize the pdf so that the integral over the
        range is one. Otherwise, normalize the pdf so that the maximum value
        is one.
    :param bw_method: Method for determining band-width or bandwidth
    :type bw_method: string or float
    :param fft: Whether to use Fast-Fourier transform
    :type fft: bool

    :returns: KDE of posterior pdf evaluated at centers
    :rtype: named tuple (pdf: numpy.ndarray, bin_centers: numpy.ndarray)

    :Example:

    >>> npoints = 1000
    >>> kde = kde_posterior_pdf(data[2], data[0], npoints=npoints)
    >>> assert len(kde.pdf) == npoints
    >>> assert len(kde.bin_centers) == npoints
    """
    if bin_limits:
        upper = max(bin_limits)
        lower = min(bin_limits)
    else:
        upper = max(parameter)
        lower = min(parameter)

    kde_func = gaussian_kde(parameter,
                            weights=posterior,
                            bw_method=bw_method, 
                            fft=fft
                            )

    centers = np.linspace(lower, upper, npoints)
    kde = kde_func(centers)

    if not norm_area:
        kde = kde / kde.max()

    return _kde_posterior_pdf_1D(kde, centers)
示例#6
0
def kde_posterior_pdf(paramx,
                      paramy,
                      posterior,
                      npoints=100,
                      bin_limits=None,
                      bw_method='scott'):
    r"""
    Kenerl density estimate (KDE) of two-dimensional posterior pdf with
    Gaussian kernel.
    
    See e.g. 
    `wiki <https://en.wikipedia.org/wiki/Kernel_density_estimation/>`_ and
    `scipy <http://docs.scipy.org/doc/scipy-0.17.0/reference/generated/scipy.stats.gaussian_kde.html>`_
    for more information.
    
    .. warning::
        By default, the band-width is estimated with Scott's rule of thumb. This
        could lead to biased/inaccurate estimates of the pdf if the parent
        distribution isn't approximately Gaussian.
        
    .. warning::
        There is no special treatment for e.g. boundaries, which can be 
        problematic.

    .. warning::
        Posterior pdf normalized such that maximum value is one.

    :param paramx: Data column of parameter x
    :type paramx: numpy.ndarray
    :param paramy: Data column of parameter y
    :type paramy: numpy.ndarray
    :param posterior: Data column of posterior weight
    :type posterior: numpy.ndarray
    :param npoints: Number of points to evaluate PDF at per dimension
    :type npoints: integer
    :param bin_limits: Bin limits for histogram
    :type bin_limits: list [[xmin,xmax],[ymin,ymax]]
    :param bw_method: Method for determining band-width or bandwidth
    :type bw_method: string or float

    :returns: KDE of posterior pdf at x and y centers
    :rtype: named tuple (pdf: numpy.ndarray, bin_centers_x: \
        numpy.ndarray, bin_centers_y: numpy.ndarray)

    :Example:

    >>> npoints = 100
    >>> pdf, x, y = kde_posterior_pdf(data[2], data[3], data[0], npoints=npoints)
    >>> assert len(pdf) == npoints
    >>> assert len(x) == npoints
    >>> assert len(y) == npoints
    """
    if bin_limits:
        upper_x = max(bin_limits[0])
        lower_x = min(bin_limits[0])
        upper_y = max(bin_limits[1])
        lower_y = min(bin_limits[1])
    else:
        upper_x = max(paramx)
        lower_x = min(paramx)
        upper_y = max(paramy)
        lower_y = min(paramy)

    kde_func = gaussian_kde(np.array((paramx, paramy)),
                            weights=posterior,
                            bw_method=bw_method,
                            )

    centers_x = np.linspace(lower_x, upper_x, npoints)
    centers_y = np.linspace(lower_y, upper_y, npoints)
    points = np.array([[x, y] for x in centers_x for y in centers_y]).T
    kde = kde_func(points)
    kde = np.reshape(kde, (npoints, npoints))

    # Normalize the pdf so that its maximum value is one. NB in other functions,
    # normalize such that area is one.
    kde = kde / kde.max()

    return _kde_posterior_pdf_2D(kde, centers_x, centers_y)
示例#7
0
def kde_posterior_pdf(parameter,
                      posterior,
                      npoints=500,
                      bin_limits=None,
                      norm_area=False,
                      bw_method='scott',
                      fft=True):
    r"""
    Kernel density estimate (KDE) of one-dimensional posterior pdf with
    Gaussian kernel.

    See e.g.
    `wiki <https://en.wikipedia.org/wiki/Kernel_density_estimation/>`_ and
    `scipy <http://docs.scipy.org/doc/scipy-0.17.0/reference/generated/scipy.stats.gaussian_kde.html>`_
    for more information.

    .. warning::
        By default, the band-width is estimated with Scott's rule of thumb. This
        could lead to biased/inaccurate estimates of the pdf if the parent
        distribution isn't approximately Gaussian.

    .. warning::
        There is no special treatment for e.g. boundaries, which can be
        problematic.

    .. warning::
        By default, posterior pdf normalized such that maximum value is one.

    :param parameter: Data column of parameter of interest
    :type parameter: numpy.ndarray
    :param posterior: Data column of posterior weight
    :type posterior: numpy.ndarray
    :param npoints: Number of points to evaluate PDF at
    :type npoints: integer
    :param bin_limits: Bin limits for histogram
    :type bin_limits: list [[xmin,xmax],[ymin,ymax]]
    :param norm_area: If True, normalize the pdf so that the integral over the
        range is one. Otherwise, normalize the pdf so that the maximum value
        is one.
    :param bw_method: Method for determining band-width or bandwidth
    :type bw_method: string or float
    :param fft: Whether to use Fast-Fourier transform
    :type fft: bool

    :returns: KDE of posterior pdf evaluated at centers
    :rtype: named tuple (pdf: numpy.ndarray, bin_centers: numpy.ndarray)

    :Example:

    >>> npoints = 1000
    >>> kde = kde_posterior_pdf(data[2], data[0], npoints=npoints)
    >>> assert len(kde.pdf) == npoints
    >>> assert len(kde.bin_centers) == npoints
    """
    if bin_limits:
        upper = max(bin_limits)
        lower = min(bin_limits)
    else:
        upper = max(parameter)
        lower = min(parameter)

    kde_func = gaussian_kde(parameter,
                            weights=posterior,
                            bw_method=bw_method,
                            fft=fft)

    centers = np.linspace(lower, upper, npoints)
    kde = kde_func(centers)

    if not norm_area:
        kde = kde / kde.max()

    return _kde_posterior_pdf_1D(kde, centers)
示例#8
0
def kde_posterior_pdf(paramx, 
                      paramy, 
                      posterior, 
                      npoints=100, 
                      bin_limits=None, 
                      bw_method='scott'):
    r"""
    Kenerl density estimate of two-dimensional posterior pdf.

    .. warning::
        Outliers sometimes mess up bins. So you might want to \
        specify the bin limits.

    .. warning::
        Posterior pdf normalized such that maximum value is one.

    :param paramx: Data column of parameter x
    :type paramx: numpy.ndarray
    :param paramy: Data column of parameter y
    :type paramy: numpy.ndarray
    :param posterior: Data column of posterior weight
    :type posterior: numpy.ndarray
    :param npoints: Number of points to evaluate PDF at per dimension
    :type npoints: integer
    :param bin_limits: Bin limits for histogram
    :type bin_limits: list [[xmin,xmax],[ymin,ymax]]
    :param bw_method: Method for determining band-width variance
    :type bw_method: string

    :returns: KDE of posterior pdf at x and y centers
    :rtype: named tuple (pdf: numpy.ndarray, bin_centers_x: \
        numpy.ndarray, bin_centers_y: numpy.ndarray)

    :Example:

    >>> npoints = 100
    >>> pdf, x, y = kde_posterior_pdf(data[2], data[3], data[0], npoints=npoints)
    >>> assert len(pdf) == npoints
    >>> assert len(x) == npoints
    >>> assert len(y) == npoints
    """
    if bin_limits:
        upper_x = max(bin_limits[0])
        lower_x = min(bin_limits[0])
        upper_y = max(bin_limits[1])
        lower_y = min(bin_limits[1])
    else:
        upper_x = max(paramx)
        lower_x = min(paramx)
        upper_y = max(paramy)
        lower_y = min(paramy)
    
    kde_func = gaussian_kde(np.array((paramx, paramy)),
                            weights=posterior,
                            bw_method=bw_method,
                            )
    
    centers_x = np.linspace(lower_x, upper_x, npoints)
    centers_y = np.linspace(lower_y, upper_y, npoints)
    points = np.array([[x, y] for y in centers_y for x in centers_x]).T
    kde = kde_func(points)
    kde = np.reshape(kde, (npoints, npoints)).T
    
    # Normalize the pdf so that its maximum value is one. NB in other functions,
    # normalize such that area is one.
    kde = kde / kde.max()

    return _kde_posterior_pdf_2D(kde, centers_x, centers_y)