Exemplo n.º 1
0
 def set_params(self, values):
     self.lengthscales = values[:-1]
     self.variance = values[-1]
     L = np.zeros((self.num_dim, self.num_dim))
     L[np.tril_indices_from(L)] = self.lengthscales
     self.L_inv = inv(L)
     self.projection = np.dot(self.L_inv.T, self.L_inv)
Exemplo n.º 2
0
    def test_map_diag_and_offdiag(self):

        vars = ["x", "y", "z"]
        g = ag.PairGrid(self.df)
        g.map_offdiag(plt.scatter)
        g.map_diag(plt.hist)

        for ax in g.diag_axes:
            nt.assert_equal(len(ax.patches), 10)

        for i, j in zip(*np.triu_indices_from(g.axes, 1)):
            ax = g.axes[i, j]
            x_in = self.df[vars[j]]
            y_in = self.df[vars[i]]
            x_out, y_out = ax.collections[0].get_offsets().T
            npt.assert_array_equal(x_in, x_out)
            npt.assert_array_equal(y_in, y_out)

        for i, j in zip(*np.tril_indices_from(g.axes, -1)):
            ax = g.axes[i, j]
            x_in = self.df[vars[j]]
            y_in = self.df[vars[i]]
            x_out, y_out = ax.collections[0].get_offsets().T
            npt.assert_array_equal(x_in, x_out)
            npt.assert_array_equal(y_in, y_out)

        for i, j in zip(*np.diag_indices_from(g.axes)):
            ax = g.axes[i, j]
            nt.assert_equal(len(ax.collections), 0)
 def transform_covars_grad(self, internal_grad):
     grad = np.empty((self.num_latent, self.get_covar_size()), dtype=np.float32)
     for j in range(self.num_latent):
         tmp = self._theano_transform_covars_grad(internal_grad[0, j], self.covars_cholesky[j])
         tmp[np.diag_indices_from(tmp)] *= self.covars_cholesky[j][np.diag_indices_from(tmp)]
         grad[j] = tmp[np.tril_indices_from(self.covars_cholesky[j])]
     return grad.flatten()
 def _get_raw_covars(self):
     flattened_covars = np.empty([self.num_latent, self.get_covar_size()], dtype=np.float32)
     for i in xrange(self.num_latent):
         raw_covars = self.covars_cholesky[i].copy()
         raw_covars[np.diag_indices_from(raw_covars)] = np.log(raw_covars[np.diag_indices_from(raw_covars)])
         flattened_covars[i] = raw_covars[np.tril_indices_from(raw_covars)]
     return flattened_covars.flatten()
Exemplo n.º 5
0
    def test_pairplot(self):

        vars = ["x", "y", "z"]
        g = pairplot(self.df)

        for ax in g.diag_axes:
            nt.assert_equal(len(ax.patches), 10)

        for i, j in zip(*np.triu_indices_from(g.axes, 1)):
            ax = g.axes[i, j]
            x_in = self.df[vars[j]]
            y_in = self.df[vars[i]]
            x_out, y_out = ax.collections[0].get_offsets().T
            npt.assert_array_equal(x_in, x_out)
            npt.assert_array_equal(y_in, y_out)

        for i, j in zip(*np.tril_indices_from(g.axes, -1)):
            ax = g.axes[i, j]
            x_in = self.df[vars[j]]
            y_in = self.df[vars[i]]
            x_out, y_out = ax.collections[0].get_offsets().T
            npt.assert_array_equal(x_in, x_out)
            npt.assert_array_equal(y_in, y_out)

        for i, j in zip(*np.diag_indices_from(g.axes)):
            ax = g.axes[i, j]
            nt.assert_equal(len(ax.collections), 0)

        plt.close("all")
Exemplo n.º 6
0
    def map_lower(self, func, **kwargs):
        """Plot with a bivariate function on the lower diagonal subplots.

        Parameters
        ----------
        func : callable plotting function
            Must take x, y arrays as positional arguments and draw onto the
            "currently active" matplotlib Axes.

        """
        kw_color = kwargs.pop("color", None)
        for i, j in zip(*np.tril_indices_from(self.axes, -1)):
            hue_grouped = self.data.groupby(self.hue_vals)
            for k, (label_k, data_k) in enumerate(hue_grouped):

                ax = self.axes[i, j]
                plt.sca(ax)

                x_var = self.x_vars[j]
                y_var = self.y_vars[i]

                color = self.palette[k] if kw_color is None else kw_color
                func(data_k[x_var], data_k[y_var], label=label_k,
                     color=color, **kwargs)

            self._clean_axis(ax)
            self._update_legend_data(ax)

        if kw_color is not None:
            kwargs["color"] = kw_color
        self._add_axis_labels()
 def net_sample_multinomial(A, minEdges, edgesPerSample=1, *args, **kwargs):
     """ NETWORK SAMPLING ALGORITHM:
     sample networks ties from multinomial distribution
     defined as 1/AAT[i,j] normalized by  sum(AAT[i>j])
     problem: doesn't sufficiently cluster the resulting network
              doesn't return exact number of ties, only at least as many as 
              specified minEdges
     """
     draws = int(np.ceil(minEdges*1.2))
     # pairwise distances between observations
     dist = pdist(A)   # what matrix to use:  pdist(A) or just tril(AAT) directly?
     invdist = dist
     invdist[invdist != 0] = 1/invdist[invdist!=0]  # prevent division by 0
     thetavec = invdist / np.sum(invdist)
     theta = squareform(thetavec)
     
     # multinomial sample
     n = np.shape(theta)[0]
     Z = np.zeros((n,n))
     # samp = sampleLinks(q=thetavec, edgesToDraw=1, draws=draws)
     y = np.random.multinomial(edgesPerSample, thetavec, draws)
     samp = np.asarray([np.mean([y[draw][item] for draw in np.arange(draws)]) for item in np.arange(len(thetavec))])
     samp = np.ceil(samp)
     
     # repeat until reaching enough network ties
     while np.sum(samp) < minEdges:
         draws = int(np.ceil(draws * 1.1))   #increase number of draws and try again
         #samp = sampleLinks(q=thetavec,edgesToDraw=1,draws=draws)
         y = np.random.multinomial(edgesPerSample, thetavec, draws)
         samp = np.asarray([np.mean([y[draw][item] for draw in np.arange(draws)]) for item in np.arange(len(thetavec))])
         samp = np.ceil(samp)
     
     Z[np.tril_indices_from(Z, k =-1)] = samp
     
     return (theta, Z)
Exemplo n.º 8
0
    def test_pairplot_reg(self):

        vars = ["x", "y", "z"]
        g = ag.pairplot(self.df, diag_kind="hist", kind="reg")

        for ax in g.diag_axes:
            nt.assert_equal(len(ax.patches), 10)

        for i, j in zip(*np.triu_indices_from(g.axes, 1)):
            ax = g.axes[i, j]
            x_in = self.df[vars[j]]
            y_in = self.df[vars[i]]
            x_out, y_out = ax.collections[0].get_offsets().T
            npt.assert_array_equal(x_in, x_out)
            npt.assert_array_equal(y_in, y_out)

            nt.assert_equal(len(ax.lines), 1)
            nt.assert_equal(len(ax.collections), 2)

        for i, j in zip(*np.tril_indices_from(g.axes, -1)):
            ax = g.axes[i, j]
            x_in = self.df[vars[j]]
            y_in = self.df[vars[i]]
            x_out, y_out = ax.collections[0].get_offsets().T
            npt.assert_array_equal(x_in, x_out)
            npt.assert_array_equal(y_in, y_out)

            nt.assert_equal(len(ax.lines), 1)
            nt.assert_equal(len(ax.collections), 2)

        for i, j in zip(*np.diag_indices_from(g.axes)):
            ax = g.axes[i, j]
            nt.assert_equal(len(ax.collections), 0)
Exemplo n.º 9
0
    def test_pairplot(self):

        vars = ["x", "y", "z"]
        g = ag.pairplot(self.df)

        for ax in g.diag_axes:
            assert len(ax.patches) > 1

        for i, j in zip(*np.triu_indices_from(g.axes, 1)):
            ax = g.axes[i, j]
            x_in = self.df[vars[j]]
            y_in = self.df[vars[i]]
            x_out, y_out = ax.collections[0].get_offsets().T
            npt.assert_array_equal(x_in, x_out)
            npt.assert_array_equal(y_in, y_out)

        for i, j in zip(*np.tril_indices_from(g.axes, -1)):
            ax = g.axes[i, j]
            x_in = self.df[vars[j]]
            y_in = self.df[vars[i]]
            x_out, y_out = ax.collections[0].get_offsets().T
            npt.assert_array_equal(x_in, x_out)
            npt.assert_array_equal(y_in, y_out)

        for i, j in zip(*np.diag_indices_from(g.axes)):
            ax = g.axes[i, j]
            nt.assert_equal(len(ax.collections), 0)

        g = ag.pairplot(self.df, hue="a")
        n = len(self.df.a.unique())

        for ax in g.diag_axes:
            assert len(ax.lines) == n
            assert len(ax.collections) == n
Exemplo n.º 10
0
 def __init__(self, lengthscale_mat, variance=1.0):
     lengthscale_mat = np.asarray(lengthscale_mat)
     assert lengthscale_mat.shape[0] == lengthscale_mat.shape[1]
     self.num_dim = lengthscale_mat.shape[0]
     self.params = np.concatenate((
         lengthscale_mat[np.tril_indices_from(lengthscale_mat)],
         np.array([variance])))
Exemplo n.º 11
0
 def set_covars(self, raw_covars):
     raw_covars = raw_covars.reshape([self.num_latent, self.get_covar_size()])
     for j in xrange(self.num_latent):
         cholesky = np.zeros([self.num_dim, self.num_dim], dtype=np.float32)
         cholesky[np.tril_indices_from(cholesky)] = raw_covars[j]
         cholesky[np.diag_indices_from(cholesky)] = np.exp(cholesky[np.diag_indices_from(cholesky)])
         self.covars_cholesky[j] = cholesky
         self.covars[j] = mdot(self.covars_cholesky[j], self.covars_cholesky[j].T)
Exemplo n.º 12
0
def find_smallest_index(matrice):
    """Return smallest number i,j index in a matrice
    A Tuple (i,j) is returned.
    Warning, the diagonal should have the largest number so it will never be choose
    """

    index = np.tril_indices_from(matrice, -1)
    return np.vstack(index)[:, matrice[index].argmin()]
Exemplo n.º 13
0
	def shepard(self, xax=1, yax=2):
		coords = self.U[:,[xax-1, yax-1]]
		reducedD = np.zeros((coords.shape[0], coords.shape[0]))
		for i in xrange(coords.shape[0]):
			for j in xrange(coords.shape[0]):
				d = coords[i,:] - coords[j,:]
				reducedD[i, j] = np.sqrt( d.dot(d) )
		reducedD = reducedD[np.tril_indices_from(reducedD, k=-1)]
		originalD = self.y2[np.tril_indices_from(self.y2, k=-1)]
		xmin = np.min(reducedD)
		xmax = np.max(reducedD)
		f, ax = py.subplots()
		ax.plot(reducedD, originalD, 'ko')
		ax.plot([xmin, xmax], [xmin, xmax], 'r--')
		ax.set_xlabel('Distances in Reduced Space')
		ax.set_ylabel('Distances in Original Matrix')
		py.show()
Exemplo n.º 14
0
    def _band_infinite():
        '''Suppress the diagonal+- of a distance matrix'''
        band       = np.empty( (t, t) )
        band[:]    = np.inf
        band[np.triu_indices_from(band, width)] = 0
        band[np.tril_indices_from(band, -width)] = 0

        return band
Exemplo n.º 15
0
def from_vector(x):
    # Solution to the equation len(x) = n * (n + 1) / 2
    n = int((math.sqrt(len(x) * 8 + 1) - 1) / 2)
    result = np.zeros((n, n))
    result[np.tril_indices_from(result, -1)] = x[n:]
    result += result.transpose()
    result[np.diag_indices_from(result)] = x[:n]
    return result
Exemplo n.º 16
0
def sort_links_by_weight(corr_mat, ok_nodes, include_mst):
    """
    Sort the links by their link-weight

    Parameters
    ----------
    corr_mat : np.array
        2D numpy array with bad nodes.
    ok_nodes : np.array
        the bool blacklist (whitelist)
    include_mst : Bool
        If true add the maximum spanning tree to the begining of sorted list

    Returns
    -------
    edgelist : numpy structrued array (node1, node2, weight)
        array([(0, 1, 1.0), (0, 3, 0.5), (2, 3, 0.5), (0, 4, 0.7), (1, 4, 0.4)],
              dtype=[('node1', '<i4'), ('node2', '<i4'), ('weight', '<f8')])
    """
    up_diag_matrix = _get_filtered_triu_adj_mat_copy(corr_mat, ok_nodes)
    n = len(up_diag_matrix)
    minVal = np.min(up_diag_matrix)
    minValMinusOne = np.min(up_diag_matrix) - 1
    # So that possible overflows don't go unnoticed
    assert minValMinusOne < minVal

    initEdges = np.array(np.triu_indices_from(up_diag_matrix, 1)).T
    weights = up_diag_matrix[np.triu_indices_from(up_diag_matrix, 1)]
    nLinksMax = (n * (n - 1)) / 2
    nLinksMST = 0
    edgelist = np.zeros(nLinksMax,
                        dtype=[('node1', 'i4'), ('node2', 'i4'),
                               ('weight', 'f8')])

    # Get the maximum spanning tree (Multyplying the weights by -1 does the
    # trick)
    if include_mst:
        g = igraph.Graph(n, list(initEdges), directed=False)
        mst = g.spanning_tree(-1 * weights, return_tree=False)
        for i, ei in enumerate(mst):
            edge = g.es[ei]
            edgelist[i] = edge.source, edge.target, weights[ei]
            # Take these links away from the orig. mat
            up_diag_matrix[edge.source, edge.target] = minValMinusOne
        nLinksMST = len(mst)

    # How many links we still need to take after (possible) MST:
    nLinksYetToTake = np.max([nLinksMax - nLinksMST, 0])  # mst already there

    # Get the next largest indices
    up_diag_matrix[np.tril_indices_from(up_diag_matrix, 0)] = minValMinusOne
    mflat = up_diag_matrix.flatten()
    flatindices = mflat.argsort()[::-1][:nLinksYetToTake]
    edgelist[nLinksMST:]['node1'], edgelist[nLinksMST:][
        'node2'] = np.unravel_index(flatindices, (n, n))
    edgelist[nLinksMST:]['weight'] = mflat[flatindices]

    return edgelist
Exemplo n.º 17
0
Arquivo: base.py Projeto: zeou1/graspy
    def score_samples(self, graph, clip=None):
        """
        Compute the weighted log probabilities for each potential edge.

        Note that this implicitly assumes the input graph is indexed like the
        fit model.

        Parameters
        ----------
        graph : np.ndarray
            Input graph. Must be same shape as model's :attr:`p_mat_` attribute

        clip : scalar or None, optional (default=None)
            Values for which to clip probability matrix, entries less than c or more
            than 1 - c are set to c or 1 - c, respectively.
            If None, values will not be clipped in the likelihood calculation, which may
            result in poorly behaved likelihoods depending on the model.

        Returns
        -------
        sample_scores : np.ndarray (size of ``graph``)
            log-likelihood per potential edge in the graph
        """
        check_is_fitted(self, "p_mat_")
        # P.ravel() <dot> graph * (1 - P.ravel()) <dot> (1 - graph)
        graph = import_graph(graph)
        if not is_unweighted(graph):
            raise ValueError("Model only implemented for unweighted graphs")
        p_mat = self.p_mat_.copy()

        if np.shape(p_mat) != np.shape(graph):
            raise ValueError("Input graph size must be the same size as P matrix")

        inds = None
        if not self.directed and self.loops:
            inds = np.triu_indices_from(graph)  # ignore lower half of graph, symmetric
        elif not self.directed and not self.loops:
            inds = np.triu_indices_from(graph, k=1)  # ignore the diagonal
        elif self.directed and not self.loops:
            xu, yu = np.triu_indices_from(graph, k=1)
            xl, yl = np.tril_indices_from(graph, k=-1)
            x = np.concatenate((xl, xu))
            y = np.concatenate((yl, yu))
            inds = (x, y)
        if inds is not None:
            p_mat = p_mat[inds]
            graph = graph[inds]

        # clip the probabilities that are degenerate
        if clip is not None:
            p_mat[p_mat < clip] = clip
            p_mat[p_mat > 1 - clip] = 1 - clip

        # TODO: use nonzero inds here will be faster
        successes = np.multiply(p_mat, graph)
        failures = np.multiply((1 - p_mat), (1 - graph))
        likelihood = successes + failures
        return np.log(likelihood)
Exemplo n.º 18
0
def plot_cor_heatmap(cor,
                     value_range=[-1, 1],
                     title=None,
                     cmap='jet',
                     figsize=None,
                     full=True):
    """ TODO : This function runs too long for large arrays.
    Implement with regular matplotlib(??).
    https://matplotlib.org/gallery/images_contours_and_fields/image_annotated_heatmap.html
    """
    if len(value_range) == 2:
        vmin, vmax = value_range
    else:
        vmin, vmax = cor.min().min(), cor.max().max()

    fontsize = 8
    if figsize is None:
        sc_x, sc_y = 0.5, 0.5
        figsize = sc_x * cor.shape[1], sc_y * cor.shape[0]

    fig, ax = plt.subplots(figsize=figsize)
    if full == True:
        ax = sns.heatmap(cor,
                         vmin=vmin,
                         vmax=vmax,
                         cmap=cmap,
                         annot=True,
                         annot_kws={"size": fontsize},
                         fmt='.2f',
                         linewidths=0.99,
                         linecolor='white')
    else:
        mask = np.zeros_like(cor)
        # mask[np.triu_indices_from(mask)] = True
        mask[np.tril_indices_from(mask)] = True
        ax = sns.heatmap(cor,
                         vmin=vmin,
                         vmax=vmax,
                         cmap=cmap,
                         annot=True,
                         annot_kws={"size": fontsize},
                         fmt='.2f',
                         linewidths=0.99,
                         linecolor='white',
                         mask=mask)

    # ax.invert_yaxis()
    ax.xaxis.tick_top()
    if isinstance(cor, pd.DataFrame):
        ax.set_xticklabels(cor.columns, rotation=60)

    # plt.xticks(range(len(cor.columns)), cor.columns)
    # plt.yticks(range(len(cor.columns)), cor.columns)

    if title:
        plt.title(title)

    return fig
Exemplo n.º 19
0
    def test_tril_indices_from_kover(self):

        a = np.zeros((3, 3))
        ref1, ref2 = np.tril_indices_from(a, k=1)

        tref1, tref2 = tril_indices_from(a, k=1)
        with self.test_session():
            self.assertTrue(np.all(ref1 == tref1.eval()))
            self.assertTrue(np.all(ref2 == tref2.eval()))
Exemplo n.º 20
0
 def set_covars(self, raw_covars):
     raw_covars = raw_covars.reshape([self.num_latent, self.get_covar_size()])
     for j in range(self.num_latent):
         cholesky = np.zeros([self.num_dim, self.num_dim], dtype=util.PRECISION)
         cholesky[np.tril_indices_from(cholesky)] = raw_covars[j]
         cholesky[np.diag_indices_from(cholesky)] = np.exp(
             cholesky[np.diag_indices_from(cholesky)])
         self.covars_cholesky[j] = cholesky
         self.covars[j] = mdot(self.covars_cholesky[j], self.covars_cholesky[j].T)
Exemplo n.º 21
0
 def createLowerTriangularMatrixOfPairs(self):
     """
     Create triangular matrix indices pairs for the similarity measure
     """
     matrix = np.zeros((self.__num_docs, self.__num_docs))
     indices = np.tril_indices_from(matrix)
     n_rows = indices[0].shape[0]
     pairs = [(indices[0][i], indices[1][i]) for i in range(n_rows) if not indices[0][i] == indices[1][i]]
     return pairs
Exemplo n.º 22
0
 def grad_logprior(self, prior, grad, parameters, **kwargs):
     scale_Qinv = prior.hyperparams[self._scale_name]
     df_Qinv = prior.hyperparams[self._df_name]
     LQinv = getattr(parameters, self._lt_prec_name)
     grad_LQinv = \
         (df_Qinv - LQinv.shape[0] - 1) * np.linalg.inv(LQinv.T) - \
         np.linalg.solve(scale_Qinv, LQinv)
     grad[self._lt_vec_name] = grad_LQinv[np.tril_indices_from(grad_LQinv)]
     return
Exemplo n.º 23
0
    def _band_infinite():
        '''Suppress the diagonal+- of a distance matrix'''

        band = np.empty((t, t))
        band.fill(np.inf)
        band[np.triu_indices_from(band, width)] = 0
        band[np.tril_indices_from(band, -width)] = 0

        return band
Exemplo n.º 24
0
 def grad_trace_a_inv_dot_covars(self, chol_a, component_index,
                                 latent_index):
     assert component_index == 0
     # TODO(karl): There is a bug here related to double counting.
     tmp = 2.0 * scipy.linalg.cho_solve(
         (chol_a, True), self.covars_cholesky[latent_index])
     tmp[np.diag_indices_from(tmp)] *= (
         self.covars_cholesky[latent_index][np.diag_indices_from(tmp)])
     return tmp[np.tril_indices_from(self.covars_cholesky[latent_index])]
Exemplo n.º 25
0
def WishartBartlett(name, S, nu, is_cholesky=False, return_cholesky=False):
    """
    Bartlett decomposition of the Wishart distribution. As the Wishart
    distribution requires the matrix to be symmetric positive semi-definite
    it is impossible for MCMC to ever propose acceptable matrices.

    Instead, we can use the Barlett decomposition which samples a lower
    diagonal matrix. Specifically:

    If L ~ [[sqrt(c_1), 0, ...],
             [z_21, sqrt(c_1), 0, ...],
             [z_31, z32, sqrt(c3), ...]]
    with c_i ~ Chi²(n-i+1) and n_ij ~ N(0, 1), then
    L * A * A.T * L.T ~ Wishart(L * L.T, nu)

    See http://en.wikipedia.org/wiki/Wishart_distribution#Bartlett_decomposition
    for more information.

    :Parameters:
      S : ndarray
        p x p positive definite matrix
        Or:
        p x p lower-triangular matrix that is the Cholesky factor
        of the covariance matrix.
      nu : int
        Degrees of freedom, > dim(S).
      is_cholesky : bool (default=False)
        Input matrix S is already Cholesky decomposed as S.T * S
      return_cholesky : bool (default=False)
        Only return the Cholesky decomposed matrix.

    :Note:
      This is not a standard Distribution class but follows a similar
      interface. Besides the Wishart distribution, it will add RVs
      c and z to your model which make up the matrix.
    """

    L = S if is_cholesky else scipy.linalg.cholesky(S)

    diag_idx = np.diag_indices_from(S)
    tril_idx = np.tril_indices_from(S, k=-1)
    n_diag = len(diag_idx[0])
    n_tril = len(tril_idx[0])
    c = tt.sqrt(ChiSquared('c', nu - np.arange(2, 2+n_diag), shape=n_diag))
    print('Added new variable c to model diagonal of Wishart.')
    z = Normal('z', 0, 1, shape=n_tril)
    print('Added new variable z to model off-diagonals of Wishart.')
    # Construct A matrix
    A = tt.zeros(S.shape, dtype=np.float32)
    A = tt.set_subtensor(A[diag_idx], c)
    A = tt.set_subtensor(A[tril_idx], z)

    # L * A * A.T * L.T ~ Wishart(L*L.T, nu)
    if return_cholesky:
        return Deterministic(name, tt.dot(L, A))
    else:
        return Deterministic(name, tt.dot(tt.dot(tt.dot(L, A), A.T), L.T))
	def heat_map(self, Dplot_specs):
		data = Dplot_specs["data"]
		heat =  np.array(data)
		heat[np.tril_indices_from(heat)]= False
		fig, ax= plt.subplots()
		fig.set_size_inches(Dplot_specs["figsize"])
		sns.set(font_scale=1.0)
		self.Dheatmap_plot =  sns.heatmap(data, mask=heat, vmax=1.0, vmin =0.0, square=True, annot=True, cmap ='Reds')
		return self.updatePlotCounter("heatMap",Dplot_specs["title"]) # return a cookie reference to heatMapplot
Exemplo n.º 27
0
    def plot_pairwise_scatter(self, i, threshold=0.95):
        '''plot pairwise scatter plot of data points, with contours as
        background


        Parameters
        ----------
        i : int
        threshold : float

        Returns
        -------
        Figure instance


        The lower triangle background is a binary contour based on the
        specified threshold. All axis not shown are set to a default value
        in the middle of their range

        The upper triangle shows a contour map with the conditional
        probability, again setting all non shown dimensions to a default value
        in the middle of their range.

        '''
        model = self.models[i]

        columns = model.params.index.values.tolist()
        columns.remove('Intercept')
        x = self._normalized[columns]
        data = x.copy()

        # TODO:: have option to change
        # diag to CDF, gives you effectively the
        # regional sensitivity analysis results

        data['y'] = self.y  # for testing
        grid = sns.PairGrid(data=data, hue='y', vars=columns)
        grid.map_lower(plt.scatter, s=5)
        grid.map_diag(sns.kdeplot, shade=True)
        grid.add_legend()

        contour_levels = np.arange(0, 1.05, 0.05)
        for i, j in zip(*np.triu_indices_from(grid.axes, 1)):
            ax = grid.axes[i, j]
            ylabel = columns[i]
            xlabel = columns[j]
            contours(ax, model, xlabel, ylabel, contour_levels)

        levels = [0, threshold, 1]
        for i, j in zip(*np.tril_indices_from(grid.axes, -1)):
            ax = grid.axes[i, j]
            ylabel = columns[i]
            xlabel = columns[j]
            contours(ax, model, xlabel, ylabel, levels)

        fig = plt.gcf()
        return fig
    def plot_pairwise_scatter(self, i, threshold=0.95):
        '''plot pairwise scatter plot of data points, with contours as
        background


        Parameters
        ----------
        i : int
        threshold : float

        Returns
        -------
        Figure instance


        The lower triangle background is a binary contour based on the
        specified threshold. All axis not shown are set to a default value
        in the middle of their range

        The upper triangle shows a contour map with the conditional
        probability, again setting all non shown dimensions to a default value
        in the middle of their range.

        '''
        model = self.models[i]

        columns = model.params.index.values.tolist()
        columns.remove('Intercept')
        x = self._normalized[columns]
        data = x.copy()

        # TODO:: have option to change
        # diag to CDF, gives you effectively the
        # regional sensitivity analysis results

        data['y'] = self.y  # for testing
        grid = sns.PairGrid(data=data, hue='y', vars=columns)
        grid.map_lower(plt.scatter, s=5)
        grid.map_diag(sns.kdeplot, shade=True)
        grid.add_legend()

        contour_levels = np.arange(0, 1.05, 0.05)
        for i, j in zip(*np.triu_indices_from(grid.axes, 1)):
            ax = grid.axes[i, j]
            ylabel = columns[i]
            xlabel = columns[j]
            contours(ax, model, xlabel, ylabel, contour_levels)

        levels = [0, threshold, 1]
        for i, j in zip(*np.tril_indices_from(grid.axes, -1)):
            ax = grid.axes[i, j]
            ylabel = columns[i]
            xlabel = columns[j]
            contours(ax, model, xlabel, ylabel, levels)

        fig = plt.gcf()
        return fig
Exemplo n.º 29
0
def WishartBartlett(name, S, nu, is_cholesky=False, return_cholesky=False):
    """
    Bartlett decomposition of the Wishart distribution. As the Wishart
    distribution requires the matrix to be symmetric positive semi-definite
    it is impossible for MCMC to ever propose acceptable matrices.

    Instead, we can use the Barlett decomposition which samples a lower
    diagonal matrix. Specifically:

    If L ~ [[sqrt(c_1), 0, ...],
             [z_21, sqrt(c_1), 0, ...],
             [z_31, z32, sqrt(c3), ...]]
    with c_i ~ Chi²(n-i+1) and n_ij ~ N(0, 1), then
    L * A * A.T * L.T ~ Wishart(L * L.T, nu)

    See http://en.wikipedia.org/wiki/Wishart_distribution#Bartlett_decomposition
    for more information.

    :Parameters:
      S : ndarray
        p x p positive definite matrix
        Or:
        p x p lower-triangular matrix that is the Cholesky factor
        of the covariance matrix.
      nu : int
        Degrees of freedom, > dim(S).
      is_cholesky : bool (default=False)
        Input matrix S is already Cholesky decomposed as S.T * S
      return_cholesky : bool (default=False)
        Only return the Cholesky decomposed matrix.

    :Note:
      This is not a standard Distribution class but follows a similar
      interface. Besides the Wishart distribution, it will add RVs
      c and z to your model which make up the matrix.
    """

    L = S if is_cholesky else scipy.linalg.cholesky(S)

    diag_idx = np.diag_indices_from(S)
    tril_idx = np.tril_indices_from(S, k=-1)
    n_diag = len(diag_idx[0])
    n_tril = len(tril_idx[0])
    c = tt.sqrt(ChiSquared('c', nu - np.arange(2, 2 + n_diag), shape=n_diag))
    print('Added new variable c to model diagonal of Wishart.')
    z = Normal('z', 0, 1, shape=n_tril)
    print('Added new variable z to model off-diagonals of Wishart.')
    # Construct A matrix
    A = tt.zeros(S.shape, dtype=np.float32)
    A = tt.set_subtensor(A[diag_idx], c)
    A = tt.set_subtensor(A[tril_idx], z)

    # L * A * A.T * L.T ~ Wishart(L*L.T, nu)
    if return_cholesky:
        return Deterministic(name, tt.dot(L, A))
    else:
        return Deterministic(name, tt.dot(tt.dot(tt.dot(L, A), A.T), L.T))
Exemplo n.º 30
0
 def _get_raw_covars(self):
     flattened_covars = np.empty(
         [self.num_latent, self.get_covar_size()], dtype=np.float32)
     for i in xrange(self.num_latent):
         raw_covars = self.covars_cholesky[i].copy()
         raw_covars[np.diag_indices_from(raw_covars)] = np.log(
             raw_covars[np.diag_indices_from(raw_covars)])
         flattened_covars[i] = raw_covars[np.tril_indices_from(raw_covars)]
     return flattened_covars.flatten()
Exemplo n.º 31
0
def heat_map():
    correlation_map = df[df.columns].corr()
    obj = np.array(correlation_map)
    obj[np.tril_indices_from(obj)] = False
    fig, ax = plt.subplots()
    fig.set_size_inches(15, 10)
    sns.heatmap(correlation_map, mask=obj, vmax=.7, square=True, annot=True)
    fig.savefig("1.png")
    return send_file("1.png")
Exemplo n.º 32
0
    def setUp(self):

        # N2O inventory of agricultural source categories for 2012.
        self.n2o_inv = [
            958.4, 1092.7, 497.8, 42.2, 7.3, 135.9, 1.7, 0.8, 51.1, 9539.9,
            4693.5, 472.6, 6171.3, 4750.5, 1315, 2213.5, 11596.4, 162.8
        ]

        # Corresponding list of source category descriptions.
        self.n2o_index = [
            "Manure management, dairy cows", "Manure management, other cattle",
            "Manure management, pigs", "Manure management, sheep",
            "Manure management, goats", "Manure management, horses",
            "Manure management, mules, asses", "Manure management, buffalo",
            "Manure management, poultry", "Soils, mineral fertilizers",
            "Soils, application of manure", "Soils, N fixing crops",
            "Soils, crop residues", "Soils, organic soils", "Soils, grazing",
            "Soils, indirect emissions (deposition)",
            "Soils, indirect emissions (leaching, run-off)",
            "Soils, sewage sludge emissions"
        ]
        # Uncertainty of inventory in %, half the 95 % confidence interval.
        self.n2o_percent = [
            100.1, 100.1, 100.1, 300.2, 300.7, 300.2, 316.2, 100.5, 100.5, 80,
            100, 94.3, 94.3, 200, 201, 111.8, 416.3, 82.5
        ]

        # Convert to absolute values in Gg.
        self.n2o_uncert = [
            a * b / 100 for a, b in zip(self.n2o_inv, self.n2o_percent)
        ]
        self.n2o_inv_uncert = np.sqrt(np.sum(map(np.square, self.n2o_uncert)))

        # Hypothetic covariance matrix for N2O emissions.
        l = len(self.n2o_percent)
        self.n2o_covmat = np.zeros(shape=(l, l))
        np.fill_diagonal(self.n2o_covmat, np.square(self.n2o_uncert))
        indu = np.triu_indices_from(self.n2o_covmat, 1)
        indl = np.tril_indices_from(self.n2o_covmat, -1)
        # Calculate covariances for an assumed correlation coefficientof 0.5.
        self.n2o_covmat[indu] = 0.5 * np.sqrt(self.n2o_covmat[
            (indu[0], indu[0])] * self.n2o_covmat[(indu[1], indu[1])])
        self.n2o_covmat[indl] = 0.5 * np.sqrt(self.n2o_covmat[
            (indl[0], indl[0])] * self.n2o_covmat[(indl[1], indl[1])])
        self.n2ocovsum = np.sqrt(self.n2o_covmat.sum())
        self.n2odiagsum = np.sqrt(np.sum(np.diag(self.n2o_covmat)))
        # Setup test raster file names and location.
        self.invin = os.path.join(os.path.dirname(__file__),
                                  "data/model_peat_examp_1.tiff")
        self.uncertin = os.path.join(os.path.dirname(__file__),
                                     "data/uncert_peat_examp_1.tiff")

        # Setup test vector file names and location.
        self.invvector = os.path.join(
            os.path.dirname(__file__), "data/n2o_eu_2010_inventory/"
            "n2o_eu_2010_inventory.shp")
Exemplo n.º 33
0
def net_sample_multinomial(A, minEdges, edgesPerSample=1, *args, **kwargs):
    """ NETWORK SAMPLING ALGORITHM:
    sample networks ties from multinomial distribution
    defined as 1/AAT[i,j] normalized by  sum(AAT[i>j])
    PROBLEM: doesn't sufficiently cluster the resulting network doesn't return exact number of ties, only at least as many as specified minEdges
    
    Parameters
    ----------
    A : ndarray
        matrix of eigenvectors from RESCAL_ALS tensor decomposition with negative values replaced by zeros
    minEdges : int
        number of edges (social ties) to be assigned in the network
        
    Returns
    ----------
    tuple
        tie probabilities : ndarray
            pairwise distances normalized by largest distance
        sampled network : ndarray
            binary matrix of assigned ties above cutoff yielding at least minEdges    
    """
    draws = int(np.ceil(minEdges * 1.2))
    # pairwise distances between observations
    dist = pdist(
        A)  # what matrix to use:  pdist(A) or just tril(AAT) directly?
    invdist = dist
    invdist[invdist != 0] = 1 / invdist[invdist != 0]  # prevent division by 0
    thetavec = invdist / np.sum(invdist)
    theta = squareform(thetavec)

    # multinomial sample
    n = np.shape(theta)[0]
    Z = np.zeros((n, n))
    # samp = sampleLinks(q=thetavec, edgesToDraw=1, draws=draws)
    y = np.random.multinomial(edgesPerSample, thetavec, draws)
    samp = np.asarray([
        np.mean([y[draw][item] for draw in range(draws)])
        for item in range(len(thetavec))
    ])
    samp = np.ceil(samp)

    # repeat until reaching enough network ties
    while np.sum(samp) < minEdges:
        draws = int(np.ceil(draws *
                            1.1))  #increase number of draws and try again
        #samp = sampleLinks(q=thetavec,edgesToDraw=1,draws=draws)
        y = np.random.multinomial(edgesPerSample, thetavec, draws)
        samp = np.asarray([
            np.mean([y[draw][item] for draw in range(draws)])
            for item in range(len(thetavec))
        ])
        samp = np.ceil(samp)

    Z[np.tril_indices_from(Z, k=-1)] = samp

    return (theta, Z)
Exemplo n.º 34
0
def gen_k_factor2(nobs=10000,
                  k=2,
                  idiosyncratic_ar1=False,
                  idiosyncratic_var=0.4,
                  k_ar=6):
    # Simulate bivariate VAR(6) for the factor
    ix = pd.period_range(start='1950-01', periods=1, freq='M')
    faux = pd.DataFrame([[0, 0]], index=ix, columns=['f1', 'f2'])
    mod = varmax.VARMAX(faux, order=(k_ar, 0), trend='n')
    A = np.zeros((2, 2 * k_ar))
    A[:, -2:] = np.array([[0.5, -0.2], [0.1, 0.3]])
    Q = np.array([[1.5, 0.2], [0.2, 0.5]])
    L = np.linalg.cholesky(Q)
    params = np.r_[A.ravel(), L[np.tril_indices_from(L)]]

    # Simulate the factors
    factors = mod.simulate(params, nobs)

    # Add in the idiosyncratic part
    faux = pd.Series([0], index=ix)
    mod_idio = sarimax.SARIMAX(faux, order=(1, 0, 0))
    phi = [0.7, -0.2] if idiosyncratic_ar1 else [0, 0.]
    tmp = factors.iloc[:, 0] + factors.iloc[:, 1]

    # Monthly variables
    endog_M = pd.concat([tmp.copy() for i in range(k)], axis=1)
    columns = []
    for i in range(k):
        endog_M.iloc[:, i] = (
            endog_M.iloc[:, i] +
            mod_idio.simulate([phi[0], idiosyncratic_var], nobs))
        columns += [f'yM{i + 1}_f2']
    endog_M.columns = columns

    # Monthly versions of quarterly variables
    endog_Q_M = pd.concat([tmp.copy() for i in range(k)], axis=1)
    columns = []
    for i in range(k):
        endog_Q_M.iloc[:, i] = (
            endog_Q_M.iloc[:, i] +
            mod_idio.simulate([phi[0], idiosyncratic_var], nobs))
        columns += [f'yQ{i + 1}_f2']
    endog_Q_M.columns = columns

    # Create quarterly versions of quarterly variables
    levels_M = 1 + endog_Q_M / 100
    levels_M.iloc[0] = 100
    levels_M = levels_M.cumprod()
    # log_levels_M = np.log(levels_M) * 100
    log_levels_Q = (
        np.log(levels_M).resample('Q', convention='e').sum().iloc[:-1] * 100)

    # Compute the quarterly growth rate series
    endog_Q = log_levels_Q.diff()

    return endog_M, endog_Q, factors
Exemplo n.º 35
0
def variance():

    data = pd.read_csv("data.csv")
    df = data.pivot("Ref Tree", "Simulated Tree", "variance")
    fig, ax = plt.subplots()
    mask = np.zeros_like(df)
    mask[np.tril_indices_from(mask)] = True

    sns.heatmap(df, annot=True, fmt=".3f", cmap="YlGnBu")
    fig.savefig("variance.png", dpi=300)
Exemplo n.º 36
0
def full_corrs(data):
    """Same- and cross-team correlations.
    Same-team correlations are above the diagonal;
    cross-team correlations are on and below the diagonal.

    """
    corr = same_team_corrs(data)
    tril_ixs = np.tril_indices_from(corr)
    corr.values[tril_ixs] = cross_team_corrs(data).values[tril_ixs]
    return corr
Exemplo n.º 37
0
 def _update(self):
     self.parameters = self.get_parameters()
     for k in range(self.num_comp):
         for j in range(self.num_process):
             temp = np.zeros((self.num_dim, self.num_dim))
             temp[np.tril_indices_from(temp)] = self.L_flatten[k,j,:].copy()
             temp[np.diag_indices_from(temp)] = np.exp(temp[np.diag_indices_from(temp)])
             # temp[np.diag_indices_from(temp)] = temp[np.diag_indices_from(temp)] ** 2
             self.L[k,j,:,:] = temp
             self.s[k,j] = mdot(self.L[k,j,:,:], self.L[k,j,:,:].T)
Exemplo n.º 38
0
def get_lower_tri(x, with_diagonal=False):
    """
    Returns the lower triangle of a provided matrix

    Inputs
        x (np.ndarray): 2D matrix to get triangle from
        with_diagonal (bool): if True, keeps the diagonal as part of lower triangle
    """
    k = 0 if with_diagonal else -1
    return x[np.tril_indices_from(x, k=k)]
Exemplo n.º 39
0
 def transform_covars_grad(self, internal_grad):
     grad = np.empty((self.num_latent, self.get_covar_size()),
                     dtype=np.float32)
     for j in range(self.num_latent):
         tmp = self._theano_transform_covars_grad(internal_grad[0, j],
                                                  self.covars_cholesky[j])
         tmp[np.diag_indices_from(tmp)] *= self.covars_cholesky[j][
             np.diag_indices_from(tmp)]
         grad[j] = tmp[np.tril_indices_from(self.covars_cholesky[j])]
     return grad.flatten()
Exemplo n.º 40
0
def flattened_to_symmetric(x):
    '''Convert a vector containing the elements of a lower triangular matrix into a full symmetric
       matrix
    '''
    n = triangular_root(len(x))
    new = np.zeros((n, n))
    inds = np.tril_indices_from(new)
    new[inds] = x
    new[(inds[1], inds[0])] = x
    return new
Exemplo n.º 41
0
def predict(mu, sigma):
    r = batched_kl(mu, sigma, mu, sigma)
    np.fill_diagonal(r, np.inf)
    var_norms = np.linalg.norm(sigma, axis=1)
    sorted_norms = np.argsort(var_norms)
    rs = r[sorted_norms, :][:, sorted_norms]
    rs[np.tril_indices_from(rs)] = np.inf
    p = np.argmin(rs, 1)
    p[rs[np.arange(p.shape[0]), p] == np.inf] = -1
    return p, sorted_norms
Exemplo n.º 42
0
def blank_unused_triangle(_map, used_triangle):
    check_upper_lower(used_triangle)
    indices = np.tril_indices_from(_map)
    _out_map = _map.copy()
    _out_map[indices] = np.nan

    if used_triangle == "lower":
        _out_map = _out_map.T

    return _out_map
Exemplo n.º 43
0
def write_ltm(matrix, f):
    with open(f, "w") as f:
        x, y = np.tril_indices_from(matrix, -1)
        a = x[0]
        for idx in range(len(x)):
            pair = x[idx], y[idx]
            if a != pair[0]:
                f.write("\n")
                a = pair[0]
            f.write("%f," % matrix[pair])
Exemplo n.º 44
0
def get_initial_w_elements(prior_mean, prior_cov, n_out):
    # NOTE: This is a numpy function.

    # Do a cholesky on the prior
    prior_cov_chol = np.linalg.cholesky(prior_cov)

    # Extract the elements
    elts = np.tril_indices_from(prior_cov_chol)

    return prior_cov_chol[elts]
Exemplo n.º 45
0
def watts_and_strogatz(conn, p_conn=[0.1], bin=False):

    # scale conn data
    conn_vec = conn[np.tril_indices_from(conn, -1)]
    data = pd.Series(conn_vec[np.nonzero(conn_vec)])

    # generate data given a distribution
    def get_pdf(data, dist, size):

        # fit dist to data
        params = dist.fit(data)

        # separate parts of parameters
        arg = params[:-2]
        loc = params[-2]
        scale = params[-1]

        # get same start and end points of distribution
        start = dist.ppf(0.01, *arg, loc=loc,
                         scale=scale) if arg else dist.ppf(
                             0.01, loc=loc, scale=scale)
        end = dist.ppf(0.99, *arg, loc=loc, scale=scale) if arg else dist.ppf(
            0.99, loc=loc, scale=scale)

        # build PDF and turn into pandas Series
        x = np.linspace(start, end, size)
        pdf = dist.pdf(x, loc=loc, scale=scale, *arg)

        return pdf

    # binarize conn data
    conn_bin = conn.astype(bool).astype(int)
    deg = int(np.mean(np.sum(conn_bin, axis=0)))
    N = len(conn_bin)

    # create networks
    networks = []
    for p in p_conn:

        # create watts_strogatz graph
        G = nx.watts_strogatz_graph(N, deg, p)
        network = nx.to_numpy_array(G)

        if not bin:
            # assign weights to conns
            mask = np.nonzero(network)
            actual_conns = conn[mask]
            new_conns = get_pdf(data, st.powerlognorm, len(mask[0]))
            network[mask] = new_conns[np.argsort(actual_conns)]

        # save weighted network
        networks.append(network)

    return np.dstack(networks)
Exemplo n.º 46
0
 def merge_layers(self, dest_layer, src1_layer, src2_layer):
     w1 = src1_layer.get_weights()
     w2 = src2_layer.get_weights()
     res = w1.copy()
     if type(w1) is list:
         half = round(len(w1) / 2)
         res[half:-1] = w2[half:-1]
     else:
         l_indices = np.tril_indices_from(w2)
         res[l_indices] = w2[l_indices]
     dest_layer.set_weights(res)
Exemplo n.º 47
0
def test_multiple_missing(rg):
    n_sample = 50
    n_cov = 2
    n_pheno = 31
    phenotype_df = pd.DataFrame(random_phenotypes((n_sample, n_pheno), rg))
    Y = phenotype_df.to_numpy()
    Y[np.tril_indices_from(Y, k=-20)] = np.nan
    assert phenotype_df.isna().sum().sum() > 0
    covariate_df = pd.DataFrame(rg.random((n_sample, n_cov)))
    genotype_df = pd.DataFrame(rg.random((n_sample, 1)))
    assert_glow_equals_golden(genotype_df, phenotype_df, covariate_df)
Exemplo n.º 48
0
def create_connect(xyz, min, max):
    """Create connectivity dataset."""
    # Create a random connection dataset :
    connect = 100. * np.random.rand(len(xyz), len(xyz))
    # Mask the connection aray :
    connect = np.ma.masked_array(connect, False)
    # Hide lower triangle :
    connect.mask[np.tril_indices_from(connect.mask)] = True
    # Hide connexions that are not between min and max :
    connect.mask[np.logical_or(connect.data < min, connect.data > max)] = True
    return connect
 def correlationMatrix(self):
     cor_mat = self.dframe[:].corr()
     mask = np.array(cor_mat)
     mask[np.tril_indices_from(mask)] = False
     fig = plt.gcf()
     fig.set_size_inches(30, 12)
     sns.heatmap(data=cor_mat,
                 mask=mask,
                 square=True,
                 annot=True,
                 cbar=True)
Exemplo n.º 50
0
 def transform_eye_grad(self):
     """
     In the case of posterior distribution with one component, gradients of the
     entropy term wrt to the posterior covariance is identity. This function returns flatten lower-triangular terms
     of the identity matrices for all processes.
     """
     grad = np.empty((self.num_comp, self.num_process, self.get_sjk_size()))
     meye = np.eye((self.num_dim))[np.tril_indices_from(self.L[0,0])]
     for k in range(self.num_comp):
         for j in range(self.num_process):
             grad[k,j] = meye
     return grad.flatten()
Exemplo n.º 51
0
def create_connect(xyz, min, max):
    """Create connectivity dataset."""
    # Create a random connection dataset :
    connect = np.random.uniform(-100., 100., (len(xyz), len(xyz)))
    # Mask the connection aray :
    connect = np.ma.masked_array(connect, False)
    # Hide lower triangle :
    connect.mask[np.tril_indices_from(connect.mask)] = True
    # Hide connexions that are not between min and max :
    connect.mask[np.logical_or(connect.data < min,
                               connect.data > max)] = True
    return connect
Exemplo n.º 52
0
 def update_covariance(self, j, Sj):
     Sj = Sj.copy()
     mm = min(Sj[np.diag_indices_from(Sj)])
     if mm < 0:
         Sj[np.diag_indices_from(Sj)] = Sj[np.diag_indices_from(Sj)] - 1.1 * mm
     for k in range(self.num_comp):
         self.s[k,j] = Sj.copy()
         self.L[k,j] = jitchol(Sj,10)
         tmp = self.L[k,j].copy()
         tmp[np.diag_indices_from(tmp)] = np.log(tmp[np.diag_indices_from(tmp)])
         self.L_flatten[k,j] = tmp[np.tril_indices_from(tmp)]
     self._update()
Exemplo n.º 53
0
 def net_sample_deterministic(AATnn, minEdges, *args, **kwargs):
     """
     """
     theta = AATnn / AATnn.max()
     n = np.shape(AATnn)[0]
     sv = AATnn[np.tril_indices_from(AATnn, k =-1)]  #pull singular values from triangle
     cutOff = ncFunctions.top_n_edges(data = sv, minEdges = minEdges, 
                        n = n)['cutOff']
     Z = np.zeros((n,n))
     Z[np.where(AATnn >= cutOff)] = 1
     
     return (theta, Z)
Exemplo n.º 54
0
 def to_matrix(self):
     vector = self.get_parameter_vector(include_frozen=True)
     if self.metric_type == 0:
         return np.exp(vector) * np.eye(len(self.axes))
     elif self.metric_type == 1:
         return np.diag(np.exp(vector))
     else:
         n = len(self.axes)
         L = np.zeros((n, n))
         L[np.tril_indices_from(L)] = vector
         i = np.diag_indices_from(L)
         L[i] = np.exp(L[i])
         return np.dot(L, L.T)
Exemplo n.º 55
0
def doubleMutant(data, refVariant, libSeq, 
                 startPos=1, refSignal=None, normToRefSignal=True, coop=False,
                 vmin=None, vmax=None, cmap=None, center=0, cbarLabel=None,
                 triangle=None, invertY=True, linewidth=3, **kwargs):
    """Plot double mutant heatmap given a reference and library sequence"""
    # Define reference signal as the signal of the reference variant if 
    # refSignal not provided
    if refSignal is None:
        refSignal = data[refVariant]
    
    # Normalize data to reference signal if normToRefSignal=True
    if normToRefSignal:
        data_norm = data / refSignal
    else:
        data_norm = data

    # Generate the double mutant matrix
    doubleMutantSignals, mutantLabels = doubleMutantMatrix(data_norm, refVariant, 
                                                           libSeq, startPos, coop)

    # Create mask for triangular matrix if requested
    mask = np.zeros_like(doubleMutantSignals, dtype=bool)
    if triangle == 'lower':
        mask[np.tril_indices_from(mask)] = True
        mask = np.invert(mask)
    elif triangle == 'upper':
        mask[np.triu_indices_from(mask)] = True
        mask = np.invert(mask)

    # Plot the double mutant heatmap
    if cmap is None:
        cmap = RdYlBu_r2()
    ax = sns.heatmap(doubleMutantSignals, 
                     mask=mask, square=True, robust=True,
                     vmin=vmin, vmax=vmax, center=center, cmap=cmap, 
                     xticklabels=mutantLabels, yticklabels=mutantLabels, 
                     cbar_kws={'label': cbarLabel}, **kwargs)
    cax = plt.gcf().axes[-1]
    if invertY:
        ax.invert_yaxis()

    # Draw white lines separating the triplets
    dim = len(mutantLabels)
    for x in range(3, dim, 3):
        ax.plot([x, x], [0, dim], color='white', linewidth=linewidth)
    for y in range(3, dim, 3):
        ax.plot([0, dim], [y, y], color='white', linewidth=linewidth)

    return ax, cax
Exemplo n.º 56
0
def JS_dismat(P, fill_tril=True):
    """
    Compute the distance matrix for set of distributions P by computing 
    pairwise Jansen-Shannon divergences.
    """
    # Need to replace it with a faster way
    dismat = np.zeros((P.shape[0], P.shape[0]))
    for i,j in zip(*np.triu_indices_from(dismat, k=1)):
        dismat[i,j] = JS_divergence(P[i,:], P[j,:])

    if fill_tril:
        indices = np.tril_indices_from(dismat, -1)
        dismat[indices] = dismat.T[indices]

    return dismat
Exemplo n.º 57
0
    def correlation(self):
        """
        The correlation between all combinations of trials

        Returns
        -------
        (r,e) : tuple
           r is the mean correlation and e is the mean error of the correlation
           (with df = n_trials - 1)
        """

        c = np.corrcoef(self.input.data)
        c = c[np.tril_indices_from(c, -1)]

        return np.mean(c), stats.sem(c)
Exemplo n.º 58
0
def tril_indices_from(arr,k=0):
    """Return the indices for the lower-triangle of an (n,n) array.

    See tril_indices() for full details.
    
    Parameters
    ----------
    n : int
      Sets the size of the arrays for which the returned indices will be valid.

    k : int, optional
      Diagonal offset (see tril() for details).

    """
    return np.tril_indices_from(arr, k)
Exemplo n.º 59
0
    def slotted_autocorrelation(self, data, time, T, K,
                                second_round=False, K1=100):

        slots = np.zeros((K, 1))
        i = 1

        # make time start from 0
        time = time - np.min(time)

        # subtract mean from mag values
        m = np.mean(data)
        data = data - m

        prod = np.zeros((K, 1))
        pairs = np.subtract.outer(time, time)
        pairs[np.tril_indices_from(pairs)] = 10000000

        ks = np.int64(np.floor(np.abs(pairs) / T + 0.5))

        # We calculate the slotted autocorrelation for k=0 separately
        idx = np.where(ks == 0)
        prod[0] = ((sum(data ** 2) + sum(data[idx[0]] *
                   data[idx[1]])) / (len(idx[0]) + len(data)))
        slots[0] = 0

        # We calculate it for the rest of the ks
        if second_round is False:
            for k in np.arange(1, K):
                idx = np.where(ks == k)
                if len(idx[0]) != 0:
                    prod[k] = sum(data[idx[0]] * data[idx[1]]) / (len(idx[0]))
                    slots[i] = k
                    i = i + 1
                else:
                    prod[k] = np.infty
        else:
            for k in np.arange(K1, K):
                idx = np.where(ks == k)
                if len(idx[0]) != 0:
                    prod[k] = sum(data[idx[0]] * data[idx[1]]) / (len(idx[0]))
                    slots[i - 1] = k
                    i = i + 1
                else:
                    prod[k] = np.infty
            np.trim_zeros(prod, trim='b')

        slots = np.trim_zeros(slots, trim='b')
        return prod / prod[0], np.int64(slots).flatten()
Exemplo n.º 60
0
    def get_matrix(self):
        """Return the current internal matrix.

        Returns
        -------
        M : ndarray, shape (n, n)
            Dense matrix containing either the Hessian or its inverse
            (depending on how `approx_type` was defined).
        """
        if self.approx_type == 'hess':
            M = np.copy(self.B)
        else:
            M = np.copy(self.H)
        li = np.tril_indices_from(M, k=-1)
        M[li] = M.T[li]
        return M