示例#1
0
def test_meld():
    # MELD operator
    # Numerical accuracy
    np.random.seed(42)

    def norm(x):
        x = x.copy()
        x = x - np.min(x)
        x = x / np.max(x)
        return x

    D = np.random.normal(0, 2, (1000, 2))
    RES = np.random.binomial(1, norm(D[:, 0]), 1000)
    G = gt.Graph(D, knn=20, decay=10, use_pygsp=True)

    meld_op = meld.MELD()
    B = meld_op.fit_transform(G, RES)

    if version.parse(np.__version__) < version.parse('1.17'):
        np.testing.assert_allclose(np.sum(B), 532.0001992193013)
    else:
        np.testing.assert_allclose(np.sum(B), 519.0001572740623)

    meld_op = meld.MELD()
    B = meld_op.fit_transform(gt.Graph(
        D, knn=20, decay=10, use_pygsp=False), RES)

    if version.parse(np.__version__) < version.parse('1.17'):
        np.testing.assert_allclose(np.sum(B), 532.0001992193013)
    else:
        np.testing.assert_allclose(np.sum(B), 519.0001572740623)

    # lap type TypeError
    lap_type = 'hello world'
    assert_raise_message(
        TypeError,
        "lap_type must be 'combinatorial'"
        " or 'normalized'. Got: '{}'".format(lap_type),
        meld.MELD(lap_type=lap_type).fit,
        G=G)

    # RES wrong shape
    RES = np.ones([2, G.N + 100])
    assert_raise_message(
        ValueError,
        "Input data ({}) and input graph ({}) "
        "are not of the same size".format(RES.shape, G.N),
        meld_op.fit_transform,
        RES=RES,
        G=G)

    # lap reconversion warning
    assert_warns_message(
        RuntimeWarning,
        "Changing lap_type may require recomputing the Laplacian",
        meld_op.fit,
        G=gt.Graph(D, knn=20, decay=10, use_pygsp=True, lap_type='normalized'))
示例#2
0
文件: test_meld.py 项目: kvshams/MELD
def test_meld_labels_non_numeric():
    data = np.random.normal(size=(100, 2))

    sample_labels = np.random.choice(["A", "B"], size=100)
    meld_op = meld.MELD()
    meld_op.fit_transform(data, sample_labels)

    sample_labels = np.random.choice(["A", "B", "C"], size=100)
    meld_op = meld.MELD()
    sample_densities = meld_op.fit_transform(data, sample_labels)
    assert np.all(sample_densities.columns == ["A", "B", "C"])
示例#3
0
def run_meld(X_red_dim, sample_labels, conditions, k=15):
    '''
    Run MELD
    - X_red_dim: c x d matrix of dimensionality reduction to use for graph construction
    - sample_labels: assignment of cells to samples
    - conditions: vector of condition names
    '''
    ## Make graph
    graph = gt.Graph(X_red_dim, knn=int(k))
    ## Make MELD object
    meld_op = meld.MELD()
    meld_op.graph = graph
    ## Compute density
    meld_fit = meld_op.transform(sample_labels=np.array(sample_labels))

    ## Mean density per replicates
    mean_density = pd.DataFrame(
        np.zeros(shape=(meld_fit.shape[0], len(conditions))),
        index=meld_fit.index,
        columns=conditions,
    )

    for c in conditions:
        c_mean = meld_fit.loc[:, [c in x for x in meld_fit.columns]].mean(1)
        mean_density[c] = c_mean

    ## From density to likelihood per condition
    likelihoods = meld.utils.normalize_densities(mean_density)
    likelihoods.columns = [col.split("_")[0] for col in likelihoods.columns]
    return (likelihoods)
示例#4
0
文件: test_meld.py 项目: kvshams/MELD
def test_sample_labels_2d():
    labels = np.ones((10, 2))
    with assert_raises_message(
            ValueError,
            "sample_labels must be a single column. Got"
            "shape={}".format(labels.shape),
    ):
        meld.MELD()._create_sample_indicators(labels)
示例#5
0
 def setUpClass(self):
     # VertexFrequencyCluster
     # Custom window sizes
     self.window_sizes = np.array([2, 4, 8, 24])
     data, self.labels = make_batches(n_pts_per_cluster=100)
     self.G = gt.Graph(data, sample_idx=self.labels, use_pygsp=True)
     meld_op = meld.MELD()
     self.EES = meld_op.fit_transform(G=self.G, RES=self.labels)
示例#6
0
def test_utils():
    data, labels = make_batches(n_pts_per_cluster=250)
    G = gt.Graph(data, sample_idx=labels, use_pygsp=True)
    EES = meld.MELD().fit_transform(G, labels)

    clusters = meld.VertexFrequencyCluster().fit_predict(G=G,
                                                         RES=labels,
                                                         EES=EES)
    meld.utils.sort_clusters_by_meld_score(clusters, EES)
示例#7
0
文件: test_meld.py 项目: kvshams/MELD
def test_sample_labels_one_sample():
    data = np.random.normal(size=(100, 2))
    labels = np.ones(100)
    with assert_raises_message(
            ValueError,
            "Found only one unqiue sample label. Cannot estimate density "
            "of a single sample.",
    ):
        meld.MELD().fit_transform(data, labels)
示例#8
0
 def test_2d(self):
     RES = np.array([self.labels, self.labels]).T
     vfc_op = meld.VertexFrequencyCluster(
         window_sizes=self.window_sizes)
     meld_op = meld.MELD()
     EES = meld_op.fit_transform(G=self.G, RES=RES)
     clusters = vfc_op.fit_predict(
         self.G, RES=RES,
         EES=EES)
     assert len(clusters) == len(self.labels)
示例#9
0
文件: test_meld.py 项目: kvshams/MELD
def test_mnn():
    data, labels = make_batches(n_pts_per_cluster=250)
    meld_op = meld.MELD(verbose=0)
    sample_densities = meld_op.fit_transform(data, labels, sample_idx=labels)
    sample_likelihoods = meld.utils.normalize_densities(sample_densities)
    meld.VertexFrequencyCluster().fit_transform(
        G=meld_op.graph,
        sample_indicator=meld_op.sample_indicators["expt"],
        likelihood=sample_likelihoods["expt"],
    )
示例#10
0
 def test_RES_EES_shape(self):
     RES = np.array([self.labels, self.labels]).T
     vfc_op = meld.VertexFrequencyCluster(
         window_sizes=self.window_sizes)
     meld_op = meld.MELD()
     EES = meld_op.fit_transform(G=self.G, RES=RES)
     assert_raise_message(ValueError,
     '`RES` and `EES` must have the same shape.'
     'Got RES: {} and EES: {}'.format(str(RES[:,1].shape), str(EES.shape)),
     vfc_op.fit_predict, G=self.G, RES=RES[:,1], EES=EES)
示例#11
0
文件: test_meld.py 项目: kvshams/MELD
def test_meld_invalid_lap_type():
    data = np.random.normal(0, 2, (1000, 2))
    # lap type TypeError
    lap_type = "hello world"
    with assert_raises_message(
            ValueError,
            "lap_type value {} not recognized. "
            "Choose from ['combinatorial', 'normalized']".format(lap_type),
    ):
        meld.MELD(verbose=0, lap_type=lap_type).fit(data)
示例#12
0
文件: test_meld.py 项目: kvshams/MELD
 def setUpClass(self):
     # VertexFrequencyCluster
     # Custom window sizes
     self.window_sizes = np.array([2, 4, 8, 24])
     self.data, self.sample_labels = make_batches(n_pts_per_cluster=100)
     meld_op = meld.MELD(verbose=0)
     self.densities = meld_op.fit_transform(
         self.data, sample_labels=self.sample_labels)
     self.sample_indicators = meld_op.sample_indicators
     self.likelihoods = meld.utils.normalize_densities(self.densities)
     self.G = meld_op.graph
示例#13
0
文件: test_meld.py 项目: kvshams/MELD
def test_meld(filter):
    # MELD operator
    # Numerical accuracy
    np.random.seed(42)

    def norm(x):
        x = x.copy()
        x = x - np.min(x)
        x = x / np.max(x)
        return x

    data = np.random.normal(0, 2, (1000, 2))
    sample_labels = np.random.binomial(1, norm(data[:, 0]), 1000)
    sample_labels = np.array(
        ["treat" if val else "ctrl" for val in sample_labels])

    meld_op = meld.MELD(
        verbose=0,
        knn=20,
        decay=10,
        thresh=0,
        anisotropy=0,
        filter=filter,
        solver="exact",
        sample_normalize=False,
    )
    densities = meld_op.fit_transform(data, sample_labels)
    expt_density = densities.iloc[:, 1]

    if version.parse("1.17") <= version.parse(
            np.__version__) < version.parse("1.18"):
        if meld_op.filter == "laplacian":
            np.testing.assert_allclose(np.sum(expt_density), 519)
        else:
            np.testing.assert_allclose(np.sum(expt_density), 519)
    else:
        if meld_op.filter == "laplacian":
            np.testing.assert_allclose(np.sum(expt_density), 532)
        else:
            np.testing.assert_allclose(np.sum(expt_density), 532)

    # check changing filter params resets filter
    meld_op.set_params(beta=meld_op.beta + 1)
    assert meld_op.sample_densities is None

    meld_op.fit_transform(data, sample_labels)
    assert meld_op.sample_densities is not None

    # check changing graph params resets filter
    meld_op.set_params(knn=meld_op.knn + 1)
    assert meld_op.graph is None
    assert meld_op.sample_densities is None
示例#14
0
文件: test_meld.py 项目: kvshams/MELD
def test_meld_labels_wrong_shape():
    data = np.random.normal(0, 2, (100, 2))
    # sample_indicator wrong shape
    sample_labels = np.ones([101, 2], dtype=str)
    with assert_raises_message(
            ValueError,
            "Input data ({}) and input graph ({}) "
            "are not of the same size".format(sample_labels.shape,
                                              data.shape[0]),
    ):
        meld.MELD(verbose=0).fit_transform(
            X=data,
            sample_labels=sample_labels,
        )
示例#15
0
    def calculate_EES(self, data=None, **kwargs):
        np.random.seed(self.seed)
        if not self.graph:
            try:
                self.fit_graph(data)
            except NameError:
                raise NameError(
                    "Must pass `data` unless graph has already been fit")

        self.meld_op = meld.MELD(**kwargs, verbose=False).fit(self.graph)
        self.EES = self.meld_op.transform(self.sample_labels)
        self.EES = self.EES["expt"].values  # Only keep the expt condition
        self.estimates["EES"] = self.EES
        return self.EES
示例#16
0
    def calculate_MELD_likelihood(self, data=None, **kwargs):
        np.random.seed(self.seed)
        if not self.graph:
            if data is not None:
                self.fit_graph(data)
            else:
                raise NameError(
                    "Must pass `data` unless graph has already been fit")

        self.meld_op = meld.MELD(**kwargs, verbose=False).fit(self.graph)
        self.sample_densities = self.meld_op.transform(self.sample_labels)
        self.sample_likelihoods = meld.utils.normalize_densities(
            self.sample_densities)
        self.expt_likelihood = self.sample_likelihoods[
            "expt"].values  # Only keep the expt condition
        return self.expt_likelihood
示例#17
0
文件: test_meld.py 项目: kvshams/MELD
def test_meld_label_2d():
    data = np.random.normal(0, 2, (100, 2))
    # Create a dataframe with a index
    index = pd.Index(["cell_{}".format(i) for i in range(100)])
    columns = pd.Index(["A"])
    sample_labels = pd.DataFrame(
        np.concatenate([np.zeros((50, 1)), np.ones((50, 1))]),
        index=index,
        columns=columns,
        dtype=str,
    )
    meld_op = meld.MELD(verbose=0)

    meld_op.fit_transform(
        X=data,
        sample_labels=sample_labels,
    )
示例#18
0
文件: test_meld.py 项目: kvshams/MELD
def test_meld_label_dataframe():
    data = np.random.normal(0, 2, (100, 2))
    # Create a dataframe with a index
    index = pd.Index(["cell_{}".format(i) for i in range(100)])
    sample_labels = pd.DataFrame(
        np.concatenate([np.zeros(50), np.ones(50)]),
        index=index,
        columns=["sample_labels"],
        dtype=str,
    )

    meld_op = meld.MELD(verbose=0)
    sample_densities = meld_op.fit_transform(
        X=data,
        sample_labels=sample_labels,
    )
    assert np.all(sample_densities.index == index)
    assert np.all(
        sample_densities.columns == pd.Index(np.unique(sample_labels)))
示例#19
0
def test_utils():
    data, labels = make_batches(n_pts_per_cluster=250)
    G = gt.Graph(data, sample_idx=labels, use_pygsp=True)
    meld_op = meld.MELD()
    sample_densities = meld_op.fit_transform(G, labels)
    sample_likelihoods = meld.utils.normalize_densities(sample_densities)

    meld.VertexFrequencyCluster().fit_predict(
        G=G,
        sample_indicator=meld_op.sample_indicators["expt"],
        likelihood=sample_likelihoods["expt"],
    )

    meld.utils.get_meld_cmap()

    # Test normalize_densities
    # Three samples
    densities = np.ones([100, 3])
    meld.utils.normalize_densities(sample_densities=densities)

    # Two samples
    densities = np.ones([100, 2])
    meld.utils.normalize_densities(sample_densities=densities)
示例#20
0
                           gamma=0,
                           n_jobs=-1,
                           random_state=rs)
    adata.obsm['X_phate']=phate_op.fit_transform(G.K)

    if True :
        # save adata obj with batch correction
        adata.write(os.path.join(pdfp,'mouse_MT_bbknn.h5ad'))
        print('\n... saved @'+datetime.datetime.now().strftime('%y%m%d.%H:%M:%S'))
    print('... full PHATE in {:.2f}-min'.format((time.time() - start)/60))


    if True :
        # MELD
        adata.obs['res_sca1']=[1 if i=='SCA1' else -1 for i in adata.obs['genotype']]
        adata.obs['ees_sca1']=meld.MELD().fit_transform(G=G,RES=adata.obs['res_sca1'])
        adata.obs['ees_sca1']=adata.obs['ees_sca1']-adata.obs['ees_sca1'].mean() # mean center
        if True :
            # save adata obj with batch correction
            adata.write(os.path.join(pdfp,'mouse_MT_bbknn.h5ad'))
            print('\n... saved @'+datetime.datetime.now().strftime('%y%m%d.%H:%M:%S'))
            
    if True :
        # MAGIC
        magic_op=magic.MAGIC().fit(X=adata.X,graph=G) # running fit_transform produces wrong shape
        adata.layers['imputed_bbknn']=magic_op.transform(adata.X,genes='all_genes')
#         adata.layers['imputed_bbknn']=sparse.csr_matrix(magic_op.transform(adata.X,genes='all_genes')) # causes memory spike
        
        if True :
            # save adata obj with batch correction & imputation
            adata.write(os.path.join(pdfp,'mouse_MT_bbknn.h5ad'))
示例#21
0
def test_mnn():
    data, labels = make_batches(n_pts_per_cluster=250)
    G = gt.Graph(data, sample_idx=labels, use_pygsp=True)
    meld_op = meld.MELD()
    EES = meld_op.fit_transform(G, labels)
    meld.VertexFrequencyCluster().fit_transform(G=G, RES=labels, EES=EES)
        if False:
            wt = utils.adata_phate(wt)
            mut = utils.adata_phate(mut)

        # MELD
        G = gt.Graph(data=wt.obsp['connectivities'] +
                     sparse.diags([1] * wt.shape[0], format='csr'),
                     precomputed='adjacency',
                     use_pygsp=True)
        G.knn_max = None
        wt.obs['res_t'] = -1
        wt.obs.loc[wt.obs['timepoint'] == '12wk', 'res_t'] = -0.5
        wt.obs.loc[wt.obs['timepoint'] == '18wk', 'res_t'] = 0
        wt.obs.loc[wt.obs['timepoint'] == '24wk', 'res_t'] = 0.5
        wt.obs.loc[wt.obs['timepoint'] == '30wk', 'res_t'] = 1
        wt.obs['ees_t'] = meld.MELD().fit_transform(G=G, RES=wt.obs['res_t'])
        wt.obs['ees_t'] = (wt.obs['ees_t'] - wt.obs['ees_t'].min()) / (
            wt.obs['ees_t'].max() - wt.obs['ees_t'].min())

        G = gt.Graph(data=mut.obsp['connectivities'] +
                     sparse.diags([1] * mut.shape[0], format='csr'),
                     precomputed='adjacency',
                     use_pygsp=True)
        G.knn_max = None
        mut.obs['res_t'] = -1
        mut.obs.loc[mut.obs['timepoint'] == '12wk', 'res_t'] = -0.5
        mut.obs.loc[mut.obs['timepoint'] == '18wk', 'res_t'] = 0
        mut.obs.loc[mut.obs['timepoint'] == '24wk', 'res_t'] = 0.5
        mut.obs.loc[mut.obs['timepoint'] == '30wk', 'res_t'] = 1
        mut.obs['ees_t'] = meld.MELD().fit_transform(G=G, RES=mut.obs['res_t'])
        mut.obs['ees_t'] = (mut.obs['ees_t'] - mut.obs['ees_t'].min()) / (
示例#23
0
    fname = 'scv2_200428.h5ad'
    adata = loader(fname, pdfp)

# meld
adata.obs['res_t'] = adata.obs['Condition'].astype(str)
adata.obs['res_t'][adata.obs['Condition'] == 'Mock'] = 0
adata.obs['res_t'][adata.obs['Condition'] == '1dpi'] = 1
adata.obs['res_t'][adata.obs['Condition'] == '2dpi'] = 2
adata.obs['res_t'][adata.obs['Condition'] == '3dpi'] = 3

G = gt.Graph(data=adata.uns['neighbors']['connectivities'] +
             sparse.diags([1] * adata.shape[0], format='csr'),
             precomputed='adjacency',
             use_pygsp=True)
G.knn_max = None
adata.obs['ees_t'] = meld.MELD().fit_transform(
    G=G, RES=adata.obs['res_t'].to_numpy(dtype=float))
adata.obs['ees_t'] = adata.obs['ees_t'] - adata.obs['ees_t'].mean(
)  # mean center

del G

# cluster genes
random_genes = False

if random_genes:
    genes = adata.var_names.to_list()
    genes = random.sample(random_genes, 10)
else:
    genes = adata.var_names.to_list()
#     genes=[int(sys.argv[1]:int(sys.argv[2]))]
print('Aggregating data')