def test_unaligned_regression_genes(self): tfs = ['tf1', 'tf2', 'tf3'] targets = ['gene1', 'gene2', 'gene3'] targets1 = ['gene1', 'gene2'] targets2 = ['gene1', 'gene3'] des = [InferelatorData(pd.DataFrame(np.array([[1, 1, 3], [0, 0, 2], [0, 0, 1]]).astype(float), columns=tfs)), InferelatorData(pd.DataFrame(np.array([[1, 1, 3], [0, 0, 2], [0, 0, 1]]).astype(float), columns=tfs))] res = [InferelatorData(pd.DataFrame(np.array([[1, 1], [2, 2], [3, 3]]).astype(float), columns=targets1)), InferelatorData(pd.DataFrame(np.array([[1, 1], [2, 2], [3, 3]]).astype(float), columns=targets2))] priors = pd.DataFrame([[0, 1, 1], [1, 0, 1], [1, 0, 1]], index=targets, columns=tfs) r = amusr_regression.AMuSR_regression(des, res, tfs=tfs, genes=targets, priors=priors, use_numba=self.use_numba) out = [pd.DataFrame([['tf3', 'gene1', -1, 1], ['tf3', 'gene1', -1, 1]], index=pd.MultiIndex(levels=[[0, 1], [0]], codes=[[0, 1], [0, 0]]), columns=['regulator', 'target', 'weights', 'resc_weights']), pd.DataFrame([['tf3', 'gene2', -1, 1]], index=pd.MultiIndex(levels=[[0, 1], [0]], codes=[[0], [0]]), columns=['regulator', 'target', 'weights', 'resc_weights']), pd.DataFrame([['tf3', 'gene3', -1, 1]], index=pd.MultiIndex(levels=[[0, 1], [0]], codes=[[1], [0]]), columns=['regulator', 'target', 'weights', 'resc_weights'])] regress_data = r.regress() for i in range(len(targets)): pdt.assert_frame_equal(pd.concat(regress_data[i]), out[i], check_dtype=False) weights, resc_weights = r.pileup_data(regress_data)
def test_two_genes_nonzero_clr_nonzero(self): self.set_all_zero_priors() self.X = InferelatorData(pd.DataFrame([1, 2], index=['gene1', 'gene2'], columns=['ss']), transpose_expression=True) self.Y = InferelatorData(pd.DataFrame([1, 2], index=['gene1', 'gene2'], columns=['ss']), transpose_expression=True) self.clr = pd.DataFrame([[.1, .1], [.1, .2]], index=['gene1', 'gene2'], columns=['gene1', 'gene2']) (betas, resc) = self.run_bbsr() self.assert_matrix_is_square(2, betas) self.assert_matrix_is_square(2, resc) pdt.assert_frame_equal( betas, pd.DataFrame([[0, 0], [0, 0]], index=['gene1', 'gene2'], columns=['gene1', 'gene2']).astype(float)) pdt.assert_frame_equal( resc, pd.DataFrame([[0, 0], [0, 0]], index=['gene1', 'gene2'], columns=['gene1', 'gene2']).astype(float))
def compute_common_data(self): """ Compute common data structures like design and response matrices. """ drd = self.drd_driver( metadata_handler=self.metadata_handler, return_half_tau=True) if self.drd_driver is not None else None # If there is no design-response driver set, use the expression data for design and response # Also do this if there is no usable metadata if drd is None or not drd.validate_run(self.data.meta_data): self.design, self.response, self.half_tau_response = self.data, self.data, self.data # Otherwise calculate the design-response ODE # TODO: Rewrite DRD for InferelatorData # TODO: This is *horrifying* as is from a memory perspective # TODO: Really fix this soon else: Debug.vprint('Creating design and response matrix ... ') drd.delTmin, drd.delTmax, drd.tau = self.delTmin, self.delTmax, self.tau design, response, half_tau_response = drd.run( self.data.to_df().T, self.data.meta_data) self.design = InferelatorData(design.T) self.response = InferelatorData(response.T) self.half_tau_response = InferelatorData(half_tau_response.T) Debug.vprint("Constructed design {d} and response {r} matrices".format( d=self.design.shape, r=self.response.shape), level=1) self.data = None
def test_create_sparse(self): data = sparse.csr_matrix(self.expr.values) adata = InferelatorData(data, gene_names=self.expr.columns.astype(str), sample_names=self.expr.index.astype(str)) InferelatorData._make_idx_str(self.expr) pdt.assert_frame_equal(self.expr, adata._adata.to_df())
def test_12_34_identical(self): """Compute mi for identical arrays [[1, 2, 1], [2, 4, 6]].""" M = InferelatorData(expression_data=np.array([[1, 2, 1], [3, 4, 6]]), transpose_expression=True) self.x_dataframe = M.copy() self.y_dataframe = M.copy() self.clr_matrix, self.mi_matrix = mi.context_likelihood_mi( self.x_dataframe, self.y_dataframe) expected = np.array([[0, 1], [1, 0]]) np.testing.assert_almost_equal(self.clr_matrix.values, expected)
def test_dask_function_mi(self): """Compute mi for identical arrays [[1, 2, 1], [2, 4, 6]].""" L = [[0, 0], [9, 3], [0, 9]] x_dataframe = InferelatorData(pd.DataFrame(L)) y_dataframe = InferelatorData(pd.DataFrame(L)) mi = dask_functions.build_mi_array_dask(x_dataframe.values, y_dataframe.values, 10, np.log) expected = np.array([[0.63651417, 0.63651417], [0.63651417, 1.09861229]]) np.testing.assert_almost_equal(mi, expected)
def setUp(self): self.expr = TestDataSingleCellLike.expression_matrix.copy().T self.expr_sparse = sparse.csr_matrix( TestDataSingleCellLike.expression_matrix.values.T).astype(np.int32) self.meta = TestDataSingleCellLike.meta_data.copy() self.adata = InferelatorData(self.expr, transpose_expression=False) self.adata_sparse = InferelatorData( self.expr_sparse, gene_names=TestDataSingleCellLike.expression_matrix.index, transpose_expression=False, meta_data=TestDataSingleCellLike.meta_data.copy())
def test_12_34_and_zeros(self): """Compute mi for identical arrays [[1, 2], [2, 4]].""" self.y_dataframe = InferelatorData(expression_data=np.zeros((2, 2))) self.clr_matrix, self.mi_matrix = mi.context_likelihood_mi( self.x_dataframe, self.y_dataframe) # the entire clr matrix is NAN self.assertTrue(np.isnan(self.clr_matrix.values).all())
def test_add_genedata(self): gene_data = TestDataSingleCellLike.gene_metadata gene_data.index = gene_data.iloc[:, 0] adata = InferelatorData(self.expr, gene_data=gene_data) pdt.assert_index_equal(adata.gene_names, self.expr.columns) pdt.assert_index_equal(adata._adata.uns["trim_gene_list"], CORRECT_GENES_INTERSECT)
def test_non_finite_sparse(self): adata = InferelatorData(sparse.csr_matrix( self.expr.values.astype(float)), gene_names=self.expr.columns, sample_names=self.expr.index) nnf, name_nf = adata.non_finite self.assertEqual(nnf, 0) self.assertIsNone(name_nf) adata.expression_data[0, 0] = np.nan nnf, name_nf = adata.non_finite self.assertEqual(nnf, 1) adata.expression_data[0, 1] = np.nan nnf, name_nf = adata.non_finite self.assertEqual(nnf, 2)
def setup_one_column(self): exp = pd.DataFrame(np.array([[1, 1, 0], [3, 2, 3]]), index=['s1', 's2'], columns=['g1', 'tf1', 'g3']) self.exp = InferelatorData(exp) self.priors = pd.DataFrame(np.array([[1], [1], [0]]), columns=['tf1'], index=self.exp.gene_names)
def test_non_finite(self): adata = InferelatorData(self.expr.values.astype(float), gene_names=self.expr.columns, sample_names=self.expr.index) nnf, name_nf = adata.non_finite self.assertEqual(nnf, 0) self.assertIsNone(name_nf) adata.expression_data[0, 0] = np.nan nnf, name_nf = adata.non_finite self.assertEqual(nnf, 1) self.assertListEqual(name_nf.tolist(), ["gene1"]) adata.expression_data[0, 1] = np.nan nnf, name_nf = adata.non_finite self.assertEqual(nnf, 2) self.assertListEqual(name_nf.tolist(), ["gene1", "gene2"])
def setUp(self): self.workflow = workflow.inferelator_workflow(workflow="amusr", regression="amusr") self.workflow.create_output_dir = lambda *x: None tfs = ['tf1', 'tf2', 'tf3'] targets = ['gene1', 'gene2'] self.workflow._task_design = [ InferelatorData(pd.DataFrame(np.array([[1, 1, 3], [0, 0, 2], [0, 0, 1]]).astype(float), columns=tfs)), InferelatorData(pd.DataFrame(np.array([[1, 1, 3], [0, 0, 2], [0, 0, 1]]).astype(float), columns=tfs)) ] self.workflow._task_response = [ InferelatorData(pd.DataFrame(np.array([[1, 1], [2, 0], [3, 0]]).astype(float), columns=targets)), InferelatorData(pd.DataFrame(np.array([[1, 3], [2, 3], [3, 3]]).astype(float), columns=targets)) ] self.priors_data = pd.DataFrame([[0, 1, 1], [1, 0, 1]], index=targets, columns=tfs) self.gold_standard = self.priors_data.copy()
def setup_mouse_th17(self): exp = pd.DataFrame( np.array([[12.2844, 8.16, 10.4782, 5.46, 7.96367], [12.55, 8.5536, 11.0834, 5.4891, 7.86005], [11.8626, 7.765, 10.5227, 4.9039, 7.82641], [11.8623, 7.8903, 10.3418, 4.698, 7.94938], [11.881, 8.0871, 10.3878, 5.0788, 7.67066]])) exp.index = ['s1', 's2', 's3', 's4', 's5'] exp.columns = ['g1', 't2', 'g3', 'g4', 'g5'] self.exp = InferelatorData(exp) self.priors = pd.DataFrame(np.array([[1, 0, 0, 1], [0, 0, 0, 0], [0, 0, -1, 0], [-1, 0, 0, -1], [0, 0, 1, 0]]), columns=['t1', 't2', 't3', 't4'], index=['g1', 't2', 'g3', 'g4', 'g5'])
def test_trim_dense(self): gene_data = TestDataSingleCellLike.gene_metadata gene_data.index = gene_data.iloc[:, 0] adata = InferelatorData(self.expr, gene_data=gene_data) adata.trim_genes(remove_constant_genes=False) pdt.assert_frame_equal( self.expr.reindex(CORRECT_GENES_INTERSECT, axis=1).astype(np.int32), adata._adata.to_df()) adata.trim_genes(remove_constant_genes=True) pdt.assert_frame_equal( self.expr.reindex(CORRECT_GENES_NZ_VAR, axis=1).astype(np.int32), adata._adata.to_df())
def compute_transcription_factor_activity(self, prior, expression_data, expression_data_halftau=None, keep_self=False, tau=None): prior, activity_tfs, expr_tfs = self._check_prior(prior, expression_data, keep_self=keep_self) if len(activity_tfs) > 0: activity = self._calculate_activity( prior.loc[:, activity_tfs].values, expression_data) else: raise ValueError( "TFA cannot be calculated; prior matrix has no edges") return InferelatorData(activity, gene_names=activity_tfs, sample_names=expression_data.sample_names, meta_data=expression_data.meta_data)
def _combine_expression_velocity(self, expression, velocity): """ Calculate dX/dt + lambda * X :param expression: :param velocity: :return: """ assert check.indexes_align( (expression.gene_names, velocity.gene_names)) assert check.indexes_align( (expression.sample_names, velocity.sample_names)) if self._decay_constants is not None: Debug.vprint("Using preloaded decay constants in _decay_constants") decay_constants = self._decay_constants elif self.tau is not None: Debug.vprint( "Calculating decay constants for tau {t}".format(t=self.tau)) decay_constants = np.repeat(1 / self.tau, expression.num_genes) elif "decay_constants" in velocity.gene_data.columns and self._use_precalculated_decay_constants: Debug.vprint( "Extracting decay constants from {n}".format(n=velocity.name)) decay_constants = velocity.gene_data["decay_constants"].values elif "decay_constants" in expression.gene_data.columns and self._use_precalculated_decay_constants: Debug.vprint("Extracting decay constants from {n}".format( n=expression.name)) decay_constants = expression.gene_data["decay_constants"].values else: Debug.vprint( "No decay information found. Solving dX/dt = AB for Betas") return velocity x = np.multiply(expression.values, decay_constants[None, :]) return InferelatorData(np.add(velocity.values, x), gene_names=expression.gene_names, sample_names=expression.sample_names, meta_data=expression.meta_data)
def test_trim_sparse(self): gene_data = TestDataSingleCellLike.gene_metadata gene_data.index = gene_data.iloc[:, 0] adata_sparse = InferelatorData( sparse.csr_matrix( TestDataSingleCellLike.expression_matrix.values.T), gene_names=TestDataSingleCellLike.expression_matrix.index, meta_data=TestDataSingleCellLike.meta_data.copy(), gene_data=gene_data) adata_sparse.trim_genes(remove_constant_genes=False) pdt.assert_frame_equal( self.expr.reindex(CORRECT_GENES_INTERSECT, axis=1), adata_sparse._adata.to_df()) adata_sparse.trim_genes(remove_constant_genes=True) pdt.assert_frame_equal(self.expr.reindex(CORRECT_GENES_NZ_VAR, axis=1), adata_sparse._adata.to_df())
def test_create_df(self): adata = InferelatorData(self.expr) npt.assert_array_equal(adata.expression_data, self.expr.values)
import unittest import pandas as pd import numpy as np import scipy.sparse as sps from inferelator.regression import mi from inferelator.utils import InferelatorData L = InferelatorData(expression_data=np.array([[1, 2], [3, 4]]), transpose_expression=True) L_sparse = InferelatorData(expression_data=sps.csr_matrix([[1, 2], [3, 4]]), transpose_expression=True) L2 = InferelatorData(expression_data=np.array([[3, 4], [2, 1]]), transpose_expression=True) class Test2By2(unittest.TestCase): def setUp(self): self.x_dataframe = L.copy() self.y_dataframe = L.copy() def test_12_34_identical(self): """Compute mi for identical arrays [[1, 2], [2, 4]].""" self.clr_matrix, self.mi_matrix = mi.context_likelihood_mi( self.x_dataframe, self.y_dataframe) expected = np.array([[0, 1], [1, 0]]) np.testing.assert_almost_equal(self.clr_matrix.values, expected) def test_12_34_minus(self): """Compute mi for identical arrays [[1, 2], [2, 4]].""" self.y_dataframe.multiply(-1) self.clr_matrix, self.mi_matrix = mi.context_likelihood_mi(
}) priors_data = pd.DataFrame([[0, 1], [0, 1], [1, 0], [0, 0]], index=["gene1", "gene2", "gene4", "gene5"], columns=["gene3", "gene6"]) gene_metadata = pd.DataFrame({ "SystematicName": ["gene1", "gene2", "gene3", "gene4", "gene7", "gene6"] }) gene_list_index = "SystematicName" tf_names = ["gene3", "gene6"] TEST_DATA = InferelatorData( TestDataSingleCellLike.expression_matrix, transpose_expression=True, meta_data=TestDataSingleCellLike.meta_data, gene_data=TestDataSingleCellLike.gene_metadata, gene_data_idx_column=TestDataSingleCellLike.gene_list_index, sample_names=list(map(str, range(10)))) TEST_DATA_SPARSE = InferelatorData( sps.csr_matrix(TestDataSingleCellLike.expression_matrix.T.values), gene_names=TestDataSingleCellLike.expression_matrix.index, sample_names=list(map(str, range(10))), meta_data=TestDataSingleCellLike.meta_data, gene_data=TestDataSingleCellLike.gene_metadata, gene_data_idx_column=TestDataSingleCellLike.gene_list_index) CORRECT_GENES_INTERSECT = pd.Index( ["gene1", "gene2", "gene3", "gene4", "gene6"]) CORRECT_GENES_NZ_VAR = pd.Index(["gene1", "gene2", "gene4", "gene6"])
def test_create_df_transpose(self): adata = InferelatorData(self.expr, transpose_expression=True) npt.assert_array_equal(adata.expression_data, self.expr.values.T)
def test_create_array(self): adata = InferelatorData(self.expr.values, gene_names=self.expr.columns.astype(str), sample_names=self.expr.index.astype(str)) InferelatorData._make_idx_str(self.expr) pdt.assert_frame_equal(self.expr, adata._adata.to_df())
def test_add_metadata(self): adata = InferelatorData(self.expr, meta_data=self.meta) self.meta.index = self.meta.index.astype(str) pdt.assert_frame_equal(self.meta, adata.meta_data)
def test_create_metadata(self): adata = InferelatorData(self.expr, meta_data=self.meta) pdt.assert_frame_equal(adata.meta_data, self.meta)