def test_amusr_regression(self): des = [np.array([[1, 1, 3], [0, 0, 2], [0, 0, 1]]).astype(float), np.array([[1, 1, 3], [0, 0, 2], [0, 0, 1]]).astype(float)] res = [np.array([1, 2, 3]).reshape(-1, 1).astype(float), np.array([1, 2, 3]).reshape(-1, 1).astype(float)] tfs = ['tf1', 'tf2', 'tf3'] targets = ['gene1', 'gene2'] priors = [pd.DataFrame([[0, 1, 1], [1, 0, 1]], index=targets, columns=tfs), pd.DataFrame([[0, 0, 1], [1, 0, 1]], index=targets, columns=tfs)] gene1_prior = amusr_regression.format_prior(priors, 'gene1', [0, 1], 1.) gene2_prior = amusr_regression.format_prior(priors, 'gene2', [0, 1], 1.) output = [amusr_regression.run_regression_EBIC(des, res, ['tf1', 'tf2', 'tf3'], [0, 1], 'gene1', gene1_prior, scale_data=True, use_numba=self.use_numba), amusr_regression.run_regression_EBIC(des, res, ['tf1', 'tf2', 'tf3'], [0, 1], 'gene2', gene2_prior, scale_data=True, use_numba=self.use_numba)] out0 = pd.DataFrame([['tf3', 'gene1', -1, 1], ['tf3', 'gene1', -1, 1]], index=pd.MultiIndex(levels=[[0, 1], [0]], codes=[[0, 1], [0, 0]]), columns=['regulator', 'target', 'weights', 'resc_weights']) out1 = pd.DataFrame([['tf3', 'gene2', -1, 1], ['tf3', 'gene2', -1, 1]], index=pd.MultiIndex(levels=[[0, 1], [0]], codes=[[0, 1], [0, 0]]), columns=['regulator', 'target', 'weights', 'resc_weights']) pdt.assert_frame_equal(pd.concat(output[0]), out0, check_dtype=False) pdt.assert_frame_equal(pd.concat(output[1]), out1, check_dtype=False)
def test_format_priors_pweight(self): tfs = ['tf1', 'tf2'] priors = [pd.DataFrame([[0, 1], [1, 0]], index=['gene1', 'gene2'], columns=tfs), pd.DataFrame([[0, 0], [1, 0]], index=['gene1', 'gene2'], columns=tfs)] gene1_prior = amusr_regression.format_prior(priors, 'gene1', [0, 1], 1.2) gene2_prior = amusr_regression.format_prior(priors, 'gene2', [0, 1], 1.2) npt.assert_almost_equal(gene1_prior, np.array([[1.09090909, 1.], [0.90909091, 1.]])) npt.assert_almost_equal(gene2_prior, np.array([[0.90909091, 0.90909091], [1.09090909, 1.09090909]]))
def test_format_priors_noweight(self): runner = amusr_regression.AMuSR_regression([pd.DataFrame()], [pd.DataFrame()], None) tfs = ['tf1', 'tf2'] priors = [ pd.DataFrame([[0, 1], [1, 0]], index=['gene1', 'gene2'], columns=tfs), pd.DataFrame([[0, 0], [1, 0]], index=['gene1', 'gene2'], columns=tfs) ] gene1_prior = amusr_regression.format_prior(priors, 'gene1', [0, 1], 1) gene2_prior = amusr_regression.format_prior(priors, 'gene2', [0, 1], 1) npt.assert_almost_equal(gene1_prior, np.array([[1., 1.], [1., 1.]])) npt.assert_almost_equal(gene2_prior, np.array([[1., 1.], [1., 1.]]))
def regression_maker(j, x_df, y_list, prior, tf): level = 0 if j % 100 == 0 else 2 utils.Debug.allprint(base_regression.PROGRESS_STR.format(gn=genes[j], i=j, total=G), level=level) gene = genes[j] x, y, tasks = [], [], [] if remove_autoregulation: tf = [t for t in tf if t != gene] else: pass for k, y_data in y_list: x.append(x_df[k].get_gene_data(tf)) # list([N, K]) y.append(y_data) tasks.append(k) # [T,] prior = format_prior(prior, gene, tasks, prior_weight, tfs=tf) return j, regression_function(x, y, tf, tasks, gene, prior, lambda_Bs=lambda_Bs, lambda_Ss=lambda_Ss, Cs=Cs, Ss=Ss, tol=tol, rel_tol=rel_tol, use_numba=use_numba)
def test_amusr_regression(self): des = [ np.array([[1, 1, 3], [0, 0, 2], [0, 0, 1]]).astype(float), np.array([[1, 1, 3], [0, 0, 2], [0, 0, 1]]).astype(float) ] res = [ np.array([1, 2, 3]).reshape(-1, 1).astype(float), np.array([1, 2, 3]).reshape(-1, 1).astype(float) ] tfs = ['tf1', 'tf2', 'tf3'] targets = ['gene1', 'gene2'] priors = [ pd.DataFrame([[0, 1, 1], [1, 0, 1]], index=targets, columns=tfs), pd.DataFrame([[0, 0, 1], [1, 0, 1]], index=targets, columns=tfs) ] runner = amusr_regression.AMuSR_regression( [pd.DataFrame(des[0], columns=tfs)], [pd.DataFrame(res[0], columns=["gene1"])], None) gene1_prior = amusr_regression.format_prior(priors, 'gene1', [0, 1], 1.) gene2_prior = amusr_regression.format_prior(priors, 'gene2', [0, 1], 1.) output = [] output.append( amusr_regression.run_regression_EBIC(des, res, ['tf1', 'tf2', 'tf3'], [0, 1], 'gene1', gene1_prior)) output.append( amusr_regression.run_regression_EBIC(des, res, ['tf1', 'tf2', 'tf3'], [0, 1], 'gene2', gene2_prior)) out0 = pd.DataFrame( [['tf3', 'gene1', -1, 1], ['tf3', 'gene1', -1, 1]], index=pd.MultiIndex(levels=[[0, 1], [0]], labels=[[0, 1], [0, 0]]), columns=['regulator', 'target', 'weights', 'resc_weights']) out1 = pd.DataFrame( [['tf3', 'gene2', -1, 1], ['tf3', 'gene2', -1, 1]], index=pd.MultiIndex(levels=[[0, 1], [0]], labels=[[0, 1], [0, 0]]), columns=['regulator', 'target', 'weights', 'resc_weights']) pdt.assert_frame_equal(pd.concat(output[0]), out0, check_dtype=False) pdt.assert_frame_equal(pd.concat(output[1]), out1, check_dtype=False)
def regression_maker(j, x_df, y_list, prior, tf): level = 0 if j % 100 == 0 else 2 utils.Debug.allprint(base_regression.PROGRESS_STR.format(gn=genes[j], i=j, total=G), level=level) gene = genes[j] x, y, tasks = [], [], [] if remove_autoregulation: tf = [t for t in tf if t != gene] else: pass for k, y_data in y_list: x.append(x_df[k].loc[:, tf].values) # list([N, K]) y.append(y_data) tasks.append(k) # [T,] del y_list prior = format_prior(prior, gene, tasks, prior_weight) return j, run_regression_EBIC(x, y, tf, tasks, gene, prior)