def test_setGeneName(self): """ OK. """ test_gene1 = 'E:/Master/cophenetic_dists/ENSG00000188324___OR6C6___CopD.csv' test_gene2 = 'E:/Master/cophenetic_dists/ENSG00000000938___FGR___CopD.csv' pop_info = 'C:/Users/norab/Master/Data/real_tree_data/phydist_population_classes.tsv' tree1 = treeInfo() tree1.setup(test_gene1, pop_info) tree2 = treeInfo() tree2.setup(test_gene2, pop_info) assert tree1.getGeneName() == "ENSG00000188324___OR6C6" assert tree2.getGeneName() == "ENSG00000000938___FGR"
def test_makePsuedoPops(self): """ OK """ test_gene = 'C:\\Users\\norab\\Master\\Data\\real_tree_data\\dist_mat_subset\\ENSG00000001167___NFYA___CopD.csv' pop_info = 'C:/Users/norab/Master/Data/real_tree_data/phydist_population_classes.tsv' num_pops_tests = [6, 17] for num_pops in num_pops_tests: tree = treeInfo() tree.setup(test_gene, pop_info) info = tree.getSampleInfo() dist_mat = tree.getDistMat() tree.makePsuedoPops(num_pops) num_samples = len(dist_mat) / num_pops all_groups = [] for ind, el in info.items(): all_groups.append(el[-1]) all_groups = pd.Series(all_groups) all_groups.value_counts() assert len(all_groups) == len(dist_mat) assert sum(all_groups.value_counts()) == len(dist_mat) num_samples = int(round(num_samples)) for val in all_groups.value_counts(): assert val in [num_samples, num_samples + 1, num_samples - 1]
def test_shuffleSampleInfo(self): test_gene_small = 'C:\\Users\\norab\\Master\\Data\\real_tree_data\\dist_mat_test\\FGR_10x10.csv' pop_info = 'C:/Users/norab/Master/Data/real_tree_data/phydist_population_classes.tsv' tree = treeInfo() tree.setup(test_gene_small, pop_info) # dist_mat = tree.getDistMat() -- used for visual inspection info1 = tree.getSampleInfo() info1 = info1.deepcopy() tree.shuffleSampleInfo() info2 = tree.getSampleInfo() assert info1 != info2
def test_getSampleInfo(self): """ OK. """ test_gene_small = 'C:\\Users\\norab\\Master\\Data\\real_tree_data\\dist_mat_test\\FGR_10x10.csv' pop_info = 'C:/Users/norab/Master/Data/real_tree_data/phydist_population_classes.tsv' tree = treeInfo() tree.setup(test_gene_small, pop_info) # dist_mat = tree.getDistMat() -- used for visual inspection info = tree.getSampleInfo() assert info[2][1] == 'EUR' assert info[2][2] == 'GBR' assert list(info.items())[-1][-1][-1] == 'ESN'
def test_setSampleInfo(self): """ OK. """ test_gene = 'E:/Master/cophenetic_dists/ENSG00000188324___OR6C6___CopD.csv' pop_info = 'C:/Users/norab/Master/Data/real_tree_data/phydist_population_classes.tsv' tree = treeInfo() tree.setup(test_gene, pop_info) # dist_mat = tree.getDistMat() -- used for visual inspection sample_info = tree.getSampleInfo() assert isinstance(sample_info, dict) assert isinstance(sample_info[2], list) assert sample_info[0] == ['EUR___GBR___HG00261', 'EUR', 'GBR'] assert sample_info[200] == ['EAS___CHS___HG00409', 'EAS', 'CHS']
def test_setPopInfo(self): """ OK. """ pop_info_file = 'C:/Users/norab/Master/Data/real_tree_data/phydist_population_classes.tsv' tree = treeInfo() tree.setPopInfo(pop_info_file) pop_info = tree.getPopInfo() sup = pop_info[0] sub = pop_info[1] assert sup == {'EUR', 'EAS', 'SAS', 'AFR', 'AMR'} assert sub == { 'MXL', 'PUR', 'TSI', 'PEL', 'PJL', 'MSL', 'CHB', 'ASW', 'ESN', 'STU', 'IBS', 'BEB', 'ACB', 'YRI', 'ITU', 'GWD', 'CHS', 'CDX', 'GBR', 'KHV', 'GIH', 'FIN', 'LWK', 'JPT', 'CLM', 'CEU' }