Exemplo n.º 1
0
    def test_load_from_server(self):

        # Not valid use_data_from
        with self.assertRaises(TypeError):
            ham.Ham(use_data_from='xxx')

        #  Valid use_data_from but missing query_database
        with self.assertRaises(TypeError):
            ham.Ham(use_data_from='oma')
Exemplo n.º 2
0
    def test_wrong_type_hog_file(self):

        with self.assertRaises(TypeError):
            ham.Ham(self.nwk_str, self.orthoxml_path, type_hog_file='xs')

        with self.assertRaises(TypeError):
            ham.Ham(self.nwk_str, self.orthoxml_path, type_hog_file='')

        with self.assertRaises(TypeError):
            ham.Ham(self.nwk_str, self.orthoxml_path, type_hog_file=None)
Exemplo n.º 3
0
    def test_wrong_newick_str(self):

        with self.assertRaises(ete3.parser.newick.NewickError):
            ham.Ham(self.nwk_str_empty,
                    self.orthoxml_path,
                    type_hog_file='orthoxml')

        with self.assertRaises(ete3.parser.newick.NewickError):
            ham.Ham(self.nwk_str_wrong,
                    self.orthoxml_path,
                    type_hog_file='orthoxml')
Exemplo n.º 4
0
    def test_orthoxml_as_string(self):

        self.ham_analysis = ham.Ham(tree_file=self.nwk_str,
                                    hog_file=self.orthoxml_string,
                                    type_hog_file='orthoxml',
                                    orthoXML_as_string=True)

        with self.assertRaises(IOError):
            self.ham_analysis = ham.Ham(tree_file=self.nwk_str,
                                        hog_file=self.orthoxml_string,
                                        type_hog_file='orthoxml')
Exemplo n.º 5
0
    def test_phyloxml_tag(self):

        with self.assertRaises(TypeError):
            ham.Ham(self.phyloxml_file,
                    self.orthoxml_path,
                    type_hog_file='orthoxml',
                    tree_format='phyloxml',
                    phyloxml_leaf_name_tag='None')

        ham.Ham(self.phyloxml_file,
                self.orthoxml_path,
                type_hog_file='orthoxml',
                tree_format='phyloxml',
                phyloxml_leaf_name_tag='clade_name')
Exemplo n.º 6
0
        def setUp(self):

            nwk_path = os.path.join(os.path.dirname(__file__),
                                    './data/simpleEx.nwk')
            tree_str = utils.get_newick_string(nwk_path, type="nwk")
            orthoxml_path = os.path.join(os.path.dirname(__file__),
                                         './data/simpleEx.orthoxml')

            self.ham_analysis = ham.Ham(tree_file=tree_str,
                                        hog_file=orthoxml_path,
                                        type_hog_file='orthoxml',
                                        use_internal_name=True)

            self.human = self.ham_analysis.get_extant_genome_by_name(
                name="HUMAN")
            self.frog = self.ham_analysis.get_extant_genome_by_name(
                name="XENTR")
            self.mouse = self.ham_analysis.get_extant_genome_by_name(
                name="MOUSE")
            self.rat = self.ham_analysis.get_extant_genome_by_name(
                name="RATNO")
            self.chimp = self.ham_analysis.get_extant_genome_by_name(
                name="PANTR")
            self.vertebrates = self.ham_analysis.get_ancestral_genome_by_mrca_of_genome_set(
                {self.human, self.frog})
            self.rodents = self.ham_analysis.get_ancestral_genome_by_mrca_of_genome_set(
                {self.mouse, self.rat})
            self.primates = self.ham_analysis.get_ancestral_genome_by_mrca_of_genome_set(
                {self.human, self.chimp})
            self.euarchontoglires = self.ham_analysis.get_ancestral_genome_by_mrca_of_genome_set(
                {self.human, self.mouse})
Exemplo n.º 7
0
    def test_non_luca_root_hog_works_from_omabrowser(self):
        analysis = ham.Ham(query_database='P53_RAT', use_data_from='oma')

        fn = os.path.join(self.tmpdir, 'tree_profile.html')
        analysis.create_tree_profile(outfile=fn)
        with open(fn, 'rt') as fh:
            html = fh.read()
        self.assertIn('treeData', html)
Exemplo n.º 8
0
    def setUp(self):

        nwk_path = os.path.join(os.path.dirname(__file__), './data/simpleEx.nwk')
        tree_str = utils.get_newick_string(nwk_path, type="nwk")
        orthoxml_path = os.path.join(os.path.dirname(__file__), './data/simpleEx.orthoxml')

        self.ham_analysis = ham.Ham(tree_file=tree_str, hog_file=orthoxml_path, type_hog_file='orthoxml',
                                    use_internal_name=True)
Exemplo n.º 9
0
 def setUp(self):
     self.phyloxml_path = os.path.join(
         os.path.dirname(__file__), "data",
         "p53_augmented_speciestree.phyloxml")
     self.orthoxml_path = os.path.join(os.path.dirname(__file__), "data",
                                       "p53_augmented.orthoxml")
     self.ham_analysis = ham.Ham(tree_file=self.phyloxml_path,
                                 tree_format="phyloxml",
                                 hog_file=self.orthoxml_path,
                                 type_hog_file="orthoxml",
                                 use_internal_name=True)
Exemplo n.º 10
0
 def test_parser_accepts_multiple_consequative_dups(self):
     ham_analysis = ham.Ham(tree_file=self.nwk_str,
                            hog_file=self.orthoxml_path,
                            type_hog_file='orthoxml',
                            use_internal_name=True)
     hogs = ham_analysis.get_list_top_level_hogs()
     self.assertEqual(1, len(hogs))
     hog = hogs[0]
     self.assertEqual(5, len(hog.children))
     self.assertFalse(hog.children[0].arose_by_duplication)
     dup_events = {g.arose_by_duplication for g in hog.children[1:]}
     self.assertEqual(1, len(dup_events))
Exemplo n.º 11
0
    def test_treeProfile_on_full_setup(self):

        logging.basicConfig(
            level=logging.INFO,
            format="%(asctime)s %(name)-12s %(levelname)-8s %(message)s")
        nwk_path = os.path.join(os.path.dirname(__file__),
                                './data/simpleEx.nwk')
        tree_str = utils.get_newick_string(nwk_path, type="nwk")
        orthoxml_path = os.path.join(os.path.dirname(__file__),
                                     './data/simpleEx.orthoxml')
        ham_analysis = ham.Ham(tree_file=tree_str,
                               hog_file=orthoxml_path,
                               type_hog_file='orthoxml')
Exemplo n.º 12
0
    def setUp(self):

        nwk_path = os.path.join(os.path.dirname(__file__),
                                './data/parser/tree.newick')
        nwk_str = utils.get_newick_string(nwk_path, type="nwk")
        orthoxml_path = os.path.join(
            os.path.dirname(__file__),
            './data/parser/conflict_duplication_children.orthoxml')

        self.ham_analysis = ham.Ham(tree_file=nwk_str,
                                    hog_file=orthoxml_path,
                                    type_hog_file='orthoxml',
                                    use_internal_name=True)
Exemplo n.º 13
0
    def setUp(self):

        nwk_path = os.path.join(os.path.dirname(__file__),
                                './data/simpleEx.nwk')
        nwk_str = utils.get_newick_string(nwk_path, type="nwk")

        nwk_path_no_name = os.path.join(os.path.dirname(__file__),
                                        './data/simpleEx.nwk')
        nwk_str_no_name = utils.get_newick_string(nwk_path_no_name, type="nwk")

        orthoxml_path = os.path.join(os.path.dirname(__file__),
                                     './data/simpleEx.orthoxml')

        # using newick with name on both internal nodes and leaves
        self.h = ham.Ham(nwk_str, orthoxml_path)

        # using newick with name only at leaves
        self.hn = ham.Ham(nwk_str_no_name, orthoxml_path)

        # using newick with name on both internal nodes and leaves and filter for HOG2

        self.filter_genome = {"HUMAN", "MOUSE", "CANFA", "PANTR"}
        self.filter_genes = {'2', '32', '22', '12'}
        self.filter_genes_ext = {'HUMAN2', 'MOUSE2', 'CANFA2', 'PANTR2'}
        self.filter_hogs = {'2'}
        self.no_filter_genome = {"XENTR", "RATNO"}
        self.no_filter_genes = {
            '1', '11', '21', '31', '41', '51', '3', '13', '23', '33', '53',
            '34', '14'
        }
        self.no_filter_genes_ext = {
            'HUMAN1', 'PANTR1', 'CANFA1', 'MOUSE1', 'RATNO1', 'XENTR1',
            'HUMAN3', 'PANTR3', 'CANFA3', 'MOUSE3', 'XENTR3', 'MOUSE4',
            'PANTR4'
        }
        self.no_filter_hogs = {'1', '3'}
        f = ham.ParserFilter()
        f.add_hogs_via_hogId([2])
        self.hf = ham.Ham(nwk_str, orthoxml_path, filter_object=f)
Exemplo n.º 14
0
    def setUp(self):
        nwk_path = os.path.join(os.path.dirname(__file__),
                                './data/simpleEx.nwk')
        nwk_str = utils.get_newick_string(nwk_path, type="nwk")
        orthoxml_path = os.path.join(os.path.dirname(__file__),
                                     './data/simpleEx.orthoxml')

        self.ham_analysis = ham.Ham(tree_file=nwk_str,
                                    hog_file=orthoxml_path,
                                    type_hog_file='orthoxml',
                                    use_internal_name=True)
        self.hogs = self.ham_analysis.get_dict_top_level_hogs()
        self.genes = self.ham_analysis.get_dict_extant_genes()
Exemplo n.º 15
0
    def test_load_taxonomy_from_nwk_file_and_all_hogs_from_orthoxml_file_no_filter(
            self):

        # load the logger
        logging.basicConfig(
            level=logging.INFO,
            format="%(asctime)s %(name)-12s %(levelname)-8s %(message)s")

        # Clement select a nwk file as a taxonomy reference
        nwk_path = os.path.join(os.path.dirname(__file__),
                                './data/simpleEx.nwk')
        # And extract the newick tree as a string
        tree_str = utils.get_newick_string(nwk_path, type="nwk")

        # then clement select his favorite orthoXML file
        orthoxml_path = os.path.join(os.path.dirname(__file__),
                                     './data/simpleEx.orthoxml')

        # Clement create the Ham object that will be the kernel of all analysis
        ham_analysis = ham.Ham(tree_file=tree_str,
                               hog_file=orthoxml_path,
                               use_internal_name=True,
                               type_hog_file='orthoxml')

        # And verifying if all tree elements are created
        self.assertEqual(
            ham_analysis.taxonomy.tree_str,
            "(XENTR,(((HUMAN,PANTR)Primates,(MOUSE,RATNO)Rodents)Euarchontoglires,"
            "CANFA)Mammalia)Vertebrata;")

        # After he get all the top level hogs
        self.assertEqual(len(ham_analysis.top_level_hogs), 3)
        self.assertEqual(len(ham_analysis.extant_gene_map), 19)

        # Clement is curious to look at the species present within this taxonomy
        extant_genomes = ham_analysis.get_list_extant_genomes()

        # then look if its 6 species are present
        extant_genomes_name = set(ext_genome.name
                                  for ext_genome in extant_genomes)
        self.assertEqual(
            extant_genomes_name,
            {'RATNO', 'HUMAN', 'CANFA', 'PANTR', 'XENTR', 'MOUSE'})
        self.assertEqual(len(ham_analysis.taxonomy.leaves), 6)
        self.assertEqual(len(ham_analysis.taxonomy.internal_nodes), 5)
Exemplo n.º 16
0
    def test_lineage_comparative_analysis(self):

        # Clement initialise the pyham analyzer objet as it's explained in the documentation
        logging.basicConfig(
            level=logging.WARNING,
            format="%(asctime)s %(name)-12s %(levelname)-8s %(message)s")

        nwk_path = os.path.join(os.path.dirname(__file__),
                                './data/simpleEx.nwk')
        tree_str = utils.get_newick_string(nwk_path, type="nwk")
        orthoxml_path = os.path.join(os.path.dirname(__file__),
                                     './data/simpleEx.orthoxml')

        ham_analysis = ham.Ham(tree_file=tree_str,
                               hog_file=orthoxml_path,
                               type_hog_file='orthoxml',
                               use_internal_name=True)

        # Then clement is interest to investigate on what happened between the ancestral genomes of vertebrates
        # and the extent genomes of the mouse.

        # First he select the related genomes objectt via their name or mrca.
        mouse = ham_analysis.get_extant_genome_by_name(name="MOUSE")
        frog = ham_analysis.get_extant_genome_by_name(name="XENTR")
        vertebrates = ham_analysis.get_ancestral_genome_by_mrca_of_genome_set(
            {mouse, frog})

        # Then, he compare the two genomes of interest
        vertical_map_mouse_vs_vert = ham_analysis.compare_genomes_vertically(
            mouse, vertebrates)

        # Now he is interest by the HOG that have stay single copy between these two levels
        self.assertDictEqual({'<HOG(1)>': 'Gene(31)'},
                             _str_dict_one_value(
                                 vertical_map_mouse_vs_vert.get_retained()))

        # ... and at the duplicated genes
        self.assertDictEqual({'<HOG(3)>': {'Gene(34)', 'Gene(33)'}},
                             _str_dict_array_value(
                                 vertical_map_mouse_vs_vert.get_duplicated()))

        # Clement is curious and want to look if there is gene that have been lost
        self.assertSetEqual(set(),
                            _str_array(vertical_map_mouse_vs_vert.get_lost()))
Exemplo n.º 17
0
    def test_load_taxonomy_from_nwk_file_and_from_orthoxml_file_with_filter_hog2(
            self):

        # load the logger
        logging.basicConfig(
            level=logging.INFO,
            format="%(asctime)s %(name)-12s %(levelname)-8s %(message)s")

        # Clement select a nwk file as a taxonomy reference
        nwk_path = os.path.join(os.path.dirname(__file__),
                                './data/simpleEx.nwk')
        # And extract the newick tree as a string
        tree_str = utils.get_newick_string(nwk_path, type="nwk")

        # Clement will now setup the filter object
        f = ham.ParserFilter()
        f.add_hogs_via_hogId([2])

        # Clement check that the filter contained all information
        self.assertEqual(set(f.HOGId_filter), {'2'})
        self.assertEqual(set(f.GeneExtId_filter), set())
        self.assertEqual(set(f.GeneIntId_filter), set())

        # then clement select his favorite orthoXML file
        orthoxml_path = os.path.join(os.path.dirname(__file__),
                                     './data/simpleEx.orthoxml')

        # Clement create the Ham object that will be the kernel of all analysis
        ham_analysis = ham.Ham(tree_str,
                               orthoxml_path,
                               filter_object=f,
                               use_internal_name=True)
        self.assertEqual(f, ham_analysis.filter_obj)

        # Clement check that what the filter understood was good
        self.assertSetEqual(set(ham_analysis.filter_obj.geneUniqueId),
                            {'2', '32', '22', '12'})
        self.assertSetEqual(set(ham_analysis.filter_obj.hogsId), {'2'})

        # Clement check that the parsed informatio is correct
        self.assertEqual(len(ham_analysis.top_level_hogs), 1)
        self.assertEqual(len(ham_analysis.extant_gene_map), 4)
Exemplo n.º 18
0
    def setUp(self):
        nwk_path = os.path.join(os.path.dirname(__file__),
                                './data/simpleEx.nwk')
        tree_str = utils.get_newick_string(nwk_path, type="nwk")

        orthoxml_path = os.path.join(os.path.dirname(__file__),
                                     './data/simpleEx.orthoxml')

        self.ham_analysis = ham.Ham(tree_file=tree_str,
                                    hog_file=orthoxml_path,
                                    type_hog_file='orthoxml')
        self.hogs = self.ham_analysis.get_dict_top_level_hogs()
        self.genes = self.ham_analysis.get_dict_extant_genes()

        self.human = self.ham_analysis.get_extant_genome_by_name(name="HUMAN")
        self.frog = self.ham_analysis.get_extant_genome_by_name(name="XENTR")
        self.mouse = self.ham_analysis.get_extant_genome_by_name(name="MOUSE")
        self.rat = self.ham_analysis.get_extant_genome_by_name(name="RATNO")
        self.chimp = self.ham_analysis.get_extant_genome_by_name(name="PANTR")
        self.vertebrates = self.ham_analysis.get_ancestral_genome_by_mrca_of_genome_set(
            {self.human, self.frog})
Exemplo n.º 19
0
 def setUp(self):
     nwk_path = os.path.join(os.path.dirname(__file__), './data/Conflict1/tree.nwk')
     tree_str = utils.get_newick_string(nwk_path, type="nwk")
     orthoxml_path = os.path.join(os.path.dirname(__file__), './data/Conflict1/hog.orthoxml')
     self.ham_analysis = ham.Ham(tree_file=tree_str, hog_file=orthoxml_path, type_hog_file='orthoxml', use_internal_name=True)
     self.hog = self.ham_analysis.get_list_top_level_hogs()[0]
Exemplo n.º 20
0
    def setUp(self):
        nwk_path = os.path.join(os.path.dirname(__file__), './data/simpleEx.nwk')
        nwk_str = utils.get_newick_string(nwk_path, type="nwk")
        orthoxml_path = os.path.join(os.path.dirname(__file__), './data/simpleEx.orthoxml')

        self.h = ham.Ham(nwk_str, orthoxml_path, use_internal_name=True)
Exemplo n.º 21
0
 def test_wrong_filter(self):
     with self.assertRaises(TypeError):
         ham.Ham(self.nwk_str, self.orthoxml_path, filter_object="x")
Exemplo n.º 22
0
    def setUp(self):
        nwk_path = os.path.join(os.path.dirname(__file__),
                                './data/simpleEx.nwk')
        tree_str = utils.get_newick_string(nwk_path, type="nwk")
        orthoxml_path = os.path.join(os.path.dirname(__file__),
                                     './data/simpleEx.orthoxml')

        self.ham_analysis = ham.Ham(tree_file=tree_str,
                                    hog_file=orthoxml_path,
                                    type_hog_file='orthoxml',
                                    use_internal_name=True)
        self.ham_analysis_no_name = ham.Ham(tree_file=tree_str,
                                            hog_file=orthoxml_path,
                                            type_hog_file='orthoxml',
                                            use_internal_name=False)

        # gene, retained, duplicated, gain, lost
        self.exp_full = {
            "Mammalia": [3, 2, 0, 1, 0],
            "Euarchontoglires": [4, 2, 2, 0, 0],
            "Primates": [4, 4, 0, 0, 0],
            "Rodents": [4, 4, 0, 0, 0],
            "Vertebrata": [2, None, None, None, None]
        }

        self.exp_full_nn = {
            "HUMAN/PANTR/MOUSE/RATNO/CANFA": [3, 2, 0, 1, 0],
            "HUMAN/PANTR/MOUSE/RATNO": [4, 2, 2, 0, 0],
            "HUMAN/PANTR": [4, 4, 0, 0, 0],
            "MOUSE/RATNO": [4, 4, 0, 0, 0],
            "XENTR/HUMAN/PANTR/MOUSE/RATNO/CANFA": [2, None, None, None, None],
            "HUMAN": [4, 3, 0, 1, 1],
            "PANTR": [4, 4, 0, 0, 0],
            "MOUSE": [4, 4, 0, 0, 0],
            "RATNO": [2, 1, 0, 1, 3],
            "CANFA": [3, 3, 0, 0, 0],
            "XENTR": [2, 2, 0, 0, 0]
        }

        # gene, retained, duplicated, lost
        self.expected_level_1 = {
            "Mammalia": [1, 1, 0, 0],
            "Euarchontoglires": [1, 1, 0, 0],
            "Primates": [1, 1, 0, 0],
            "Rodents": [1, 1, 0, 0],
            "Vertebrata": [1, None, None, None],
            "HUMAN": [1, 1, 0, 0],
            "PANTR": [1, 1, 0, 0],
            "MOUSE": [1, 1, 0, 0],
            "RATNO": [1, 1, 0, 0],
            "CANFA": [1, 1, 0, 0],
            "XENTR": [1, 1, 0, 0]
        }

        self.expected_level_2 = {
            "Mammalia": [1, None, None, None],
            "Euarchontoglires": [1, 1, 0, 0],
            "Primates": [1, 1, 0, 0],
            "Rodents": [1, 1, 0, 0],
            "HUMAN": [1, 1, 0, 0],
            "PANTR": [1, 1, 0, 0],
            "MOUSE": [1, 1, 0, 0],
            "RATNO": [0, 0, 0, 1],
            "CANFA": [1, 1, 0, 0]
        }

        self.expected_level_3 = {
            "Mammalia": [1, 1, 0, 0],
            "Euarchontoglires": [2, 0, 2, 0],
            "Primates": [2, 2, 0, 0],
            "Rodents": [2, 2, 0, 0],
            "Vertebrata": [1, None, None, None],
            "HUMAN": [1, 1, 0, 1],
            "PANTR": [2, 2, 0, 0],
            "MOUSE": [2, 2, 0, 0],
            "RATNO": [0, 0, 0, 2],
            "CANFA": [1, 1, 0, 0],
            "XENTR": [1, 1, 0, 0]
        }
Exemplo n.º 23
0
    def setUp(self):
        nwk_path = os.path.join(os.path.dirname(__file__),
                                './data/simpleEx.nwk')
        nwk_str = utils.get_newick_string(nwk_path, type="nwk")

        nwk_path_no_name = os.path.join(os.path.dirname(__file__),
                                        './data/simpleEx.nwk')
        nwk_str_no_name = utils.get_newick_string(nwk_path_no_name, type="nwk")

        phyloxml_file = os.path.join(os.path.dirname(__file__),
                                     './data/simpleEx.phyloxml')

        orthoxml_path = os.path.join(os.path.dirname(__file__),
                                     './data/simpleEx.orthoxml')

        with open(orthoxml_path, 'r') as orthoxml_file:
            data = orthoxml_file.read()
            self.orthoxml_string = data

        # using newick with name on both internal nodes and leaves
        self.h = ham.Ham(nwk_str, orthoxml_path, use_internal_name=True)

        # using newick with name only at leaves
        self.hn = ham.Ham(nwk_str_no_name, orthoxml_path)

        # using phyloxml file with name
        self.hpx = ham.Ham(
            phyloxml_file,
            orthoxml_path,
            use_internal_name=True,
            tree_format='phyloxml',
            phyloxml_internal_name_tag='taxonomy_scientific_name',
            phyloxml_leaf_name_tag='taxonomy_code')

        # using newick with name on both internal nodes and leaves and filter for HOG2
        self.filter_genome = {"HUMAN", "MOUSE", "CANFA", "PANTR"}
        self.filter_genes = {'2', '32', '22', '12'}
        self.filter_genes_ext = {'HUMAN2', 'MOUSE2', 'CANFA2', 'PANTR2'}
        self.filter_hogs = {'2'}
        self.no_filter_genome = {"XENTR", "RATNO"}
        self.no_filter_genes = {
            '1', '11', '21', '31', '41', '51', '3', '13', '23', '33', '53',
            '34', '14'
        }
        self.no_filter_genes_ext = {
            'HUMAN1', 'PANTR1', 'CANFA1', 'MOUSE1', 'RATNO1', 'XENTR1',
            'HUMAN3', 'PANTR3', 'CANFA3', 'MOUSE3', 'XENTR3', 'MOUSE4',
            'PANTR4'
        }
        self.no_filter_hogs = {'1', '3'}
        f = ham.ParserFilter()
        f.add_hogs_via_hogId([2])
        self.hf = ham.Ham(nwk_str,
                          orthoxml_path,
                          filter_object=f,
                          use_internal_name=True)

        # test that filter work with string
        self.hstring = ham.Ham(nwk_str,
                               self.orthoxml_string,
                               use_internal_name=True,
                               orthoXML_as_string=True)

        # test that filter work with string
        self.hfstring = ham.Ham(nwk_str,
                                self.orthoxml_string,
                                filter_object=f,
                                use_internal_name=True,
                                orthoXML_as_string=True)