Exemplo n.º 1
0
 def test_name_tree(self):
     XML = etree.tostring(etree.parse(
         'data/input/single_source_no_names.phyml', parser),
                          pretty_print=True)
     xml_root = _parse_xml(XML)
     source_tree_element = xml_root.xpath(
         '/phylo_storage/sources/source/source_tree')[0]
     tree_name = create_tree_name(XML, source_tree_element)
     self.assert_(tree_name == 'Hill_2011_1')
Exemplo n.º 2
0
 def testSingleYear(self):
     XML = etree.tostring(etree.parse('data/input/sub_taxa.phyml',parser),pretty_print=True)
     searchTerms = {'years':[2011]}
     new_XML = create_subset(XML,searchTerms) #these data are all 2011
     root = _parse_xml(new_XML)
     find = etree.XPath("//year")
     yrs = find(root)
     for y in yrs:
         self.assert_(int(y.xpath('integer_value')[0].text) == 2011)
Exemplo n.º 3
0
 def testRealDataCharMorphOrMol(self):
     XML = etree.tostring(etree.parse('data/input/old_stk_input.phyml',parser),pretty_print=True)
     searchTerms = {'character_types':["morphological","molecular"]}
     new_XML = create_subset(XML,searchTerms,andSearch=False)
     root = _parse_xml(new_XML)
     srcs = root.findall(".//source")
     names = []
     for s in srcs:
         names.append(s.attrib['name'])
     names.sort()
     expected_names = ["Aleixo_2002","Aliabadian_etal_2007","Allende_etal_2001","Andersson_1999a","Aragon_etal_1999","Baker_Strauch_1988","Baker_etal_2005","Baker_etal_2006","Baker_etal_2007a","Baker_etal_2007b","Baptista_Visser_1999","Barber_Peterson_2004","Barhoum_Burns_2002","Bertelli_etal_2006"]
     self.assertListEqual(expected_names,names)
Exemplo n.º 4
0
 def testRealDataAllFossil(self):
     XML = etree.tostring(etree.parse('data/input/old_stk_input.phyml',parser),pretty_print=True)
     searchTerms = {'fossil':"all_fossil"}
     new_XML = create_subset(XML,searchTerms)
     root = _parse_xml(new_XML)
     srcs = root.findall(".//source")
     names = []
     for s in srcs:
         names.append(s.attrib['name'])
     names.sort()
     expected_names = ["Baker_etal_2005"]
     self.assertListEqual(expected_names,names)
     src_trs = root.findall(".//source_tree")
     self.assert_(len(src_trs) == 1)
Exemplo n.º 5
0
 def testRealDataCharTaxonYearEmpty(self):
     XML = etree.tostring(etree.parse('data/input/old_stk_input.phyml',parser),pretty_print=True)
     searchTerms = {'taxa':["Gallus gallus"], 'years':['2009']}
     new_XML = create_subset(XML,searchTerms)
     root = _parse_xml(new_XML)
     srcs = root.findall(".//source")
     names = []
     for s in srcs:
         names.append(s.attrib['name'])
     names.sort()
     expected_names = []
     self.assertListEqual(expected_names,names)
     src_trs = root.findall(".//source_tree")
     self.assert_(len(src_trs) == 0)
Exemplo n.º 6
0
 def testRealDataCharMorphMol(self):
     XML = etree.tostring(etree.parse('data/input/old_stk_input.phyml',parser),pretty_print=True)
     searchTerms = {'character_types':["morphological","molecular"]}
     new_XML = create_subset(XML,searchTerms)
     root = _parse_xml(new_XML)
     srcs = root.findall(".//source")
     names = []
     for s in srcs:
         names.append(s.attrib['name'])
     names.sort()
     expected_names = ["Bertelli_etal_2006"]
     self.assertListEqual(expected_names,names)
     src_trs = root.findall(".//source_tree")
     self.assert_(len(src_trs) == 1)
Exemplo n.º 7
0
 def testRealDataCharCytb(self):
     XML = etree.tostring(etree.parse('data/input/old_stk_input.phyml',parser),pretty_print=True)
     searchTerms = {'characters':["cytb"]}
     new_XML = create_subset(XML,searchTerms)
     root = _parse_xml(new_XML)
     srcs = root.findall(".//source")
     names = []
     for s in srcs:
         names.append(s.attrib['name'])
     names.sort()
     expected_names = ["Aleixo_2002","Allende_etal_2001","Aragon_etal_1999","Baker_etal_2005","Baker_etal_2006","Baker_etal_2007a",
             "Baker_etal_2007b","Barhoum_Burns_2002"]
     self.assertListEqual(expected_names,names)
     # and two source trees too
     src_trs = root.findall(".//source_tree")
     self.assert_(len(src_trs) == 9)
Exemplo n.º 8
0
 def testRealDataCharType(self):
     XML = etree.tostring(etree.parse('data/input/old_stk_input.phyml',parser),pretty_print=True)
     searchTerms = {'character_types':["morphological"]}
     new_XML = create_subset(XML,searchTerms)
     root = _parse_xml(new_XML)
     find = etree.XPath("//character")
     srcs = root.findall(".//source")
     names = []
     for s in srcs:
         names.append(s.attrib['name'])
     names.sort()
     expected_names = ["Aliabadian_etal_2007","Andersson_1999a","Baptista_Visser_1999","Bertelli_etal_2006"]
     self.assertListEqual(expected_names,names)
     # and two source trees too
     src_trs = root.findall(".//source_tree")
     self.assert_(len(src_trs) == 7)
Exemplo n.º 9
0
    def test_sort_data(self):
        XML = etree.tostring(etree.parse('data/input/create_matrix.phyml',
                                         parser),
                             pretty_print=True)
        xml_root = _parse_xml(XML)
        xml_root = _sort_data(xml_root)
        # By getting source, we can then loop over each source_tree
        # within that source and construct a unique name
        find = etree.XPath("//source")
        sources = find(xml_root)
        names = []
        for s in sources:
            # for each source, get source name
            names.append(s.attrib['name'])

        expected_names = ['Davis_2011', 'Hill_2011', 'Hill_Davis_2011']
        self.assertListEqual(names, expected_names)
Exemplo n.º 10
0
 def testRealDataYears(self):
     XML = etree.tostring(etree.parse('data/input/old_stk_input.phyml',parser),pretty_print=True)
     searchTerms = {'years':[1999]}
     new_XML = create_subset(XML,searchTerms)
     root = _parse_xml(new_XML)
     find = etree.XPath("//year")
     yrs = find(root)
     i = 0
     for y in yrs:
         self.assert_(int(y.xpath('integer_value')[0].text) == 1999)
         i+=1
     self.assert_(i==4)
     srcs = root.findall(".//source")
     names = []
     for s in srcs:
         names.append(s.attrib['name'])
     names.sort()
     expected_names = ["Andersson_1999a","Andersson_1999b","Aragon_etal_1999","Baptista_Visser_1999"]
     self.assertListEqual(expected_names,names)
Exemplo n.º 11
0
 def testCharType(self):
     XML = etree.tostring(etree.parse('data/input/create_matrix.phyml',parser),pretty_print=True)
     searchTerms = {'character_types':["molecular"]}
     new_XML = create_subset(XML,searchTerms)
     root = _parse_xml(new_XML)
     find = etree.XPath("//character")
     chrs = find(root)
     i = 0
     for c in chrs:
         self.assert_(c.attrib['type'] == "molecular")
         i+=1
     self.assert_(i==2)
     srcs = root.findall(".//source")
     names = []
     for s in srcs:
         names.append(s.attrib['name'])
     names.sort()
     expected_names = ["Davis_2011","Hill_2011"]
     self.assertListEqual(expected_names,names)
     # and two source trees too
     src_trs = root.findall(".//source_tree")
     self.assert_(len(src_trs) == 2)
Exemplo n.º 12
0
    def test_add_weights(self):
        """Add weights to a bunch of trees"""
        XML = etree.tostring(etree.parse('data/input/check_data_ind.phyml',
                                         parser),
                             pretty_print=True)
        # see above
        expected_idents = [[
            'Hill_Davis_2011_2', 'Hill_Davis_2011_1', 'Hill_Davis_2011_3'
        ], ['Hill_Davis_2013_1', 'Hill_Davis_2013_2']]
        # so the first should end up with a weight of 0.33333 and the second with 0.5
        for ei in expected_idents:
            weight = 1.0 / float(len(ei))
            XML = add_weights(XML, ei, weight)

        expected_weights = [
            str(1.0 / 3.0),
            str(1.0 / 3.0),
            str(1.0 / 3.0),
            str(0.5),
            str(0.5)
        ]
        weights_in_xml = []
        # now check weights have been added to the correct part of the tree
        xml_root = _parse_xml(XML)
        i = 0
        for ei in expected_idents:
            for tree in ei:
                find = etree.XPath("//source_tree")
                trees = find(xml_root)
                for t in trees:
                    if t.attrib['name'] == tree:
                        # check len(trees) == 0
                        weights_in_xml.append(
                            t.xpath("tree/weight/real_value")[0].text)

        self.assertListEqual(expected_weights, weights_in_xml)
Exemplo n.º 13
0
    def test_import_data(self):

        # Our validator object
        validator = SchemaValidator(rootDir="data")

        phyml = import_old_data('data/input/old_stk_test/', verbose=False)
        temp_file_handle, temp_file = tempfile.mkstemp(suffix=".phyml")
        f = open(temp_file, "w")
        f.write(phyml)
        f.close()
        validator.ValidateOptionFile(
            os.path.join("../../../schema", "phylo_storage.rng"), temp_file)
        passes = validator.Passes()
        optionErrors = validator.OptionErrors()
        validator.Reset()
        failures = []
        for phyml_file in optionErrors:
            # We expect there to be missing taxon elements in the XML
            # as the user must fill these in, so check missing attributes, etc OK
            # and skip the missing element
            added_eles = optionErrors[phyml_file][1]
            for err in added_eles:
                if (err ==
                        "/phylo_storage/sources/source/source_tree/taxa_data/mixed_fossil_and_extant/taxon"
                        and len(optionErrors[phyml_file][0]) +
                        len(optionErrors[phyml_file][2]) +
                        len(optionErrors[phyml_file][3]) == 0):
                    continue
                if (err ==
                        "/phylo_storage/sources/source/source_tree/tree/topology"
                        and len(optionErrors[phyml_file][0]) +
                        len(optionErrors[phyml_file][2]) +
                        len(optionErrors[phyml_file][3]) == 0):
                    continue
                failures.append(phyml_file)
        if (len(failures) > 0):
            print failures
        self.assert_(len(failures) == 0)
        os.remove(temp_file)

        # parse XML and check various things
        XML = _parse_xml(phyml)
        name = XML.xpath('/phylo_storage/project_name/string_value')[0].text
        self.assert_(name == "old_stk_test")

        # check numebr of souces
        find = etree.XPath('//source')
        sources = find(XML)
        self.assert_(len(sources) == 15)

        # check names of sources:
        expected_names = [
            'Allende_etal_2001', 'Andersson_1999b', 'Baker_etal_2006',
            'Aleixo_2002', 'Bertelli_etal_2006', 'Baker_etal_2007b',
            'Aragon_etal_1999', 'Baker_etal_2007a', 'Aliabadian_etal_2007',
            'Baker_Strauch_1988', 'Barhoum_Burns_2002', 'Barber_etal_2004',
            'Baker_etal_2005', 'Andersson_1999a', 'Baptista_etal_1999'
        ]
        for s in sources:
            name = s.attrib['name']
            self.assert_(name in expected_names)
            if name == "Bertelli_etal_2006":
                # this source publication has three trees, let's check that is the case!
                find = etree.XPath('source_tree')
                trees = find(s)
                self.assert_(len(trees) == 3)
            if name == "Baptista_etal_1999":
                volume = s.xpath(
                    './/bibliographic_information/article/volume/string_value'
                )[0].text
                self.assert_(volume == "140")