def test_singlular(self): """Clade, Phylogeny: Singular properties for plural attributes.""" conf = PX.Confidence(0.9, "bootstrap") taxo = PX.Taxonomy(rank="genus") # Clade.taxonomy, Clade.confidence clade = PX.Clade(confidences=[conf], taxonomies=[taxo]) self.assertEqual(clade.confidence.type, "bootstrap") self.assertEqual(clade.taxonomy.rank, "genus") # raise if len > 1 clade.confidences.append(conf) self.assertRaises(AttributeError, getattr, clade, "confidence") clade.taxonomies.append(taxo) self.assertRaises(AttributeError, getattr, clade, "taxonomy") # None if [] clade.confidences = [] self.assertEqual(clade.confidence, None) clade.taxonomies = [] self.assertEqual(clade.taxonomy, None) # Phylogeny.confidence tree = PX.Phylogeny(True, confidences=[conf]) self.assertEqual(tree.confidence.type, "bootstrap") tree.confidences.append(conf) self.assertRaises(AttributeError, getattr, tree, "confidence") tree.confidences = [] self.assertEqual(tree.confidence, None)
def _parse_taxonomy(self, parent): taxonomy = PX.Taxonomy(**parent.attrib) for event, elem in self.context: namespace, tag = _split_namespace(elem.tag) if event == 'end': if tag == 'taxonomy': parent.clear() break if tag in ('id', 'uri'): setattr(taxonomy, tag, getattr(self, tag)(elem)) elif tag == 'common_name': taxonomy.common_names.append(_collapse_wspace(elem.text)) elif tag == 'synonym': taxonomy.synonyms.append(elem.text) elif tag in ('code', 'scientific_name', 'authority', 'rank'): # ENH: check_str on rank setattr(taxonomy, tag, elem.text) elif namespace != NAMESPACES['phy']: taxonomy.other.append(self.other(elem, namespace, tag)) parent.clear() return taxonomy
def _parse_taxonomy(self, parent): """Parse taxonomic information for a clade (PRIVATE).""" taxonomy = PX.Taxonomy(**parent.attrib) for event, elem in self.context: namespace, tag = _split_namespace(elem.tag) if event == "end": if tag == "taxonomy": parent.clear() break if tag in ("id", "uri"): setattr(taxonomy, tag, getattr(self, tag)(elem)) elif tag == "common_name": taxonomy.common_names.append(_collapse_wspace(elem.text)) elif tag == "synonym": taxonomy.synonyms.append(elem.text) elif tag in ("code", "scientific_name", "authority", "rank"): # ENH: check_str on rank setattr(taxonomy, tag, elem.text) elif namespace != NAMESPACES["phy"]: taxonomy.other.append(self.other(elem, namespace, tag)) parent.clear() return taxonomy
def prepare_species_tree(FILE_TREE_IN, FILE_TREE_OUT): clan_taxa = {} treexml = PhyloXMLIO.read(open(FILE_TREE_IN, 'r')) tree = treexml[0] treexml.attributes.pop('schemaLocation', None) # not supported by Forester tree.rooted = True leaf_dict = {} for node in tree.clade.find_clades(): if node.name: tax_id = node.name if tax_id.startswith('INT'): tax_id = tax_id[3:] taxon = PhyloXML.Taxonomy( id=PhyloXML.Id(tax_id, provider='ncbi_taxonomy')) try: taxon.scientific_name = find_tax_name(tax_id) except KeyError: taxon.scientific_name = '(NA)' node._set_taxonomy(taxon) node.name = None else: pass PhyloXMLIO.write(treexml, FILE_TREE_OUT)
def reconcile_tree(gene_tree_file, reconciled_file, rec_tag, pfam_id, db): if (os.path.isfile(rec_tag + 'ids.pickle')) and (pplacer_flag == 1): id_information = pickle.load(open(rec_tag + 'ids.pickle', 'rb')) existing_genes = id_information['existing_genes'] Sequnces = [] p_ids = [] new_genes = set([w['id'] for w in pplacer_queries[pfam_id]]) if not (new_genes - set(existing_genes)): print "All %s Genes for family %s have already been placed in the reconciled tree." % ( len(new_genes), pfam_id) print "Skip Reconciliation for %s" % pfam_id return txid_file = rec_tag + 'txid.xml' if not (os.path.isfile(rec_tag + 'ids.pickle')) or not ( os.path.isfile(reconciled_file + '.gz')) or (pplacer_flag == 1): print "Running Reconciliation for: %s" % pfam_id rand_id = random.randint(1000000, 9999999) subprocess.check_call("gunzip -c %s/%s.nw.gz > %s.%d" % (tree_folder, pfam_id, gene_tree_file, rand_id), shell=True) tree = ete2.PhyloTree('%s.%d' % (gene_tree_file, rand_id), format=0) tree.resolve_polytomy() tree.write(format=0, outfile=txid_file + '.tmp.nw') if os.path.exists('%s.%d' % (gene_tree_file, rand_id)): subprocess.check_call("rm %s.%d" % (gene_tree_file, rand_id), shell=True) Phylo.convert(txid_file + '.tmp.nw', 'newick', txid_file + '.tmp.xml', 'phyloxml') treexml = PhyloXMLIO.read(open(txid_file + '.tmp.xml', 'r')) tree = treexml[0] treexml.attributes.pop('schemaLocation', None) # not supported by Forester tree.rooted = True my_ids = set([]) my_query_by_taxid = {} for leaf in tree.clade.find_clades(terminal=True): up_name = leaf.name.split('/')[0] tax_id, tax_name = find_tax_id_unip(up_name, db) if tax_id not in all_species_txids: if tax_id in merged_taxid.keys(): tax_id = merged_taxid[tax_id] tax_name = find_tax_name(tax_id, db) if tax_id in best_taxid_map.keys(): tax_id = best_taxid_map[tax_id] tax_name = find_tax_name(tax_id, db) else: tax_id0 = tax_id tax_id, tax_name = find_best_taxid(tax_id, db) if tax_id > 0: best_taxid_map[tax_id0] = tax_id if tax_id < 0: if (-tax_id) in merged_taxid.keys(): tax_id = merged_taxid[-tax_id] tax_name = find_tax_name(tax_id, db) if tax_id in my_query_by_taxid: my_query_by_taxid[tax_id].append(up_name) else: my_query_by_taxid[tax_id] = [up_name] my_ids.add(tax_id) my_tax_id = PhyloXML.Id(tax_id, provider='ncbi_taxonomy') taxon = PhyloXML.Taxonomy(id=my_tax_id) taxon.scientific_name = tax_name leaf._set_taxonomy(taxon) PhyloXMLIO.write(treexml, open(txid_file, 'w')) os.system('rm ' + txid_file + '.tmp.nw') os.system('rm ' + txid_file + '.tmp.xml') print "Taxid file done for: %s" % pfam_id existing_ids = list(set(my_ids) & set(all_species_txids)) existing_genes = [ g for txid in my_query_by_taxid.keys() for g in my_query_by_taxid[txid] if txid in existing_ids ] pickle.dump( { 'pfam_id': pfam_id, 'existing_ids': existing_ids, 'existing_genes': existing_genes }, open(rec_tag + 'ids.pickle', 'wb')) print "Pickle file done for: %s" % pfam_id if os.path.exists(reconciled_file): os.system('rm ' + reconciled_file) os.system( "java -Xmx4g -cp %s/forester_1038.jar org.forester.application.gsdi -g %s %s/ncbi_2_fixed.xml %s" % (lib_path, txid_file, species_tree_data_path, reconciled_file)) if os.path.exists(reconciled_file): if os.path.exists(reconciled_file + '.gz'): subprocess.check_call("rm %s.gz" % (reconciled_file), shell=True) subprocess.check_call("gzip %s" % (reconciled_file), shell=True) os.system('rm ' + rec_tag + 'reconciled_species_tree_used.xml') os.system('rm ' + rec_tag + 'reconciled_gsdi_log.txt') os.system('rm ' + txid_file) print "Reconciliation file done for: %s" % pfam_id