def testNewickExport(self): n = pathmap.nexson_obj('10/pg_10.json') newick = extract_tree( n, 'tree3', PhyloSchema('newick', tip_label='ot:ottTaxonName', bracket_ingroup=True)) self.assertTrue('[pre-ingroup-marker' in newick) self.assertTrue('[post-ingroup-marker' in newick) self.assertTrue(newick.startswith('(')) self.assertTrue('*tip #1 not mapped' in newick) self.assertTrue('*tip #2 not mapped' in newick) self.assertTrue('*tip #3 not mapped' not in newick) newick = extract_tree( n, 'tree3', PhyloSchema('newick', tip_label='ot:ottTaxonName')) self.assertTrue('[pre-ingroup-marker' not in newick) self.assertTrue('[post-ingroup-marker' not in newick) self.assertTrue('*tip #1 not mapped' in newick) self.assertTrue('*tip #2 not mapped' in newick) self.assertTrue('*tip #3 not mapped' not in newick) self.assertTrue(newick.startswith('(')) newick = extract_tree( n, 'tree3', PhyloSchema('newick', tip_label='ot:originallabel')) self.assertTrue('[pre-ingroup-marker' not in newick) self.assertTrue('[post-ingroup-marker' not in newick) self.assertTrue('*tip #' not in newick)
def testNexusConvByExtViaPS(self): o = pathmap.nexson_obj('10/pg_10.json') ps = PhyloSchema(None, type_ext='.nex', content='tree', content_id='tree3') nex = ps.serialize(o) self.assertTrue(nex.startswith('#')) # pylint: disable=E1103
def testOtuConvViaPS(self): o = pathmap.nexson_obj('10/pg_10.json') ps = PhyloSchema('nexson', content='otu', content_id='otu190', version='1.2.1') x = ps.serialize(o) self.assertTrue(x.startswith('{')) # pylint: disable=E1103
def testTreesCulledNonmatcingConvViaPS(self): o = pathmap.nexson_obj('9/v1.2.json') self.assertEqual(len(extract_tree_nexson(o, tree_id=None)), 2) ps = PhyloSchema('nexson', content='tree', content_id='tree2', version='1.2.1', cull_nonmatching='true') x = ps.serialize(o) etn = extract_tree_nexson(o, tree_id=None) self.assertEqual(len(etn), 1) self.assertEqual(etn[0][0], 'tree2') self.assertTrue(x.startswith('{')) # pylint: disable=E1103 rx = json.loads(x) etn = extract_tree_nexson(rx, tree_id=None) self.assertEqual(len(etn), 1) self.assertEqual(etn[0][0], 'tree2')
def testTreesCulledNonmatcingConvViaPS(self): o = pathmap.nexson_obj('9/v1.2.json') self.assertEqual(len(extract_tree_nexson(o, tree_id=None)), 2) ps = PhyloSchema('nexson', content='tree', content_id='tree2', version='1.2.1', cull_nonmatching='true') x = ps.serialize(o) etn = extract_tree_nexson(o, tree_id=None) self.assertEqual(len(etn), 1) self.assertEqual(etn[0][0], 'tree2') self.assertTrue(x.startswith('{')) #pylint: disable=E1103 rx = json.loads(x) etn = extract_tree_nexson(rx, tree_id=None) self.assertEqual(len(etn), 1) self.assertEqual(etn[0][0], 'tree2')
def testTreesCulledNonmatcingConvViaPSV0(self): """Verify that the culling does not break the conversion to other forms of NexSON""" o = pathmap.nexson_obj('9/v1.2.json') self.assertEqual(len(extract_tree_nexson(o, tree_id=None)), 2) ps = PhyloSchema('nexson', content='tree', content_id='tree2', version='0.0.0', cull_nonmatching='true') x = ps.serialize(o) etn = extract_tree_nexson(o, tree_id=None) self.assertEqual(len(etn), 1) self.assertEqual(etn[0][0], 'tree2') self.assertTrue(x.startswith('{')) # pylint: disable=E1103 rx = json.loads(x) self.assertEqual(detect_nexson_version(rx), '0.0.0') etn = extract_tree_nexson(rx, tree_id=None) self.assertEqual(len(etn), 1) self.assertEqual(etn[0][0], 'tree2')
def testTreesCulledNonmatcingConvViaPSV0(self): '''Verify that the culling does not break the conversion to other forms of NexSON''' o = pathmap.nexson_obj('9/v1.2.json') self.assertEqual(len(extract_tree_nexson(o, tree_id=None)), 2) ps = PhyloSchema('nexson', content='tree', content_id='tree2', version='0.0.0', cull_nonmatching='true') x = ps.serialize(o) etn = extract_tree_nexson(o, tree_id=None) self.assertEqual(len(etn), 1) self.assertEqual(etn[0][0], 'tree2') self.assertTrue(x.startswith('{')) #pylint: disable=E1103 rx = json.loads(x) self.assertEqual(detect_nexson_version(rx), '0.0.0') etn = extract_tree_nexson(rx, tree_id=None) self.assertEqual(len(etn), 1) self.assertEqual(etn[0][0], 'tree2')
def testSubTreesConvViaPS(self): o = pathmap.nexson_obj('10/pg_10.json') ps = PhyloSchema('newick', content='subtree', content_id=('tree3', 'node508'), version='1.2.1') x = ps.serialize(o) self.assertTrue(x.startswith('(')) # pylint: disable=E1103 o = pathmap.nexson_obj('10/pg_10.json') ps = PhyloSchema('newick', content='subtree', content_id=('tree3', 'ingroup'), version='1.2.1') x = ps.serialize(o) self.assertTrue(x.startswith('(')) # pylint: disable=E1103
def testPS(self): self.assertRaises(ValueError, PhyloSchema, schema='bogus') self.assertRaises(ValueError, PhyloSchema, content='bogus') self.assertRaises(ValueError, PhyloSchema) PhyloSchema('nexson', output_nexml2json='1.2') self.assertRaises(ValueError, PhyloSchema, schema='nexson') self.assertRaises(ValueError, PhyloSchema, schema='nexson', version='1.3') self.assertRaises(ValueError, PhyloSchema, schema='newick', tip_label='bogus') self.assertRaises(ValueError, PhyloSchema, schema='nexus', tip_label='bogus') self.assertRaises(ValueError, PhyloSchema, schema='nexml', tip_label='bogus')
def testMimicPhylesystemExport(self): study_nexson = pathmap.nexson_obj('10/pg_10.json') src_schema = PhyloSchema('nexson', version='1.2.1') out_schema = PhyloSchema(schema='newick', content='tree', content_id='bogusID here') result_data = out_schema.convert(study_nexson, serialize=True, src_schema=src_schema) self.assertFalse(bool(result_data)) out_schema = PhyloSchema(schema='nexus', content='tree', content_id='bogusID here') result_data = out_schema.convert(study_nexson, serialize=True, src_schema=src_schema) self.assertFalse(bool(result_data))
def __validate_output_nexml2json(kwargs, resource, type_ext, content_id=None): msg = None if 'output_nexml2json' not in kwargs: kwargs['output_nexml2json'] = '0.0.0' biv = kwargs.get('bracket_ingroup') if biv and (isinstance(biv, str) or isinstance(biv, unicode)): if biv.lower() in ['f', 'false', '0']: kwargs['bracket_ingroup'] = False else: kwargs['bracket_ingroup'] = True try: schema = PhyloSchema(schema=kwargs.get('format'), type_ext=type_ext, content=resource, content_id=content_id, repo_nexml2json=repo_nexml2json, **kwargs) if not schema.can_convert_from(resource): msg = 'Cannot convert from {s} to {d}'.format(s=repo_nexml2json, d=schema.description) except ValueError, x: _LOG = api_utils.get_logger(request, 'ot_api.default.v1') msg = str(x) _LOG.exception('GET failing: {m}'.format(m=msg))
def testNexmlConvByExtViaPS(self): o = pathmap.nexson_obj('10/pg_10.json') ps = PhyloSchema(type_ext='.nexml', otu_label='otttaxonname') nex = ps.serialize(o, src_schema=PhyloSchema('nexson', version='1.2.1')) self.assertTrue(nex.startswith('<')) # pylint: disable=E1103
def testOtuMapConvViaPS(self): o = pathmap.nexson_obj('10/pg_10.json') ps = PhyloSchema('nexson', content='otumap', version='1.2.1') x = ps.serialize(o) self.assertTrue(x.startswith('{')) # pylint: disable=E1103
def _main(): import sys, codecs, json import argparse _HELP_MESSAGE = '''NexSON (or NeXML) to newick converter''' _EPILOG = '''UTF-8 encoding is used (for input and output). Environmental variables used: NEXSON_LOGGING_LEVEL logging setting: NotSet, Debug, Warn, Info, Error NEXSON_LOGGING_FORMAT format string for logging messages. ''' tip_label_list = PhyloSchema._otu_label_list for tl in tip_label_list: assert(tl.startswith('ot:')) tip_labels_choices = [i[3:] for i in tip_label_list] parser = argparse.ArgumentParser(description=_HELP_MESSAGE, formatter_class=argparse.RawDescriptionHelpFormatter, epilog=_EPILOG) parser.add_argument("input", help="filepath to input") parser.add_argument("-i", "--id", metavar="TREE-ID", required=False, help="The ID tree to emit") parser.add_argument("-o", "--output", metavar="FILE", required=False, help="output filepath. Standard output is used if omitted.") parser.add_argument("-l", "--list", action="store_true", default=False, help="Just list the tree IDs in the nexSON.") parser.add_argument("-x", "--xml", action="store_true", default=False, help="Parse input as NeXML rather than NexSON.") tl_help = 'The field to use to label tips. Should be one of: "{}"' tl_help = tl_help.format('", "'.join(tip_labels_choices)) parser.add_argument("-t", "--tip-label", metavar="STRING", required=False, default='originallabel', help=tl_help) args = parser.parse_args() otu_label = args.tip_label.lower() if not otu_label.startswith('ot:'): otu_label = 'ot:' + otu_label if otu_label not in tip_label_list: sys.exit('Illegal tip label choice "{}"\n'.format(args.tip_label)) inpfn = args.input outfn = args.output try: inp = codecs.open(inpfn, mode='rU', encoding='utf-8') except: sys.exit('nexson_newick: Could not open file "{fn}"\n'.format(fn=inpfn)) if outfn is not None: try: out = codecs.open(outfn, mode='w', encoding='utf-8') except: sys.exit('nexson_newick: Could not open output filepath "{fn}"\n'.format(fn=outfn)) else: out = codecs.getwriter('utf-8')(sys.stdout) if args.xml: src_schema = PhyloSchema('nexml') blob = get_ot_study_info_from_nexml(inp) else: src_schema = None blob = json.load(inp) if args.list: schema = PhyloSchema(content='treelist', output_nexml2json='1.2.1') tl = schema.convert(src=blob, src_schema=src_schema) out.write('{t}\n'.format(t='\n'.join(tl))) else: schema = create_content_spec(content='tree', content_id=args.id, format='newick', otu_label=otu_label) try: schema.convert(src=blob, serialize=True, output_dest=out, src_schema=src_schema) except KeyError: if 'nexml' not in blob and 'nex:nexml' not in blob: blob = blob['data'] schema.convert(src=blob, serialize=True, output_dest=out, src_schema=src_schema) else: raise
import dendropy configfi = "aws.config" study_id = "ot_350" tree_id = "Tr53297" workdir = "scrape_ot_350" # Read in the configuration information conf = physcraper.ConfigObj(configfi) #Get an existing tree from the Open Tree of life, and convert it to newick format nexson = physcraper.opentree_helpers.get_nexson(study_id, 'api') newick = extract_tree( nexson, tree_id, PhyloSchema('newick', output_nexml2json='1.2.1', content="tree", tip_label="ot:originalLabel")) tre = dendropy.Tree.get(data=newick, schema="newick", preserve_underscores=True) #Pull down an alignment from treebase. dataset = physcraper.opentree_helpers.get_dataset_from_treebase( study_id, phylesystem_loc='api') aln = None ##order of data matrices is arbitratry, so we choose one that matches the tree length for mat in dataset.char_matrices: if len(mat) == len(tre.taxon_namespace): aln = mat
def testNewickConvViaPS(self): o = pathmap.nexson_obj('10/pg_10.json') ps = PhyloSchema('newick', content='tree', content_id='tree3') nex = ps.serialize(o) self.assertTrue(nex.startswith('(')) # pylint: disable=E1103
def generate_ATT_from_phylesystem(aln, workdir, config_obj, study_id, tree_id, phylesystem_loc='api', ingroup_mrca=None): """gathers together tree, alignment, and study info - forces names to otu_ids. Study and tree ID's can be obtained by using python ./scripts/find_trees.py LINEAGE_NAME Spaces vs underscores kept being an issue, so all spaces are coerced to underscores when data are read in. :param aln: dendropy :class:`DnaCharacterMatrix <dendropy.datamodel.charmatrixmodel.DnaCharacterMatrix>` alignment object :param workdir: path to working directory :param config_obj: config class containing the settings :param study_id: OToL study id of the corresponding phylogeny which shall be updated :param tree_id: OToL corresponding tree ID as some studies have several phylogenies :param phylesystem_loc: access the github version of the OpenTree data store, or a local clone :param ingroup_mrca: optional. OToL identifier of the mrca of the clade that shall be updated (can be subset of the phylogeny) :return: object of class ATT """ assert isinstance(aln, datamodel.charmatrixmodel.DnaCharacterMatrix), \ "your alignment `%s` ist not of type DnaCharacterMatrix" % aln for tax in aln.taxon_namespace: tax.label = tax.label.replace(" ", "_") # Forcing all spaces to underscore nexson = get_nexson(study_id, phylesystem_loc) newick = extract_tree( nexson, tree_id, PhyloSchema('newick', output_nexml2json='1.2.1', content="tree", tip_label="ot:originalLabel")) newick = newick.replace( " ", "_" ) # UGH Very heavy handed, need to make sure happens on alignment side as well. tre = Tree.get(data=newick, schema="newick", preserve_underscores=True, taxon_namespace=aln.taxon_namespace) # this gets the taxa that are in the subtree with all of their info - ott_id, original name, otus = get_subtree_otus(nexson, tree_id=tree_id) otu_dict = {} orig_lab_to_otu = {} treed_taxa = {} for otu_id in otus: otu_dict[otu_id] = extract_otu_nexson(nexson, otu_id)[otu_id] otu_dict[otu_id]["^physcraper:status"] = "original" otu_dict[otu_id]["^physcraper:last_blasted"] = None orig = otu_dict[otu_id].get(u"^ot:originalLabel").replace(" ", "_") orig_lab_to_otu[orig] = otu_id treed_taxa[orig] = otu_dict[otu_id].get(u"^ot:ottId") for tax in aln.taxon_namespace: if tax.label in otu_dict: sys.stdout.write("{} aligned\n".format(tax.label)) else: try: tax.label = orig_lab_to_otu[tax.label].encode("ascii") except KeyError: sys.stderr.write( "{} doesn't have an otu id. It is being removed from the alignment. " "This may indicate a mismatch between tree and alignment\n" .format(tax.label)) # need to prune tree to seqs and seqs to tree... otu_newick = tre.as_string(schema="newick") ott_ids = get_subtree_otus(nexson, tree_id=tree_id, subtree_id="ingroup", return_format="ottid") if ingroup_mrca: if type(ingroup_mrca) == list: ott_ids = set(ingroup_mrca) ott_mrca = get_mrca_ott(ott_ids) else: ott_mrca = int(ingroup_mrca) elif ott_ids: # if no ingroup is specified, ott_ids will be none ott_mrca = get_mrca_ott(ott_ids) else: # just get the mrca for teh whole tree ott_mrca = get_mrca_ott( [otu_dict[otu_id].get(u"^ot:ottId") for otu_id in otu_dict]) workdir = os.path.abspath(workdir) return physcraper.aligntreetax.AlignTreeTax(otu_newick, otu_dict, aln, ingroup_mrca=ott_mrca, workdir=workdir, config_obj=config_obj)
def testTreeExport(self): n = pathmap.nexson_obj('10/pg_10.json') newick = extract_tree( n, 'tree3', PhyloSchema('nexus', tip_label='ot:ottTaxonName')) self.assertTrue(newick.startswith('#'))
def testNexusConvStudyViaPS(self): o = pathmap.nexson_obj('10/pg_10.json') ps = PhyloSchema(type_ext='.nex') nex = ps.convert(o, serialize=True) self.assertTrue(nex.startswith('#')) # pylint: disable=E1103
def testUrlGen(self): _prefix = 'https://devapi.opentreeoflife.org/v2' url, params = PhyloSchema('nexson', version='1.0.0').phylesystem_api_url( _prefix, 'pg_719') self.assertEqual('{}/study/pg_719'.format(_prefix), url) self.assertEqual({'output_nexml2json': '1.0.0'}, params) url, params = PhyloSchema('nexson', version='1.2.1').phylesystem_api_url( _prefix, 'pg_719') self.assertEqual('{}/study/pg_719'.format(_prefix), url) self.assertEqual({'output_nexml2json': '1.2.1'}, params) url, params = PhyloSchema('nexson', version='0.0.0').phylesystem_api_url( _prefix, 'pg_719') self.assertEqual('{}/study/pg_719'.format(_prefix), url) self.assertEqual({'output_nexml2json': '0.0.0'}, params) url, params = PhyloSchema( type_ext='.nexml', otu_label='otttaxonname').phylesystem_api_url(_prefix, 'pg_719') self.assertEqual('{}/study/pg_719.nexml'.format(_prefix), url) self.assertEqual({'otu_label': 'ot:otttaxonname'}, params) url, params = PhyloSchema(type_ext='.nexml').phylesystem_api_url( _prefix, 'pg_719') self.assertEqual('{}/study/pg_719.nexml'.format(_prefix), url) self.assertEqual({}, params) url, params = PhyloSchema(type_ext='.nexml', otu_label='ottid').phylesystem_api_url( _prefix, 'pg_719') self.assertEqual('{}/study/pg_719.nexml'.format(_prefix), url) self.assertEqual({'otu_label': 'ot:ottid'}, params) url, params = PhyloSchema( type_ext='.nex', otu_label='otttaxonname').phylesystem_api_url(_prefix, 'pg_719') self.assertEqual('{}/study/pg_719.nex'.format(_prefix), url) self.assertEqual({'otu_label': 'ot:otttaxonname'}, params) url, params = PhyloSchema(type_ext='.nex').phylesystem_api_url( _prefix, 'pg_719') self.assertEqual('{}/study/pg_719.nex'.format(_prefix), url) self.assertEqual({}, params) url, params = PhyloSchema(type_ext='.nex', otu_label='ottid').phylesystem_api_url( _prefix, 'pg_719') self.assertEqual({'otu_label': 'ot:ottid'}, params) url, params = PhyloSchema( type_ext='.tre', otu_label='otttaxonname').phylesystem_api_url(_prefix, 'pg_719') self.assertEqual('{}/study/pg_719.tre'.format(_prefix), url) self.assertEqual({'otu_label': 'ot:otttaxonname'}, params) url, params = PhyloSchema(type_ext='.tre').phylesystem_api_url( _prefix, 'pg_719') self.assertEqual('{}/study/pg_719.tre'.format(_prefix), url) self.assertEqual({}, params) url, params = PhyloSchema(type_ext='.tre', otu_label='ottid').phylesystem_api_url( _prefix, 'pg_719') self.assertEqual('{}/study/pg_719.tre'.format(_prefix), url) self.assertEqual({'otu_label': 'ot:ottid'}, params) url, params = PhyloSchema('newick', content='tree', content_id='tree1294', otu_label='ottid').phylesystem_api_url( _prefix, 'pg_719') self.assertEqual('{}/study/pg_719/tree/tree1294.tre'.format(_prefix), url) self.assertEqual({'otu_label': 'ot:ottid'}, params) url, params = PhyloSchema('newick', content='subtree', content_id=('tree1294', 'node436709'), otu_label='ottid').phylesystem_api_url( _prefix, 'pg_719') self.assertEqual( '{}/study/pg_719/subtree/tree1294.tre'.format(_prefix), url) self.assertEqual({ 'otu_label': 'ot:ottid', 'subtree_id': 'node436709' }, params) url, params = PhyloSchema('newick', content='tree', content_id='tree1294', otu_label='ottid', cull_nonmatching='true').phylesystem_api_url( _prefix, 'pg_719') self.assertEqual('{}/study/pg_719/tree/tree1294.tre'.format(_prefix), url) self.assertEqual({ 'otu_label': 'ot:ottid', 'cull_nonmatching': 'true' }, params)
def testNexmlConvViaPS(self): o = pathmap.nexson_obj('10/pg_10.json') ps = PhyloSchema('nexml') nex = ps.serialize(o) self.assertTrue(nex.startswith('<')) # pylint: disable=E1103
def testNexusConvViaPS(self): o = pathmap.nexson_obj('10/pg_10.json') ps = PhyloSchema('nexus', content='tree', content_id='tree3') nex = ps.convert(o, serialize=True) self.assertTrue(nex.startswith('#')) # pylint: disable=E1103
def testNewickConvStudyViaPS(self): o = pathmap.nexson_obj('9/v1.2.json') ps = PhyloSchema(type_ext='.tre') nex = ps.convert(o, serialize=True) self.assertTrue(nex.startswith('(')) # pylint: disable=E1103