def write_taxon_json(obj, filepath): out_dir = os.path.split(filepath)[0] if out_dir: assure_dir_exists(out_dir) dtw = {} for k, v in obj.items(): if isinstance(v, Taxon): dtw[k] = v.to_serializable_dict() else: dtw[k] = v write_as_json(dtw, filepath, separators=(',', ": "), indent=1)
def build_partition_maps(taxalotl_config): rw = taxalotl_config.get_terminalized_res_by_id("ott", 'partition') if not rw.has_been_partitioned(): partition_resources(taxalotl_config, ["ott"], PREORDER_PART_LIST) nsd = rw.build_paritition_maps() if not nsd: return pd = rw.partitioned_filepath mfp = os.path.join(pd, GEN_MAPPING_FILENAME) write_as_json(nsd, mfp, indent=2) _LOG.info("Partitions maps written to {}".format(mfp))
def report_results(tag, duration, expected_fn, result): expected = json.load(codecs.open(expected_fn, 'rU', encoding='utf-8')) succeeded = True if expected != result: obtained_fn = expected_fn + '-obtained.json' write_as_json(result, obtained_fn) succeeded = False return {'tag': tag, 'duration': duration, 'expected-output':succeeded, 'returned': True, 'status': 200 }
def cache_separator_names(taxalotl_config): rw = taxalotl_config.get_terminalized_res_by_id("ott", '') n2p = {} accumulate_taxon_dir_names(rw.partitioned_filepath, n2p) xl = list(n2p.keys()) xl.sort() outfn = os.path.join(rw.partitioned_filepath, SEP_NAMES) write_as_json(xl, outfn) _LOG.info("Separator dir names written to {}".format(outfn)) outfn = os.path.join(rw.partitioned_filepath, SEP_MAPPING) for k, v in n2p.items(): if len(v) > 1: _LOG.info("separator {} has multiple dirs: {}".format(k, v)) write_as_json(n2p, outfn) _LOG.info("Separator name to dir mapping written to {}".format(outfn))
def testCachedValidation(self): try: # noinspection PyPackageRequirements import dogpile.cache except: pass else: r = pathmap.get_test_repos() p = Phylesystem(r) nexson, sha = p.return_study('xy_10') r = p.add_validation_annotation(nexson, sha) cache_hits = p._cache_hits r1 = p.add_validation_annotation(nexson, sha) self.assertEqual(1 + cache_hits, p._cache_hits) self.assertEqual(r, r1) write_as_json(nexson, sys.stdout)
def testCachedValidation(self): try: import dogpile.cache except: pass else: r = pathmap.get_test_repos() p = Phylesystem(r) nexson, sha = p.return_study('xy_10') r = p.add_validation_annotation(nexson, sha) cache_hits = p._cache_hits r1 = p.add_validation_annotation(nexson, sha) self.assertEqual(1 + cache_hits, p._cache_hits) self.assertEqual(r, r1) import sys; from peyotl import write_as_json; write_as_json(nexson, sys.stdout)
def testCachedValidation(self): try: import dogpile.cache except: pass else: r = pathmap.get_test_repos() p = Phylesystem(r) nexson, sha = p.return_study('xy_10') r = p.add_validation_annotation(nexson, sha) cache_hits = p._cache_hits r1 = p.add_validation_annotation(nexson, sha) self.assertEqual(1 + cache_hits, p._cache_hits) self.assertEqual(r, r1) import sys from peyotl import write_as_json write_as_json(nexson, sys.stdout)
def normalize_silva_taxonomy(source, destination, res_wrapper): assure_dir_exists(destination) depends_on = res_wrapper.depends_on taxalotl_config = res_wrapper.config expect_id_fp, ncbi_mapping_res = None, None for dep_id in depends_on: dep_res = taxalotl_config.get_terminalized_res_by_id( dep_id, 'normalize silva') if not dep_res.has_been_unpacked(): unpack_resources(taxalotl_config, [dep_id]) if dep_res.schema.lower() == 'id list': dep_fp = os.path.join(dep_res.unpacked_filepath, dep_res.local_filename) expect_id_fp = dep_fp elif dep_res.schema.lower() in {'silva taxmap', "fasta silva taxmap"}: dep_fp = dep_res.normalized_filepath ncbi_mapping_res = dep_res else: raise ValueError('unrecognized dependency schema {}'.format( dep_res.schema)) if not os.path.isfile(dep_fp): raise ValueError( "Silva processing dependency not found at: {}".format(dep_fp)) if expect_id_fp is None: raise ValueError('ID list dependency not found.') if ncbi_mapping_res is None: raise ValueError('NCBI mapping dependency not found.') expect_tax_fp = os.path.join(res_wrapper.unpacked_filepath, res_wrapper.local_filename) if not os.path.isfile(expect_tax_fp): raise ValueError( "Silva taxon file not found at: {}".format(expect_tax_fp)) acc_to_trim = ncbi_mapping_res.parse_acc_to_trim_from_ncbi() preferred = parse_silva_ids(expect_id_fp) itd = InterimTaxonomyData() part_name_to_silva_id = parse_silva_taxon_file(expect_tax_fp, preferred, acc_to_trim, itd) _LOG.info('{} taxonomy IDs read'.format(len(itd.to_par))) res_wrapper.post_process_interim_tax_data(itd) itd.write_to_dir(destination) mapping_file = os.path.join(destination, GEN_MAPPING_FILENAME) write_as_json(part_name_to_silva_id, mapping_file, indent=2, separators=(',', ': '))
def pull_otifacts(taxalotl_config): dest_dir = taxalotl_config.resources_dir taxalotl_dir = os.path.split(os.path.abspath(dest_dir))[0] repo_dir = os.path.split(taxalotl_dir)[0] otifacts_dir = os.path.join(repo_dir, 'OTifacts') if not os.path.isdir(otifacts_dir): clone_otifacts(otifacts_dir) else: git_pull_otifacts(otifacts_dir) all_res = read_all_otifacts(otifacts_dir) for res_type in [ 'external taxonomy', 'open tree taxonomy', 'id list', 'open tree taxonomy idlist', "id to ncbi mapping" ]: ext_tax = filter_otifacts_by_type(all_res, res_type) by_root_id = partition_otifacts_by_root_element(ext_tax) for root_key, res_dict in by_root_id.items(): fp = os.path.join(dest_dir, root_key + '.json') write_as_json(res_dict, fp, indent=2, separators=(',', ': '))
def diagnose_new_separators(taxalotl_config, level_list): rw = taxalotl_config.get_terminalized_res_by_id("ott", 'diagnose-new-separators') if not rw.has_been_partitioned(): partition_resources(taxalotl_config, ["ott"], PREORDER_PART_LIST) pd = rw.partitioned_filepath if level_list == [None]: level_list = PART_NAMES for part_name in level_list: nsd = rw.diagnose_new_separators(current_partition_key=part_name) if not nsd: _LOG.info("no new separtors in {}.".format(part_name)) else: for k, sd in nsd.items(): _LOG.info('{} new separators in {}'.format( sd.num_separators(), part_name)) fp = os.path.join(pd, k, NEW_SEP_FILENAME) write_as_json(sd.as_dict(), fp, sort_keys=True, indent=2) _LOG.info("new separators written to {}".format(fp))
def write_to_dir(self, destination): # Write out in OTT form d = tempfile.mkdtemp() fn = [ 'taxonomy.tsv', 'synonyms.tsv', 'forwards.tsv', 'about.json', 'details.json' ] try: syn_order = self.write_ott_taxonomy_tsv( os.path.join(d, 'taxonomy.tsv')) write_ott_synonyms_tsv(os.path.join(d, 'synonyms.tsv'), self.synonyms, syn_order, self.details_log) if self.forwards: write_ott_forwards(os.path.join(d, 'forwards.tsv'), self.forwards) about_fp = os.path.join(d, 'about.json') write_as_json(self.about, about_fp, indent=2) self.finalize() write_ncbi_details_json(os.path.join(d, 'details.json'), self.details_log) except: for f in fn: tf = os.path.join(d, f) if os.path.exists(tf): try: os.remove(tf) except: pass try: os.rmdir(d) except: pass raise assure_dir_exists(destination) for f in fn: sfp = os.path.join(d, f) if os.path.exists(sfp): dfp = os.path.join(destination, f) os.rename(sfp, dfp) os.rmdir(d)
with open(fn) as inp: for row in csv.reader(inp, delimiter=','): ott_id = 'ott{}'.format(row[1]) if ott_id in nodes_annotations: n_passes += 1 elif ott_id in bt_dict: n_failures += 1 err('Taxon {} from monophyly is not monophyletic in the tree'.format(ott_id)) mp.append(ott_id) else: skip_msg = 'Monophyly test for {} treated as a skipped test because the taxon is not in the lost taxa or in the tree. (it could be the case that the synthesis was run on a subset of the full taxonomy)\n' sys.stderr.write(skip_msg.format(ott_id)) n_skipped += 1 if 'MONOPHYLY_TEST_SOURCE_NAME' in os.environ: src = os.environ['MONOPHYLY_TEST_SOURCE_NAME'] else: src = fn if mp: mtb = {'result': 'ERROR', 'data': [n_passes, n_skipped, n_failures, mp]} else: mtb = {'result': 'OK', 'data': [n_passes, n_skipped, n_failures, mp]} mtb['description'] = 'Check that the taxa from the monophyly tests listed in {} are monophyletic in the tree.'.format(src) summary['monophyly'] = mtb else: sys.stderr.write('MONOPHYLY_TEST_CSV_FILE is not in the env, so no monophyly tests are being run\n') # serialize the summary # write_as_json(summary, os.path.join(assessments_dir, 'summary.json'), indent=2) sys.exit(num_errors)
required=False, help='Optional comma-separated list of flags to prune. ' 'If omitted, the treemachine flags are used.') parser.add_argument('--root', default=None, type=int, required=False, help='Optional taxonomy root argument.') args = parser.parse_args(sys.argv[1:]) ott_dir, output, log_filename, root = args.ott_dir, args.output, args.log, args.root flags_str = args.flags try: assert os.path.isdir(args.ott_dir) except: sys.exit('Expecting ott-dir argument to be a directory. Got "{}"'.format(args.ott_dir)) ott = OTT(ott_dir=args.ott_dir) if flags_str is None: flags = ott.TREEMACHINE_SUPPRESS_FLAGS else: flags = flags_str.split(',') create_log = log_filename is not None with codecs.open(args.output, 'w', encoding='utf-8') as outp: log = ott.write_newick(outp, label_style=OTULabelStyleEnum.CURRENT_LABEL_OTT_ID, root_ott_id=root, prune_flags=flags, create_log_dict=create_log) outp.write('\n') if create_log: write_as_json(log, log_filename)
selected_study_found = True else: continue ga = ps.create_git_action(study_id) with ga.lock(): ga.checkout(sha) if copy_phylesystem_file_if_differing(ga, sha, inc, out_dir, generic2concrete): num_moved += 1 ga.checkout_master() debug('{} total trees'.format(len(included))) debug('{} JSON files copied'.format(num_moved)) if selected_study is not None: if selected_study_found: sys.exit(0) error( 'The selected tree {}_{}.json was not found in the collection\n.'. format(selected_study, selected_tree)) sys.exit(1) # now we write a "concrete" version of this snapshot coll_name = os.path.split(args.collection)[-1] concrete_collection = get_empty_collection() concrete_collection[ 'description'] = 'Concrete form of collection "{}"'.format(coll_name) cd_list = concrete_collection['decisions'] for inc in included: concrete = generic2concrete[id(inc)] cd_list.append(concrete) concrete_fn = os.path.join(out_dir, 'concrete_' + coll_name) write_as_json(concrete_collection, concrete_fn)
parser.add_argument('flag_pruned_json', nargs=1, metavar='F', type=str) parser.add_argument('higher_taxon_pruned_json', metavar='H', nargs=1, type=str) parser.add_argument('combined_json', nargs=1, metavar='O', type=str) args = parser.parse_args() fj_fn = args.flag_pruned_json[0] htj_fn = args.higher_taxon_pruned_json[0] out_fn = args.combined_json[0] blob = read_as_json(fj_fn) higher_taxon_blob = read_as_json(htj_fn) if higher_taxon_blob: p = blob['pruned'] httk = 'higher-taxon-tip' intk = 'empty-after-higher-taxon-tip-prune' high_tax_tip_pruned = higher_taxon_blob.get(httk, {}) internal_high_tax_tip_pruned = higher_taxon_blob.get(intk, {}) p[httk] = high_tax_tip_pruned p[intk] = internal_high_tax_tip_pruned n_ht_in_pruned = len(internal_high_tax_tip_pruned) n_ht_pruned = len(high_tax_tip_pruned) blob['num_non_leaf_nodes'] -= n_ht_in_pruned blob['num_pruned_anc_nodes'] += n_ht_in_pruned blob['num_tips'] -= n_ht_pruned blob['num_nodes'] -= (n_ht_pruned + n_ht_in_pruned) del blob['num_monotypic_nodes'] del blob['num_non_leaf_nodes_with_multiple_children'] kl = [httk, intk] else: kl = [] blob['pruning_keys_not_from_flags'] = kl write_as_json(blob, out_fn)
def write_ncbi_details_json(fp, details_log): write_as_json(details_log, fp, indent=2)
selected_study_found = True else: continue ga = ps.create_git_action(study_id) with ga.lock(): ga.checkout(sha) if copy_phylesystem_file_if_differing(ga, sha, inc, out_dir, generic2concrete): num_moved += 1 ga.checkout_master() debug('{} total trees'.format(len(included))) debug('{} JSON files copied'.format(num_moved)) if selected_study is not None: if selected_study_found: sys.exit(0) error('The selected tree {}_{}.json was not found in the collection\n.'.format(selected_study, selected_tree)) sys.exit(1) # now we write a "concrete" version of this snapshot coll_name = os.path.split(args.collection)[-1] concrete_collection = get_empty_collection() concrete_collection['description'] = 'Concrete form of collection "{}"'.format(coll_name) cd_list = concrete_collection['decisions'] for inc in included: concrete = generic2concrete[id(inc)] cd_list.append(concrete) concrete_fn = os.path.join(out_dir, 'concrete_' + coll_name) write_as_json(concrete_collection, concrete_fn)
nexson_blob = read_as_json(inp) ntw = NexsonTreeWrapper(nexson_blob, tree_id, log_obj=log_obj) assert ntw.root_node_id taxonomy_treefile = os.path.join(args.out_dir, study_tree + '-taxonomy.tre') try: ntw.prune_tree_for_supertree( ott=ott, to_prune_fsi_set=to_prune_fsi_set, root_ott_id=root, taxonomy_treefile=taxonomy_treefile, id_to_other_prune_reason=to_prune_for_reasons) except EmptyTreeError: log_obj['EMPTY_TREE'] = True out_log = os.path.join(args.out_dir, study_tree + '.json') write_as_json(log_obj, out_log) newick_fp = os.path.join(args.out_dir, study_tree + '.tre') def compose_label(nodeid, node, otu): try: return '_'.join([ otu['^ot:ottTaxonName'], str(node['@id']), 'ott' + str(otu['^ot:ottId']) ]) except: # internal nodes may lack otu's but we still want the node Ids return '_{}_'.format(str(node['@id'])) with codecs.open(newick_fp, 'w', encoding='utf-8') as outp: if not ntw.is_empty: nexson_frag_write_newick(outp,
def _main(): import argparse _HELP_MESSAGE = '''Takes a filepath to Newick tree file with propinquity-style leaf labels - unique numeric suffixes which identify the taxon. Writes a NexSON representation of the tree to ''' parser = argparse.ArgumentParser(description=_HELP_MESSAGE, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("-i", "--ids", required=True, help="comma separated list of tree IDs to be assigned to the trees in the newick file.") parser.add_argument('newick', help='filepath of the newick tree') args = parser.parse_args() if not os.path.exists(args.newick): sys.exit('The file "{}" does not exist'.format(args.newick)) tree_id_list = args.ids.split(',') if not tree_id_list: sys.exit('At least one tree ID must be provided') tree_id_it = iter(tree_id_list) out = codecs.getwriter('utf-8')(sys.stdout) pyid2int = {} curr_nd_counter = 1 with codecs.open(args.newick, 'r', encoding='utf8') as inp: tree = parse_newick(stream=inp) tree_id = tree_id_it.next() nexson = get_empty_nexson() body = nexson['nexml'] all_otus_groups = body['otusById'].values() assert len(all_otus_groups) == 1 first_otus_group = all_otus_groups[0] all_trees_groups = body['treesById'].values() assert len(all_trees_groups) == 1 first_trees_group = all_trees_groups[0] first_trees_group['^ot:treeElementOrder'].append(tree_id) otus = first_otus_group['otuById'] all_trees_dict = first_trees_group['treeById'] ntree = all_trees_dict.setdefault(tree_id, {}) ebsi, nbi = {}, {} ntree['edgeBySourceId'] = ebsi ntree['nodeById'] = nbi root_node_id = None for node in tree._root.preorder_iter(): nid = id(node) i = pyid2int.get(nid) if i is None: i = curr_nd_counter curr_nd_counter += 1 pyid2int[nid] = i node_id_s = 'node{}'.format(i) otu_id_s = 'otu{}'.format(i) n_obj = nbi.setdefault(node_id_s, {}) if node is tree._root: n_obj['@root'] = True root_node_id = node_id_s else: edge_id_s = 'edge{}'.format(i) pid = id(node.parent) pni = 'node{}'.format(pyid2int[pid]) ed = ebsi.setdefault(pni, {}) ed[edge_id_s] = {'@source': pni, '@target': node_id_s} if not node.children: n_obj['@otu'] = otu_id_s orig = node._id ott_id = ott_id_from_label(orig) otus[otu_id_s] = {"^ot:originalLabel": orig, "^ot:ottId": ott_id, "^ot:ottTaxonName": orig} assert root_node_id is not None ntree['^ot:rootNodeId'] = root_node_id write_as_json(nexson, out)
nargs=1, type=str) parser.add_argument('combined_json', nargs=1, metavar='O', type=str) args = parser.parse_args() fj_fn = args.flag_pruned_json[0] htj_fn = args.higher_taxon_pruned_json[0] out_fn = args.combined_json[0] blob = read_as_json(fj_fn) higher_taxon_blob = read_as_json(htj_fn) if higher_taxon_blob: p = blob['pruned'] httk = 'higher-taxon-tip' intk = 'empty-after-higher-taxon-tip-prune' high_tax_tip_pruned = higher_taxon_blob.get(httk, {}) internal_high_tax_tip_pruned = higher_taxon_blob.get(intk, {}) p[httk] = high_tax_tip_pruned p[intk] = internal_high_tax_tip_pruned n_ht_in_pruned = len(internal_high_tax_tip_pruned) n_ht_pruned = len(high_tax_tip_pruned) blob['num_non_leaf_nodes'] -= n_ht_in_pruned blob['num_pruned_anc_nodes'] += n_ht_in_pruned blob['num_tips'] -= n_ht_pruned blob['num_nodes'] -= (n_ht_pruned + n_ht_in_pruned) del blob['num_monotypic_nodes'] del blob['num_non_leaf_nodes_with_multiple_children'] kl = [httk, intk] else: kl = [] blob['pruning_keys_not_from_flags'] = kl write_as_json(blob, out_fn)
summary = _ot_call( 'treemachine/getSynthesisSourceList', 'curl-versions/getSynthesisSourceList.json', lambda: otwrap.treemachine.synthetic_source_list, ) summary_list.append(summary) summary = _ot_call('taxomachine/autocompleteBoxQuery', 'curl-versions/autocompleteBoxQuery.json', otwrap.taxomachine.autocomplete, 'Endoxyla', 'All life') summary_list.append(summary) summary = _ot_call( 'phylesystem/study_list', 'curl-versions/study_list.json', lambda: otwrap.phylesystem_api.study_list, ) summary_list.append(summary) summary = _ot_call('phylesystem/pg_719', 'curl-versions/pg_719.json', otwrap.phylesystem_api.get_study, 'pg_719') summary_list.append(summary) blob = { 'time': timestamp.isoformat(), 'time_string': timestamp.strftime('%A %H:%M:%S.%f (UTC) %d %B, %Y'), 'summary': summary_list } out = codecs.getwriter('utf-8')(sys.stdout) write_as_json(blob, out, indent=1)
lambda: otwrap.treemachine.synthetic_source_list, ) summary_list.append(summary) summary = _ot_call('taxomachine/autocompleteBoxQuery', 'curl-versions/autocompleteBoxQuery.json', otwrap.taxomachine.autocomplete, 'Endoxyla', 'All life' ) summary_list.append(summary) summary = _ot_call('phylesystem/study_list', 'curl-versions/study_list.json', lambda: otwrap.phylesystem_api.study_list, ) summary_list.append(summary) summary = _ot_call('phylesystem/pg_719', 'curl-versions/pg_719.json', otwrap.phylesystem_api.get_study, 'pg_719') summary_list.append(summary) blob = {'time': timestamp.isoformat(), 'time_string': timestamp.strftime('%A %H:%M:%S.%f (UTC) %d %B, %Y'), 'summary': summary_list } out = codecs.getwriter('utf-8')(sys.stdout) write_as_json(blob, out, indent=1)
#!/usr/bin/env python from peyotl import concatenate_collections, read_as_json, write_as_json if __name__ == '__main__': import argparse import sys import os description = 'Takes a list of collections and writes a collection that is a concatenation of their decisions' parser = argparse.ArgumentParser(prog='collection_export.py', description=description) parser.add_argument('--output', type=str, required=True, help='output filepath for collection json') parser.add_argument('collection', default=None, type=str, nargs="*", help='filepath for the collections JSON') args = parser.parse_args(sys.argv[1:]) inp = [read_as_json(i) for i in args.collection] out = concatenate_collections(inp) write_as_json(out, args.output)
def flush(self, tax_dir): self.curr_tree.add_best_guess_rank_sort_number() self.prev_tree.add_best_guess_rank_sort_number() edit_list = [] for nd in self.curr_tree.preorder(): stat_flag, other = _get_nonsyn_flag_and_other(nd) if stat_flag == UpdateStatus.UNDIAGNOSED_CHANGE: ranks_differ = nd.best_rank_sort_number != other.best_rank_sort_number if ranks_differ: if nd.best_rank_sort_number == SPECIES_SORTING_NUMBER: if other.best_rank_sort_number <= MAX_INFRASPECIFIC_NUMBER: genus_nd = self.curr_tree.find_genus_for_alpha(nd) if genus_nd: other_genus = _get_nonsyn_flag_and_other(genus_nd)[1] if self.prev_tree.does_first_contain_second(other_genus, other): _alter_update_flag(nd, UpdateStatus.ELEVATED_TO_SP) elif other.best_rank_sort_number == SPECIES_SORTING_NUMBER: if nd.best_rank_sort_number <= MAX_INFRASPECIFIC_NUMBER: genus_nd = self.curr_tree.find_genus_for_alpha(nd) if genus_nd: other_genus = _get_nonsyn_flag_and_other(genus_nd)[1] if self.prev_tree.does_first_contain_second(other_genus, other): _alter_update_flag(nd, UpdateStatus.DEMOTED_TO_INFRA_SP) if _get_nonsyn_flag_and_other(nd)[0] == UpdateStatus.UNDIAGNOSED_CHANGE: _LOG.warn('persistent UNDIAGNOSED_CHANGE for {} and {}'.format(nd, other)) if (not nd.children_refs) and nd.best_rank_sort_number >= MINIMUM_HIGHER_TAXON_NUMBER: if other \ and (not other.children_refs) \ and other.best_rank_sort_number >= MINIMUM_HIGHER_TAXON_NUMBER: _add_update_flag_bit(nd, UpdateStatus.OLDLY_BARREN) else: _add_update_flag_bit(nd, UpdateStatus.NEWLY_BARREN) if hasattr(nd, 'new_children'): for c in nd.new_children: if _get_nonsyn_flag_and_other(c)[0] & UpdateStatus.NAME_CHANGED: self._detect_cascading_name_change(nd, c) for nd in self.curr_tree.preorder(): ne = self._gen_edit_if_new(nd, {}) if ne: edit_list.append(ne) for nd in self.prev_tree.preorder(): ne = self._gen_prev_tree_nd_edit(nd, {}) if ne: edit_list.append(ne) edit_ids = set() for edit in edit_list: ft = edit.get('focal_taxon') if ft is None: pt = edit['focal_taxon_prev'] key = '{}_|edit|_prev_{}'.format(self.tag, pt['id']) else: key = '{}_|edit|_{}'.format(self.tag, ft['id']) assert key not in edit_ids edit_ids.add(key) edit['edit_id'] = key fp = os.path.join(tax_dir, UPDATE_ANALYSIS_FILENAME) with open(fp, 'w', encoding='utf-8') as outf: for opts in [outf, out_stream]: write_as_json(edit_list, opts, indent=' ', sort_keys=True) # curr_tree_par_ids = set() # prev_tree_par_ids = set() # for status_code, node_list in self.by_status_code.items(): # if status_code == UpdateStatus.UNCHANGED: # continue # if status_code in [UpdateStatus.DELETED_TERMINAL, UpdateStatus.DELETED_INTERNAL]: # target = prev_tree_par_ids # else: # target = curr_tree_par_ids # for nd in node_list: # target.add(nd.par_id) # # curr_deepest_mod_id = _old_modified_subtree_ids(curr_tree_par_ids, self.curr_tree) # prev_deepest_mod_id = _old_modified_subtree_ids(prev_tree_par_ids, self.prev_tree) # emitted = set() # for par_id in curr_deepest_mod_id: # par_nd = self.curr_tree.id_to_taxon[par_id] # self.report_on_altered_contiguous_des(par_nd, True) # status_keys = [(i.value, i) for i in self.by_status_code.keys()] # status_keys.sort() # status_keys = [i[1] for i in status_keys] # status_keys.remove(UpdateStatus.TERMINAL_SUNK_TO_SYNONYM) # status_keys.remove(UpdateStatus.INTERNAL_SUNK_TO_SYNONYM) # for k in status_keys: # for nd in self.by_status_code[k]: # self._write_nd(nd) # Reinitialize... self.__init__(None, None)
parser.add_argument('--root', default=None, type=int, required=False, help='Optional taxonomy root argument.') args = parser.parse_args(sys.argv[1:]) ott_dir, output, log_filename, root = args.ott_dir, args.output, args.log, args.root flags_str = args.flags try: assert os.path.isdir(args.ott_dir) except: sys.exit( 'Expecting ott-dir argument to be a directory. Got "{}"'.format( args.ott_dir)) ott = OTT(ott_dir=args.ott_dir) if flags_str is None: flags = ott.TREEMACHINE_SUPPRESS_FLAGS else: flags = flags_str.split(',') create_log = log_filename is not None with codecs.open(args.output, 'w', encoding='utf-8') as outp: log = ott.write_newick( outp, label_style=OTULabelStyleEnum.CURRENT_LABEL_OTT_ID, root_ott_id=root, prune_flags=flags, create_log_dict=create_log) outp.write('\n') if create_log: write_as_json(log, log_filename)
from peyotl.collections_store.validation import validate_collection from peyotl import write_as_json import sys # Expecting a lot of lines like pg_2359_4962 for 'pg_2359', 'tree4962' inp_fn = sys.argv[1] with open(inp_fn, 'rU') as inp: lines = [] for line in inp: line = line.strip() if (not line) or (line == 'taxonomy'): continue assert line.endswith('.tre') frag = line[:-4] s = frag.split('_') study_id, tree_frag = '_'.join(s[:-1]), s[-1] tree_id = 'tree' + tree_frag lines.append((study_id, tree_id)) c = get_empty_collection() d = c['decisions'] for pair in lines: d.append({'SHA': '', 'decision': 'INCLUDED', 'name': '', 'studyID': pair[0], 'treeID': pair[1] }) assert not (validate_collection(c)[0]) write_as_json(c, sys.stdout)
study_tree = '.'.join(inp_fn.split('.')[:-1]) # strip extension study_id, tree_id = propinquity_fn_to_study_tree(inp_fn) nexson_blob = read_as_json(inp) ntw = NexsonTreeWrapper(nexson_blob, tree_id, log_obj=log_obj) assert ntw.root_node_id taxonomy_treefile = os.path.join(args.out_dir, study_tree + '-taxonomy.tre') try: ntw.prune_tree_for_supertree(ott=ott, to_prune_fsi_set=to_prune_fsi_set, root_ott_id=root, taxonomy_treefile=taxonomy_treefile, id_to_other_prune_reason=to_prune_for_reasons) except EmptyTreeError: log_obj['EMPTY_TREE'] = True out_log = os.path.join(args.out_dir, study_tree + '.json') write_as_json(log_obj, out_log) newick_fp = os.path.join(args.out_dir, study_tree + '.tre') def compose_label(node, otu): try: return '_'.join([otu['^ot:ottTaxonName'], str(node['@id']), 'ott' + str(otu['^ot:ottId'])]) except: # internal nodes may lack otu's but we still want the node Ids return '_{}_'.format(str(node['@id'])) with codecs.open(newick_fp, 'w', encoding='utf-8') as outp: if not ntw.is_empty: nexson_frag_write_newick(outp, ntw._edge_by_source,