def read_labelled_supertree(self):
     d = os.path.join(self.top_output_dir, 'labelled_supertree')
     p = 'labelled_supertree_out_degree_distribution.txt'
     lsodd = os.path.join(d, p)
     subprocess.call(['make', lsodd])
     subprocess.call(['make', os.path.join(d, 'labelled_supertree_ottnames.tre')])
     assert(os.path.exists(lsodd))
     blob = Extensible()
     blob.unprune_stats = read_as_json(os.path.join(d, 'input_output_stats.json'))
     blob.non_monophyletic_taxa = read_as_json(os.path.join(d, 'broken_taxa.json'))
     if blob.non_monophyletic_taxa['non_monophyletic_taxa'] is None:
         blob.non_monophyletic_taxa['non_monophyletic_taxa'] = {}
     return blob
Exemplo n.º 2
0
 def read_exemplified_phylo(self):
     d = os.path.join(self.top_output_dir, 'exemplified_phylo')
     x = read_as_json(os.path.join(d, 'exemplified_log.json'))
     tx = x['taxa_exemplified']
     if not tx:
         tx = {}
     by_source_tree = {}
     for ott_id, exdict in tx.items():
         tm = exdict['trees_modified']
         for tree in tm:
             key = '.'.join(tree.split('.')[:-1])
             by_source_tree.setdefault(key, []).append(ott_id)
     for v in by_source_tree.values():
         v.sort()
     ptdd = os.path.join(d, 'pruned_taxonomy_degree_distribution.txt')
     subprocess.call(['make', ptdd])
     assert(os.path.exists(ptdd))
     ddlines = [i.split() for i in stripped_nonempty_lines(ptdd) if i.split()[0] == '0']
     assert(len(ddlines) == 1)
     leaf_line = ddlines[0] # should b
     assert(len(leaf_line) == 2)
     blob = Extensible()
     blob.num_leaves_in_exemplified_taxonomy = int(leaf_line[1])
     blob.taxa_exemplified = tx
     blob.source_tree_to_ott_id_exemplified_list = by_source_tree
     f = os.path.join(d, 'nonempty_trees.txt')
     blob.nonempty_tree_filenames = stripped_nonempty_lines(f)
     blob.nonempty_trees = [propinquity_fn_to_study_tree(i) for i in blob.nonempty_tree_filenames]
     return blob
Exemplo n.º 3
0
 def read_exemplified_phylo(self):
     d = os.path.join(self.top_output_dir, 'exemplified_phylo')
     x = read_as_json(os.path.join(d, 'exemplified_log.json'))
     tx = x['taxa_exemplified']
     if not tx:
         tx = {}
     by_source_tree = {}
     for ott_id, exdict in tx.items():
         tm = exdict['trees_modified']
         for tree in tm:
             key = '.'.join(tree.split('.')[:-1])
             by_source_tree.setdefault(key, []).append(ott_id)
     for v in by_source_tree.values():
         v.sort()
     ptdd = os.path.join(d, 'pruned_taxonomy_degree_distribution.txt')
     subprocess.call(['make', ptdd])
     assert (os.path.exists(ptdd))
     ddlines = [
         i.split() for i in stripped_nonempty_lines(ptdd)
         if i.split()[0] == '0'
     ]
     assert (len(ddlines) == 1)
     leaf_line = ddlines[0]  # should b
     assert (len(leaf_line) == 2)
     blob = Extensible()
     blob.num_leaves_in_exemplified_taxonomy = int(leaf_line[1])
     blob.taxa_exemplified = tx
     blob.source_tree_to_ott_id_exemplified_list = by_source_tree
     f = os.path.join(d, 'nonempty_trees.txt')
     blob.nonempty_tree_filenames = stripped_nonempty_lines(f)
     blob.nonempty_trees = [
         propinquity_fn_to_study_tree(i)
         for i in blob.nonempty_tree_filenames
     ]
     return blob
Exemplo n.º 4
0
def perform_separation(taxalotl_config, part_name, id_list, sep_fn):
    ott_res = taxalotl_config.get_terminalized_res_by_id(
        "ott", 'enforce-new-separators')
    if not ott_res.has_been_partitioned():
        partition_resources(taxalotl_config, ["ott"], PREORDER_PART_LIST)
    sep_mapping_fp = os.path.join(ott_res.partitioned_filepath, SEP_MAPPING)
    if not os.path.isfile(sep_mapping_fp):
        cache_separator_names(taxalotl_config)
    top_dir = get_part_dir_from_part_name(ott_res, part_name)
    active_sep_fn = os.path.join(top_dir, sep_fn)
    try:
        active_seps = return_sep_obj_copy_with_ott_fields(
            read_as_json(active_sep_fn))
        print(active_seps)
    except:
        raise ValueError('{} does not exist'.format(part_name, active_sep_fn))
    if id_list:
        resource_ids = id_list
    else:
        resource_ids = get_taxonomies_for_dir(top_dir)
    for rid in resource_ids:
        rw = taxalotl_config.get_resource_by_id(rid)
        print(rid, rw)
        perform_dynamic_separation(ott_res,
                                   res=rw,
                                   part_key=part_name,
                                   separation_by_ott=active_seps)
Exemplo n.º 5
0
 def get_separator_dict(self):
     from taxalotl.commands import SEP_MAPPING, cache_separator_names
     from peyotl import read_as_json
     fn = os.path.join(self.partitioned_dir, SEP_MAPPING)
     if not os.path.exists(fn):
         cache_separator_names(self)
     return read_as_json(fn)
Exemplo n.º 6
0
 def read_subproblems(self):
     d = os.path.join(self.top_output_dir, 'subproblems')
     blob = Extensible()
     conf_tax_json_fp = os.path.join(d, 'contesting-trees.json')
     conf_tax_info = read_as_json(conf_tax_json_fp)
     if not conf_tax_info:
         conf_tax_info = {}
     externalized_conf_tax_info = {}
     for ott_id, tree2node_info_list in conf_tax_info.items():
         tr_ob_li = []
         if ott_id.startswith('ott'):
             ott_id = ott_id[3:]
         externalized_conf_tax_info[ott_id] = tr_ob_li
         for study_tree_fn, node_info_list in tree2node_info_list.items():
             study_id, tree_id = propinquity_fn_to_study_tree(study_tree_fn)
             cf_nl = []
             tre_obj = {'study_id': study_id,
                        'tree_id': tree_id,
                        'tree_filename': study_tree_fn,
                        'conflicting_nodes': cf_nl}
             tr_ob_li.append(tre_obj)
             if len(node_info_list) < 2:
                 raise RuntimeError('read_subproblems < 2 node info elements for taxon ID = {}'.format(ott_id))
             for node_info in node_info_list:
                 rcfn = node_info['children_from_taxon']
                 cfn = [node_label2obj(i) for i in rcfn]
                 el = {'parent': node_label2obj(node_info['parent']),
                       'children_from_taxon': cfn
                      }
                 cf_nl.append(el)
     blob.contested_taxa = externalized_conf_tax_info
     blob.tree_files = stripped_nonempty_lines(os.path.join(d, 'subproblem-ids.txt'))
     id2num_leaves = {}
     for el in self.subproblem_solutions.subproblem_num_leaves_num_internal_nodes:
         id2num_leaves[el[0]] = el[1]
     by_num_phylo = []
     by_input = {}
     for s in blob.tree_files:
         assert s.endswith('.tre')
         pref = s[:-4]
         assert pref.startswith('ott')
         tree_name_file = os.path.join(d, pref + '-tree-names.txt')
         phylo_inputs = []
         for i in stripped_nonempty_lines(tree_name_file):
             x = i[:-4] if i.endswith('.tre') else i
             phylo_inputs.append(i)
             if x != 'TAXONOMY':
                 by_input.setdefault(x, []).append(pref)
         npi = len(phylo_inputs)
         by_num_phylo.append((npi, int(pref[3:]), s, phylo_inputs))
     by_num_phylo.sort(reverse=True)
     blob.sorted_by_num_phylo_inputs = [[i[2], i[3], id2num_leaves[i[2]]] for i in by_num_phylo]
     by_input = [(len(v), k, v) for k, v in by_input.items()]
     by_input.sort(reverse=True)
     blob.input_and_subproblems_sorted = [[i[1], i[2]] for i in by_input]
     return blob
Exemplo n.º 7
0
 def read_cleaned_ott(self):
     blob = Extensible()
     d = os.path.join(self.top_output_dir, 'cleaned_ott')
     o = read_as_json(os.path.join(d, 'cleaned_ott.json'))
     for k, v in o.items():
         setattr(blob, k, v)
         if k == 'flags_to_prune':
             v.sort()
     blob.root_ott_id = self.config.root_ott_id
     return blob
Exemplo n.º 8
0
 def read_labelled_supertree(self):
     d = os.path.join(self.top_output_dir, 'labelled_supertree')
     p = 'labelled_supertree_out_degree_distribution.txt'
     lsodd = os.path.join(d, p)
     subprocess.call(['make', lsodd])
     subprocess.call(
         ['make',
          os.path.join(d, 'labelled_supertree_ottnames.tre')])
     assert (os.path.exists(lsodd))
     blob = Extensible()
     blob.unprune_stats = read_as_json(
         os.path.join(d, 'input_output_stats.json'))
     blob.non_monophyletic_taxa = read_as_json(
         os.path.join(d, 'broken_taxa.json'))
     if blob.non_monophyletic_taxa['non_monophyletic_taxa'] is None:
         blob.non_monophyletic_taxa['non_monophyletic_taxa'] = {}
     blob.non_monophyletic_taxa = add_taxonomy_metadata(
         blob.non_monophyletic_taxa)
     return blob
Exemplo n.º 9
0
 def read_cleaned_ott(self):
     blob = Extensible()
     d = os.path.join(self.top_output_dir, 'cleaned_ott')
     o = read_as_json(os.path.join(d, 'cleaned_ott.json'))
     for k, v in o.items():
         setattr(blob, k, v)
         if k == 'flags_to_prune':
             v.sort()
     blob.root_ott_id = self.config.root_ott_id
     return blob
Exemplo n.º 10
0
 def read_assessments(self):
     d = os.path.join(self.top_output_dir, 'assessments')
     blob = Extensible()
     blob.assessments = read_as_json(os.path.join(d, 'summary.json'))
     blob.categories_of_checks = list(blob.assessments.keys())
     blob.categories_of_checks.sort()
     blob.categories_of_checks_with_errors = []
     for k, v in blob.assessments.items():
         if v['result'] != 'OK':
             blob.categories_of_checks_with_errors.append(k)
     blob.categories_of_checks_with_errors.sort()
     return blob
Exemplo n.º 11
0
 def read_assessments(self):
     d = os.path.join(self.top_output_dir, 'assessments')
     blob = Extensible()
     blob.assessments = read_as_json(os.path.join(d, 'summary.json'))
     blob.categories_of_checks = list(blob.assessments.keys())
     blob.categories_of_checks.sort()
     blob.categories_of_checks_with_errors = []
     for k, v in blob.assessments.items():
         if v['result'] != 'OK':
             blob.categories_of_checks_with_errors.append(k)
     blob.categories_of_checks_with_errors.sort()
     return blob
Exemplo n.º 12
0
def _read_json_and_coerce_to_otttaxon(tax_dir, misc_tax_dir, fn):
    r = {}
    for td in [tax_dir, misc_tax_dir]:
        rf = os.path.join(td, fn)
        if os.path.exists(rf):
            rd = read_as_json(rf)
            for k, v in rd.items():
                try:
                    k = int(k)
                except:
                    pass
                r[k] = Taxon(d=v)
    return r
Exemplo n.º 13
0
def get_auto_gen_part_mapper(res):
    fp = os.path.join(res.partitioned_filepath, GEN_MAPPING_FILENAME)
    if not os.path.isfile(fp):
        m = 'Mapping file not found at "{}"\nRun the build-partitions-maps command.'
        raise RuntimeError(m.format(fp))
    master_mapping = read_as_json(fp)
    a_list = list(res.alias_list)
    base_res = res.base_resource
    if base_res:
        a_list.extend(base_res.alias_list)
    poss_ids = [res.id] + a_list + [res.base_id]
    for k in poss_ids:
        if k in master_mapping:
            return master_mapping[k]
    m = 'No entry for ids {} found in "{}".'
    raise RuntimeError(m.format(', '.join(poss_ids), fp))
Exemplo n.º 14
0
 parser = argparse.ArgumentParser(
     prog=SCRIPT_NAME,
     description=
     'Simple tool to combine the logs from pruning via flags and pruning via higher-level taxa that have become tips'
 )
 parser.add_argument('flag_pruned_json', nargs=1, metavar='F', type=str)
 parser.add_argument('higher_taxon_pruned_json',
                     metavar='H',
                     nargs=1,
                     type=str)
 parser.add_argument('combined_json', nargs=1, metavar='O', type=str)
 args = parser.parse_args()
 fj_fn = args.flag_pruned_json[0]
 htj_fn = args.higher_taxon_pruned_json[0]
 out_fn = args.combined_json[0]
 blob = read_as_json(fj_fn)
 higher_taxon_blob = read_as_json(htj_fn)
 if higher_taxon_blob:
     p = blob['pruned']
     httk = 'higher-taxon-tip'
     intk = 'empty-after-higher-taxon-tip-prune'
     high_tax_tip_pruned = higher_taxon_blob.get(httk, {})
     internal_high_tax_tip_pruned = higher_taxon_blob.get(intk, {})
     p[httk] = high_tax_tip_pruned
     p[intk] = internal_high_tax_tip_pruned
     n_ht_in_pruned = len(internal_high_tax_tip_pruned)
     n_ht_pruned = len(high_tax_tip_pruned)
     blob['num_non_leaf_nodes'] -= n_ht_in_pruned
     blob['num_pruned_anc_nodes'] += n_ht_in_pruned
     blob['num_tips'] -= n_ht_pruned
     blob['num_nodes'] -= (n_ht_pruned + n_ht_in_pruned)
Exemplo n.º 15
0
 final_tree = os.path.join(top_dir, 'labelled_supertree', 'labelled_supertree.tre')
 # Check that we have the same # of leaves in the cleaned_ott and the final tree
 #
 tax_dd_file = os.path.join(assessments_dir, 'taxonomy_degree_distribution.txt')
 supertree_dd_file = os.path.join(assessments_dir, 'supertree_degree_distribution.txt')
 tdd = parse_degree_dist(tax_dd_file)
 sdd = parse_degree_dist(supertree_dd_file)
 if tdd[0] != sdd[0]:
     err('The number of leaves differed between the taxonomy and supertree')
     nt = {'result':'ERROR', 'data':[tdd[0][1], sdd[0][1]]}
 else:
     nt = {'result':'OK', 'data': tdd[0][1]}
 nt['description'] = 'Check that the cleaned version of the taxonomy and the supertree have the same number of leaves'
 summary['num_tips'] = nt
 annot_file = os.path.join(top_dir, 'annotated_supertree', 'annotations.json')
 annotations = read_as_json(annot_file)
 nodes_annotations = annotations['nodes']
 # Check that otc-taxonomy-parser and otc-unprune-solution-and-name-unnamed-nodes
 #   agree on the number of taxa that were lost
 #
 if False:
     ltb = {'result': 'Skipped test - have not updated tests to deal with 2 layers of taxon filtering', 'data': []}
     btb = dict(ltb)
     ub = dict(ltb)
 else:
     lt_file = os.path.join(assessments_dir, 'lost_taxa.txt')
     lt_name = 'otc-taxonomy-parser lost-taxon'
     lt_pair  = [lt_file, lt_name]
     lt_set = parse_otc_taxonomy_parser_lost_taxa(lt_file)
     bt_file = os.path.join(top_dir, 'labelled_supertree', 'broken_taxa.json')
     bt_name = 'otc-unprune-solution-and-name-unnamed-nodes broken_taxa.json'
import sys

if __name__ == '__main__':
    import argparse
    import os
    bin_dir, SCRIPT_NAME = os.path.split(__file__)
    propinquity_dir = os.path.dirname(bin_dir)
    parser = argparse.ArgumentParser(prog=SCRIPT_NAME, description='Simple tool to combine the logs from pruning via flags and pruning via higher-level taxa that have become tips')
    parser.add_argument('flag_pruned_json', nargs=1, metavar='F', type=str)
    parser.add_argument('higher_taxon_pruned_json', metavar='H', nargs=1, type=str)
    parser.add_argument('combined_json', nargs=1, metavar='O', type=str)
    args = parser.parse_args()
    fj_fn = args.flag_pruned_json[0]
    htj_fn = args.higher_taxon_pruned_json[0]
    out_fn = args.combined_json[0]
    blob = read_as_json(fj_fn)
    higher_taxon_blob = read_as_json(htj_fn)
    if higher_taxon_blob:
        p = blob['pruned']
        httk = 'higher-taxon-tip'
        intk = 'empty-after-higher-taxon-tip-prune'
        high_tax_tip_pruned = higher_taxon_blob.get(httk, {})
        internal_high_tax_tip_pruned = higher_taxon_blob.get(intk, {})
        p[httk] = high_tax_tip_pruned
        p[intk] = internal_high_tax_tip_pruned
        n_ht_in_pruned = len(internal_high_tax_tip_pruned)
        n_ht_pruned = len(high_tax_tip_pruned)
        blob['num_non_leaf_nodes'] -= n_ht_in_pruned
        blob['num_pruned_anc_nodes'] += n_ht_in_pruned
        blob['num_tips'] -= n_ht_pruned
        blob['num_nodes'] -= (n_ht_pruned + n_ht_in_pruned)
Exemplo n.º 17
0
                     default=None,
                     type=int,
                     required=False,
                     help='Optional taxonomy root argument.')
 parser.add_argument('--input-files-list',
                     default=None,
                     type=str,
                     required=False,
                     help='A list of input NexSON filenames.')
 args = parser.parse_args(sys.argv[1:])
 ott_dir, out_dir, root = args.ott_dir, args.out_dir, args.root
 to_prune_for_reasons = {}
 nonflagged_json_fn = args.ott_prune_nonflagged_json
 if nonflagged_json_fn is not None:
     try:
         nonflagged_blob = read_as_json(nonflagged_json_fn)
     except:
         nonflagged_blob = None
     if nonflagged_blob:
         for reason, id_list in nonflagged_blob.items():
             for ott_id in id_list:
                 to_prune_for_reasons[ott_id] = reason
 flags_str = args.ott_prune_flags
 try:
     assert os.path.isdir(args.ott_dir)
 except:
     error('Expecting ott-dir argument to be a directory. Got "{}"'.format(args.ott_dir))
     sys.exit(1)
 if args.nexson:
     inp_files = list(args.nexson)
 else:
Exemplo n.º 18
0
def main():
    import argparse

    description = "The main CLI for taxalotl"
    p = argparse.ArgumentParser(description=description)
    p.add_argument("--config",
                   type=str,
                   help="the taxalotl.conf filepath (optional)")
    p.add_argument(
        "--show-completions",
        action="store_true",
        default=False,
        help="print the list of options for the next word in the command line")

    p.set_defaults(which="all")
    subp = p.add_subparsers(help="command help")
    # ANALYZE UPDATE
    analyze_update_p = subp.add_parser(
        'analyze-update',
        help="calculates a diff between the last version of a "
        "taxonomy used and the latest version downloaded.")
    analyze_update_p.add_argument('resources',
                                  nargs=2,
                                  help="IDs of the resources to analyzed.")
    _add_level_arg(analyze_update_p)
    analyze_update_p.set_defaults(which="analyze-update")

    # PULL OTifacts
    pull_otifacts_p = subp.add_parser(
        'pull-otifacts',
        help="refresh list of taxonomic artifacts from OTifacts repo")
    pull_otifacts_p.set_defaults(which="pull-otifacts")
    # STATUS
    status_p = subp.add_parser(
        'status', help="report the status of a resource (or all resources)")
    status_p.add_argument('resources',
                          nargs="*",
                          help="IDs of the resources to report status on")
    status_p.add_argument("-i",
                          "--ids-only",
                          action='store_true',
                          default=False,
                          help="just list the IDs")
    status_p.add_argument("--by-status",
                          action='store_true',
                          default=False,
                          help="group the report by status")
    status_p.add_argument(
        "--terminal",
        action='store_true',
        default=False,
        help="Report only on the terminalized resource of each type.")
    status_p.set_defaults(which="status")
    # CACHE-separator-names
    compare_tax_p = subp.add_parser(
        'compare-taxonomies', help="compare taxonomies for a separated dir")
    _add_level_arg(compare_tax_p)
    compare_tax_p.set_defaults(which="compare-taxonomies")

    # CACHE-separator-names
    cache_p = subp.add_parser(
        'cache-separator-names',
        help="Accumulate a list of separator names for tab-completion")
    cache_p.set_defaults(which="cache-separator-names")
    # DOWNLOAD
    download_p = subp.add_parser(
        'download', help="download an artifact to your local filesystem")
    download_p.add_argument('resources',
                            nargs="+",
                            help="IDs of the resources to download")
    download_p.set_defaults(which="download")
    # UNPACK
    unpack_p = subp.add_parser(
        'unpack', help="unpack an resource (downloads if necessary)")
    unpack_p.add_argument('resources',
                          nargs="+",
                          help="IDs of the resources to unpack")
    unpack_p.set_defaults(which="unpack")
    # NORMALIZE
    normalize_p = subp.add_parser(
        'normalize', help="converts to the OTT format (unpacks if necessary)")
    normalize_p.add_argument('resources',
                             nargs="+",
                             help="IDs of the resources to normalize")
    normalize_p.set_defaults(which="normalize")
    # PARTITION
    partition_p = subp.add_parser('partition',
                                  help="Breaks the resource taxon")
    partition_p.add_argument('resources',
                             nargs="+",
                             help="IDs of the resources to partitition")
    _add_level_arg(partition_p)
    partition_p.set_defaults(which="partition")

    # INFO
    info_p = subp.add_parser('info', help="Report statistics about a resource")
    info_p.add_argument('resources', nargs="+", help="IDs of the resources")
    _add_level_arg(info_p)
    info_p.set_defaults(which="info")

    # DIAGNOSE-NEW-SEPARATORS
    diag_sep_p = subp.add_parser(
        'diagnose-new-separators',
        help="Uses the last OTT build to find taxa IDs that "
        "feature are common to the relevant inputs")
    _add_level_arg(diag_sep_p)
    diag_sep_p.set_defaults(which="diagnose-new-separators")
    # ENFORCE-NEW-SEPARATORS
    enf_sep_p = subp.add_parser(
        'enforce-new-separators',
        help="Uses the __sep__.json files created by "
        "diagnose-new-separators to partition by unproblematic "
        "taxa")
    enf_sep_p.add_argument('resources',
                           nargs="*",
                           help="IDs of the resources to separate")
    _add_level_arg(enf_sep_p)
    enf_sep_p.set_defaults(which="enforce-new-separators")
    # Align
    align_p = subp.add_parser(
        'align',
        help=
        "Attempts to align a new (parititioned) resource to the latest OTT for a level"
    )
    align_p.add_argument('resources',
                         nargs="*",
                         help="IDs of the resources to separate")
    _add_level_arg(align_p)
    align_p.set_defaults(which="align")

    # ACCUMULATE-SEPARATED-DESCENDANTS
    accum_sep_des_p = subp.add_parser(
        'accumulate-separated-descendants',
        help=
        "Should be run after enforce-separators and before compare-taxonomies")
    accum_sep_des_p.add_argument('resources',
                                 nargs="*",
                                 help="IDs of the resources")
    accum_sep_des_p.set_defaults(which="accumulate-separated-descendants")

    # BUILD-PARTITION-MAPS
    build_partition_maps_p = subp.add_parser(
        'build-partition-maps',
        help="Uses the last OTT build to find the "
        "ID mappings needed to "
        "partition the inputs taxonomies.")
    build_partition_maps_p.set_defaults(which="build-partition-maps")
    # CLEAN-PARTITION
    clean_p = subp.add_parser(
        'clean-partition',
        help=
        "remove the results the partition+enforce-new-separator for a resource."
    )
    clean_p.add_argument('resources',
                         nargs="*",
                         help="IDs of the resources to clean")
    clean_p.set_defaults(which='clean-partition')
    # CLEAN-PARTITION
    clean_s_p = subp.add_parser(
        'clean-separation',
        help="remove the results the diagnose-new-separator for a resource.")
    _add_level_arg(clean_s_p)
    clean_s_p.set_defaults(which='clean-separation')

    # Handle --show-completions differently from the others, because
    #   argparse does not help us out here... at all
    if "--show-completions" in sys.argv:
        a = sys.argv[1:]
        univ = frozenset([
            '--config',
        ])
        sel_cmd = None
        num_cmds = 0
        for c in all_cmds:
            if c in a:
                if sel_cmd is None:
                    sel_cmd = c
                num_cmds += 1
        comp_list = []
        if sel_cmd is None:
            comp_list = []
            for u in univ:
                found = False
                for arg in a:
                    if arg.startswith(u):
                        found = True
                        break
                if not found:
                    comp_list.append(u)
            comp_list.extend(all_cmds)
        else:
            if sel_cmd in res_dep_cmds \
              or sel_cmd in ['compare-taxonomies'] \
              or sel_cmd in ver_inp_res_dep_cmds:
                # From Ned Batchelder's answer on http://stackoverflow.com/a/14728477
                class ArgumentParserError(Exception):
                    pass

                # noinspection PyClassHasNoInit
                class ThrowingArgumentParser(argparse.ArgumentParser):
                    def error(self, message):
                        raise ArgumentParserError(message)

                fake_parser = ThrowingArgumentParser()
                fake_parser.add_argument("--config", type=str)
                fake_parser.add_argument('blah', nargs="*")
                comp_list = []
                taxalotl_config = None
                try:
                    fa = fake_parser.parse_known_args()[0]
                    config = fa.config
                    taxalotl_config = TaxalotlConfig(filepath=config)
                    if sel_cmd in res_dep_cmds:
                        comp_list = list(
                            taxalotl_config.resources_mgr.resources.keys())
                    elif sel_cmd in ver_inp_res_dep_cmds:
                        comp_list = list(taxalotl_config.resources_mgr.
                                         abstract_input_resource_types())
                except Exception as _excep:
                    _LOG.warn('Exception: {}'.format(_excep))
                    pass

                if sel_cmd == 'status':
                    if '-i' not in a and '--ids-only' not in a:
                        comp_list.extend(["-i", "--ids-only"])
                    for x in ['--by-status', '--terminal']:
                        if x not in a:
                            comp_list.extend([x])
                elif sel_cmd == 'partition':
                    # sys.stderr.write(str(a))
                    if '--level' == a[-1] or (len(a) > 1
                                              and '--level' == a[-2]):
                        comp_list = list(NONTERMINAL_PART_NAMES)
                    elif '--level' not in a:
                        comp_list.extend(['--level'])
                elif sel_cmd in ('diagnose-new-separators',
                                 'enforce-new-separators'):
                    # sys.stderr.write(str(a))
                    if '--level' == a[-1] or (len(a) > 1
                                              and '--level' == a[-2]):
                        comp_list = list(TERMINAL_PART_NAMES)
                    elif '--level' not in a:
                        comp_list.extend(['--level'])
                elif sel_cmd in ['compare-taxonomies']:
                    rw = taxalotl_config.get_terminalized_res_by_id("ott", '')
                    outfn = os.path.join(rw.partitioned_filepath, SEP_NAMES)
                    if os.path.exists(outfn):
                        comp_list.extend(read_as_json(outfn))

        sys.stdout.write('{}\n'.format(' '.join(comp_list)))
    else:
        rc = main_post_parse(p.parse_args())
        sys.exit(rc)
Exemplo n.º 19
0
                     default=None,
                     type=int,
                     required=False,
                     help='Optional taxonomy root argument.')
 parser.add_argument('--input-files-list',
                     default=None,
                     type=str,
                     required=False,
                     help='A list of input NexSON filenames.')
 args = parser.parse_args(sys.argv[1:])
 ott_dir, out_dir, root = args.ott_dir, args.out_dir, args.root
 to_prune_for_reasons = {}
 nonflagged_json_fn = args.ott_prune_nonflagged_json
 if nonflagged_json_fn is not None:
     try:
         nonflagged_blob = read_as_json(nonflagged_json_fn)
     except:
         nonflagged_blob = None
     if nonflagged_blob:
         for reason, id_list in nonflagged_blob.items():
             for ott_id in id_list:
                 to_prune_for_reasons[ott_id] = reason
 flags_str = args.ott_prune_flags
 try:
     assert os.path.isdir(args.ott_dir)
 except:
     error('Expecting ott-dir argument to be a directory. Got "{}"'.format(
         args.ott_dir))
     sys.exit(1)
 if args.nexson:
     inp_files = list(args.nexson)
Exemplo n.º 20
0
 def get_primary_partition_map(self):
     return read_as_json(
         os.path.join(self.normalized_filedir, GEN_MAPPING_FILENAME))
Exemplo n.º 21
0
 def read_subproblems(self):
     d = os.path.join(self.top_output_dir, 'subproblems')
     blob = Extensible()
     conf_tax_json_fp = os.path.join(d, 'contesting-trees.json')
     conf_tax_info = read_as_json(conf_tax_json_fp)
     if not conf_tax_info:
         conf_tax_info = {}
     externalized_conf_tax_info = {}
     for ott_id, tree2node_info_list in conf_tax_info.items():
         tr_ob_li = []
         if ott_id.startswith('ott'):
             ott_id = ott_id[3:]
         externalized_conf_tax_info[ott_id] = tr_ob_li
         for study_tree_fn, node_info_list in tree2node_info_list.items():
             study_id, tree_id = propinquity_fn_to_study_tree(study_tree_fn)
             cf_nl = []
             tre_obj = {
                 'study_id': study_id,
                 'tree_id': tree_id,
                 'tree_filename': study_tree_fn,
                 'conflicting_nodes': cf_nl
             }
             tr_ob_li.append(tre_obj)
             if len(node_info_list) < 2:
                 raise RuntimeError(
                     'read_subproblems < 2 node info elements for taxon ID = {}'
                     .format(ott_id))
             for node_info in node_info_list:
                 rcfn = node_info['children_from_taxon']
                 cfn = [node_label2obj(i) for i in rcfn]
                 el = {
                     'parent': node_label2obj(node_info['parent']),
                     'children_from_taxon': cfn
                 }
                 cf_nl.append(el)
     blob.contested_taxa = externalized_conf_tax_info
     blob.tree_files = stripped_nonempty_lines(
         os.path.join(d, 'subproblem-ids.txt'))
     id2num_leaves = {}
     for el in self.subproblem_solutions.subproblem_num_leaves_num_internal_nodes:
         id2num_leaves[el[0]] = el[1]
     by_num_phylo = []
     by_input = {}
     for s in blob.tree_files:
         assert s.endswith('.tre')
         pref = s[:-4]
         assert pref.startswith('ott')
         tree_name_file = os.path.join(d, pref + '-tree-names.txt')
         phylo_inputs = []
         for i in stripped_nonempty_lines(tree_name_file):
             x = i[:-4] if i.endswith('.tre') else i
             phylo_inputs.append(i)
             if x != 'TAXONOMY':
                 by_input.setdefault(x, []).append(pref)
         npi = len(phylo_inputs)
         by_num_phylo.append((npi, int(pref[3:]), s, phylo_inputs))
     by_num_phylo.sort(reverse=True)
     blob.sorted_by_num_phylo_inputs = [[i[2], i[3], id2num_leaves[i[2]]]
                                        for i in by_num_phylo]
     by_input = [(len(v), k, v) for k, v in by_input.items()]
     by_input.sort(reverse=True)
     blob.input_and_subproblems_sorted = [[i[1], i[2]] for i in by_input]
     return blob
#!/usr/bin/env python
from peyotl import read_as_json
import codecs
import json
import sys
try:
    subproblem_ids_file, in_annotations_file, out_annotations_file = sys.argv[1:]
except:
    sys.exit('Expecting 3 arguments:\n   subproblem_ids_file, in_annotations_file, out_annotations_file')
import os
bin_dir = os.path.abspath(os.path.dirname(sys.argv[0]))
sys.path.append(os.path.join(bin_dir))
from document_outputs import stripped_nonempty_lines
subproblems = []
for s in stripped_nonempty_lines(subproblem_ids_file):
    assert s.endswith('.tre')
    subproblems.append(s[:-4])
jsonblob = read_as_json(in_annotations_file)
nodes_dict = jsonblob['nodes']
for ott_id in subproblems:
    d = nodes_dict.setdefault(ott_id, {})
    d['was_constrained'] = True
    d['was_uncontested'] = True
with codecs.open(out_annotations_file, 'w', encoding='utf-8') as out_stream:
    json.dump(jsonblob, out_stream, indent=2, sort_keys=True, separators=(',', ': '))
#!/usr/bin/env python
from peyotl import concatenate_collections, read_as_json, write_as_json

if __name__ == '__main__':
    import argparse
    import sys
    import os
    description = 'Takes a list of collections and writes a collection that is a concatenation of their decisions'
    parser = argparse.ArgumentParser(prog='collection_export.py', description=description)
    parser.add_argument('--output',
                        type=str,
                        required=True,
                        help='output filepath for collection json')
    parser.add_argument('collection',
                        default=None,
                        type=str,
                        nargs="*",
                        help='filepath for the collections JSON')
    args = parser.parse_args(sys.argv[1:])
    inp = [read_as_json(i) for i in args.collection]
    out = concatenate_collections(inp)
    write_as_json(out, args.output)
Exemplo n.º 24
0
        nt = {'result':'ERROR', 'data':[tdd[0][1], sdd[0][1]]}
    else:
        nt = {'result':'OK', 'data': tdd[0][1]}
    nt['description'] = 'Check that the cleaned version of the taxonomy and the supertree have the same number of leaves'
    summary['num_tips'] = nt
    # Check that otc-taxonomy-parser and otc-unprune-solution-and-name-unnamed-nodes
    #   agree on the number of taxa that were lost
    #
    lt_file = os.path.join(assessments_dir, 'lost_taxa.txt')
    lt_name = 'otc-taxonomy-parser lost-taxon'
    lt_pair  = [lt_file, lt_name]
    lt_set = parse_otc_taxonomy_parser_lost_taxa(lt_file)
    bt_file = os.path.join(top_dir, 'labelled_supertree', 'broken_taxa.json')
    bt_name = 'otc-unprune-solution-and-name-unnamed-nodes broken_taxa.json'
    bt_pair  = [bt_file, bt_name]
    bt_dict = read_as_json(bt_file)['non_monophyletic_taxa']
    if not bt_dict:
        bt_dict = {}
    cleaned_ott_json_pruned = read_as_json(cleaned_taxonomy_json).get('pruned', {})
    # pruned because they became empty
    httip_key = 'higher-taxon-tip'
    int_key = 'empty-after-higher-taxon-tip-prune'
    htpruned_ids = set()
    for key in [httip_key, int_key]:
        pl = set(cleaned_ott_json_pruned.get(key, []))
        htpruned_ids.update(pl)

    lte = {}
    for ott_id in lt_set:
        ott_id_str = 'ott{}'.format(ott_id)
        if (ott_id_str not in bt_dict) and (ott_id not in htpruned_ids):
Exemplo n.º 25
0
import codecs
import json
import sys
try:
    subproblem_ids_file, in_annotations_file, out_annotations_file = sys.argv[
        1:]
except:
    sys.exit(
        'Expecting 3 arguments:\n   subproblem_ids_file, in_annotations_file, out_annotations_file'
    )
import os
bin_dir = os.path.abspath(os.path.dirname(sys.argv[0]))
sys.path.append(os.path.join(bin_dir))
from document_outputs import stripped_nonempty_lines
subproblems = []
for s in stripped_nonempty_lines(subproblem_ids_file):
    assert s.endswith('.tre')
    subproblems.append(s[:-4])
jsonblob = read_as_json(in_annotations_file)
nodes_dict = jsonblob['nodes']
for ott_id in subproblems:
    d = nodes_dict.setdefault(ott_id, {})
    d['was_constrained'] = True
    d['was_uncontested'] = True
with codecs.open(out_annotations_file, 'w', encoding='utf-8') as out_stream:
    json.dump(jsonblob,
              out_stream,
              indent=2,
              sort_keys=True,
              separators=(',', ': '))