Пример #1
0
def write_taxon_json(obj, filepath):
    out_dir = os.path.split(filepath)[0]
    if out_dir:
        assure_dir_exists(out_dir)
    dtw = {}
    for k, v in obj.items():
        if isinstance(v, Taxon):
            dtw[k] = v.to_serializable_dict()
        else:
            dtw[k] = v
    write_as_json(dtw, filepath, separators=(',', ": "), indent=1)
Пример #2
0
def build_partition_maps(taxalotl_config):
    rw = taxalotl_config.get_terminalized_res_by_id("ott", 'partition')
    if not rw.has_been_partitioned():
        partition_resources(taxalotl_config, ["ott"], PREORDER_PART_LIST)
    nsd = rw.build_paritition_maps()
    if not nsd:
        return
    pd = rw.partitioned_filepath
    mfp = os.path.join(pd, GEN_MAPPING_FILENAME)
    write_as_json(nsd, mfp, indent=2)
    _LOG.info("Partitions maps written to {}".format(mfp))
Пример #3
0
def report_results(tag, duration, expected_fn, result):
    expected = json.load(codecs.open(expected_fn, 'rU', encoding='utf-8'))
    succeeded = True
    if expected != result:
        obtained_fn = expected_fn + '-obtained.json'
        write_as_json(result, obtained_fn)
        succeeded = False
    return {'tag': tag,
            'duration': duration,
            'expected-output':succeeded,
            'returned': True,
            'status': 200
           }
Пример #4
0
def cache_separator_names(taxalotl_config):
    rw = taxalotl_config.get_terminalized_res_by_id("ott", '')
    n2p = {}
    accumulate_taxon_dir_names(rw.partitioned_filepath, n2p)
    xl = list(n2p.keys())
    xl.sort()
    outfn = os.path.join(rw.partitioned_filepath, SEP_NAMES)
    write_as_json(xl, outfn)
    _LOG.info("Separator dir names written to {}".format(outfn))
    outfn = os.path.join(rw.partitioned_filepath, SEP_MAPPING)
    for k, v in n2p.items():
        if len(v) > 1:
            _LOG.info("separator {} has multiple dirs: {}".format(k, v))
    write_as_json(n2p, outfn)
    _LOG.info("Separator name to dir mapping written to {}".format(outfn))
Пример #5
0
 def testCachedValidation(self):
     try:
         # noinspection PyPackageRequirements
         import dogpile.cache
     except:
         pass
     else:
         r = pathmap.get_test_repos()
         p = Phylesystem(r)
         nexson, sha = p.return_study('xy_10')
         r = p.add_validation_annotation(nexson, sha)
         cache_hits = p._cache_hits
         r1 = p.add_validation_annotation(nexson, sha)
         self.assertEqual(1 + cache_hits, p._cache_hits)
         self.assertEqual(r, r1)
         write_as_json(nexson, sys.stdout)
Пример #6
0
 def testCachedValidation(self):
     try:
         import dogpile.cache
     except:
         pass
     else:
         r = pathmap.get_test_repos()
         p = Phylesystem(r)
         nexson, sha = p.return_study('xy_10')
         r = p.add_validation_annotation(nexson, sha)
         cache_hits = p._cache_hits
         r1 = p.add_validation_annotation(nexson, sha)
         self.assertEqual(1 + cache_hits, p._cache_hits)
         self.assertEqual(r, r1)
         import sys; from peyotl import write_as_json; 
         write_as_json(nexson, sys.stdout)
Пример #7
0
 def testCachedValidation(self):
     try:
         import dogpile.cache
     except:
         pass
     else:
         r = pathmap.get_test_repos()
         p = Phylesystem(r)
         nexson, sha = p.return_study('xy_10')
         r = p.add_validation_annotation(nexson, sha)
         cache_hits = p._cache_hits
         r1 = p.add_validation_annotation(nexson, sha)
         self.assertEqual(1 + cache_hits, p._cache_hits)
         self.assertEqual(r, r1)
         import sys
         from peyotl import write_as_json
         write_as_json(nexson, sys.stdout)
Пример #8
0
def normalize_silva_taxonomy(source, destination, res_wrapper):
    assure_dir_exists(destination)
    depends_on = res_wrapper.depends_on
    taxalotl_config = res_wrapper.config
    expect_id_fp, ncbi_mapping_res = None, None
    for dep_id in depends_on:
        dep_res = taxalotl_config.get_terminalized_res_by_id(
            dep_id, 'normalize silva')
        if not dep_res.has_been_unpacked():
            unpack_resources(taxalotl_config, [dep_id])
        if dep_res.schema.lower() == 'id list':
            dep_fp = os.path.join(dep_res.unpacked_filepath,
                                  dep_res.local_filename)
            expect_id_fp = dep_fp
        elif dep_res.schema.lower() in {'silva taxmap', "fasta silva taxmap"}:
            dep_fp = dep_res.normalized_filepath
            ncbi_mapping_res = dep_res
        else:
            raise ValueError('unrecognized dependency schema {}'.format(
                dep_res.schema))
        if not os.path.isfile(dep_fp):
            raise ValueError(
                "Silva processing dependency not found at: {}".format(dep_fp))
    if expect_id_fp is None:
        raise ValueError('ID list dependency not found.')
    if ncbi_mapping_res is None:
        raise ValueError('NCBI mapping dependency not found.')
    expect_tax_fp = os.path.join(res_wrapper.unpacked_filepath,
                                 res_wrapper.local_filename)
    if not os.path.isfile(expect_tax_fp):
        raise ValueError(
            "Silva taxon file not found at: {}".format(expect_tax_fp))
    acc_to_trim = ncbi_mapping_res.parse_acc_to_trim_from_ncbi()
    preferred = parse_silva_ids(expect_id_fp)
    itd = InterimTaxonomyData()
    part_name_to_silva_id = parse_silva_taxon_file(expect_tax_fp, preferred,
                                                   acc_to_trim, itd)
    _LOG.info('{} taxonomy IDs read'.format(len(itd.to_par)))
    res_wrapper.post_process_interim_tax_data(itd)
    itd.write_to_dir(destination)
    mapping_file = os.path.join(destination, GEN_MAPPING_FILENAME)
    write_as_json(part_name_to_silva_id,
                  mapping_file,
                  indent=2,
                  separators=(',', ': '))
Пример #9
0
def pull_otifacts(taxalotl_config):
    dest_dir = taxalotl_config.resources_dir
    taxalotl_dir = os.path.split(os.path.abspath(dest_dir))[0]
    repo_dir = os.path.split(taxalotl_dir)[0]
    otifacts_dir = os.path.join(repo_dir, 'OTifacts')
    if not os.path.isdir(otifacts_dir):
        clone_otifacts(otifacts_dir)
    else:
        git_pull_otifacts(otifacts_dir)
    all_res = read_all_otifacts(otifacts_dir)
    for res_type in [
            'external taxonomy', 'open tree taxonomy', 'id list',
            'open tree taxonomy idlist', "id to ncbi mapping"
    ]:
        ext_tax = filter_otifacts_by_type(all_res, res_type)
        by_root_id = partition_otifacts_by_root_element(ext_tax)
        for root_key, res_dict in by_root_id.items():
            fp = os.path.join(dest_dir, root_key + '.json')
            write_as_json(res_dict, fp, indent=2, separators=(',', ': '))
Пример #10
0
def diagnose_new_separators(taxalotl_config, level_list):
    rw = taxalotl_config.get_terminalized_res_by_id("ott",
                                                    'diagnose-new-separators')
    if not rw.has_been_partitioned():
        partition_resources(taxalotl_config, ["ott"], PREORDER_PART_LIST)
    pd = rw.partitioned_filepath
    if level_list == [None]:
        level_list = PART_NAMES
    for part_name in level_list:
        nsd = rw.diagnose_new_separators(current_partition_key=part_name)
        if not nsd:
            _LOG.info("no new separtors in {}.".format(part_name))
        else:
            for k, sd in nsd.items():
                _LOG.info('{} new separators in {}'.format(
                    sd.num_separators(), part_name))
                fp = os.path.join(pd, k, NEW_SEP_FILENAME)
                write_as_json(sd.as_dict(), fp, sort_keys=True, indent=2)
                _LOG.info("new separators written to {}".format(fp))
Пример #11
0
    def write_to_dir(self, destination):
        # Write out in OTT form
        d = tempfile.mkdtemp()
        fn = [
            'taxonomy.tsv', 'synonyms.tsv', 'forwards.tsv', 'about.json',
            'details.json'
        ]
        try:
            syn_order = self.write_ott_taxonomy_tsv(
                os.path.join(d, 'taxonomy.tsv'))
            write_ott_synonyms_tsv(os.path.join(d, 'synonyms.tsv'),
                                   self.synonyms, syn_order, self.details_log)
            if self.forwards:
                write_ott_forwards(os.path.join(d, 'forwards.tsv'),
                                   self.forwards)

            about_fp = os.path.join(d, 'about.json')
            write_as_json(self.about, about_fp, indent=2)
            self.finalize()
            write_ncbi_details_json(os.path.join(d, 'details.json'),
                                    self.details_log)
        except:
            for f in fn:
                tf = os.path.join(d, f)
                if os.path.exists(tf):
                    try:
                        os.remove(tf)
                    except:
                        pass
            try:
                os.rmdir(d)
            except:
                pass
            raise
        assure_dir_exists(destination)
        for f in fn:
            sfp = os.path.join(d, f)
            if os.path.exists(sfp):
                dfp = os.path.join(destination, f)
                os.rename(sfp, dfp)
        os.rmdir(d)
Пример #12
0
        with open(fn) as inp:
            for row in csv.reader(inp, delimiter=','):
                ott_id = 'ott{}'.format(row[1])
                if ott_id in nodes_annotations:
                    n_passes += 1
                elif ott_id in bt_dict:
                    n_failures += 1
                    err('Taxon {} from monophyly is not monophyletic in the tree'.format(ott_id))
                    mp.append(ott_id)
                else:
                    skip_msg = 'Monophyly test for {} treated as a skipped test because the taxon is not in the lost taxa or in the tree. (it could be the case that the synthesis was run on a subset of the full taxonomy)\n'
                    sys.stderr.write(skip_msg.format(ott_id))
                    n_skipped += 1
        if 'MONOPHYLY_TEST_SOURCE_NAME' in os.environ:
            src = os.environ['MONOPHYLY_TEST_SOURCE_NAME']
        else:
            src = fn
        if mp:
            mtb = {'result': 'ERROR', 'data': [n_passes, n_skipped, n_failures, mp]}
        else:
            mtb = {'result': 'OK', 'data': [n_passes, n_skipped, n_failures, mp]}
        mtb['description'] = 'Check that the taxa from the monophyly tests listed in {} are monophyletic in the tree.'.format(src)
        summary['monophyly'] = mtb
    else:
        sys.stderr.write('MONOPHYLY_TEST_CSV_FILE is not in the env, so no monophyly tests are being run\n')
    # serialize the summary
    #
    write_as_json(summary, os.path.join(assessments_dir, 'summary.json'), indent=2)
    sys.exit(num_errors)

Пример #13
0
                        required=False,
                        help='Optional comma-separated list of flags to prune. '
                             'If omitted, the treemachine flags are used.')
    parser.add_argument('--root',
                        default=None,
                        type=int,
                        required=False,
                        help='Optional taxonomy root argument.')
    args = parser.parse_args(sys.argv[1:])
    ott_dir, output, log_filename, root = args.ott_dir, args.output, args.log, args.root
    flags_str = args.flags
    try:
        assert os.path.isdir(args.ott_dir)
    except:
        sys.exit('Expecting ott-dir argument to be a directory. Got "{}"'.format(args.ott_dir))
    ott = OTT(ott_dir=args.ott_dir)
    if flags_str is None:
        flags = ott.TREEMACHINE_SUPPRESS_FLAGS
    else:
        flags = flags_str.split(',')
    create_log = log_filename is not None
    with codecs.open(args.output, 'w', encoding='utf-8') as outp:
        log = ott.write_newick(outp,
                               label_style=OTULabelStyleEnum.CURRENT_LABEL_OTT_ID,
                               root_ott_id=root,
                               prune_flags=flags,
                               create_log_dict=create_log)
        outp.write('\n')
    if create_log:
        write_as_json(log, log_filename)
                    selected_study_found = True
                else:
                    continue
            ga = ps.create_git_action(study_id)
            with ga.lock():
                ga.checkout(sha)
                if copy_phylesystem_file_if_differing(ga, sha, inc, out_dir,
                                                      generic2concrete):
                    num_moved += 1
                ga.checkout_master()
    debug('{} total trees'.format(len(included)))
    debug('{} JSON files copied'.format(num_moved))
    if selected_study is not None:
        if selected_study_found:
            sys.exit(0)
        error(
            'The selected tree {}_{}.json was not found in the collection\n.'.
            format(selected_study, selected_tree))
        sys.exit(1)
    # now we write a "concrete" version of this snapshot
    coll_name = os.path.split(args.collection)[-1]
    concrete_collection = get_empty_collection()
    concrete_collection[
        'description'] = 'Concrete form of collection "{}"'.format(coll_name)
    cd_list = concrete_collection['decisions']
    for inc in included:
        concrete = generic2concrete[id(inc)]
        cd_list.append(concrete)
    concrete_fn = os.path.join(out_dir, 'concrete_' + coll_name)
    write_as_json(concrete_collection, concrete_fn)
    parser.add_argument('flag_pruned_json', nargs=1, metavar='F', type=str)
    parser.add_argument('higher_taxon_pruned_json', metavar='H', nargs=1, type=str)
    parser.add_argument('combined_json', nargs=1, metavar='O', type=str)
    args = parser.parse_args()
    fj_fn = args.flag_pruned_json[0]
    htj_fn = args.higher_taxon_pruned_json[0]
    out_fn = args.combined_json[0]
    blob = read_as_json(fj_fn)
    higher_taxon_blob = read_as_json(htj_fn)
    if higher_taxon_blob:
        p = blob['pruned']
        httk = 'higher-taxon-tip'
        intk = 'empty-after-higher-taxon-tip-prune'
        high_tax_tip_pruned = higher_taxon_blob.get(httk, {})
        internal_high_tax_tip_pruned = higher_taxon_blob.get(intk, {})
        p[httk] = high_tax_tip_pruned
        p[intk] = internal_high_tax_tip_pruned
        n_ht_in_pruned = len(internal_high_tax_tip_pruned)
        n_ht_pruned = len(high_tax_tip_pruned)
        blob['num_non_leaf_nodes'] -= n_ht_in_pruned
        blob['num_pruned_anc_nodes'] += n_ht_in_pruned
        blob['num_tips'] -= n_ht_pruned
        blob['num_nodes'] -= (n_ht_pruned + n_ht_in_pruned)
        del blob['num_monotypic_nodes']
        del blob['num_non_leaf_nodes_with_multiple_children']
        kl = [httk, intk]
    else:
        kl = []
    blob['pruning_keys_not_from_flags'] = kl
    write_as_json(blob, out_fn)
Пример #16
0
def write_ncbi_details_json(fp, details_log):
    write_as_json(details_log, fp, indent=2)
                    selected_study_found = True
                else:
                    continue
            ga = ps.create_git_action(study_id)
            with ga.lock():
                ga.checkout(sha)
                if copy_phylesystem_file_if_differing(ga,
                                                      sha,
                                                      inc,
                                                      out_dir,
                                                      generic2concrete):
                    num_moved += 1
                ga.checkout_master()
    debug('{} total trees'.format(len(included)))
    debug('{} JSON files copied'.format(num_moved))
    if selected_study is not None:
        if selected_study_found:
            sys.exit(0)
        error('The selected tree {}_{}.json was not found in the collection\n.'.format(selected_study, selected_tree))
        sys.exit(1)
    # now we write a "concrete" version of this snapshot
    coll_name = os.path.split(args.collection)[-1]
    concrete_collection = get_empty_collection()
    concrete_collection['description'] = 'Concrete form of collection "{}"'.format(coll_name)
    cd_list = concrete_collection['decisions']
    for inc in included:
        concrete = generic2concrete[id(inc)]
        cd_list.append(concrete)
    concrete_fn = os.path.join(out_dir, 'concrete_' + coll_name)
    write_as_json(concrete_collection, concrete_fn)
Пример #18
0
        with open(fn) as inp:
            for row in csv.reader(inp, delimiter=','):
                ott_id = 'ott{}'.format(row[1])
                if ott_id in nodes_annotations:
                    n_passes += 1
                elif ott_id in bt_dict:
                    n_failures += 1
                    err('Taxon {} from monophyly is not monophyletic in the tree'.format(ott_id))
                    mp.append(ott_id)
                else:
                    skip_msg = 'Monophyly test for {} treated as a skipped test because the taxon is not in the lost taxa or in the tree. (it could be the case that the synthesis was run on a subset of the full taxonomy)\n'
                    sys.stderr.write(skip_msg.format(ott_id))
                    n_skipped += 1
        if 'MONOPHYLY_TEST_SOURCE_NAME' in os.environ:
            src = os.environ['MONOPHYLY_TEST_SOURCE_NAME']
        else:
            src = fn
        if mp:
            mtb = {'result': 'ERROR', 'data': [n_passes, n_skipped, n_failures, mp]}
        else:
            mtb = {'result': 'OK', 'data': [n_passes, n_skipped, n_failures, mp]}
        mtb['description'] = 'Check that the taxa from the monophyly tests listed in {} are monophyletic in the tree.'.format(src)
        summary['monophyly'] = mtb
    else:
        sys.stderr.write('MONOPHYLY_TEST_CSV_FILE is not in the env, so no monophyly tests are being run\n')
    # serialize the summary
    #
    write_as_json(summary, os.path.join(assessments_dir, 'summary.json'), indent=2)
    sys.exit(num_errors)

Пример #19
0
        nexson_blob = read_as_json(inp)
        ntw = NexsonTreeWrapper(nexson_blob, tree_id, log_obj=log_obj)
        assert ntw.root_node_id
        taxonomy_treefile = os.path.join(args.out_dir,
                                         study_tree + '-taxonomy.tre')
        try:
            ntw.prune_tree_for_supertree(
                ott=ott,
                to_prune_fsi_set=to_prune_fsi_set,
                root_ott_id=root,
                taxonomy_treefile=taxonomy_treefile,
                id_to_other_prune_reason=to_prune_for_reasons)
        except EmptyTreeError:
            log_obj['EMPTY_TREE'] = True
        out_log = os.path.join(args.out_dir, study_tree + '.json')
        write_as_json(log_obj, out_log)
        newick_fp = os.path.join(args.out_dir, study_tree + '.tre')

        def compose_label(nodeid, node, otu):
            try:
                return '_'.join([
                    otu['^ot:ottTaxonName'],
                    str(node['@id']), 'ott' + str(otu['^ot:ottId'])
                ])
            except:
                # internal nodes may lack otu's but we still want the node Ids
                return '_{}_'.format(str(node['@id']))

        with codecs.open(newick_fp, 'w', encoding='utf-8') as outp:
            if not ntw.is_empty:
                nexson_frag_write_newick(outp,
def _main():
    import argparse
    _HELP_MESSAGE = '''Takes a filepath to Newick tree file with propinquity-style
leaf labels - unique numeric suffixes which identify the taxon.
Writes a NexSON representation of the tree to
'''

    parser = argparse.ArgumentParser(description=_HELP_MESSAGE,
                                     formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument("-i", "--ids",
                        required=True,
                        help="comma separated list of tree IDs to be assigned to the trees in the newick file.")
    parser.add_argument('newick', help='filepath of the newick tree')
    args = parser.parse_args()
    if not os.path.exists(args.newick):
        sys.exit('The file "{}" does not exist'.format(args.newick))
    tree_id_list = args.ids.split(',')
    if not tree_id_list:
        sys.exit('At least one tree ID must be provided')
    tree_id_it = iter(tree_id_list)
    out = codecs.getwriter('utf-8')(sys.stdout)
    pyid2int = {}
    curr_nd_counter = 1
    with codecs.open(args.newick, 'r', encoding='utf8') as inp:
        tree = parse_newick(stream=inp)
        tree_id = tree_id_it.next()
        nexson = get_empty_nexson()
        body = nexson['nexml']
        all_otus_groups = body['otusById'].values()
        assert len(all_otus_groups) == 1
        first_otus_group = all_otus_groups[0]
        all_trees_groups = body['treesById'].values()
        assert len(all_trees_groups) == 1
        first_trees_group = all_trees_groups[0]
        first_trees_group['^ot:treeElementOrder'].append(tree_id)
        otus = first_otus_group['otuById']
        all_trees_dict = first_trees_group['treeById']
        ntree = all_trees_dict.setdefault(tree_id, {})
        ebsi, nbi = {}, {}
        ntree['edgeBySourceId'] = ebsi
        ntree['nodeById'] = nbi
        root_node_id = None
        for node in tree._root.preorder_iter():
            nid = id(node)
            i = pyid2int.get(nid)
            if i is None:
                i = curr_nd_counter
                curr_nd_counter += 1
                pyid2int[nid] = i
            node_id_s = 'node{}'.format(i)
            otu_id_s = 'otu{}'.format(i)
            n_obj = nbi.setdefault(node_id_s, {})
            if node is tree._root:
                n_obj['@root'] = True
                root_node_id = node_id_s
            else:
                edge_id_s = 'edge{}'.format(i)
                pid = id(node.parent)
                pni = 'node{}'.format(pyid2int[pid])
                ed = ebsi.setdefault(pni, {})
                ed[edge_id_s] = {'@source': pni, '@target': node_id_s}
            if not node.children:
                n_obj['@otu'] = otu_id_s
                orig = node._id
                ott_id = ott_id_from_label(orig)
                otus[otu_id_s] = {"^ot:originalLabel": orig, "^ot:ottId": ott_id, "^ot:ottTaxonName": orig}
        assert root_node_id is not None
        ntree['^ot:rootNodeId'] = root_node_id
        write_as_json(nexson, out)
Пример #21
0
                        nargs=1,
                        type=str)
    parser.add_argument('combined_json', nargs=1, metavar='O', type=str)
    args = parser.parse_args()
    fj_fn = args.flag_pruned_json[0]
    htj_fn = args.higher_taxon_pruned_json[0]
    out_fn = args.combined_json[0]
    blob = read_as_json(fj_fn)
    higher_taxon_blob = read_as_json(htj_fn)
    if higher_taxon_blob:
        p = blob['pruned']
        httk = 'higher-taxon-tip'
        intk = 'empty-after-higher-taxon-tip-prune'
        high_tax_tip_pruned = higher_taxon_blob.get(httk, {})
        internal_high_tax_tip_pruned = higher_taxon_blob.get(intk, {})
        p[httk] = high_tax_tip_pruned
        p[intk] = internal_high_tax_tip_pruned
        n_ht_in_pruned = len(internal_high_tax_tip_pruned)
        n_ht_pruned = len(high_tax_tip_pruned)
        blob['num_non_leaf_nodes'] -= n_ht_in_pruned
        blob['num_pruned_anc_nodes'] += n_ht_in_pruned
        blob['num_tips'] -= n_ht_pruned
        blob['num_nodes'] -= (n_ht_pruned + n_ht_in_pruned)
        del blob['num_monotypic_nodes']
        del blob['num_non_leaf_nodes_with_multiple_children']
        kl = [httk, intk]
    else:
        kl = []
    blob['pruning_keys_not_from_flags'] = kl
    write_as_json(blob, out_fn)
Пример #22
0
    summary = _ot_call(
        'treemachine/getSynthesisSourceList',
        'curl-versions/getSynthesisSourceList.json',
        lambda: otwrap.treemachine.synthetic_source_list,
    )
    summary_list.append(summary)

    summary = _ot_call('taxomachine/autocompleteBoxQuery',
                       'curl-versions/autocompleteBoxQuery.json',
                       otwrap.taxomachine.autocomplete, 'Endoxyla', 'All life')
    summary_list.append(summary)

    summary = _ot_call(
        'phylesystem/study_list',
        'curl-versions/study_list.json',
        lambda: otwrap.phylesystem_api.study_list,
    )
    summary_list.append(summary)

    summary = _ot_call('phylesystem/pg_719', 'curl-versions/pg_719.json',
                       otwrap.phylesystem_api.get_study, 'pg_719')
    summary_list.append(summary)

    blob = {
        'time': timestamp.isoformat(),
        'time_string': timestamp.strftime('%A %H:%M:%S.%f (UTC) %d %B, %Y'),
        'summary': summary_list
    }
    out = codecs.getwriter('utf-8')(sys.stdout)
    write_as_json(blob, out, indent=1)
Пример #23
0
                       lambda: otwrap.treemachine.synthetic_source_list,
                       )
    summary_list.append(summary)

    summary = _ot_call('taxomachine/autocompleteBoxQuery',
                       'curl-versions/autocompleteBoxQuery.json',
                       otwrap.taxomachine.autocomplete,
                       'Endoxyla',
                       'All life'
                       )
    summary_list.append(summary)

    summary = _ot_call('phylesystem/study_list',
                       'curl-versions/study_list.json',
                       lambda: otwrap.phylesystem_api.study_list,
                       )
    summary_list.append(summary)

    summary = _ot_call('phylesystem/pg_719',
                       'curl-versions/pg_719.json',
                       otwrap.phylesystem_api.get_study,
                       'pg_719')
    summary_list.append(summary)

    blob = {'time': timestamp.isoformat(),
            'time_string': timestamp.strftime('%A %H:%M:%S.%f (UTC) %d %B, %Y'),
            'summary': summary_list
            }
    out = codecs.getwriter('utf-8')(sys.stdout)
    write_as_json(blob, out, indent=1)
#!/usr/bin/env python
from peyotl import concatenate_collections, read_as_json, write_as_json

if __name__ == '__main__':
    import argparse
    import sys
    import os
    description = 'Takes a list of collections and writes a collection that is a concatenation of their decisions'
    parser = argparse.ArgumentParser(prog='collection_export.py', description=description)
    parser.add_argument('--output',
                        type=str,
                        required=True,
                        help='output filepath for collection json')
    parser.add_argument('collection',
                        default=None,
                        type=str,
                        nargs="*",
                        help='filepath for the collections JSON')
    args = parser.parse_args(sys.argv[1:])
    inp = [read_as_json(i) for i in args.collection]
    out = concatenate_collections(inp)
    write_as_json(out, args.output)
Пример #25
0
    def flush(self, tax_dir):
        self.curr_tree.add_best_guess_rank_sort_number()
        self.prev_tree.add_best_guess_rank_sort_number()

        edit_list = []
        for nd in self.curr_tree.preorder():
            stat_flag, other = _get_nonsyn_flag_and_other(nd)
            if stat_flag == UpdateStatus.UNDIAGNOSED_CHANGE:
                ranks_differ = nd.best_rank_sort_number != other.best_rank_sort_number
                if ranks_differ:
                    if nd.best_rank_sort_number == SPECIES_SORTING_NUMBER:
                        if other.best_rank_sort_number <= MAX_INFRASPECIFIC_NUMBER:
                            genus_nd = self.curr_tree.find_genus_for_alpha(nd)
                            if genus_nd:
                                other_genus = _get_nonsyn_flag_and_other(genus_nd)[1]
                                if self.prev_tree.does_first_contain_second(other_genus, other):
                                    _alter_update_flag(nd, UpdateStatus.ELEVATED_TO_SP)
                    elif other.best_rank_sort_number == SPECIES_SORTING_NUMBER:
                        if nd.best_rank_sort_number <= MAX_INFRASPECIFIC_NUMBER:
                            genus_nd = self.curr_tree.find_genus_for_alpha(nd)
                            if genus_nd:
                                other_genus = _get_nonsyn_flag_and_other(genus_nd)[1]
                                if self.prev_tree.does_first_contain_second(other_genus, other):
                                    _alter_update_flag(nd, UpdateStatus.DEMOTED_TO_INFRA_SP)
                if _get_nonsyn_flag_and_other(nd)[0] == UpdateStatus.UNDIAGNOSED_CHANGE:
                    _LOG.warn('persistent UNDIAGNOSED_CHANGE for {} and {}'.format(nd, other))
            if (not nd.children_refs) and nd.best_rank_sort_number >= MINIMUM_HIGHER_TAXON_NUMBER:
                if other \
                   and (not other.children_refs) \
                   and other.best_rank_sort_number >= MINIMUM_HIGHER_TAXON_NUMBER:
                    _add_update_flag_bit(nd, UpdateStatus.OLDLY_BARREN)
                else:
                    _add_update_flag_bit(nd, UpdateStatus.NEWLY_BARREN)
            if hasattr(nd, 'new_children'):
                for c in nd.new_children:
                    if _get_nonsyn_flag_and_other(c)[0] & UpdateStatus.NAME_CHANGED:
                        self._detect_cascading_name_change(nd, c)

        for nd in self.curr_tree.preorder():
            ne = self._gen_edit_if_new(nd, {})
            if ne:
                edit_list.append(ne)
        for nd in self.prev_tree.preorder():
            ne = self._gen_prev_tree_nd_edit(nd, {})
            if ne:
                edit_list.append(ne)
        edit_ids = set()
        for edit in edit_list:
            ft = edit.get('focal_taxon')
            if ft is None:
                pt = edit['focal_taxon_prev']
                key = '{}_|edit|_prev_{}'.format(self.tag, pt['id'])
            else:
                key = '{}_|edit|_{}'.format(self.tag, ft['id'])
            assert key not in edit_ids
            edit_ids.add(key)
            edit['edit_id'] = key

        fp = os.path.join(tax_dir, UPDATE_ANALYSIS_FILENAME)
        with open(fp, 'w', encoding='utf-8') as outf:
            for opts in [outf, out_stream]:
                write_as_json(edit_list, opts, indent='  ', sort_keys=True)

        # curr_tree_par_ids = set()
        # prev_tree_par_ids = set()
        # for status_code, node_list in self.by_status_code.items():
        #     if status_code == UpdateStatus.UNCHANGED:
        #         continue
        #     if status_code in [UpdateStatus.DELETED_TERMINAL, UpdateStatus.DELETED_INTERNAL]:
        #         target = prev_tree_par_ids
        #     else:
        #         target = curr_tree_par_ids
        #     for nd in node_list:
        #         target.add(nd.par_id)
        #
        # curr_deepest_mod_id = _old_modified_subtree_ids(curr_tree_par_ids, self.curr_tree)
        # prev_deepest_mod_id = _old_modified_subtree_ids(prev_tree_par_ids, self.prev_tree)

        # emitted = set()
        # for par_id in curr_deepest_mod_id:
        #     par_nd = self.curr_tree.id_to_taxon[par_id]
        #     self.report_on_altered_contiguous_des(par_nd, True)

        # status_keys = [(i.value, i) for i in self.by_status_code.keys()]
        # status_keys.sort()
        # status_keys = [i[1] for i in status_keys]
        # status_keys.remove(UpdateStatus.TERMINAL_SUNK_TO_SYNONYM)
        # status_keys.remove(UpdateStatus.INTERNAL_SUNK_TO_SYNONYM)
        # for k in status_keys:
        #     for nd in self.by_status_code[k]:
        #         self._write_nd(nd)

        # Reinitialize...
        self.__init__(None, None)
Пример #26
0
    parser.add_argument('--root',
                        default=None,
                        type=int,
                        required=False,
                        help='Optional taxonomy root argument.')
    args = parser.parse_args(sys.argv[1:])
    ott_dir, output, log_filename, root = args.ott_dir, args.output, args.log, args.root
    flags_str = args.flags
    try:
        assert os.path.isdir(args.ott_dir)
    except:
        sys.exit(
            'Expecting ott-dir argument to be a directory. Got "{}"'.format(
                args.ott_dir))
    ott = OTT(ott_dir=args.ott_dir)
    if flags_str is None:
        flags = ott.TREEMACHINE_SUPPRESS_FLAGS
    else:
        flags = flags_str.split(',')
    create_log = log_filename is not None
    with codecs.open(args.output, 'w', encoding='utf-8') as outp:
        log = ott.write_newick(
            outp,
            label_style=OTULabelStyleEnum.CURRENT_LABEL_OTT_ID,
            root_ott_id=root,
            prune_flags=flags,
            create_log_dict=create_log)
        outp.write('\n')
    if create_log:
        write_as_json(log, log_filename)
Пример #27
0
from peyotl.collections_store.validation import validate_collection
from peyotl import write_as_json
import sys

# Expecting a lot of lines like pg_2359_4962 for 'pg_2359', 'tree4962'
inp_fn = sys.argv[1]
with open(inp_fn, 'rU') as inp:
    lines = []
    for line in inp:
        line = line.strip()
        if (not line) or (line == 'taxonomy'):
            continue
        assert line.endswith('.tre')
        frag = line[:-4]
        s = frag.split('_')
        study_id, tree_frag = '_'.join(s[:-1]), s[-1]
        tree_id = 'tree' + tree_frag
        lines.append((study_id, tree_id))
c = get_empty_collection()
d = c['decisions']
for pair in lines:
    d.append({'SHA': '',
              'decision': 'INCLUDED',
              'name': '',
              'studyID': pair[0],
              'treeID': pair[1]
              })

assert not (validate_collection(c)[0])
write_as_json(c, sys.stdout)
Пример #28
0
        study_tree = '.'.join(inp_fn.split('.')[:-1])  # strip extension
        study_id, tree_id = propinquity_fn_to_study_tree(inp_fn)
        nexson_blob = read_as_json(inp)
        ntw = NexsonTreeWrapper(nexson_blob, tree_id, log_obj=log_obj)
        assert ntw.root_node_id
        taxonomy_treefile = os.path.join(args.out_dir, study_tree + '-taxonomy.tre')
        try:
            ntw.prune_tree_for_supertree(ott=ott,
                                         to_prune_fsi_set=to_prune_fsi_set,
                                         root_ott_id=root,
                                         taxonomy_treefile=taxonomy_treefile,
                                         id_to_other_prune_reason=to_prune_for_reasons)
        except EmptyTreeError:
            log_obj['EMPTY_TREE'] = True
        out_log = os.path.join(args.out_dir, study_tree + '.json')
        write_as_json(log_obj, out_log)
        newick_fp = os.path.join(args.out_dir, study_tree + '.tre')


        def compose_label(node, otu):
            try:
                return '_'.join([otu['^ot:ottTaxonName'], str(node['@id']), 'ott' + str(otu['^ot:ottId'])])
            except:
                # internal nodes may lack otu's but we still want the node Ids
                return '_{}_'.format(str(node['@id']))


        with codecs.open(newick_fp, 'w', encoding='utf-8') as outp:
            if not ntw.is_empty:
                nexson_frag_write_newick(outp,
                                         ntw._edge_by_source,