Exemplo n.º 1
0
 def testTreesCulledNonmatcingConvViaPS(self):
     o = pathmap.nexson_obj('9/v1.2.json')
     self.assertEqual(len(extract_tree_nexson(o, tree_id=None)), 2)
     ps = PhyloSchema('nexson', content='tree', content_id='tree2', version='1.2.1', cull_nonmatching='true')
     x = ps.serialize(o)
     etn = extract_tree_nexson(o, tree_id=None)
     self.assertEqual(len(etn), 1)
     self.assertEqual(etn[0][0], 'tree2')
     self.assertTrue(x.startswith('{')) #pylint: disable=E1103
     rx = json.loads(x)
     etn = extract_tree_nexson(rx, tree_id=None)
     self.assertEqual(len(etn), 1)
     self.assertEqual(etn[0][0], 'tree2')
Exemplo n.º 2
0
 def testTreesCulledNonmatcingConvViaPS(self):
     o = pathmap.nexson_obj('9/v1.2.json')
     self.assertEqual(len(extract_tree_nexson(o, tree_id=None)), 2)
     ps = PhyloSchema('nexson', content='tree', content_id='tree2', version='1.2.1', cull_nonmatching='true')
     x = ps.serialize(o)
     etn = extract_tree_nexson(o, tree_id=None)
     self.assertEqual(len(etn), 1)
     self.assertEqual(etn[0][0], 'tree2')
     self.assertTrue(x.startswith('{'))  # pylint: disable=E1103
     rx = json.loads(x)
     etn = extract_tree_nexson(rx, tree_id=None)
     self.assertEqual(len(etn), 1)
     self.assertEqual(etn[0][0], 'tree2')
Exemplo n.º 3
0
 def testTreesCulledNonmatcingConvViaPSV0(self):
     """Verify that the culling does not break the conversion to other forms of NexSON"""
     o = pathmap.nexson_obj('9/v1.2.json')
     self.assertEqual(len(extract_tree_nexson(o, tree_id=None)), 2)
     ps = PhyloSchema('nexson', content='tree', content_id='tree2', version='0.0.0', cull_nonmatching='true')
     x = ps.serialize(o)
     etn = extract_tree_nexson(o, tree_id=None)
     self.assertEqual(len(etn), 1)
     self.assertEqual(etn[0][0], 'tree2')
     self.assertTrue(x.startswith('{'))  # pylint: disable=E1103
     rx = json.loads(x)
     self.assertEqual(detect_nexson_version(rx), '0.0.0')
     etn = extract_tree_nexson(rx, tree_id=None)
     self.assertEqual(len(etn), 1)
     self.assertEqual(etn[0][0], 'tree2')
Exemplo n.º 4
0
 def testTreesCulledNonmatcingConvViaPSV0(self):
     '''Verify that the culling does not break the conversion to other forms of NexSON'''
     o = pathmap.nexson_obj('9/v1.2.json')
     self.assertEqual(len(extract_tree_nexson(o, tree_id=None)), 2)
     ps = PhyloSchema('nexson', content='tree', content_id='tree2', version='0.0.0', cull_nonmatching='true')
     x = ps.serialize(o)
     etn = extract_tree_nexson(o, tree_id=None)
     self.assertEqual(len(etn), 1)
     self.assertEqual(etn[0][0], 'tree2')
     self.assertTrue(x.startswith('{')) #pylint: disable=E1103
     rx = json.loads(x)
     self.assertEqual(detect_nexson_version(rx), '0.0.0')
     etn = extract_tree_nexson(rx, tree_id=None)
     self.assertEqual(len(etn), 1)
     self.assertEqual(etn[0][0], 'tree2')
Exemplo n.º 5
0
def find_tree_and_otus_in_nexson(nexson, tree_id):
    tl = extract_tree_nexson(nexson, tree_id)
    if (len(tl) != 1):
        #        sys.stderr.write('{}: len(tl) = {}\n'.format(tree_id,len(tl)))
        return None, None
    tree_id, tree, otus = tl[0]
    return tree, otus
Exemplo n.º 6
0
def find_tree_and_otus_in_nexson(nexson, tree_id):
    tl = extract_tree_nexson(nexson, tree_id)
    if (len(tl) != 1):
        #        sys.stderr.write('{}: len(tl) = {}\n'.format(tree_id,len(tl)))
        return None, None
    tree_id, tree, otus = tl[0]
    return tree, otus
Exemplo n.º 7
0
    sys.stderr.write('count_trees.py: Exception: {}\n'.format(e.message))
    sys.exit('count_trees.py: There was a problem creating a wrapper around your phylesystem ' \
             'instance. Double check your configuration (see ' \
             'http://opentreeoflife.github.io/peyotl/configuration/ for info).')
try:
    print_freq = 500
    num_trees = 0
    num_studies = 0
    max_trees_per_study = 0
    biggest_study = None
    studies_without_trees = []
    sys.stderr.write('count_trees.py: beginning loop over studies...\n')
    for study_id, nexson in phylsys.iter_study_objs():
        num_studies += 1
        try:
            nt = len(extract_tree_nexson(nexson, tree_id=None))
        except:
            sys.stderr.write(
                'Problem extracting trees from study {}'.format(study_id))
            raise
        if nt == 0:
            studies_without_trees.append(study_id)
        else:
            num_trees += nt
            if nt > max_trees_per_study:
                biggest_study = study_id
                max_trees_per_study = nt
        if num_studies % print_freq == 0:
            sys.stderr.write('   ...{d} studies read. Still going...\n'.format(
                d=num_studies))
Exemplo n.º 8
0
    sys.stderr.write('count_trees.py: Exception: {}\n'.format(e.message))
    sys.exit('count_trees.py: There was a problem creating a wrapper around your phylesystem '
             'instance. Double check your configuration (see '
             'http://opentreeoflife.github.io/peyotl/configuration/ for info).')
try:
    print_freq = 500
    num_trees = 0
    num_studies = 0
    max_trees_per_study = 0
    biggest_study = None
    studies_without_trees = []
    sys.stderr.write('count_trees.py: beginning loop over studies...\n')
    for study_id, nexson in phylsys.iter_study_objs():
        num_studies += 1
        try:
            nt = len(extract_tree_nexson(nexson, tree_id=None))
        except:
            sys.stderr.write('Problem extracting trees from study {}'.format(study_id))
            raise
        if nt == 0:
            studies_without_trees.append(study_id)
        else:
            num_trees += nt
            if nt > max_trees_per_study:
                biggest_study = study_id
                max_trees_per_study = nt
        if num_studies % print_freq == 0:
            sys.stderr.write('   ...{d} studies read. Still going...\n'.format(d=num_studies))

except:
    sys.exit('Unexpected error in iteration, please report this bug.')
Exemplo n.º 9
0
def write_tree_list(outpath):
    conflict_analyses = read_conflict_analyses()
    trees_in_synthesis = read_synthesis_list()
    taxa_in_synthesis = read_synthesis_taxa()
    phylesystem = Phylesystem()
    study_count = 0
    tree_count = 0
    preferred_count = 0
    table = []
    for study_id, nexson in phylesystem.iter_study_objs():
        study_count += 1
        nexml_el = nexson[u'nexml']
        n_intended = 1
        not_intended = nexml_el.get(u'^ot:notIntendedForSynthesis')
        if not_intended == True:
            n_intended = 0
        else:
            n_intended = 2
        candidates = nexml_el.get(u'^ot:candidateTreeForSynthesis')
        if candidates == None: candidates = []
        tid_tree_otug = extract_tree_nexson(nexson, tree_id=None)
        for (tree_id, tree, otu_group) in tid_tree_otug:
            tree_count += 1
            row = Row()

            # otu_group = otu_groups[ogi]['otuById']
            long_id = '%s@%s' % (study_id, tree_id)
            row.id = long_id

            row.n_intended = n_intended  # per study

            if len(candidates) == 0: # No selection(s) made
                if len(tid_tree_otug) == 1:
                    n_preferred = 2    # Only one tree; use it
                else:
                    n_preferred = 1    # More than one tree; decision required
            else:
                if tree_id in candidates:
                    preferred_count += 1
                    n_preferred = 2    # This is a preferred tree; use it
                else:
                    n_preferred = 0    # Not preferred, another is; do not use
            row.n_preferred = n_preferred

            ctype = tree.get('^ot:curatedType')
            n_ctype = 0
            if ctype != None and ctype != '':
                n_ctype = 1
            row.n_ctype = n_ctype

            # whether a curator has confirmed the root
            root = tree.get('^ot:specifiedRoot')
            root_confirmed = 0
            if root != None and root != '':
                root_confirmed = 1
            row.root_confirmed = root_confirmed

            row.n_synth = 1 if long_id in trees_in_synthesis else 0

            ingroup_node_id = tree.get('^ot:inGroupClade')
            row.n_ingroup = (1 if (ingroup_node_id != None) else 0)

            (row.tip_count, row.ott_count, row.new_count) = \
                examine_tree(tree, otu_group, ingroup_node_id, taxa_in_synthesis)

            row.conflict_count = 0
            row.resolve_count = 0
            analysis = conflict_analyses.get(long_id)
            if analysis != None:
                row.conflict_count = int(analysis[1])
                row.resolve_count = int(analysis[2])

            row.score = ((row.new_count + row.resolve_count) -
                         (row.conflict_count * 20) +
                         (row.n_ingroup * 10) +
                         (row.n_preferred * 50) +
                         (row.n_intended * 100))

            table.append(row)
            if tree_count % 500 == 0:
                print tree_count, long_id, ctype
    table.sort(key=lambda row:(-row.score,
                               row.n_intended == 0,   # whether intended for synthesis
                               -row.n_preferred,   # whether preferred
                               -row.n_ingroup,   # whether ingroup is designated
                               row.conflict_count,    # number of synth tree conflicts
                               -row.new_count,   # number of OTUs mapped to OTT
                               -row.n_ctype,   # whether there's a 'curated type'
                               -row.tip_count,   # total number of tips (for comparison)
                               ))
    with codecs.open(outpath, 'w', encoding='utf-8') as outfile:
        writer = csv.writer(outfile)
        writer.writerow(['tree', 'intended', 'preferred', 'has ingroup',
                         'has method', 'root confirmed', 'in synth', '#tips',
                         '#mapped', '#new', '#resolved', '#conflicts',
                         'score'])
        for row in table:
            writer.writerow([row.id, row.n_intended, row.n_preferred,
                             row.n_ingroup, row.n_ctype,
                             row.root_confirmed, row.n_synth,
                             row.tip_count, row.ott_count,
                             row.new_count,
                             row.resolve_count,
                             row.conflict_count,
                             row.score])
    print 'studies:', study_count
    print 'trees:', tree_count
    print 'preferred:', preferred_count
Exemplo n.º 10
0
def write_tree_list(outpath):
    conflict_analyses = read_conflict_analyses()
    trees_in_synthesis = read_synthesis_list()
    taxa_in_synthesis = read_synthesis_taxa()
    phylesystem = Phylesystem()
    study_count = 0
    tree_count = 0
    preferred_count = 0
    table = []
    for study_id, nexson in phylesystem.iter_study_objs():
        study_count += 1
        nexml_el = nexson[u'nexml']
        n_intended = 1
        not_intended = nexml_el.get(u'^ot:notIntendedForSynthesis')
        if not_intended == True:
            n_intended = 0
        else:
            n_intended = 2
        candidates = nexml_el.get(u'^ot:candidateTreeForSynthesis')
        if candidates == None: candidates = []
        tid_tree_otug = extract_tree_nexson(nexson, tree_id=None)
        for (tree_id, tree, otu_group) in tid_tree_otug:
            tree_count += 1
            row = Row()

            # otu_group = otu_groups[ogi]['otuById']
            long_id = '%s@%s' % (study_id, tree_id)
            row.id = long_id

            row.n_intended = n_intended  # per study

            if len(candidates) == 0:  # No selection(s) made
                if len(tid_tree_otug) == 1:
                    n_preferred = 2  # Only one tree; use it
                else:
                    n_preferred = 1  # More than one tree; decision required
            else:
                if tree_id in candidates:
                    preferred_count += 1
                    n_preferred = 2  # This is a preferred tree; use it
                else:
                    n_preferred = 0  # Not preferred, another is; do not use
            row.n_preferred = n_preferred

            ctype = tree.get('^ot:curatedType')
            n_ctype = 0
            if ctype != None and ctype != '':
                n_ctype = 1
            row.n_ctype = n_ctype

            # whether a curator has confirmed the root
            root = tree.get('^ot:specifiedRoot')
            root_confirmed = 0
            if root != None and root != '':
                root_confirmed = 1
            row.root_confirmed = root_confirmed

            row.n_synth = 1 if long_id in trees_in_synthesis else 0

            ingroup_node_id = tree.get('^ot:inGroupClade')
            row.n_ingroup = (1 if (ingroup_node_id != None) else 0)

            (row.tip_count, row.ott_count, row.new_count) = \
                examine_tree(tree, otu_group, ingroup_node_id, taxa_in_synthesis)

            row.conflict_count = 0
            row.resolve_count = 0
            analysis = conflict_analyses.get(long_id)
            if analysis != None:
                row.conflict_count = int(analysis[1])
                row.resolve_count = int(analysis[2])

            row.score = ((row.new_count + row.resolve_count) -
                         (row.conflict_count * 20) + (row.n_ingroup * 10) +
                         (row.n_preferred * 50) + (row.n_intended * 100))

            table.append(row)
            if tree_count % 500 == 0:
                print tree_count, long_id, ctype
    table.sort(key=lambda row: (
        -row.score,
        row.n_intended == 0,  # whether intended for synthesis
        -row.n_preferred,  # whether preferred
        -row.n_ingroup,  # whether ingroup is designated
        row.conflict_count,  # number of synth tree conflicts
        -row.new_count,  # number of OTUs mapped to OTT
        -row.n_ctype,  # whether there's a 'curated type'
        -row.tip_count,  # total number of tips (for comparison)
    ))
    with codecs.open(outpath, 'w', encoding='utf-8') as outfile:
        writer = csv.writer(outfile)
        writer.writerow([
            'tree', 'intended', 'preferred', 'has ingroup', 'has method',
            'root confirmed', 'in synth', '#tips', '#mapped', '#new',
            '#resolved', '#conflicts', 'score'
        ])
        for row in table:
            writer.writerow([
                row.id, row.n_intended, row.n_preferred, row.n_ingroup,
                row.n_ctype, row.root_confirmed, row.n_synth, row.tip_count,
                row.ott_count, row.new_count, row.resolve_count,
                row.conflict_count, row.score
            ])
    print 'studies:', study_count
    print 'trees:', tree_count
    print 'preferred:', preferred_count
Exemplo n.º 11
0
            out = codecs.open(outfn, mode='w', encoding='utf-8')
        except:
            sys.exit('validate_ot_nexson: Could not open output filepath "{fn}"\n'.format(fn=outfn))
    else:
        out = codecs.getwriter('utf-8')(sys.stdout)
    try:
        nexson = read_as_json(inp_filepath)
    except ValueError as vx:
        _LOG.error('Not valid JSON.')
        if args.verbose:
            raise vx
        else:
            sys.exit(1)
    except Exception as nx:
        _LOG.error(nx.value)
        sys.exit(1)
    convert_nexson_format(nexson, BY_ID_HONEY_BADGERFISH)
    trees = extract_tree_nexson(nexson, tree_id=args.tree_id)
    if len(trees) == 0:
        trees = extract_tree_nexson(nexson, tree_id=None)
        if trees:
            v = '", "'.join([i[0] for i in trees])
            sys.exit('Tree ID {i} not found. Valid IDs for this file are "{l}"\n'.format(i=args.tree_id, l=v))
        else:
            sys.exit('This NexSON has not trees.\n')
    ott = OTT()
    for tree_id, tree, otus in trees:
        tree_proxy = NexsonTreeProxy(tree=tree, tree_id=tree_id, otus=otus)
        evaluate_tree_rooting(nexson, ott, tree_proxy)

    if len(study) == 1:
        study = '0' + study
    study2tree.setdefault('pg_' + study, []).append('tree' + tree)


pa = PhylesystemAPI(get_from='local')
raw_phylsys = pa.phylesystem_obj
nexson_version = raw_phylsys.repo_nexml2json
for study_id, tree_list in study2tree.items():
    if verbose:
        sys.stderr.write('treelist={t} for study {s}.\n'.format(t=str(tree_list), s=study_id))
    try:
        fp = raw_phylsys.get_filepath_for_study(study_id)
        blob = read_as_json(fp)

        nex = get_nexml_el(blob)
        prev = nex.setdefault('^ot:candidateTreeForSynthesis', [])
        for tree_id in tree_list:
            if tree_id not in prev:
                prev.append(tree_id)
            i_t_o_list = extract_tree_nexson(blob, tree_id, nexson_version)
            if not i_t_o_list:
                sys.stderr.write('tree {t} of study {s} not found !!!\n'.format(t=tree_id, s=study_id))
            for tid, tree, otus_group in i_t_o_list:
                tree['^ot:unrootedTree'] = False
                tree['^ot:specifiedRoot'] = tree['^ot:rootNodeId']
        if not dry_run:
            write_as_json(blob, fp)
        
    except KeyError:
        sys.stderr.write('study {} not found !!!\n'.format(study_id))