Exemplo n.º 1
0
def get_btree(treestr):
    if treestr.count(':') == 1:  # one-leaf tree
        name, lengthstr = treestr.strip().rstrip(';').split(':')
        tree = OneLeafTree(name, float(lengthstr))
    else:
        tree = baltic.tree()
        baltic.make_tree(treestr, tree, verbose=False)
    tree.traverse_tree()
    return tree
                ), 'Expected number of tips: %s\nNumber of tips found: %s' % (
                    tipNum, len(tips)
                )  ## check that correct numbers of tips have been parsed
    ################################################################################### start analysing trees
    cerberus = re.match(
        'tree\sSTATE\_([0-9]+).+\[\&R\]\s', line
    )  ## search for crud at the beginning of the line that's not a tree string

    if cerberus is not None:  ## tree identified
        ################################################################# at state 0 - create the header for the output file and read the tree (in case the output log file requires information encoded in the tree)
        if treecount == 0:  ## At tree state 0 insert header into output file
            ll = bt.tree()  ## empty tree object
            start = len(cerberus.group()
                        )  ## index of where tree string starts in the line
            treestring = str(line[start:])  ## grab tree string
            bt.make_tree(treestring, ll)  ## read tree string
            if lower == 0 and upper == np.inf:  ## only add a header if not doing a chunk
                outfile.write('state')  ## begin the output log file
                ########################################### add header to output log file
                if 'treeLength' in analyses:
                    outfile.write('\ttreeLength')
                ###########################################
                if 'RC' in analyses:
                    outfile.write('\tN\tS\tuN\tuS\tdNdS')
                ###########################################
                if 'tmrcas' in analyses:
                    tmrcas = {
                        'A': [],
                        'B': [],
                        'C': []
                    }  ## dict of clade names
Exemplo n.º 3
0
    cerberus = re.search(
        'dimensions ntax=([0-9]+);',
        l.lower())  ## check how many tips there are supposed to be
    if cerberus is not None:
        tipNum = int(cerberus.group(1))

    #####################
    cerberus = re.search(
        'tree TREE([0-9]+) = \[&R\]',
        l)  ## search for beginning of tree string in BEAST format
    if cerberus is not None:
        treeString_start = l.index(
            '(')  ## tree string starts where the first '(' is in the line
        ll = bt.tree()  ## new instance of tree
        bt.make_tree(
            l[treeString_start:], ll
        )  ## send tree string to make_tree function, provide an empty tree object
    #####################

    if tipFlag == True:
        cerberus = re.search(
            '([0-9]+) ([A-Za-z\-\_\/\.\'0-9 \|?]+)', l
        )  ## look for tip name map, where each tip is given an integer to represent it in tree
        if cerberus is not None:
            tips[cerberus.group(1)] = cerberus.group(2).strip(
                "'"
            )  ## if you give tips an integer (in the form of a string), it will return the full name of the tip
        elif ';' not in l:  ## something's wrong - nothing that matches the tip regex is being captured where it should be in the file
            print 'tip not captured by regex:', l.replace('\t', '')

    if 'translate' in l.lower():  ## start looking for tips
Exemplo n.º 4
0
            cerberus=re.search('([0-9]+) ([\'\"A-Za-z0-9\?\|\-\_\.\/]+)',line)
            tips[cerberus.group(1)]=cerberus.group(2).strip("'")

    if 'tree STATE_' in line and plate==True: ## starting actual analysis
        plate=False
        assert (tipNum == len(tips)),'Expected number of tips: %s\nNumber of tips found: %s'%(tipNum,len(tips)) ## check that correct numbers of tips have been parsed
    ################################################################################### start analysing trees
    cerberus=re.match('tree\sSTATE\_([0-9]+).+\[\&R\]\s',line) ## search for crud at the beginning of the line that's not a tree string

    if cerberus is not None: ## tree identified
        ################################################################# at state 0 - create the header for the output file and read the tree (in case the output log file requires information encoded in the tree)
        if treecount==0: ## At tree state 0 insert header into output file
            ll=bt.tree() ## empty tree object
            start=len(cerberus.group()) ## index of where tree string starts in the line
            treestring=str(line[start:]) ## grab tree string
            bt.make_tree(treestring,ll) ## read tree string
            if lower==0 and upper==np.inf: ## only add a header if not doing a chunk
                outfile.write('state') ## begin the output log file
                ########################################### add header to output log file
                if 'treeLength' in analyses:
                    outfile.write('\ttreeLength')
                ###########################################
                if 'RC' in analyses:
                    outfile.write('\tN\tS\tuN\tuS\tdNdS')
                ###########################################
                if 'tmrcas' in analyses:
                    tmrcas={'A':[],'B':[],'C':[]} ## dict of clade names
                    ll.renameTips(tips)
                    for k in ll.Objects: ## iterate over branches
                        if isinstance(k,bt.leaf): ## only interested in tips
                            if 'A' in k.name: ## if name of tip satisfies condition
Exemplo n.º 5
0
try:
    treefiles = sorted(
        [t for t in glob('*bestTree*') if t.split('_')[-1] in protein_list],
        key=lambda t: protein_list.index(t.split('_')[-1]))
    assert len(treefiles) == len(protein_list)
except ValueError as e:  # For now, require that we can match all tree files to a protein
    print 'Oops! Plotting without building trees? Make sure your trees are named like `whatever_protein` and you passed a list of matching `protein` names to `-p` to set the order of trees.\n\n'
    raise e
except AssertionError as e:
    print 'ERROR: Missing tree files. Looked for trees for these proteins:\n', protein_list, '\n\n'
    raise e

trees = {}
for i, t in enumerate(treefiles):
    treestring, treeobject = open(t, 'r').readline().strip(), bt.tree()
    bt.make_tree(treestring, treeobject)
    treeobject.treeStats()  ## initial traversal, checks for stats
    treeobject.sortBranches(
    )  ## traverses tree, sorts branches, draws tree (sets plotting coordinates)
    trees[i] = treeobject

for i in range(1, len(treefiles)):
    print 'Untangling tree number %d' % i
    untangle(trees[i - 1], trees[i])

################
## Plot Genome Map
################
if proteins == None and reference != None:  # If we didn't parse proteins earlier, but have the reference sequence, do so now.
    proteins, reference_seq = load_reference(reference)
    reference_seq = str(reference_seq.seq)