示例#1
0
def pick_otu(spe_out, alignment):
    fin = open(spe_out)
    lines = fin.readlines()
    fin.close()
    fout = open(alignment + ".otu", "w")
    aln = SeqGroup(sequences=alignment)
    for i in range(len(lines)):
        line = lines[i]
        if line.startswith("Species"):
            nline = lines[i + 1].strip()
            seq = aln.get_seq(nline)
            fout.write(">" + nline + "\n")
            fout.write(seq + "\n")
    fout.close()
示例#2
0
def _create_tree (tree,fasta,out,color):
    seqs = SeqGroup(fasta, format="fasta")
    t = Tree(tree)
    colors = _parse_color_file(color)
    node_names = t.get_leaf_names()
    for name in node_names:
        seq = seqs.get_seq(name)
        seqFace = SeqMotifFace(seq, seq_format="()")
        node = t.get_leaves_by_name(name)
        for i in range(0,len(node)):
            if name in colors:
                ns = NodeStyle()
                ns['bgcolor'] = colors[name]
                node[i].set_style(ns)
            node[i].add_face(seqFace,0,'aligned')
    t.render(out)
示例#3
0
文件: phylotree.py 项目: Ward9250/ete
 def link_to_alignment(self, alignment, alg_format="fasta", **kwargs):
     missing_leaves = []
     missing_internal = []
     if type(alignment) == SeqGroup:
         alg = alignment
     else:
         alg = SeqGroup(alignment, format=alg_format, **kwargs)
     # sets the seq of
     for n in self.traverse():
         try:
             n.add_feature("sequence",alg.get_seq(n.name))
         except KeyError:
             if n.is_leaf():
                 missing_leaves.append(n.name)
             else:
                 missing_internal.append(n.name)
     if len(missing_leaves)>0:
         print("Warnning: [%d] terminal nodes could not be found in the alignment." %\
             len(missing_leaves), file=sys.stderr)
示例#4
0
 def link_to_alignment(self, alignment, alg_format="fasta", **kwargs):
     missing_leaves = []
     missing_internal = []
     if type(alignment) == SeqGroup:
         alg = alignment
     else:
         alg = SeqGroup(alignment, format=alg_format, **kwargs)
     # sets the seq of
     for n in self.traverse():
         try:
             n.add_feature("sequence", alg.get_seq(n.name))
         except KeyError:
             if n.is_leaf():
                 missing_leaves.append(n.name)
             else:
                 missing_internal.append(n.name)
     if len(missing_leaves) > 0:
         print("Warnning: [%d] terminal nodes could not be found in the alignment." %\
             len(missing_leaves), file=sys.stderr)
t = PhyloTree( tree_input , format=1, quoted_node_names=True )
seqs = SeqGroup(alg, format="fasta")


nodestyle1 = NodeStyle()
nodestyle1["size"] = 0
nodestyle1["vt_line_width"] = 2
nodestyle1["hz_line_width"] = 2

for node in t.traverse():
    node.set_style(nodestyle1)


for leaf in t.iter_leaves():
    item=seqs.get_seq(leaf.name)
    name_face = AttrFace(item, fsize=24)
    Bars = SequenceFace(item, seqtype='aa', fsize=24, bg_colors={'G': 'Khaki', 'A': 'Khaki', 'S': 'Khaki', 'T': 'Khaki', 'C': 'LightGreen', 'V': 'LightGreen', 'I': 'LightGreen', 'L': 'LightGreen', 'P': 'LightGreen', 'F': 'LightGreen', 'Y': 'LightGreen', 'M': 'YellowGreen', 'W': 'LightGreen', 'N': 'Thistle', 'Q': 'Thistle', 'H': 'Thistle', 'D': 'DarkSalmon', 'E': 'DarkSalmon', 'K': 'SkyBlue', 'R': 'SkyBlue', 'X':'Black', '-':'White' }, fg_colors=None, codon=None, col_w=1.5, alt_col_w=3, special_col=None, interactive=False)
    leaf.add_face(Bars, 2, "aligned")    
    
t.render("tree_and_alignment.png", h=100, units="mm")
t.render("tree_and_alignment.svg", h=100, units="mm")





t2 = PhyloTree( tree_input , format=1, quoted_node_names=True )
for node in t2.traverse():
    node.set_style(nodestyle1)
t2.convert_to_ultrametric(tree_length=None, strategy='balanced')
示例#6
0
                    cf = CircleFace(10, "black")
                elif ies == '0':
                    cf = CircleFace(10, "LightGrey")
                else:
                    sys.exit(1)
                column = hiesL[(geneFamily, homIES)] + 1
                leaf.add_face(cf, column, "aligned")
        drawTree(outputFile)
    elif plotStyle == '3': # plot with MSA
        # load nucleotide sequences for all genes!
        nuclAlnFile = os.path.join(basePath, 'analysis', 'msas', 'filtered', 'cluster.' + geneFamily + '.nucl.fa')
        seqs = SeqGroup(sequences = nuclAlnFile, format = "fasta")

        for leaf in t:
            geneId = leaf.name
            seq = seqs.get_seq(geneId)
            seq = seq.translate(None, string.ascii_lowercase) # keep only CDS
            iesmotif = [[1, len(seq), "line", 2, 5, None, None, None]]
            for homIES in gfhomIES[geneFamily]:
                (begin, end, ies, iesId, beginMSA, endMSA) = charMat[(geneFamily, homIES, geneId)]
                if ies == '?':
                    if beginMSA == 'NA':
                        iesmotif.append([int(begin), int(end),"()", 10, 10, "red", "black", "arial|8|black|?"])
                    else:
                        iesmotif.append([int(begin), int(end),"()", 10, 10, "red", "black", "arial|8|black|?"])
                elif ies == '1':
                    iesmotif.append([int(beginMSA), int(endMSA),"[]", 10, 10, "black", "red", "arial|8|black|" + iesId])
                elif ies == '0':
                    iesmotif.append([int(begin), int(end), "[]", 10, 10, "silver", "silver", None])
                else:
                    quit(1)
示例#7
0
import re
from ete3 import SeqGroup, Tree
import sys

alg_file = sys.argv[1]  # in fasta format
tree_file = sys.argv[2]  # in newick format

alg = SeqGroup(alg_file)
for k, v in alg.name2id.items():
    # converts ilegal newick chars from alg names.
    # Comment this line if not necessary
    k = re.sub('[:,();]', '_', k)
    alg.name2id[k] = v

tree = Tree(tree_file)
for leaf in tree:
    print(">%s\n%s" % (leaf.name, alg.get_seq(leaf.name)))
from ete3 import SeqGroup

sp_mem = {}
in_fasta = SeqGroup('/home/plaza/research/dom_walk/raw/COG0484.faa')

for num, (name, seq, _) in enumerate(in_fasta):
    sp = name.split('.')[0]
    if sp not in sp_mem:
        sp_mem[sp] = []
    sp_mem[sp].append(name)
    
print ('writing fastas per sp')
for k, val in sp_mem.items():
    out_fasta = open('/home/plaza/research/dom_walk/analysis/fasta_per_sp/'+k+'.faa', 'w')
    for seq_name in val:
        print (">%s" %(seq_name), file = out_fasta)
        print (in_fasta.get_seq(seq_name), file =out_fasta)
    out_fasta.close()
示例#9
0
from ete3 import SeqGroup, Tree
import sys

tree_file = sys.argv[1]  # in newick format
original_fasta = SeqGroup(sys.argv[2])
pruned_fasta = open(sys.argv[3], 'w')
star_target = str(sys.argv[4])
end_target = str(sys.argv[5])

tree = Tree(tree_file)
R = tree.get_midpoint_outgroup()
tree.set_outgroup(R)

name_list = []
for num, leaf in enumerate(tree):
    name_list.append(leaf.name)
    if star_target == leaf.name:
        star_pos = num
    if end_target == leaf.name:
        end_pos = num

pruned_list = name_list[star_pos:(end_pos + 1)]
print pruned_list

#for ele in pruned_list:
#    print >>pruned_fasta,">%s\n%s"%(ele, original_fasta.get_seq(ele))

for ele in name_list:
    if ele not in pruned_list:
        print >> pruned_fasta, ">%s\n%s" % (ele, original_fasta.get_seq(ele))