Пример #1
0
 def __write_algn(self, fullpath):
     """
     to write algn in paml format
     """
     seq_group = SeqGroup()
     for n in self:
         seq_group.id2seq  [n.node_id] = n.nt_sequence
         seq_group.id2name [n.node_id] = n.name
         seq_group.name2id [n.name   ] = n.node_id
     seq_group.write(outfile=fullpath, format='paml')
Пример #2
0
 def __write_algn(self, fullpath):
     """
     to write algn in paml format
     """
     seq_group = SeqGroup()
     for n in self:
         seq_group.id2seq[n.node_id] = n.nt_sequence
         seq_group.id2name[n.node_id] = n.name
         seq_group.name2id[n.name] = n.node_id
     seq_group.write(outfile=fullpath, format='paml')
Пример #3
0
def extract_ss(input_path, suffix, tree_file):
    tree = Tree(tree_file, format=1)
    leaves_set = set(tree.get_leaf_names())
    msa = SeqGroup(input_path.alignment, "fasta")
    path_argv = [input_path._version, input_path._dataset + suffix]
    output_path = common.Paths(path_argv, 0)
    data_versioning.setup_new_dataset(output_path)
    new_msa = SeqGroup()
    for entry in msa.iter_entries():
        label = entry[0]
        sequence = entry[1]
        if (label in leaves_set):
            new_msa.set_seq(label, sequence)
    open(output_path.alignment, "w").write(new_msa.write(format="fasta"))
    shutil.copy(input_path.duplicates_json, output_path.duplicates_json)
    shutil.copy(input_path.outgroups_file, output_path.outgroups_file)
Пример #4
0
def extract_ss(input_path, suffix, tree_file):
    print(
        "Extracting alignment generated with the support selection tree thinning technique..."
    )
    tree = Tree(tree_file, format=1)
    leaves_set = set(tree.get_leaf_names())
    msa = SeqGroup(input_path.alignment, "fasta")
    path_argv = [input_path._version, input_path._dataset + suffix]
    output_path = common.Paths(path_argv, 0)
    data_versioning.setup_new_dataset(output_path)
    new_msa = SeqGroup()
    for entry in msa.iter_entries():
        label = entry[0]
        sequence = entry[1]
        if (label in leaves_set):
            new_msa.set_seq(label, sequence)
    open(output_path.alignment, "w").write(new_msa.write(format="fasta"))
    shutil.copy(input_path.duplicates_json, output_path.duplicates_json)
    shutil.copy(input_path.outgroups_file, output_path.outgroups_file)
    print("New version of the snapshot: " + output_path.path)
Пример #5
0
import sys
import random, string
from ete3 import SeqGroup
from tempfile import NamedTemporaryFile

in_file = sys.argv[1]
transform_fasta = sys.argv[2]
out_file = sys.argv[3]
translate_table = open(sys.argv[4], 'w')

alg = SeqGroup(in_file)
translate = open(transform_fasta, 'w')

for num, (name, seq, _) in enumerate(alg):
    taxid = name.split('.')[0]
    code = ''.join(random.choices(string.ascii_letters + string.digits, k=5))
    #code=format((num+1), '05')
    #nam_t=taxid+'.'+str(code)
    print >> translate, '>%s\n%s' % (code, seq)
    print >> translate_table, '%s\t%s' % (name, code)

translate_table.close()
translate.close()
translate_alg = SeqGroup(transform_fasta)

translate_alg.write(format="phylip", outfile=out_file)
Пример #6
0
    for name, seq, _ in alg_aa:
        try:
            cdna = F.id2seq[F.name2id[name]]
        except KeyError:
            print "cdna for %s not found" % name
            continue
        cdna_aln = ""
        for pos in seq:
            if pos != "-":
                cdna_aln += cdna[:3]
                cdna = cdna[3:]
            else:
                cdna_aln += "---"
        # Last the stop codon
        cdna_aln += cdna[:3]
        alg_dna.set_seq(name, cdna_aln)
    print "Input protein alignment contains %s aa sequences" % len(alg_aa)
    print "Output cdna alignment contains %s cdna sequences" % len(alg_dna)
    print 
    alg_dna.write(outfile=infile.replace(".clustalo", ".clustalo.cdna.aln"))









                
Пример #7
0
F = parser.fasta.read_fasta(sys.argv[2])

for infile in infiles:
    print infile
    if os.stat(infile).st_size == 0:
        continue
    alg_aa = SeqGroup(infile)
    alg_dna = SeqGroup()

    for name, seq, _ in alg_aa:
        try:
            cdna = F.id2seq[F.name2id[name]]
        except KeyError:
            print "cdna for %s not found" % name
            continue
        cdna_aln = ""
        for pos in seq:
            if pos != "-":
                cdna_aln += cdna[:3]
                cdna = cdna[3:]
            else:
                cdna_aln += "---"
        # Last the stop codon
        cdna_aln += cdna[:3]
        alg_dna.set_seq(name, cdna_aln)
    print "Input protein alignment contains %s aa sequences" % len(alg_aa)
    print "Output cdna alignment contains %s cdna sequences" % len(alg_dna)
    print
    alg_dna.write(outfile=infile.replace(".clustalo", ".clustalo.cdna.aln"))