Пример #1
0
 def test_with_multiple_output_formats(self):
     """Simple muscle call with multiple output formats"""
     input_file = "Fasta/f002"
     output_html = "temp_f002.html"
     output_clwstrict = "temp_f002.clw"
     self.assertTrue(os.path.isfile(input_file))
     records = list(SeqIO.parse(input_file, "fasta"))
     records.sort(key=lambda rec: rec.id)
     #Prepare the command... use Clustal output (with a MUSCLE header)
     cmdline = MuscleCommandline(muscle_exe,
                                 input=input_file,
                                 clw=True,
                                 htmlout=output_html,
                                 clwstrictout=output_clwstrict)
     self.assertEqual(
         str(cmdline).rstrip(),
         muscle_exe + " -in Fasta/f002 -clw -htmlout temp_f002.html" +
         " -clwstrictout temp_f002.clw")
     self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
     child = subprocess.Popen(str(cmdline),
                              stdout=subprocess.PIPE,
                              stderr=subprocess.PIPE,
                              universal_newlines=True,
                              shell=(sys.platform != "win32"))
     #Clustalw on stdout:
     align = AlignIO.read(child.stdout, "clustal")
     align.sort()
     #Didn't use -quiet so there should be progress reports on stderr,
     self.assertTrue(child.stderr.read().strip().startswith("MUSCLE"))
     return_code = child.wait()
     self.assertEqual(return_code, 0)
     self.assertEqual(len(records), len(align))
     for old, new in zip(records, align):
         self.assertEqual(old.id, new.id)
     child.stdout.close()
     child.stderr.close()
     del child
     handle = open(output_html, "rU")
     html = handle.read().strip().upper()
     handle.close()
     self.assertTrue(html.startswith("<HTML"))
     self.assertTrue(html.endswith("</HTML>"))
     #ClustalW strict:
     align = AlignIO.read(output_clwstrict, "clustal")
     align.sort()
     self.assertEqual(len(records), len(align))
     for old, new in zip(records, align):
         self.assertEqual(old.id, new.id)
     os.remove(output_html)
     os.remove(output_clwstrict)
Пример #2
0
def muscle_msa(config, input_sequence_file, output_alignment_file):
    """Calculates a MSA using MUSCLE's Biopython wrapper"""
    muscle_bin = config['ALIGN']['MUSCLE_BIN']
    muscle_cline = MuscleCommandline(muscle_bin,
                                     input=input_sequence_file,
                                     out=output_alignment_file)
    if not os.path.exists(muscle_bin):
        logger.critical(
            "The path defined for the MUSCLE binary is not correct. Check the configuration file!"
        )
        raise SystemExit
    stdout, stderr = muscle_cline()
    MultipleSeqAlignment = AlignIO.read(output_alignment_file, "fasta")
    return MultipleSeqAlignment
Пример #3
0
def multialign_genomic_templates(fastafile):
    """Uses MUSCLE to return the multi-aligned genomic data."""
    from Bio.Align.Applications import MuscleCommandline
    try:
        from StringIO import StringIO
    except ImportError:
        from io import StringIO
    from Bio import AlignIO

    muscle_cline = MuscleCommandline(input=fastafile)
    stdout, stderr = muscle_cline()
    multialign = AlignIO.read(StringIO(stdout), "fasta")

    return multialign
Пример #4
0
 def align(self):
     if self.align_software == 'mafft':
         mafft_cline = MafftCommandline(cmd=self.mafft_path,
                                        input=self.pair_pep_file,
                                        auto=True)
         stdout, stderr = mafft_cline()
         align = AlignIO.read(StringIO(stdout), "fasta")
         AlignIO.write(align, self.prot_align_file, "fasta")
     if self.align_software == 'muscle':
         muscle_cline = MuscleCommandline(cmd=self.muscle_path,
                                          input=self.pair_pep_file,
                                          out=self.prot_align_file,
                                          seqtype="protein",
                                          clwstrict=True)
         stdout, stderr = muscle_cline()
Пример #5
0
def GenerateTree(matrix_name, multifasta_filename, show_pdf):
    results = open("results/tress.txt", "a")
    cline = MuscleCommandline(input=multifasta_filename,
                              out=matrix_name + ".fasta",
                              matrix=matrix_name)
    stdout, stderr = cline()
    os.system(MEGA_location + "megacc True -d " + matrix_name +
              ".fasta -o output/" + matrix_name + " -a " + method)
    tree = Phylo.read("output/" + matrix_name + "_consensus.nwk", "newick")
    results.write("KMAT Tree:\n")
    Phylo.draw_ascii(tree, file=results)
    results.close()
    if (GeneratePDF):
        Phylo.draw(tree, do_show=show_pdf)
    return True
Пример #6
0
 def test_Muscle_profile_simple(self):
     """Simple round-trip through app doing a profile alignment"""
     cmdline = MuscleCommandline(muscle_exe)
     cmdline.set_parameter("out", self.outfile3)
     cmdline.set_parameter("profile", True)
     cmdline.set_parameter("in1", self.infile2)
     cmdline.set_parameter("in2", self.infile3)
     self.assertEqual(
         str(cmdline), muscle_exe + " -out Fasta/temp_align_out3.fa" +
         " -profile -in1 Fasta/fa01 -in2 Fasta/f001")
     self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
     output, error = cmdline()
     self.assertEqual(output, "")
     self.assertTrue("ERROR" not in error)
     self.assertTrue(error.strip().startswith("MUSCLE"), output)
Пример #7
0
def run_align(seqs: dict,
              in_path: str,
              out_path: str,
              muscle_path=muscle_path):

    with open(in_path, 'w') as f:
        SeqIO.write(seqs, f, 'fasta')

    muscle_cline = MuscleCommandline(muscle_path, input=in_path, out=out_path)

    muscle_cline()

    alignment = SeqIO.to_dict(SeqIO.parse(out_path, 'fasta'))

    return alignment
Пример #8
0
 def test_long(self):
     """Simple muscle call using long file."""
     # Create a large input file by converting some of another example file
     temp_large_fasta_file = "temp_cw_prot.fasta"
     records = list(SeqIO.parse("NBRF/Cw_prot.pir", "pir"))[:40]
     SeqIO.write(records, temp_large_fasta_file, "fasta")
     # Prepare the command...
     cmdline = MuscleCommandline(muscle_exe)
     cmdline.set_parameter("in", temp_large_fasta_file)
     # Use fast options
     cmdline.set_parameter("maxiters", 1)
     cmdline.set_parameter("diags", True)  # Default None treated as False!
     # Use clustal output
     cmdline.set_parameter("clwstrict",
                           True)  # Default None treated as False!
     # Shoudn't need this, but just to make sure it is accepted
     cmdline.set_parameter("maxhours", 0.1)
     # No progress reports to stderr
     cmdline.set_parameter("quiet", True)  # Default None treated as False!
     self.assertEqual(
         str(cmdline).rstrip(),
         _escape_filename(muscle_exe) +
         " -in temp_cw_prot.fasta -diags -maxhours 0.1" +
         " -maxiters 1 -clwstrict -quiet",
     )
     self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
     child = subprocess.Popen(
         str(cmdline),
         stdout=subprocess.PIPE,
         stderr=subprocess.PIPE,
         universal_newlines=True,
         shell=(sys.platform != "win32"),
     )
     align = AlignIO.read(child.stdout, "clustal")
     align.sort()
     records.sort(key=lambda rec: rec.id)  # noqa: E731
     self.assertEqual(len(records), len(align))
     for old, new in zip(records, align):
         self.assertEqual(old.id, new.id)
         self.assertEqual(str(new.seq).replace("-", ""), str(old.seq))
     # See if quiet worked:
     self.assertEqual("", child.stderr.read().strip())
     return_code = child.wait()
     self.assertEqual(return_code, 0)
     child.stdout.close()
     child.stderr.close()
     del child
     os.remove(temp_large_fasta_file)
Пример #9
0
def align_sequences(muscle_exe,
                    seq1,
                    seq2,
                    seq1_id=None,
                    seq2_id=None,
                    temp_input_file=None,
                    temp_output_file=None):
    """
    Align two protein sequences using Muscle.

    Args:
        muscle_exe (str): path to muscle tool
        seq1 (str): protein sequence 1
        seq2 (str): protein sequence 2
        seq_id1 (str): if set, the id for sequence 1
        seq_id2 (str): if set, the id for sequence 2
        temp_input_file (str): if set, the path to the alignment input file
        temp_output_file (str): if set, the path to the alignment output file
    """

    # create temp files for running alignment
    temp_dir = "temp_alignment"
    gen.create_output_directories(temp_dir)
    # create the random alignment files
    random_alignment = random.random()
    if not temp_input_file:
        temp_input_file = "{0}/protein_alignment_input_{1}.fasta".format(
            temp_dir, random_alignment)
    if not temp_output_file:
        temp_output_file = "{0}/protein_alignment_output_{1}.fasta".format(
            temp_dir, random_alignment)
    # in case the sequence ids are not set
    if not seq1_id:
        seq1_id = "seq_id_{0}_1".format(random.random())
    if not seq2_id:
        seq2_id = "{0}_2".format(seq1_id[:-2])
    # write the temporary alignment file
    with open(temp_input_file, "w") as temp_file:
        temp_file.write(">{0}\n{1}\n>{2}\n{3}\n".format(
            seq1_id, seq1, seq2_id, seq2))
    # run muscle alignment
    muscle_output = MuscleCommandline(muscle_exe,
                                      input=temp_input_file,
                                      out=temp_output_file)
    # get object
    muscle_output()

    return temp_input_file, temp_output_file
Пример #10
0
def muscle_alignment(seqs):
    """Align sequences with muscle"""

    from Bio import SeqIO, AlignIO
    filename = 'temp.fa'
    SeqIO.write(seqs, filename, "fasta")
    name = os.path.splitext(filename)[0]
    from Bio.Align.Applications import MuscleCommandline
    cline = MuscleCommandline(input=filename, out=name + '.txt')
    try:
        stdout, stderr = cline()
    except:
        print('muscle not installed?')
        return
    align = AlignIO.read(name + '.txt', 'fasta')
    return align
Пример #11
0
    def raw_sequence(self):
        filename = create_seq_file(self.data['sequences'])

        muscle = MuscleCommandline(input=filename)
        stdout, stderr = muscle()
        align = AlignIO.read(StringIO(stdout), "fasta")

        summary_align = AlignInfo.SummaryInfo(align)
        consensus = summary_align.gap_consensus(threshold=0.55, ambiguous='N')

        add_result(self, "Consenus sequence", str(consensus))
        add_result(self, "Sequence length", len(consensus))

        remove_temp_file(filename)

        return self.results
Пример #12
0
def alignEm(refSeq, record):
    try:
        muscle_cline = MuscleCommandline("../muscle", maxiters=1, diags=True)
        child = subprocess.Popen(str(muscle_cline),
                                 stdin=subprocess.PIPE,
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.PIPE,
                                 universal_newlines=False,
                                 shell=True)

        SeqIO.write([refSeq, record], child.stdin, "fasta")
        child.stdin.close()
        align = AlignIO.read(child.stdout, "fasta")
        return Levenshtein.ratio(str(align[0].seq), str(align[1].seq))
    except:
        return 0
    def file_seq(self):
        filename = create_seq_file(self.data['sequences'])

        threshold = float(self.data.get('threshold'))

        muscle = MuscleCommandline(input=filename)
        stdout, stderr = muscle()
        align = AlignIO.read(StringIO(stdout), "fasta")

        summary_align = AlignInfo.SummaryInfo(align)
        consensus = summary_align.gap_consensus(threshold=threshold,
                                                ambiguous='N')

        remove_temp_file(filename)

        return f'>consensus sequence {len(consensus)} bp\n' + str(consensus)
Пример #14
0
def muscle_profile_align(fa1, fa2):
    ''' Uses muscle to profile-align two fastas

        fa1, fa2: filenames of fastas to profile-align. Must exist on disk when
                  command is called

        Returns a MultipleSeqAlignment object

    '''

    muscle_cmd = MuscleCommandline(in1 = fa1, in2 = fa2,
                                   profile = True
                                   )
    exaln = AlignIO.read(StringIO(muscle_cmd()[0]), format = "fasta")

    return exaln
Пример #15
0
def muscle_alignment(path):
    """Performs MUSCLE alignment using the command line tool and writes to output file.
       CAVE: filenames can not include special characters such as (*, /, &)"""
    for files in os.listdir(path):
        if "fasta" in files:
            file_path = path + "/{0}".format(files)
            output_file = str(files)
            muscle_cline = MuscleCommandline(input=file_path, out=output_file)
            child = subprocess.Popen(str(muscle_cline),
                                     stdout=subprocess.PIPE,
                                     stderr=subprocess.PIPE,
                                     universal_newlines=True,
                                     shell=True)
            child.wait()
            with open(output_file) as align_handle:
                align = AlignIO.read(align_handle, "fasta")
Пример #16
0
 def test_Muscle_profile_with_options(self):
     """Profile alignment, and switch and valued options"""
     #Using some keyword arguments, note -stable isn't supported in v3.8
     cmdline = MuscleCommandline(muscle_exe,
                                 out=self.outfile4,
                                 in1=self.infile2,
                                 in2=self.infile3,
                                 profile=True,
                                 stable=True,
                                 cluster1="neighborjoining")
     self.assertEqual(
         str(cmdline), muscle_exe + " -out Fasta/temp_align_out4.fa" +
         " -profile -in1 Fasta/fa01 -in2 Fasta/f001" +
         " -cluster1 neighborjoining -stable")
     self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
     """
    def codon_align(self,
                    alignment_tool="mafft",
                    prune=True,
                    discard_premature_stops=False):
        '''
        takes a nucleotide alignment, translates it, aligns the amino acids, pads the gaps
        note that this suppresses any compensated frameshift mutations

        Parameters:
        - alignment_tool: ['mafft', 'muscle'] the commandline tool to use
        '''
        cwd = os.getcwd()
        make_dir(self.run_dir)
        os.chdir(self.run_dir)

        # translate
        aa_seqs = {}
        for seq in self.seqs.values():
            tempseq = seq.seq.translate(table="Bacterial")
            # use only sequences that translate without trouble
            if not discard_premature_stops or '*' not in str(
                    tempseq)[:-1] or prune == False:
                aa_seqs[seq.id] = SeqRecord(tempseq, id=seq.id)
            else:
                print(seq.id, "has premature stops, discarding")

        tmpfname = 'temp_in.fasta'
        SeqIO.write(aa_seqs.values(), tmpfname, 'fasta')

        if alignment_tool == 'mafft':
            os.system(
                'mafft --reorder --amino temp_in.fasta 1> temp_out.fasta')
            aln_aa = AlignIO.read('temp_out.fasta', "fasta")
        elif alignment_tool == 'muscle':
            from Bio.Align.Applications import MuscleCommandline
            cline = MuscleCommandline(input=tmpfname,
                                      out=tmpfname[:-5] + 'aligned.fasta')
            cline()
            aln_aa = AlignIO.read(tmpfname[:-5] + 'aligned.fasta', "fasta")
        else:
            print 'Alignment tool not supported:' + alignment_tool
            #return

        #generate nucleotide alignment
        self.aln = pad_nucleotide_sequences(aln_aa, self.seqs)
        os.chdir(cwd)
        remove_dir(self.run_dir)
Пример #18
0
def get_multiple_alignment(seq_file, alignment_file=None, verbose=9):
    muscle_exe = 'muscle'
    if alignment_file == None:
        out_file = tempfile.NamedTemporaryFile(delete=False).name
    else:
        out_file = alignment_file

    muscle_cline = MuscleCommandline(muscle_exe, input=seq_file, out=out_file)
    if verbose > 6:
        print("      muscle command line:")
        print(muscle_cline)
    stdout, stderr = muscle_cline()
    #	MultipleSeqAlignment = AlignIO.read(out_file, "fasta")
    #	if alignment_file ==  None:
    #		os.remove(out_file)
    #	return(MultipleSeqAlignment)
    return (out_file)
Пример #19
0
 def test_Muscle_with_options(self):
     """Round-trip through app with a switch and valued option"""
     cmdline = MuscleCommandline(muscle_exe)
     cmdline.set_parameter("input",
                           self.infile1)  #"input" is alias for "in"
     cmdline.set_parameter("out", self.outfile2)
     #Use property:
     cmdline.objscore = "sp"
     cmdline.noanchors = True
     self.assertEqual(
         str(cmdline), muscle_exe + " -in Fasta/f002" +
         " -out Fasta/temp_align_out2.fa" + " -objscore sp -noanchors")
     self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
     output, error = cmdline()
     self.assertEqual(output, "")
     self.assertTrue("ERROR" not in error)
     self.assertTrue(error.strip().startswith("MUSCLE"), output)
Пример #20
0
def algo_msa(msa_type: str, seq_id: List[int], consensus: bool = None):
    if len(seq_id) > 10:
        return "Cannot process more than 10 sequences for MSA. Operation aborted."

    result = Virus.query.with_entities("id",
                                       "fasta").filter(Virus.id.in_(seq_id))
    result_dict = {}

    for r in result:
        result_dict[r[0]] = r[1]

    fasta_file = "tmp/%s" % str(uuid.uuid4())
    with open(fasta_file, "w") as fasta:
        # Ensure ordering of sequences based on input
        for i in seq_id:
            fasta.write(result_dict[i] + "\n\n")
    msa_command = None

    if msa_type == "muscle":
        msa_command = MuscleCommandline("muscle",
                                        input=fasta_file,
                                        html=True,
                                        quiet=True)
        ret = msa_command()
    elif msa_type == "clustalo":
        msa_command = ClustalOmegaCommandline(infile=fasta_file)
        ret = msa_command()
    else:  # if msa_type == "mview":
        clustal_file = "tmp/%s" % str(uuid.uuid4())
        msa_command = ClustalOmegaCommandline(infile=fasta_file,
                                              outfile=clustal_file)
        msa_command()

        con = "on" if consensus else "off"
        ret = runCommand([
            "mview", "--css", "on", "--pcid", "aligned", "--ruler", "on",
            "--width", "80", "-coloring", "mismatch", "-colormap", "pink",
            "-consensus", con, "-con_threshold", "100", "-html", "head", "-in",
            "fasta", clustal_file
        ])
        os.remove(clustal_file)

    os.remove(fasta_file)

    return ret
Пример #21
0
def align_fasta(infname, outfname, debug=False):
    """
    Generate an alignment for the given fasta file.

    Args:
        infname (str): Path to fasta to be aligned.
        outfname (str): Path to output fasta to be
    """
    muscle_exec = {
        "Windows": "niclassify/bin/muscle3.8.31_i86win32.exe",
        "Linux": "niclassify/bin/muscle3.8.31_i86linux64",
        "Darwin": "niclassify/bin/muscle3.8.31_i86darwin64"
    }[PLATFORM]

    alignment_call = MuscleCommandline(os.path.realpath(
        os.path.join(MAIN_PATH, muscle_exec)),
                                       input=os.path.realpath(infname),
                                       out=os.path.realpath(outfname))

    print(alignment_call.__str__())

    if debug:
        subprocess.run(alignment_call.__str__(),
                       creationflags=subprocess.CREATE_NEW_CONSOLE,
                       shell=True)
    else:
        subprocess.run(alignment_call.__str__(), shell=True)

    r_script = os.path.realpath(
        os.path.join(MAIN_PATH, "niclassify/core/scripts/trim_alignment.R"))

    trim_call = [R_LOC, r_script, outfname, outfname]

    if debug:
        proc = subprocess.run(trim_call,
                              creationflags=subprocess.CREATE_NEW_CONSOLE,
                              env=os.environ.copy())
    else:
        proc = subprocess.run(trim_call, env=os.environ.copy())

    if os.stat(outfname).st_size == 0:
        raise ChildProcessError("Sequence Alignment Failed")

    if proc.returncode != 0:
        raise RScriptFailedError("R TrimAlignment failed")
Пример #22
0
def _align_muscle(input_file,output_file,**kwargs):
    """
    Run muscle.
    """

    # Run muscle
    try:
        cmd = MuscleCommandline(input=input_file, out=output_file,**kwargs)
        cmd_args = str(cmd).split()
        output = subprocess.run(args=cmd_args)
    except FileNotFoundError:
        err = "muscle does not appear to be in your path\n"
        raise RuntimeError(err)

    # Make sure it returned successfully
    if output.returncode != 0:
        err = "muscle failed\n"
        raise RuntimeError(err)
Пример #23
0
def muscle_aln():
    mergedaln = {}
    # Reuse alignment
    if len(glob.glob("coregenes/*.fasta")) > 0:
        print "Running muscle on extracted sequences..."
        coregenes = glob.glob("coregenes/*.fasta")
        for gene in coregenes:
            print '\r{0:.1%} completed'.format(
                float(coregenes.index(gene)) / len(coregenes)),
            muscle_cline = MuscleCommandline(muscle_exe,
                                             input=gene,
                                             maxiters=1)
            stdout, stderr = muscle_cline()
            alignment = AlignIO.read(StringIO(stdout), 'fasta')
            AlignIO.write(alignment, open(gene + ".aln", "w"), "fasta")
            # ref_gen_len = len([x for x in list(alignment) if x.id == ref_gen][0].seq)
            for seq in alignment:
                if seq.id not in mergedaln:
                    mergedaln[seq.id] = seq
                else:
                    mergedaln[seq.id] += seq
            missing_genome = list(
                set([
                    os.path.split(genome)[1].split('.')[0]
                    for genome in genomes
                ]) - set([seq.id for seq in alignment]))
            for genome in missing_genome:
                if genome not in mergedaln:
                    mergedaln[genome] = SeqRecord(Seq(
                        "-" * alignment.get_alignment_length(),
                        Bio.Alphabet.SingleLetterAlphabet()),
                                                  id=genome,
                                                  name=genome,
                                                  description=genome)
                else:
                    mergedaln[genome] += "-" * alignment.get_alignment_length()
        pickle.dump(mergedaln, open("mergedaln", "wb"))
        SeqIO.write(mergedaln.values(), "muscleout.aln", "fasta")
        trimal_cline = "%s -in muscleout.aln -out trimmed_muscleout.aln -gappyout" % (
            trimal_exe)
        os.system(trimal_cline)
    else:
        print "No sequences to align"
        exit(0)
Пример #24
0
def quickAlign(refseq, testseq, maxiters=None, diags=None, gapopen=None):

    #sanity check
    try:
        refseq = re.sub("-", "", refseq)
    except TypeError:
        #not a string, probably a SeqRecord
        try:
            refseq = str(refseq.seq)
            refseq = re.sub("-", "", refseq)
        except AttributeError:
            #give up
            sys.exit(
                "quickAlign() requires inputs to be either strings or SeqRecord objects"
            )

    try:
        testseq = re.sub("-", "", testseq)
    except TypeError:
        #not a string, probably a SeqRecord
        try:
            testseq = str(testseq.seq)
            testseq = re.sub("-", "", testseq)
        except AttributeError:
            #give up
            sys.exit(
                "quickAlign() requires inputs to be either strings or SeqRecord objects"
            )

    handle = StringIO()
    handle.write(">ref\n%s\n>test\n%s\n" % (refseq, testseq))
    data = handle.getvalue()

    muscle_cline = MuscleCommandline(cmd=muscle, quiet=True)
    if maxiters is not None: muscle_cline.maxiters = maxiters
    if diags is not None: muscle_cline.diags = diag
    if gapopen is not None: muscle_cline.gapopen = gapopen

    stdout, stderr = muscle_cline(stdin=data)

    aligned = dict()
    for p in SeqIO.parse(StringIO(stdout), "fasta"):
        aligned[p.id] = str(p.seq)
    return aligned
Пример #25
0
def align_prot_objs(prots, grpname):
    alignmentInputFile = filehash['ALL']['workdir'][
        'NA'] + '/' + grpname + '.prot.fasta'
    alignmentOutputFile = filehash['ALL']['workdir'][
        'NA'] + '/' + grpname + '.prot.align.fasta'
    print >> sys.stderr, alignmentInputFile, "\t", alignmentOutputFile
    output_handle = open(alignmentInputFile, "w")
    SeqIO.write(prots, output_handle, "fasta")
    output_handle.close()
    cline = MuscleCommandline(input=alignmentInputFile,
                              out=alignmentOutputFile)
    print >> sys.stderr, cline
    cline()
    handleM = open(alignmentOutputFile, "rU")
    alignedProts = []
    for record in SeqIO.parse(handleM, "fasta"):
        alignedProts.append(record.seq)
    handleM.close()
    return alignedProts
Пример #26
0
 def align(self, seq_iter):
     with TemporaryFilePath() as in_path:
         with OpenFile(in_path, 'w') as tmp:
             for seq in seq_iter:
                 tmp.write(seq.format('fasta'))
         self.kwargs['input'] = in_path
         with TemporaryFilePath() as tmp_out_path:
             self.kwargs['out'] = tmp_out_path
             muscle_command = MuscleCommandline(self.exe, **self.kwargs)
             self.cmd = str(muscle_command)
             _LOG.debug('{0}: Executing command {1!r}'.format(
                 self.name, self.cmd))
             stdout, stderr = muscle_command()
             results = dataio.get_buffered_seq_iter([tmp_out_path],
                                                    format='fasta')
             if self.out_path:
                 self.out_path = functions.get_new_path(self.out_path)
                 shutil.move(tmp_out_path, self.out_path)
     return results
Пример #27
0
def alignSEQ(SEQs, s, n):
    outName = []
    for i in SEQs:
        inputName = "TMP_{0:03d}.fa".format(n)
        inFaHandle = open(inputName, "w")
        inFaHandle.write('>' + i + '\n' + SEQs[i] + "\n")

        inFaHandle.write(s)
        inFaHandle.close()

        outfile = inputName.replace("TMP_", "aligned_")
        m_cline = MuscleCommandline(muscle,
                                    input=inputName,
                                    out=work_dir + '/' + outfile,
                                    clw=False)
        m_cline()

        outName.append(outfile)
    return outName
Пример #28
0
    def _align_fasta(self):
        """Executed MUSCLE via commandline to create a multi-alignment from the input FASTA file.

        Raises
        ------
        OSError
            When the MUSCLE commandline program returns an error.

        Notes
        -----
            This function uses the FASTA file set in the class constructor for creating the alignment.

        """
        try:
            muscle_cline = MuscleCommandline(input=self.fasta, **self.kwargs)
            stdout, _ = muscle_cline()
            return AlignIO.read(StringIO(stdout), "fasta")
        except ApplicationError as err:
            raise OSError(err.stderr)
Пример #29
0
def muscle_aln(seqreclist, **kwargs):
    """Align with muscle"""
    #let's write to file
    s = str(uuid.uuid4())

    output_handle = open(TEMP_DIR + "/%s.fasta" % s, "w")
    SeqIO.write(seqreclist, output_handle, "fasta")
    output_handle.close()

    muscle_cline = MuscleCommandline(MUSCLE_BIN,
                                     input=TEMP_DIR + "/%s.fasta" % s,
                                     **kwargs)
    # print muscle_cline
    stdout, stderr = muscle_cline()
    #  # print stderr
    # print stdout
    msa = AlignIO.read(StringIO(stdout), "fasta")
    os.system("rm " + TEMP_DIR + "/%s.fasta" % s)
    return msa
Пример #30
0
def align_muscle(*seqs, **kwargs):
    '''Global alignment of sequences via MUSCLE'''
    import subprocess as sp
    from Bio import AlignIO, SeqIO
    from Bio.Align.Applications import MuscleCommandline
    
    if not len(seqs):
        return None

    # Convert to SeqRecord if required
    if isinstance(seqs[0], basestring):
        from Bio.Seq import Seq
        from Bio.SeqRecord import SeqRecord
        from Bio.Alphabet import single_letter_alphabet
        seqs = [SeqRecord(Seq(s, single_letter_alphabet),
                          id='seq'+str(i+1),
                          name='seq'+str(i+1),
                          description='seq'+str(i+1))
                for i, s in enumerate(seqs)]

    muscle_cline = MuscleCommandline(diags=True, quiet=True)
    child = sp.Popen(str(muscle_cline),
                     stdin=sp.PIPE,
                     stdout=sp.PIPE,
                     stderr=sp.PIPE,
                     shell=True)
    SeqIO.write(seqs, child.stdin, "fasta")
    child.stdin.close()
    align = AlignIO.read(child.stdout, "fasta")
    child.stderr.close()
    child.stdout.close()

    if ('sort' in kwargs) and kwargs['sort']:
        from Bio.Align import MultipleSeqAlignment as MSA
        alisort = []
        for seq in seqs:
            for row in align:
                if row.id == seq.id:
                    alisort.append(row)
                    break
        align = MSA(alisort)

    return align