def muscle_wrapper(work_msa):
    app = Applications.MuscleCommandline(
        input=work_msa.input_fasta,
        out=work_msa.output_aln,
        clw=True,
    )
    app()
Exemplo n.º 2
0
def __align_sequences__(args=None, seqs=None):
    if args.input_file is not None:
        assert args.file_input_format is not None, "Missed the file input format at __retrieve_data__(args=None)"

        if args.verbose:
            print("\nStarting sequences alignment process...\n\n")

        from Bio.Align import Applications
        # import subprocess
        # global nproc

        if args.tool == "clustalo":
            started = datetime.now()
            print("Starting at: %s" % started.strftime("%Y-%m-%d %H:%M:%S"))
            # from Bio.Align.Applications import ClustalwCommandline
            binpath = r"/usr/local/bin/clustalo"
            cmd=Applications.ClustalOmegaCommandline(\
             binpath,\
             infile=args.input_file,\
             outfile="%s.aln.clustalo" % args.input_file,\
             verbose=args.verbose,\
             force=True,\
             threads=nproc,\
             guidetree_out="%s.dnd.clustalo" % args.input_file)
            # cmd="%s -i %s -o %s --threads=%i --force --guidetree-out=%s" % (binpath, args.input_file, ("%s.aln.clustalo" % args.input_file), nproc, ("%s.dnd.clustalo" % args.input_file))
            # if args.verbose:
            # 	cmd="%s -i %s -o %s --threads=%i --force --guidetree-out=%s -v" % (binpath, args.input_file, ("%s.aln.clustalo" % args.input_file), nproc, ("%s.dnd.clustalo" % args.input_file))
            # stdout,stderr=cmd()
            child=subprocess.Popen(\
             str(cmd),\
             stdout=subprocess.PIPE,\
             stderr=subprocess.PIPE,\
             universal_newlines=True,\
             shell=(sys.platform!="win32"))
            child.wait()
            finished = datetime.now()
            print("Finished at: %s" % finished.strftime("%Y-%m-%d %H:%M:%S"))
            print("Total elapsed time: %s" % str(finished - started))
            if args.verbose:
                stdout = child.stdout.read()
                if (len(stdout) > 0):
                    print("\nStandard out is: %s\n" % stdout)
                else:
                    print("\nStandard out is empty!\n")

                stderr = child.stderr.read()
                if (len(stderr) > 0):
                    print("Standard error is: %s" % stderr)
                else:
                    print("Standard error is empty")

            from Bio import AlignIO
            # align=AlignIO.read("tmp.aln","fasta")
            align = AlignIO.read("%s.aln.clustalo" % args.input_file, "fasta")
            print(align)
        elif args.tool == "muscle":
            started = datetime.now()
            print("Starting at: %s" % started.strftime("%Y-%m-%d %H:%M:%S"))
            # from Bio.Align.Applications import ClustalwCommandline
            cmd = None
            if not args.file_output_format or args.file_output_format is None:
                cmd = Applications.MuscleCommandline(input=args.input_file,
                                                     out="%s.aln.muscle" %
                                                     args.input_file)
            else:
                if args.file_output_format == "clustal":
                    cmd = Applications.MuscleCommandline(
                        input=args.input_file,
                        clw=True,
                        out="%s.aln.muscle.clustalwfmt" % args.input_file)
                elif args.file_output_format == "clustal-strict":
                    cmd = Applications.MuscleCommandline(
                        input=args.input_file,
                        clwstrict=True,
                        out="%s.aln.muscle.clustalwstrictfmt" %
                        args.input_file)
            # cmd()
            child=subprocess.Popen(\
             str(cmd),\
             stdout=subprocess.PIPE,\
             stderr=subprocess.PIPE,\
             universal_newlines=True,\
             shell=(sys.platform!="win32"))
            child.wait()
            finished = datetime.now()
            print("Finished at: %s" % finished.strftime("%Y-%m-%d %H:%M:%S"))
            print("Total elapsed time: %s" % str(finished - started))
            if args.verbose:
                stdout = child.stdout.read()
                if (len(stdout) > 0):
                    print("\nStandard out is: %s\n" % stdout)
                else:
                    print("\nStandard out is empty!\n")

                stderr = child.stderr.read()
                if (len(stderr) > 0):
                    print("Standard error is: %s" % stderr)
                else:
                    print("Standard error is empty")

            from Bio import AlignIO
            align = None
            if args.file_output_format is None:
                align = AlignIO.read("%s.aln.muscle" % args.input_file,
                                     "fasta")
            elif args.file_output_format == "clustal":
                align = AlignIO.read(
                    "%s.aln.muscle.clustalwfmt" % args.input_file, "clustal")
            elif args.file_output_format == "clustal-strict":
                align = AlignIO.read(
                    "%s.aln.muscle.clustalwstrictfmt" % args.input_file,
                    "clustal")
            print(align)
        elif args.tool == "emboss":
            raise NotImplementedError(
                "Not implemented yet! Fix the a and b sequence files")
            outfile = ''
            binpath = ''
            if args.emboss_algorithm == "needle":
                from Bio.Emboss.Applications import NeedleCommandline as EmbossCommandline
                outfile = "%s.needle.txt" % args.input_file
                binpath = r"/usr/local/bin/needle"
            elif args.emboss_algorithm == "water":
                from Bio.Emboss.Applications import WaterCommandline as EmbossCommandline
                outfile = "%s.water.txt" % args.input_file
                binpath = r"/usr/local/bin/water"
            started = datetime.now()
            print("Starting at: %s" % started.strftime("%Y-%m-%d %H:%M:%S"))
            cmd=EmbossCommandline(\
             binpath,\
             asequence="/home/edario/mines/bio/alpha.faa",\
             bsequence="/home/edario/mines/bio/beta.faa",\
             gapopen=10,\
             gapextend=0.5,\
             outfile=outfile)
            # stdout,stderr=cmd()
            child=subprocess.Popen(\
             str(cmd),\
             stdout=subprocess.PIPE,\
             stderr=subprocess.PIPE,\
             universal_newlines=True,\
             shell=(sys.platform!="win32"))
            child.wait()
            finished = datetime.now()
            print("Finished at: %s" % finished.strftime("%Y-%m-%d %H:%M:%S"))
            print("Total elapsed time: %s" % str(finished - started))
            if args.verbose:
                stdout = child.stdout.read()
                if (len(stdout) > 0):
                    print("\nStandard out is: %s\n" % stdout)
                else:
                    print("\nStandard out is empty!\n")

                stderr = child.stderr.read()
                if (len(stderr) > 0):
                    print("Standard error is: %s" % stderr)
                else:
                    print("Standard error is empty")

            from Bio import AlignIO
            # align=AlignIO.read("tmp.aln","fasta")
            # align=AlignIO.read("%s.needle.txt" % args.input_file,"emboss")
            align = AlignIO.read(outfile, "emboss")
            print(align)
        elif args.tool == "blast":
            assert args.blast_app is not None, "Missed the -bap|--blast-app arg"
            assert args.blast_database is not None, "Missed the -bdb|--blast-database arg"
            # assert args.blast_query_sequence is not None, "Missed the -bqs|--blast-query-sequence arg"

            started = datetime.now()
            print("Starting at: %s" % started.strftime("%Y-%m-%d %H:%M:%S"))

            from Bio.Blast import NCBIWWW
            result_handle = None

            # args.blast_query_sequence=''
            # for seq in SeqIO.parse(args.input_file,args.file_input_format):
            # 	args.blast_query_sequence+=seq.id+'\n'

            # if args.blast_query_sequence is not None:
            # if args.verbose:
            # 	print("Searching in BLAST with app %s, in database %s and query %s" % (args.blast_app, args.blast_database, args.blast_query_sequence))
            # 	print("(cmd is %s -db %s)" % (args.blast_app, args.blast_database))
            # result_handle=NCBIWWW.qblast(args.blast_app, args.blast_database, args.blast_query_sequence)
            if args.file_input_format.lower() != "xml":
                try:
                    record = SeqIO.read(args.input_file,
                                        args.file_input_format)
                    result_handle = NCBIWWW.qblast(args.blast_app,
                                                   args.blast_database,
                                                   record.seq)
                except ValueError as e:
                    if "more than one record found in handle" in e.args[
                            0].lower():
                        records = SeqIO.parse(args.input_file,
                                              args.file_input_format)
                        query = ''
                        for rec in records:
                            query += rec.id + '\n'
                        print("************query***********")
                        print(type(query))
                        result_handle = NCBIWWW.qblast(args.blast_app,
                                                       args.blast_database,
                                                       query)
                        quit()

                with open("blast.xml", 'w') as out_handle:
                    # out_handle.write(result_handle.read())
                    out_handle.write(result_handle.getvalue())
                    # result_handle.close()
            else:
                result_handle = open(args.input_file)

            # else:
            # 	query=''
            # 	for seq in seqs:
            # 		query+="%s\n" % eq

            # result_handle=NCBIWWW.qblast(args.blast_app, args.blast_database, seq)

            from Bio.Blast import NCBIXML
            blast_records = NCBIXML.parse(result_handle)

            for blast_record in blast_records:
                for alignment in blast_record.alignments:
                    for hsp in alignment.hsps:
                        print("\nALIGNMENT\n")
                        print("Sequence: ", alignment.title)
                        print("Length: ", alignment.length)
                        print("e value: ", hsp.expect)
                        print(hsp.query[0:75] + "...")
                        print(hsp.match[0:75] + "...")
                        print(hsp.sbjct[0:75] + "...")

                # print(blast_record)

            result_handle.close()
            finished = datetime.now()
            print("Finished at: %s" % finished.strftime("%Y-%m-%d %H:%M:%S"))
            print("Total elapsed time: %s" % str(finished - started))
    else:
        alignments = pairwise2.align.globalxx(seq1, seq2)
        for alignment in alignments:
            print(pairwise2.format_alignment(*alignment))