def mafft_wrapper(work_msa):
    app = Applications.MafftCommandline(
        input=work_msa.input_fasta,
        clustalout=True,
    )
    stdout, stderr = app()
    work_msa.output_aln.write_text(stdout)
def probcons_wrapper(work_msa):
    app = Applications.ProbconsCommandline(
        input=work_msa.input_fasta,
        clustalw=True,
    )
    stdout, stderr = app()
    work_msa.output_aln.write_text(stdout)
def tcoffee_wrapper(work_msa):
    app = Applications.TCoffeeCommandline(
        infile=work_msa.input_fasta,
        outfile=work_msa.output_aln,
        output='clustalw',
    )
    app()
def msaprobs_wrapper(work_msa):
    app = Applications.MSAProbsCommandline(
        infile=work_msa.input_fasta,
        outfile=work_msa.output_aln,
        clustalw=True,
    )
    app()
def muscle_wrapper(work_msa):
    app = Applications.MuscleCommandline(
        input=work_msa.input_fasta,
        out=work_msa.output_aln,
        clw=True,
    )
    app()
def clustalo_wrapper(work_msa):
    app = Applications.ClustalOmegaCommandline(
        infile=work_msa.input_fasta,
        outfile=work_msa.output_aln,
        outfmt='clustal',
        verbose=True,
        auto=True,
    )
    app()
def dialign2_wrapper(work_msa):
    raise NotImplementedError(
        "I can't figure out how to get dialign to output the MSA it calculates..."
    )
    app = Applications.DialignCommandline(
        'dialign',
        input=work_msa.input_fasta,
        fn=work_msa.output_aln.stem,
    )
    app()
Exemplo n.º 8
0
    def __call__(self):
        """Calls the underlying alignment method.

        First, validate method, command, and outpath arguments as valid.
        Next, call the underlying method using BioPython commandline
        wrapper or internal method and handle stdout/stderr.
        """
        # Either delegate call to BioPython or run internal method
        if self.method == 'Mafft':
            cmdline = Applications.MafftCommandline(self.cmd,
                                                    input=self.inpath,
                                                    **self.kwargs)
            try:
                stdout, stderr = cmdline()  # Need to log stderr eventually
            except ApplicationError:  # Raised if subprocess return code != 0
                print(
                    "Failed to run MAFFT")  # Should process better eventually
            with open(self.outpath, 'w') as o:
                o.write(stdout)
        elif self.method == 'Generic':
            pass  # To be implemented
Exemplo n.º 9
0
def renumber_pdb(config, path, pdb_name, sequences, dummy_dir):
    ''' 
     Renumber PDB file located in path folder with the real sequences

     path	Folder where PDB file is located
     pdb 	PDB file
     sequences  dictionary of sequences (of ProteinSequence Class from SeqIO) that define the Aa number
                chain identifier is the key of the dictionary
     dummy_dir  Dummy directory to cerate files

    '''

    #Initialize
    from SBI.structure.chain import Chain
    from SBI.sequence import Sequence
    from SBI.structure import PDB
    from Bio import SeqIO
    from Bio import ExPASy
    from Bio import AlignIO
    from Bio.Align import Applications

    clustal_exe = os.path.join(config.get('Paths', 'clustal_path'),
                               'clustalw2')
    name_pdb = ".".join(pdb_name.split('/')[-1].split('.')[:-1])
    new_pdb = PDB()
    pdb_file = os.path.join(path, pdb_name)
    pdb = PDB(pdb_file)
    pdb.clean()
    for chain_id, chain_seq in sequences.iteritems():
        name_chain = name_pdb + "_" + chain_id
        name_seq = chain_seq.get_identifier()
        pdb_chain = pdb.get_chain_by_id(chain_id)
        new_chain = Chain(name_pdb, chain_id)
        #define/create files
        infile = dummy_dir + "/tmp_" + name_chain + "_" + name_seq + ".fa"
        outfile = dummy_dir + "/tmp_" + name_chain + "_" + name_seq + ".aln"
        dndfile = dummy_dir + "/tmp_" + name_chain + "_" + name_seq + ".dnd"
        fd = open(infile, "w")
        fd.write(">{0:s}\n{1:s}\n".format(name_chain,
                                          pdb_chain.protein_sequence))
        fd.write(">{0:s}\n{1:s}\n".format(name_seq, chain_seq.get_sequence()))
        fd.close()
        try:
            # run clustalw2
            msa_cline = Applications.ClustalwCommandline(clustal_exe,
                                                         infile=infile,
                                                         outfile=outfile)
            child = subprocess.Popen(str(msa_cline),
                                     stdout=subprocess.PIPE,
                                     stderr=subprocess.PIPE,
                                     shell="/bin/bash")
            child.communicate()
            #store alignment in compare
            alignment = AlignIO.read(outfile, 'clustal')
            structure = alignment[0].seq
            reference = alignment[1].seq
            try:
                len_3d = len(structure)
                len_ref = len(reference)
            except Exception as e:
                sys.stderr.write("ERROR: %s\n" % e)
                return e
        except Exception as e:
            sys.stderr.write("ERROR: %s\n" % e)
            return e
        #remove temporary fasta and alignment files
        remove_files([infile, outfile, dndfile])
        #mapping of residues to the original sequence
        mapping = create_mapping(pdb_chain.protein_idx.split(";"), structure,
                                 reference)
        #fill the new chain with the correct numbering of residues
        for residue in pdb_chain.aminoacids:
            pair = (str(residue.number), residue.version)
            number, version = mapping.get(pair)
            residue.number = number
            residue.version = version
            new_chain.add_residue(residue)
        #fill the new pdb
        new_pdb.add_chain(new_chain)

    return new_pdb
Exemplo n.º 10
0
def __align_sequences__(args=None, seqs=None):
    if args.input_file is not None:
        assert args.file_input_format is not None, "Missed the file input format at __retrieve_data__(args=None)"

        if args.verbose:
            print("\nStarting sequences alignment process...\n\n")

        from Bio.Align import Applications
        # import subprocess
        # global nproc

        if args.tool == "clustalo":
            started = datetime.now()
            print("Starting at: %s" % started.strftime("%Y-%m-%d %H:%M:%S"))
            # from Bio.Align.Applications import ClustalwCommandline
            binpath = r"/usr/local/bin/clustalo"
            cmd=Applications.ClustalOmegaCommandline(\
             binpath,\
             infile=args.input_file,\
             outfile="%s.aln.clustalo" % args.input_file,\
             verbose=args.verbose,\
             force=True,\
             threads=nproc,\
             guidetree_out="%s.dnd.clustalo" % args.input_file)
            # cmd="%s -i %s -o %s --threads=%i --force --guidetree-out=%s" % (binpath, args.input_file, ("%s.aln.clustalo" % args.input_file), nproc, ("%s.dnd.clustalo" % args.input_file))
            # if args.verbose:
            # 	cmd="%s -i %s -o %s --threads=%i --force --guidetree-out=%s -v" % (binpath, args.input_file, ("%s.aln.clustalo" % args.input_file), nproc, ("%s.dnd.clustalo" % args.input_file))
            # stdout,stderr=cmd()
            child=subprocess.Popen(\
             str(cmd),\
             stdout=subprocess.PIPE,\
             stderr=subprocess.PIPE,\
             universal_newlines=True,\
             shell=(sys.platform!="win32"))
            child.wait()
            finished = datetime.now()
            print("Finished at: %s" % finished.strftime("%Y-%m-%d %H:%M:%S"))
            print("Total elapsed time: %s" % str(finished - started))
            if args.verbose:
                stdout = child.stdout.read()
                if (len(stdout) > 0):
                    print("\nStandard out is: %s\n" % stdout)
                else:
                    print("\nStandard out is empty!\n")

                stderr = child.stderr.read()
                if (len(stderr) > 0):
                    print("Standard error is: %s" % stderr)
                else:
                    print("Standard error is empty")

            from Bio import AlignIO
            # align=AlignIO.read("tmp.aln","fasta")
            align = AlignIO.read("%s.aln.clustalo" % args.input_file, "fasta")
            print(align)
        elif args.tool == "muscle":
            started = datetime.now()
            print("Starting at: %s" % started.strftime("%Y-%m-%d %H:%M:%S"))
            # from Bio.Align.Applications import ClustalwCommandline
            cmd = None
            if not args.file_output_format or args.file_output_format is None:
                cmd = Applications.MuscleCommandline(input=args.input_file,
                                                     out="%s.aln.muscle" %
                                                     args.input_file)
            else:
                if args.file_output_format == "clustal":
                    cmd = Applications.MuscleCommandline(
                        input=args.input_file,
                        clw=True,
                        out="%s.aln.muscle.clustalwfmt" % args.input_file)
                elif args.file_output_format == "clustal-strict":
                    cmd = Applications.MuscleCommandline(
                        input=args.input_file,
                        clwstrict=True,
                        out="%s.aln.muscle.clustalwstrictfmt" %
                        args.input_file)
            # cmd()
            child=subprocess.Popen(\
             str(cmd),\
             stdout=subprocess.PIPE,\
             stderr=subprocess.PIPE,\
             universal_newlines=True,\
             shell=(sys.platform!="win32"))
            child.wait()
            finished = datetime.now()
            print("Finished at: %s" % finished.strftime("%Y-%m-%d %H:%M:%S"))
            print("Total elapsed time: %s" % str(finished - started))
            if args.verbose:
                stdout = child.stdout.read()
                if (len(stdout) > 0):
                    print("\nStandard out is: %s\n" % stdout)
                else:
                    print("\nStandard out is empty!\n")

                stderr = child.stderr.read()
                if (len(stderr) > 0):
                    print("Standard error is: %s" % stderr)
                else:
                    print("Standard error is empty")

            from Bio import AlignIO
            align = None
            if args.file_output_format is None:
                align = AlignIO.read("%s.aln.muscle" % args.input_file,
                                     "fasta")
            elif args.file_output_format == "clustal":
                align = AlignIO.read(
                    "%s.aln.muscle.clustalwfmt" % args.input_file, "clustal")
            elif args.file_output_format == "clustal-strict":
                align = AlignIO.read(
                    "%s.aln.muscle.clustalwstrictfmt" % args.input_file,
                    "clustal")
            print(align)
        elif args.tool == "emboss":
            raise NotImplementedError(
                "Not implemented yet! Fix the a and b sequence files")
            outfile = ''
            binpath = ''
            if args.emboss_algorithm == "needle":
                from Bio.Emboss.Applications import NeedleCommandline as EmbossCommandline
                outfile = "%s.needle.txt" % args.input_file
                binpath = r"/usr/local/bin/needle"
            elif args.emboss_algorithm == "water":
                from Bio.Emboss.Applications import WaterCommandline as EmbossCommandline
                outfile = "%s.water.txt" % args.input_file
                binpath = r"/usr/local/bin/water"
            started = datetime.now()
            print("Starting at: %s" % started.strftime("%Y-%m-%d %H:%M:%S"))
            cmd=EmbossCommandline(\
             binpath,\
             asequence="/home/edario/mines/bio/alpha.faa",\
             bsequence="/home/edario/mines/bio/beta.faa",\
             gapopen=10,\
             gapextend=0.5,\
             outfile=outfile)
            # stdout,stderr=cmd()
            child=subprocess.Popen(\
             str(cmd),\
             stdout=subprocess.PIPE,\
             stderr=subprocess.PIPE,\
             universal_newlines=True,\
             shell=(sys.platform!="win32"))
            child.wait()
            finished = datetime.now()
            print("Finished at: %s" % finished.strftime("%Y-%m-%d %H:%M:%S"))
            print("Total elapsed time: %s" % str(finished - started))
            if args.verbose:
                stdout = child.stdout.read()
                if (len(stdout) > 0):
                    print("\nStandard out is: %s\n" % stdout)
                else:
                    print("\nStandard out is empty!\n")

                stderr = child.stderr.read()
                if (len(stderr) > 0):
                    print("Standard error is: %s" % stderr)
                else:
                    print("Standard error is empty")

            from Bio import AlignIO
            # align=AlignIO.read("tmp.aln","fasta")
            # align=AlignIO.read("%s.needle.txt" % args.input_file,"emboss")
            align = AlignIO.read(outfile, "emboss")
            print(align)
        elif args.tool == "blast":
            assert args.blast_app is not None, "Missed the -bap|--blast-app arg"
            assert args.blast_database is not None, "Missed the -bdb|--blast-database arg"
            # assert args.blast_query_sequence is not None, "Missed the -bqs|--blast-query-sequence arg"

            started = datetime.now()
            print("Starting at: %s" % started.strftime("%Y-%m-%d %H:%M:%S"))

            from Bio.Blast import NCBIWWW
            result_handle = None

            # args.blast_query_sequence=''
            # for seq in SeqIO.parse(args.input_file,args.file_input_format):
            # 	args.blast_query_sequence+=seq.id+'\n'

            # if args.blast_query_sequence is not None:
            # if args.verbose:
            # 	print("Searching in BLAST with app %s, in database %s and query %s" % (args.blast_app, args.blast_database, args.blast_query_sequence))
            # 	print("(cmd is %s -db %s)" % (args.blast_app, args.blast_database))
            # result_handle=NCBIWWW.qblast(args.blast_app, args.blast_database, args.blast_query_sequence)
            if args.file_input_format.lower() != "xml":
                try:
                    record = SeqIO.read(args.input_file,
                                        args.file_input_format)
                    result_handle = NCBIWWW.qblast(args.blast_app,
                                                   args.blast_database,
                                                   record.seq)
                except ValueError as e:
                    if "more than one record found in handle" in e.args[
                            0].lower():
                        records = SeqIO.parse(args.input_file,
                                              args.file_input_format)
                        query = ''
                        for rec in records:
                            query += rec.id + '\n'
                        print("************query***********")
                        print(type(query))
                        result_handle = NCBIWWW.qblast(args.blast_app,
                                                       args.blast_database,
                                                       query)
                        quit()

                with open("blast.xml", 'w') as out_handle:
                    # out_handle.write(result_handle.read())
                    out_handle.write(result_handle.getvalue())
                    # result_handle.close()
            else:
                result_handle = open(args.input_file)

            # else:
            # 	query=''
            # 	for seq in seqs:
            # 		query+="%s\n" % eq

            # result_handle=NCBIWWW.qblast(args.blast_app, args.blast_database, seq)

            from Bio.Blast import NCBIXML
            blast_records = NCBIXML.parse(result_handle)

            for blast_record in blast_records:
                for alignment in blast_record.alignments:
                    for hsp in alignment.hsps:
                        print("\nALIGNMENT\n")
                        print("Sequence: ", alignment.title)
                        print("Length: ", alignment.length)
                        print("e value: ", hsp.expect)
                        print(hsp.query[0:75] + "...")
                        print(hsp.match[0:75] + "...")
                        print(hsp.sbjct[0:75] + "...")

                # print(blast_record)

            result_handle.close()
            finished = datetime.now()
            print("Finished at: %s" % finished.strftime("%Y-%m-%d %H:%M:%S"))
            print("Total elapsed time: %s" % str(finished - started))
    else:
        alignments = pairwise2.align.globalxx(seq1, seq2)
        for alignment in alignments:
            print(pairwise2.format_alignment(*alignment))