示例#1
0
 def test_water_file(self):
     """water with the asis trick, output to a file."""
     #Setup, try a mixture of keyword arguments and later additions:
     cline = WaterCommandline(cmd=exes["water"],
                              gapopen="10", gapextend="0.5")
     #Try using both human readable names, and the literal ones:
     cline.set_parameter("asequence", "asis:ACCCGGGCGCGGT")
     cline.set_parameter("-bsequence", "asis:ACCCGAGCGCGGT")
     #Try using a property set here:
     cline.outfile = "Emboss/temp with space.water"
     self.assertEqual(str(eval(repr(cline))), str(cline))
     #Run the tool,
     result, out, err = generic_run(cline)
     #Check it worked,
     errors = err.read().strip()
     self.assert_(errors.startswith("Smith-Waterman local alignment"), errors)
     self.assertEqual(out.read().strip(), "")
     if result.return_code != 0 : print >> sys.stderr, "\n%s"%cline
     self.assertEqual(result.return_code, 0)
     filename = result.get_result("outfile")
     self.assertEqual(filename, "Emboss/temp with space.water")
     assert os.path.isfile(filename)
     #Check we can parse the output...
     align = AlignIO.read(open(filename),"emboss")
     self.assertEqual(len(align), 2)
     self.assertEqual(str(align[0].seq), "ACCCGGGCGCGGT")
     self.assertEqual(str(align[1].seq), "ACCCGAGCGCGGT")
     #Clean up,
     os.remove(filename)            
示例#2
0
文件: skrypt.py 项目: KTrzcinska/WBO
def generate_water_cmd(macierz, pliki_fasta_rodzina):
    """
    Generuje polecenia wywolania programu water EMBOSS dla wszystkich sekwencji podanych jako nazwy plikow je zawierajacych
    :param macierz: lokalizacja/nazwa pliku z macierza substytucji PAM/BLOSUM
    :param pliki_fasta_fodzina: lista lokalizacji/nazw plikow z sekwencjami bialkowymi fasta nalezacymi do danej rodziny
    :return: polecenie wywolania programu water
    """
    records = []
    for file in pliki_fasta_rodzina:
        handle = open(file, "rU")
        records.extend(list(SeqIO.parse(handle, "fasta")))
        handle.close()

    from Bio.Emboss.Applications import WaterCommandline
    all_water_cmd = []
    for i in range(len(records)):
        for j in range(len(records)):
            if i < j:

                water_cmd = WaterCommandline(gapopen=100, gapextend=10)#maksymalne wartosci aby uzyskac uliniowienia bezspacjowe
                water_cmd.asequence = "asis:" + str(records[i].seq)
                water_cmd.bsequence = "asis:" + str(records[j].seq)
                water_cmd.stdout = True
                water_cmd.sprotein=True
                water_cmd.datafile=macierz
                all_water_cmd.append(str(water_cmd))

    return all_water_cmd
示例#3
0
 def test_water_file(self):
     """Run water with the asis trick, output to a file."""
     # Setup, try a mixture of keyword arguments and later additions:
     cline = WaterCommandline(cmd=exes["water"],
                              gapopen="10", gapextend="0.5")
     # Try using both human readable names, and the literal ones:
     cline.set_parameter("asequence", "asis:ACCCGGGCGCGGT")
     cline.set_parameter("-bsequence", "asis:ACCCGAGCGCGGT")
     # Try using a property set here:
     cline.outfile = "Emboss/temp with space.water"
     self.assertEqual(str(eval(repr(cline))), str(cline))
     # Run the tool,
     self.run_water(cline)
     # Check we can parse the output...
     align = AlignIO.read(cline.outfile, "emboss")
     self.assertEqual(len(align), 2)
     self.assertEqual(str(align[0].seq), "ACCCGGGCGCGGT")
     self.assertEqual(str(align[1].seq), "ACCCGAGCGCGGT")
     # Clean up,
     os.remove(cline.outfile)
示例#4
0
 def test_water_piped(self):
     """Run water with asis trick, output piped to stdout."""
     cline = WaterCommandline(
         cmd=exes["water"],
         asequence="asis:ACCCGGGCGCGGT",
         bsequence="asis:ACCCGAGCGCGGT",
         gapopen=10,
         gapextend=0.5,
         auto=True,
         filter=True,
     )
     self.assertEqual(
         str(cline),
         exes["water"]
         + " -auto -filter"
         + " -asequence=asis:ACCCGGGCGCGGT"
         + " -bsequence=asis:ACCCGAGCGCGGT"
         + " -gapopen=10 -gapextend=0.5",
     )
     # Run the tool,
     child = subprocess.Popen(
         str(cline),
         stdin=subprocess.PIPE,
         stdout=subprocess.PIPE,
         stderr=subprocess.PIPE,
         universal_newlines=True,
         shell=(sys.platform != "win32"),
     )
     child.stdin.close()
     # Check we could read it's output
     align = AlignIO.read(child.stdout, "emboss")
     self.assertEqual(len(align), 2)
     self.assertEqual(str(align[0].seq), "ACCCGGGCGCGGT")
     self.assertEqual(str(align[1].seq), "ACCCGAGCGCGGT")
     # Check no error output:
     self.assertEqual(child.stderr.read(), "")
     self.assertEqual(0, child.wait())
     child.stdout.close()
     child.stderr.close()
def doWater(contig, seq):
    with open("contig.faa", "w") as stuff1:
        stuff1.write(">contig\n")
        stuff1.write(contig)
    with open("seq.faa", "w") as stuff2:
        stuff2.write(">seq\n")
        stuff2.write(str(seq))
    water_cline = WaterCommandline()
    water_cline.asequence="contig.faa"
    water_cline.bsequence="seq.faa"
    water_cline.gapopen=10
    water_cline.gapextend=0.5
    water_cline.outfile="water.txt"
    stdout, stderr = water_cline()
    print(stdout + stderr)
    values = getStartEnd()
    return values
示例#6
0
def getCellBarcodeAlignment(read, fil):
    """
    use stdin and stdout to simplify water
        asequence: one SMRT read
        bsequence: {index}_CBC-list.fasta
        return: best matched CBC for this SMRT read and the corresponding score
    """
    water_cline = WaterCommandline(asequence='stdin',
                                   filter=True,
                                   bsequence=fil,
                                   gapopen=10.0,
                                   gapextend=.5,
                                   stdout=True)
    child = subprocess.Popen(str(water_cline),
                             stdin=subprocess.PIPE,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE,
                             universal_newlines=True,
                             shell=(sys.platform != "win32"))
    rec = SeqRecord(Seq(read), id="temp")
    SeqIO.write(rec, child.stdin, "fasta")
    child.stdin.close()
    seqs, scores = [], []
    line = child.stdout.readline()
    eof = False
    while True:
        if not line:
            eof = True
        if eof:
            break
        if '2:' in line[:8]:
            seqs.append(line.strip().split(':')[1])
        elif 'Score' in line:
            scores.append(float(line.split(':')[1]))
        line = child.stdout.readline()
    assert len(seqs) == len(
        scores), "ERROR: incorrect alignment file line counting."
    return seqs[scores.index(max(scores))], max(scores)
示例#7
0
def main():
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()

    loc = '01-fix_orientation'
    chunk = 'chunk-' + '{:03}'.format(rank + 1)
    if not os.path.exists(loc):
        os.mkdir(loc)
    for fragment in ('inside', 'inside_rc', 'outside', 'outside_rc'):
        subloc = loc + '/' + fragment
        if not os.path.exists(subloc):
            os.mkdir(subloc)
        infile = os.path.join('00-external', fragment + '.txt')
        data = os.path.join('00-data', chunk + '.fasta')
        if not os.path.isfile(data):
            continue
        outfile = os.path.join(subloc, chunk + '.txt')
        water_cline = WaterCommandline(asequence=infile,
                                       bsequence=data,
                                       gapopen=10.0,
                                       gapextend=.5,
                                       outfile=outfile)
        stdout, stderr = water_cline()
示例#8
0
def water(*id, gop=10, gex=0.5, out='emb.aln'):
    """Alignement global par la methode de Needleman"""

    lso = list(SeqIO.parse(workfile, "fasta"))

    mkfasx('seqa.fas', id[0])

    mkfasx('seqb.fas', *id[1:])

    water_cline = WaterCommandline(asequence='seqa.fas',
                                   bsequence='seqb.fas',
                                   gapopen=gop,
                                   gapextend=gex,
                                   outfile=out)

    stdout, stderr = water_cline()

    os.remove('seqa.fas')
    os.remove('seqb.fas')

    if len(id) < 3:
        align = AlignIO.read(out, "emboss")
        return align
示例#9
0
def emboss_local_pairwise_alignment(query_dir, seq_type):
    if seq_type == 'fg':
        print '\n   ...pairwise comparison of functional gene sequences...\n' 
    elif seq_type == 'ssu':
        print '\n   ...pairwise comparison of SSU rRNA sequences...\n'
    water_cline = WaterCommandline()
    water_cline.gapopen=10
    water_cline.gapextend=0.5
    query_list = [query for query in sorted(glob.glob(query_dir+"/*.fa"))]
    for i, a_seq in enumerate(query_list): 
        water_cline.asequence=str(a_seq)
        for j, b_seq in enumerate(query_list[i:]):
            water_cline.bsequence=str(b_seq)
            align_out = query_dir+"/pairwise_"+str(i+1)+"_"+str(i+j+1)+".aln"
            water_cline.outfile=str(align_out)
            water_cline()
    print 'Done\n'
    return query_dir+"/*.aln"
示例#10
0
def emboss_water(seq_a_file: str, seq_b_file: str, out_file: str):
    """ Do a global pairwise alignment using EMBOSS

        Args: 
            seq_a_file: First sequence
            seq_b_file: second sequence
            out_file: Output file

        Returns: 
            r [subprocess object]: Execute the commandline command for EMBOSS
        
    """
    water_cline = WaterCommandline(asequence=seq_a_file,
                                        bsequence=seq_b_file,
                                        outfile=out_file,
                                        verbose=True,
                                        gapextend=1,
                                        gapopen=10)

    cmd = str(water_cline)
    cmd = cmd.split(" ")
    cmd.append("-aformat=msf")

    return subprocess.run(cmd, check=True)
示例#11
0
 def test_water_file(self):
     """water with the asis trick, output to a file."""
     # Setup, try a mixture of keyword arguments and later additions:
     cline = WaterCommandline(cmd=exes["water"], gapopen="10", gapextend="0.5")
     # Try using both human readable names, and the literal ones:
     cline.set_parameter("asequence", "asis:ACCCGGGCGCGGT")
     cline.set_parameter("-bsequence", "asis:ACCCGAGCGCGGT")
     # Try using a property set here:
     cline.outfile = "Emboss/temp with space.water"
     self.assertEqual(str(eval(repr(cline))), str(cline))
     # Run the tool,
     self.run_water(cline)
     # Check we can parse the output...
     align = AlignIO.read(open(cline.outfile), "emboss")
     self.assertEqual(len(align), 2)
     self.assertEqual(str(align[0].seq), "ACCCGGGCGCGGT")
     self.assertEqual(str(align[1].seq), "ACCCGAGCGCGGT")
     # Clean up,
     os.remove(cline.outfile)
示例#12
0
 def test_water_file3(self):
     """water with the asis trick and GenBank file, output to a file."""
     #Setup,
     query = "TGTTGTAATGTTTTAATGTTTCTTCTCCCTTTAGATGTACTACGTTTGGA"
     out_file = "Emboss/temp_test3.water"
     in_file = "GenBank/cor6_6.gb"
     self.assert_(os.path.isfile(in_file))
     if os.path.isfile(out_file):
         os.remove(out_file)
     cline = WaterCommandline(cmd=exes["water"])
     cline.set_parameter("asequence", "asis:%s" % query)
     cline.set_parameter("bsequence", in_file)
     #TODO - Tell water this is a GenBank file!
     cline.set_parameter("gapopen", "1")
     cline.set_parameter("gapextend", "0.5")
     cline.set_parameter("outfile", out_file)
     self.assertEqual(str(eval(repr(cline))), str(cline))
     #Run the tool,
     result, out, err = generic_run(cline)
     #Check it worked,
     errors = err.read().strip()
     self.assert_(errors.startswith("Smith-Waterman local alignment"),
                  errors)
     self.assertEqual(out.read().strip(), "")
     if result.return_code != 0: print >> sys.stderr, "\n%s" % cline
     self.assertEqual(result.return_code, 0)
     self.assertEqual(result.get_result("outfile"), out_file)
     assert os.path.isfile(out_file)
     #Check we can parse the output and it is sensible...
     self.pairwise_alignment_check(query,
                                   SeqIO.parse(open(in_file), "genbank"),
                                   AlignIO.parse(open(out_file), "emboss"),
                                   local=True)
     #Clean up,
     os.remove(out_file)
示例#13
0
 def test_water_file2(self):
     """water with the asis trick and nucleotide FASTA file, output to a file."""
     #Setup,
     query = "ACACACTCACACACACTTGGTCAGAGATGCTGTGCTTCTTGGAAGCAAGGNCTCAAAGGCAAGGTGCACGCAGAGGGACGTTTGAGTCTGGGATGAAGCATGTNCGTATTATTTATATGATGGAATTTCACGTTTTTATG"
     out_file = "Emboss/temp_test2.water"
     in_file = "Fasta/f002"
     self.assert_(os.path.isfile(in_file))
     if os.path.isfile(out_file):
         os.remove(out_file)
     cline = WaterCommandline(cmd=exes["water"])
     cline.set_parameter("-asequence", "asis:%s" % query)
     cline.set_parameter("-bsequence", in_file)
     cline.set_parameter("-gapopen", "10")
     cline.set_parameter("-gapextend", "0.5")
     cline.set_parameter("-outfile", out_file)
     self.assertEqual(str(eval(repr(cline))), str(cline))
     #Run the tool,
     result, out, err = generic_run(cline)
     #Check it worked,
     errors = err.read().strip()
     self.assert_(errors.startswith("Smith-Waterman local alignment"),
                  errors)
     self.assertEqual(out.read().strip(), "")
     if result.return_code != 0: print >> sys.stderr, "\n%s" % cline
     self.assertEqual(result.return_code, 0)
     self.assertEqual(result.get_result("outfile"), out_file)
     assert os.path.isfile(out_file)
     #Check we can parse the output and it is sensible...
     self.pairwise_alignment_check(query,
                                   SeqIO.parse(open(in_file), "fasta"),
                                   AlignIO.parse(open(out_file), "emboss"),
                                   local=True)
     #Clean up,
     os.remove(out_file)
示例#14
0
# http://rosalind.info/problems/swat/

from Bio.Emboss.Applications import WaterCommandline
from Bio import ExPASy, SeqIO


if __name__ == "__main__":
    ids = open('rosalind_swat.txt').read().split(' ')

    for i in ids:
        handle = ExPASy.get_sprot_raw(i)
        r = SeqIO.read(handle, "swiss")
        handle.close()
        
        with open(i, 'w') as f:
            SeqIO.write(r, f, 'fasta')

    water_cline = WaterCommandline()
    water_cline.asequence = ids[0]
    water_cline.bsequence = ids[1]
    water_cline.outfile = "rosalind_swat_output.txt"
    water_cline.gapopen = 10
    water_cline.gapextend = 1
    water_cline()

    for line in  open('rosalind_swat_output.txt').readlines():
        if 'Score:' in line:
            print(int(float(line[:-1].split(':')[-1].strip())))
示例#15
0
from Bio.Emboss.Applications import WaterCommandline
from Bio import ExPASy
from Bio import SeqIO

if __name__ == "__main__":
    with open(os.path.join('data', 'rosalind_swat.txt')) as dataset:
        ids = dataset.read().split()

    for i in ids:
        handle = ExPASy.get_sprot_raw(i)
        r = SeqIO.read(handle, "swiss")
        handle.close()
        with open(i, 'w') as f:
            SeqIO.write(r, f, 'fasta')

    water_cline = WaterCommandline()
    water_cline.asequence = ids[0]
    water_cline.bsequence = ids[1]
    water_cline.outfile = "water.txt"
    water_cline.gapopen = 10
    water_cline.gapextend = 1

    water_cline()

    with open('water.txt') as f:
        output = f.readlines()

    for line in output:
        if 'Score:' in line:
            print(int(float(line[:-1].split(':')[-1].strip())))
示例#16
0
            # Get telomere ref sequence
            ref_length = int(math.ceil(float(size / float(6))))
            if strand == "+":
                telo_ref = "TTAGGC" * ref_length
            elif strand == "-":
                telo_ref = "GCCTAA" * ref_length
            else:
                print("ERROR: strand must be + or -")
                sys.exit(1)

            # Perform alignment with water
            with open("its_seq.temp", "w") as fi:
                fi.write(str(seq))
            with open("telo.temp", "w") as ft:
                ft.write(telo_ref)

            water_cmd = WaterCommandline(gapopen=10,
                                         gapextend=0.5,
                                         asequence="its_seq.temp",
                                         bsequence="telo.temp",
                                         stdout=True,
                                         auto=True)
            stdout, stderr = water_cmd()
            identity = re.findall("# Identity:.*\((.+)\%\)", stdout)[0]

            outfile.write(line.strip() + "\t" + str(identity) + "\n")

outfile.close()
os.remove("its_seq.temp")
os.remove("telo.temp")
示例#17
0
def water_aligner(TR_frame, seqrec_array, m, go, ge, args):
    """ Performs TR alignment using the provided EMBOSS-water aligner executable.
    TR_frame: A data frame containing TR instances
    seqrec_array: An array containing indexed seqrecord instances from the query feature array
    m: match score
    go: gap open penalty
    ge: gap extension penalty
    min_match: The minimum percentage similarity to accept the alignment, otherwise realign with reverse complement or remove
    """
    tr_count = len(TR_frame)

    missing_features = 0  ## counter for instances of missing features in the query lib

    water_log = open("./TR_aln.log",
                     "w")  ## file to dump water subprocess output

    vprint(subprocessID, "Starting alignments...", "prYellow")
    print(
        f"\n\t\t\tEMBOSS-water Smith-Waterman Aligner.\n\t\t\tmatch={m}\n\t\t\tgap_open={go}\n\t\t\tgap_extend={ge}\n",
        flush=True)

    for i, tr in enumerate(TR_frame):
        time = strftime("%H:%M:%S", localtime())
        print("\r{time} {subprocess} :: Aligning TR {i}/{tr_count}".format(
            time=time,
            subprocess=prYellow(subprocessID),
            i=i + 1,
            tr_count=tr_count),
              end="... ",
              file=sys.stdout,
              flush=True)

        ## generate a homolog dict from the TR and track the number of missing features from the query library
        rfa_out, qfa_out, missing = tr.get_homologs_fasta(seqrec_array, args.m)
        missing_features += missing

        aln_out = path.join(args.o, f"{tr.id}.water")
        flank1_out = path.join(args.o, f"{tr.id}_F1.water")
        flank2_out = path.join(args.o, f"{tr.id}_F2.water")

        def run_alignment(water_cline, a_prefix):
            """ Runs water alignment using a Biopython water commandline object and a prefix to identify which sequence is being aligned
            """
            p = Popen(str(water_cline),
                      stdin=PIPE,
                      stdout=PIPE,
                      stderr=PIPE,
                      shell=True)
            output, err = p.communicate()
            rc = p.returncode

            if rc == 0:
                print(f"\nAlignment of {a_prefix}:{tr.id} exited with 0",
                      file=water_log)
            else:
                print(
                    f"\nAlignment of {a_prefix}:{tr.id} exited with {rc} and warning:\n{str(err)}",
                    file=water_log)

        ## align full TR region
        water_cline = WaterCommandline(args.w,
                                       asequence=rfa_out,
                                       bsequence=qfa_out,
                                       gapopen=go,
                                       gapextend=ge,
                                       outfile=aln_out)

        run_alignment(water_cline, "FULL")

        ## align flank 1
        water_cline = WaterCommandline(args.w,
                                       asequence=f"asis:{tr.flank1}",
                                       bsequence=qfa_out,
                                       gapopen=go,
                                       gapextend=ge,
                                       outfile=flank1_out)

        run_alignment(water_cline, "flank1")

        ## align flank 2
        water_cline = WaterCommandline(args.w,
                                       asequence=f"asis:{tr.flank2}",
                                       bsequence=qfa_out,
                                       gapopen=go,
                                       gapextend=ge,
                                       outfile=flank2_out)

        run_alignment(water_cline, "flank2")

        if i > 10:
            break

    print(f"Done with {missing_features} missing seqeuences.\n", flush=True)
    water_log.close()
示例#18
0
 def test_water_file2(self):
     """water with the asis trick and nucleotide FASTA file, output to a file."""
     # Setup,
     query = "ACACACTCACACACACTTGGTCAGAGATGCTGTGCTTCTTGGAAGCAAGGNCTCAAAGGCAAGGTGCACGCAGAGGGACGTTTGAGTCTGGGATGAAGCATGTNCGTATTATTTATATGATGGAATTTCACGTTTTTATG"
     out_file = "Emboss/temp_test2.water"
     in_file = "Fasta/f002"
     self.assertTrue(os.path.isfile(in_file))
     if os.path.isfile(out_file):
         os.remove(out_file)
     cline = WaterCommandline(cmd=exes["water"])
     cline.set_parameter("-asequence", "asis:%s" % query)
     cline.set_parameter("-bsequence", in_file)
     cline.set_parameter("-gapopen", "10")
     cline.set_parameter("-gapextend", "0.5")
     cline.set_parameter("-outfile", out_file)
     self.assertEqual(str(eval(repr(cline))), str(cline))
     # Run the tool,
     self.run_water(cline)
     # Check we can parse the output and it is sensible...
     self.pairwise_alignment_check(query,
                                   SeqIO.parse(in_file, "fasta"),
                                   AlignIO.parse(out_file, "emboss"),
                                   local=True)
     # Clean up,
     os.remove(out_file)
示例#19
0
 def test_water_file4(self):
     """water with the asis trick and SwissProt file, output to a file."""
     #Setup,
     query = "DVCTGKALCDPVTQNIKTYPVKIENLRVMI"
     out_file = "Emboss/temp_test4.water"
     in_file = "SwissProt/sp004"
     self.assert_(os.path.isfile(in_file))
     if os.path.isfile(out_file) :
         os.remove(out_file)
     cline = WaterCommandline(cmd=exes["water"])
     cline.set_parameter("-asequence", "asis:%s" % query)
     cline.set_parameter("-bsequence", in_file)
     #EMBOSS should work this out, but let's be explicit:
     cline.set_parameter("-sprotein", True)
     #TODO - Tell water this is a SwissProt file!
     cline.set_parameter("-gapopen", "20")
     cline.set_parameter("-gapextend", "5")
     cline.set_parameter("-outfile", out_file)
     self.assertEqual(str(eval(repr(cline))), str(cline))
     #Run the tool,
     result, out, err = generic_run(cline)
     #Check it worked,
     errors = err.read().strip()
     self.assert_(errors.startswith("Smith-Waterman local alignment"), errors)
     self.assertEqual(out.read().strip(), "")
     if result.return_code != 0 : print >> sys.stderr, "\n%s"%cline
     self.assertEqual(result.return_code, 0)
     #Should be able to access this via any alias:
     self.assertEqual(result.get_result("-outfile"), out_file)
     assert os.path.isfile(out_file)
     #Check we can parse the output and it is sensible...
     self.pairwise_alignment_check(query,
                                   SeqIO.parse(open(in_file),"swiss"),
                                   AlignIO.parse(open(out_file),"emboss"),
                                   local=True)
     #Clean up,
     os.remove(out_file)
示例#20
0
文件: EMBOSSW23.py 项目: cwt1/BioGUI
    def GetExec(self, optList, frame):
        # Respond to the "embossn" type command.
        self.frame = frame
        plugin_exe = r"C:/mEMBOSS/water.exe"
        self.outfile = r"C:\Users\francis\Documents\Monguis\BioGui\plugins\water.txt"
        self.outtype = "fasta"
        cline = WaterCommandline(
            plugin_exe,
            asequence=str(self.frame.paramBoxes[1].GetValue()),
            bsequence=str(self.frame.paramBoxes[3].GetValue()))
        cline.outfile = self.outfile
        cline.gapopen = self.param[7].GetValue()
        cline.gapextend = self.param[9].GetValue()
        if self.param[10].GetValue():
            cline.similarity = True
        else:
            cline.similarity = False

        if self.frame.abet == "AA":
            cline.snucleotide = True
            cline.sprotein = False
        elif self.frame.abet == "DNA" or self.frame.abet == "RNA":
            cline.snucleotide = True
            cline.sprotein = False
        if self.frame.options:
            t = self.boxList[3].GetValue()
            if t != '':
                cline.datafile = str(t)
        return str(cline)
示例#21
0
 def test_water_file3(self):
     """Run water with the asis trick and GenBank file, output to a file."""
     # Setup,
     query = "TGTTGTAATGTTTTAATGTTTCTTCTCCCTTTAGATGTACTACGTTTGGA"
     out_file = "Emboss/temp_test3.water"
     in_file = "GenBank/cor6_6.gb"
     self.assertTrue(os.path.isfile(in_file))
     if os.path.isfile(out_file):
         os.remove(out_file)
     cline = WaterCommandline(cmd=exes["water"])
     cline.set_parameter("asequence", "asis:%s" % query)
     cline.set_parameter("bsequence", in_file)
     # TODO - Tell water this is a GenBank file!
     cline.set_parameter("gapopen", "1")
     cline.set_parameter("gapextend", "0.5")
     cline.set_parameter("outfile", out_file)
     self.assertEqual(str(eval(repr(cline))), str(cline))
     # Run the tool,
     self.run_water(cline)
     # Check we can parse the output and it is sensible...
     self.pairwise_alignment_check(
         query,
         SeqIO.parse(in_file, "genbank"),
         AlignIO.parse(out_file, "emboss"),
         local=True,
     )
     # Clean up,
     os.remove(out_file)
示例#22
0
 def test_water_file4(self):
     """water with the asis trick and SwissProt file, output to a file."""
     # Setup,
     query = "DVCTGKALCDPVTQNIKTYPVKIENLRVMI"
     out_file = "Emboss/temp_test4.water"
     in_file = "SwissProt/sp004"
     self.assertTrue(os.path.isfile(in_file))
     if os.path.isfile(out_file):
         os.remove(out_file)
     cline = WaterCommandline(cmd=exes["water"])
     cline.set_parameter("-asequence", "asis:%s" % query)
     cline.set_parameter("-bsequence", in_file)
     # EMBOSS should work this out, but let's be explicit:
     cline.set_parameter("-sprotein", True)
     # TODO - Tell water this is a SwissProt file!
     cline.set_parameter("-gapopen", "20")
     cline.set_parameter("-gapextend", "5")
     cline.set_parameter("-outfile", out_file)
     self.assertEqual(str(eval(repr(cline))), str(cline))
     # Run the tool,
     self.run_water(cline)
     # Check we can parse the output and it is sensible...
     self.pairwise_alignment_check(query,
                                   SeqIO.parse(in_file, "swiss"),
                                   AlignIO.parse(out_file, "emboss"),
                                   local=True)
     # Clean up,
     os.remove(out_file)
示例#23
0
文件: EMBOSSW23.py 项目: cwt1/BioGUI
def GetExec(inF, outF):
    # Create User Modifiable search check boxes.
    plugin_exe = r"C:/mEMBOSS/water.exe"
    cline = WaterCommandline(plugin_exe, infile=inF, outfile=outF)
    p = subprocess.Popen(str(self.cline))
    p.wait()
示例#24
0
# http://rosalind.info/problems/swat/

from Bio.Emboss.Applications import WaterCommandline
from Bio import ExPASy, SeqIO

if __name__ == "__main__":
    ids = open('rosalind_swat.txt').read().split(' ')

    for i in ids:
        handle = ExPASy.get_sprot_raw(i)
        r = SeqIO.read(handle, "swiss")
        handle.close()

        with open(i, 'w') as f:
            SeqIO.write(r, f, 'fasta')

    water_cline = WaterCommandline()
    water_cline.asequence = ids[0]
    water_cline.bsequence = ids[1]
    water_cline.outfile = "rosalind_swat_output.txt"
    water_cline.gapopen = 10
    water_cline.gapextend = 1
    water_cline()

    for line in open('rosalind_swat_output.txt').readlines():
        if 'Score:' in line:
            print(int(float(line[:-1].split(':')[-1].strip())))
示例#25
0
import Bio.Seq
import os
from Bio.Emboss.Applications import WaterCommandline
from Bio.Align.Applications import ClustalwCommandline

fasta = open('/home/nastia/fasta_end.txt', 'r')
string = fasta.readline()
outfileput = open('/home/nastia/Desktop/output.txt', 'w')
while len(string) > 0:
    m = string.find('\t')
    n = string.rfind('\t')
    my_seq_1 = Bio.Seq.Seq(string[m + 1:n])
    my_seg_2 = Bio.Seq.Seq(string[n + 1:-1])
    cline = WaterCommandline(gapopen=10,
                             gapextend=0.5,
                             asequence=my_seq_1,
                             bsequence=my_seg_2,
                             outfile='/home/nastia/Desktop/Water.txt')
    #os.system('clustalw'+cline)
    #print(type(cline))
    #print(cline)
    #outfileput.write(cline)
    string = fasta.readline()
outfileput.close()
示例#26
0
 def test_water_file4(self):
     """water with the asis trick and SwissProt file, output to a file."""
     #Setup,
     query = "DVCTGKALCDPVTQNIKTYPVKIENLRVMI"
     out_file = "Emboss/temp_test4.water"
     in_file = "SwissProt/sp004"
     self.assert_(os.path.isfile(in_file))
     if os.path.isfile(out_file):
         os.remove(out_file)
     cline = WaterCommandline(cmd=exes["water"])
     cline.set_parameter("-asequence", "asis:%s" % query)
     cline.set_parameter("-bsequence", in_file)
     #EMBOSS should work this out, but let's be explicit:
     cline.set_parameter("-sprotein", True)
     #TODO - Tell water this is a SwissProt file!
     cline.set_parameter("-gapopen", "20")
     cline.set_parameter("-gapextend", "5")
     cline.set_parameter("-outfile", out_file)
     self.assertEqual(str(eval(repr(cline))), str(cline))
     #Run the tool,
     result, out, err = generic_run(cline)
     #Check it worked,
     errors = err.read().strip()
     self.assert_(errors.startswith("Smith-Waterman local alignment"),
                  errors)
     self.assertEqual(out.read().strip(), "")
     if result.return_code != 0: print >> sys.stderr, "\n%s" % cline
     self.assertEqual(result.return_code, 0)
     #Should be able to access this via any alias:
     self.assertEqual(result.get_result("-outfile"), out_file)
     assert os.path.isfile(out_file)
     #Check we can parse the output and it is sensible...
     self.pairwise_alignment_check(query,
                                   SeqIO.parse(open(in_file), "swiss"),
                                   AlignIO.parse(open(out_file), "emboss"),
                                   local=True)
     #Clean up,
     os.remove(out_file)
示例#27
0
"""
This is the first example of Python script.
"""
a = 10  # variable a
b = 33  # variable b
c = a / b  # variable c holds the ratio

# Let's print the result to screen.
print("a:", a, " b:", b, " a/b=", c)

from Bio.Seq import Seq

a = Seq("ATATATACG")

a.alphabet
a.sequence()

from Bio.Emboss.Applications import WaterCommandline
cline = WaterCommandline(gapopen=10, gapextend=0.5)
cline.asequence = "asis:ACCCGGGCGCGGT"
cline.bsequence = "asis:ACCCGAGCGCGGT"
cline.outfile = "temp_water.txt"
print(cline)
示例#28
0
文件: EMBOSSW23.py 项目: fxb22/BioGUI
    def GetExec(self, optList, frame):
        # Respond to the "embossn" type command.
        self.frame = frame
        plugin_exe = r"C:/mEMBOSS/water.exe"
        self.outfile = r"C:\Users\francis\Documents\Monguis\BioGui\plugins\water.txt"
        self.outtype = "fasta"
        cline = WaterCommandline(plugin_exe, asequence=str(self.frame.paramBoxes[1].GetValue()), bsequence=str(self.frame.paramBoxes[3].GetValue()))
        cline.outfile = self.outfile
        cline.gapopen = self.param[7].GetValue()
        cline.gapextend = self.param[9].GetValue()
        if self.param[10].GetValue():
            cline.similarity = True
        else:
            cline.similarity = False

        if self.frame.abet=="AA":
            cline.snucleotide = True
            cline.sprotein = False
        elif self.frame.abet=="DNA" or self.frame.abet=="RNA":
            cline.snucleotide = True
            cline.sprotein = False
        if self.frame.options:
            t = self.boxList[3].GetValue()
            if t != '':
                cline.datafile = str(t)   
        return str(cline)
示例#29
0
 def test_water_file3(self):
     """water with the asis trick and GenBank file, output to a file."""
     # Setup,
     query = "TGTTGTAATGTTTTAATGTTTCTTCTCCCTTTAGATGTACTACGTTTGGA"
     out_file = "Emboss/temp_test3.water"
     in_file = "GenBank/cor6_6.gb"
     self.assertTrue(os.path.isfile(in_file))
     if os.path.isfile(out_file):
         os.remove(out_file)
     cline = WaterCommandline(cmd=exes["water"])
     cline.set_parameter("asequence", "asis:%s" % query)
     cline.set_parameter("bsequence", in_file)
     # TODO - Tell water this is a GenBank file!
     cline.set_parameter("gapopen", "1")
     cline.set_parameter("gapextend", "0.5")
     cline.set_parameter("outfile", out_file)
     self.assertEqual(str(eval(repr(cline))), str(cline))
     # Run the tool,
     self.run_water(cline)
     # Check we can parse the output and it is sensible...
     self.pairwise_alignment_check(query,
                                   SeqIO.parse(in_file, "genbank"),
                                   AlignIO.parse(out_file, "emboss"),
                                   local=True)
     # Clean up,
     os.remove(out_file)
示例#30
0
from Bio import ExPASy
from Bio import SeqIO


if __name__ == "__main__":
    with open(os.path.join('data', 'rosalind_swat.txt')) as dataset:
        ids = dataset.read().split()

    for i in ids:
        handle = ExPASy.get_sprot_raw(i)
        r = SeqIO.read(handle, "swiss")
        handle.close()
        with open(i, 'w') as f:
            SeqIO.write(r, f, 'fasta')

    water_cline = WaterCommandline()
    water_cline.asequence = ids[0]
    water_cline.bsequence = ids[1]
    water_cline.outfile = "water.txt"
    water_cline.gapopen = 10
    water_cline.gapextend = 1

    water_cline()

    with open('water.txt') as f:
        output = f.readlines()

    for line in output:
        if 'Score:' in line:
            print(int(float(line[:-1].split(':')[-1].strip())))
示例#31
0
 def test_water_file2(self):
     """Run water with the asis trick and nucleotide FASTA file, output to a file."""
     # Setup,
     query = "ACACACTCACACACACTTGGTCAGAGATGCTGTGCTTCTTGGAAGCAAGGNCTCAAAGGCAAGGTGCACGCAGAGGGACGTTTGAGTCTGGGATGAAGCATGTNCGTATTATTTATATGATGGAATTTCACGTTTTTATG"
     out_file = "Emboss/temp_test2.water"
     in_file = "Fasta/f002"
     self.assertTrue(os.path.isfile(in_file))
     if os.path.isfile(out_file):
         os.remove(out_file)
     cline = WaterCommandline(cmd=exes["water"])
     cline.set_parameter("-asequence", "asis:%s" % query)
     cline.set_parameter("-bsequence", in_file)
     cline.set_parameter("-gapopen", "10")
     cline.set_parameter("-gapextend", "0.5")
     cline.set_parameter("-outfile", out_file)
     self.assertEqual(str(eval(repr(cline))), str(cline))
     # Run the tool,
     self.run_water(cline)
     # Check we can parse the output and it is sensible...
     self.pairwise_alignment_check(
         query,
         SeqIO.parse(in_file, "fasta"),
         AlignIO.parse(out_file, "emboss"),
         local=True,
     )
     # Clean up,
     os.remove(out_file)
示例#32
0
 def test_water_file2(self):
     """water with the asis trick and nucleotide FASTA file, output to a file."""
     #Setup,
     query = "ACACACTCACACACACTTGGTCAGAGATGCTGTGCTTCTTGGAAGCAAGGNCTCAAAGGCAAGGTGCACGCAGAGGGACGTTTGAGTCTGGGATGAAGCATGTNCGTATTATTTATATGATGGAATTTCACGTTTTTATG"
     out_file = "Emboss/temp_test2.water"
     in_file = "Fasta/f002"
     self.assert_(os.path.isfile(in_file))
     if os.path.isfile(out_file) :
         os.remove(out_file)
     cline = WaterCommandline(cmd=exes["water"])
     cline.set_parameter("-asequence", "asis:%s" % query)
     cline.set_parameter("-bsequence", in_file)
     cline.set_parameter("-gapopen", "10")
     cline.set_parameter("-gapextend", "0.5")
     cline.set_parameter("-outfile", out_file)
     self.assertEqual(str(eval(repr(cline))), str(cline))
     #Run the tool,
     result, out, err = generic_run(cline)
     #Check it worked,
     errors = err.read().strip()
     self.assert_(errors.startswith("Smith-Waterman local alignment"), errors)
     self.assertEqual(out.read().strip(), "")
     if result.return_code != 0 : print >> sys.stderr, "\n%s"%cline
     self.assertEqual(result.return_code, 0)
     self.assertEqual(result.get_result("outfile"), out_file)
     assert os.path.isfile(out_file)
     #Check we can parse the output and it is sensible...
     self.pairwise_alignment_check(query,
                                   SeqIO.parse(open(in_file),"fasta"),
                                   AlignIO.parse(open(out_file),"emboss"),
                                   local=True)
     #Clean up,
     os.remove(out_file)
示例#33
0
 def test_water_file4(self):
     """Run water with the asis trick and SwissProt file, output to a file."""
     # Setup,
     query = "DVCTGKALCDPVTQNIKTYPVKIENLRVMI"
     out_file = "Emboss/temp_test4.water"
     in_file = "SwissProt/P0A186.txt"
     self.assertTrue(os.path.isfile(in_file))
     if os.path.isfile(out_file):
         os.remove(out_file)
     cline = WaterCommandline(cmd=exes["water"])
     cline.set_parameter("-asequence", "asis:%s" % query)
     cline.set_parameter("-bsequence", in_file)
     # EMBOSS should work this out, but let's be explicit:
     cline.set_parameter("-sprotein", True)
     # TODO - Tell water this is a SwissProt file!
     cline.set_parameter("-gapopen", "20")
     cline.set_parameter("-gapextend", "5")
     cline.set_parameter("-outfile", out_file)
     self.assertEqual(str(eval(repr(cline))), str(cline))
     # Run the tool,
     self.run_water(cline)
     # Check we can parse the output and it is sensible...
     self.pairwise_alignment_check(
         query,
         SeqIO.parse(in_file, "swiss"),
         AlignIO.parse(out_file, "emboss"),
         local=True,
     )
     # Clean up,
     os.remove(out_file)
示例#34
0
 def test_water_file3(self):
     """water with the asis trick and GenBank file, output to a file."""
     #Setup,
     query = "TGTTGTAATGTTTTAATGTTTCTTCTCCCTTTAGATGTACTACGTTTGGA"
     out_file = "Emboss/temp_test3.water"
     in_file = "GenBank/cor6_6.gb"
     self.assert_(os.path.isfile(in_file))
     if os.path.isfile(out_file) :
         os.remove(out_file)
     cline = WaterCommandline(cmd=exes["water"])
     cline.set_parameter("asequence", "asis:%s" % query)
     cline.set_parameter("bsequence", in_file)
     #TODO - Tell water this is a GenBank file!
     cline.set_parameter("gapopen", "1")
     cline.set_parameter("gapextend", "0.5")
     cline.set_parameter("outfile", out_file)
     self.assertEqual(str(eval(repr(cline))), str(cline))
     #Run the tool,
     result, out, err = generic_run(cline)
     #Check it worked,
     errors = err.read().strip()
     self.assert_(errors.startswith("Smith-Waterman local alignment"), errors)
     self.assertEqual(out.read().strip(), "")
     if result.return_code != 0 : print >> sys.stderr, "\n%s"%cline
     self.assertEqual(result.return_code, 0)
     self.assertEqual(result.get_result("outfile"), out_file)
     assert os.path.isfile(out_file)
     #Check we can parse the output and it is sensible...
     self.pairwise_alignment_check(query,
                                   SeqIO.parse(open(in_file),"genbank"),
                                   AlignIO.parse(open(out_file),"emboss"),
                                   local=True)
     #Clean up,
     os.remove(out_file)