Exemplo n.º 1
0
 def test_blastn(self):
     BLAST.blastn(self.cwd + "/BLAST/query.fas", self.cwd + "/BLAST/silkcds.fa")
     file = open(self.cwd + "/BLAST/query_blastn_out.csv", "r")
     for line in file:
         result = line.split(",")[1]
         break
     for name in os.listdir(self.cwd + "/BLAST/"):
         if name[:10] == "silkcds.fa" and len(name) > 10:
             os.remove(self.cwd + "/BLAST/" + name)
     self.assertEqual(result, "BGIBMGA000001-TA")
Exemplo n.º 2
0
    def test_blastParser(self):
        query = self.cwd + "/BLAST/queries_db1.fas"
        blast_table = self.cwd + "/BLAST/queries_db1_blastn_out.csv"
        sbj = self.cwd + "/BLAST/silkcds.fa"
        out = self.cwd + "/BLAST/output.txt"
        BLAST.blastn(query, sbj)
        BLAST.blastParser(blast_table, sbj, out)
        seqs = [i.id for i in SeqIO.parse(out, "fasta")]
        self.assertEqual(2, len(seqs))
        os.remove(out)

        # with header row
        query = self.cwd + "/BLAST/queries_db1.fas"
        blast_table = self.cwd + "/BLAST/queries_db2_blastn_out.csv"
        sbj = self.cwd + "/BLAST/silkcds.fa"
        out = self.cwd + "/BLAST/output.txt"
        BLAST.blastn(query, sbj)
        BLAST.blastParser(blast_table, sbj, out)
        seqs = [i.id for i in SeqIO.parse(out, "fasta")]
        self.assertEqual(2, len(seqs))
        os.remove(out)
Exemplo n.º 3
0
    def test_blastn_big_query_file(self):
        query_file = os.path.join(self.cwd, "BLAST", "query_big.fas.gz")

        cmd = "gunzip " + query_file
        p = subprocess.check_call(cmd, shell=True)

        if p == 0:
            gunzipped_query_file = os.path.join(self.cwd, "BLAST", "query_big.fas")
            BLAST.blastn(gunzipped_query_file, self.cwd + "/BLAST/silkcds.fa")
            file = open(self.cwd + "/BLAST/query_big_blastn_out.csv", "r")
            for line in file:
                result = line.split(",")[1]
                break
            for name in os.listdir(self.cwd + "/BLAST/"):
                if name[:10] == "silkcds.fa" and len(name) > 10:
                    os.remove(self.cwd + "/BLAST/" + name)
            self.assertEqual(result, "BGIBMGA000001-TA")

            os.remove(os.path.join(self.cwd, "BLAST", "query_big_blastn_out.csv"))
            cmd = "gzip " + gunzipped_query_file
            p = subprocess.check_call(cmd, shell=True)
        else:
            raise Exception("test failed.")
Exemplo n.º 4
0
#!/usr/bin/env python

from pyphylogenomics import BLAST

BLAST.blastn("grefs/Bombyx_exons.fas", "grefs/Dp_genome_v2.fasta")
BLAST.blastParser("grefs/Bombyx_exons_blastn_out.csv",
                    "grefs/Dp_genome_v2.fasta",     
                    "grefs/Danaus_exons.fasta",
                    sp_name = "Danaus")
import sys
import glob
import os
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord



if len(sys.argv) < 2:
    print """This script takes as input a FASTA file and will put all sequences
            in same direction using blast results."""
    sys.exit()

infile = sys.argv[1].strip()
BLAST.blastn(infile, infile)

# remove BLAST files
for file in glob.glob(infile + ".*"):
    os.remove(file)
if os.path.isfile(infile + "_dust.asnb"):
    os.remove(infile + "_dust.asnb")

# parse BLAST output file
blast_file = infile.replace(".fasta", "_blastn_out.csv")
f = open(blast_file, "r")
lines = f.readlines()
f.close()

def reverse(id, infile):
    # reverse complement sequence of ID in FASTA file infile
from pyphylogenomics import BLAST;
import sys

query_seqs = sys.argv[1].strip()
genome = sys.argv[2].strip()
BLAST.blastn(query_seqs, genome);
#!/usr/bin/env python

import os;
from pyphylogenomics import BLAST


"""
Do a BLASTn of the sequences against the Bombyx mori genome. The input arguments
are your file containing the sequences for single-copy genes (pulled_seqs.fa) 
and your file with the genome of Bombyx mori which is in FASTA format (silkgenome.fa).
"""
BLAST.blastn('data/pulled_seqs.fasta', 'data/silkgenome.fa')


from pyphylogenomics import OrthoDB
from pyphylogenomics import BLAST

in_file = 'grefs/OrthoDB7_Arthropoda_tabtext'
genes = OrthoDB.single_copy_genes(in_file, 'Bombyx mori')
cds_file = 'grefs/silkcds.fa'
BLAST.get_cds(genes, cds_file)
BLAST.blastn('pulled_seqs.fasta', 'grefs/silkgenome.fa')
exons = BLAST.getLargestExon("pulled_seqs_blastn_out.csv", E_value=0.001, ident=98, exon_len=300)
exons = BLAST.eraseFalsePosi(exons)
BLAST.storeExonsInFrame(exons, "pulled_seqs.fasta", "grefs/Bombyx_exons.fas")