def test_blastn(self): BLAST.blastn(self.cwd + "/BLAST/query.fas", self.cwd + "/BLAST/silkcds.fa") file = open(self.cwd + "/BLAST/query_blastn_out.csv", "r") for line in file: result = line.split(",")[1] break for name in os.listdir(self.cwd + "/BLAST/"): if name[:10] == "silkcds.fa" and len(name) > 10: os.remove(self.cwd + "/BLAST/" + name) self.assertEqual(result, "BGIBMGA000001-TA")
def test_blastParser(self): query = self.cwd + "/BLAST/queries_db1.fas" blast_table = self.cwd + "/BLAST/queries_db1_blastn_out.csv" sbj = self.cwd + "/BLAST/silkcds.fa" out = self.cwd + "/BLAST/output.txt" BLAST.blastn(query, sbj) BLAST.blastParser(blast_table, sbj, out) seqs = [i.id for i in SeqIO.parse(out, "fasta")] self.assertEqual(2, len(seqs)) os.remove(out) # with header row query = self.cwd + "/BLAST/queries_db1.fas" blast_table = self.cwd + "/BLAST/queries_db2_blastn_out.csv" sbj = self.cwd + "/BLAST/silkcds.fa" out = self.cwd + "/BLAST/output.txt" BLAST.blastn(query, sbj) BLAST.blastParser(blast_table, sbj, out) seqs = [i.id for i in SeqIO.parse(out, "fasta")] self.assertEqual(2, len(seqs)) os.remove(out)
def test_blastn_big_query_file(self): query_file = os.path.join(self.cwd, "BLAST", "query_big.fas.gz") cmd = "gunzip " + query_file p = subprocess.check_call(cmd, shell=True) if p == 0: gunzipped_query_file = os.path.join(self.cwd, "BLAST", "query_big.fas") BLAST.blastn(gunzipped_query_file, self.cwd + "/BLAST/silkcds.fa") file = open(self.cwd + "/BLAST/query_big_blastn_out.csv", "r") for line in file: result = line.split(",")[1] break for name in os.listdir(self.cwd + "/BLAST/"): if name[:10] == "silkcds.fa" and len(name) > 10: os.remove(self.cwd + "/BLAST/" + name) self.assertEqual(result, "BGIBMGA000001-TA") os.remove(os.path.join(self.cwd, "BLAST", "query_big_blastn_out.csv")) cmd = "gzip " + gunzipped_query_file p = subprocess.check_call(cmd, shell=True) else: raise Exception("test failed.")
#!/usr/bin/env python from pyphylogenomics import BLAST BLAST.blastn("grefs/Bombyx_exons.fas", "grefs/Dp_genome_v2.fasta") BLAST.blastParser("grefs/Bombyx_exons_blastn_out.csv", "grefs/Dp_genome_v2.fasta", "grefs/Danaus_exons.fasta", sp_name = "Danaus")
import sys import glob import os from Bio import SeqIO from Bio.Seq import Seq from Bio.SeqRecord import SeqRecord if len(sys.argv) < 2: print """This script takes as input a FASTA file and will put all sequences in same direction using blast results.""" sys.exit() infile = sys.argv[1].strip() BLAST.blastn(infile, infile) # remove BLAST files for file in glob.glob(infile + ".*"): os.remove(file) if os.path.isfile(infile + "_dust.asnb"): os.remove(infile + "_dust.asnb") # parse BLAST output file blast_file = infile.replace(".fasta", "_blastn_out.csv") f = open(blast_file, "r") lines = f.readlines() f.close() def reverse(id, infile): # reverse complement sequence of ID in FASTA file infile
from pyphylogenomics import BLAST; import sys query_seqs = sys.argv[1].strip() genome = sys.argv[2].strip() BLAST.blastn(query_seqs, genome);
#!/usr/bin/env python import os; from pyphylogenomics import BLAST """ Do a BLASTn of the sequences against the Bombyx mori genome. The input arguments are your file containing the sequences for single-copy genes (pulled_seqs.fa) and your file with the genome of Bombyx mori which is in FASTA format (silkgenome.fa). """ BLAST.blastn('data/pulled_seqs.fasta', 'data/silkgenome.fa')
from pyphylogenomics import OrthoDB from pyphylogenomics import BLAST in_file = 'grefs/OrthoDB7_Arthropoda_tabtext' genes = OrthoDB.single_copy_genes(in_file, 'Bombyx mori') cds_file = 'grefs/silkcds.fa' BLAST.get_cds(genes, cds_file) BLAST.blastn('pulled_seqs.fasta', 'grefs/silkgenome.fa') exons = BLAST.getLargestExon("pulled_seqs_blastn_out.csv", E_value=0.001, ident=98, exon_len=300) exons = BLAST.eraseFalsePosi(exons) BLAST.storeExonsInFrame(exons, "pulled_seqs.fasta", "grefs/Bombyx_exons.fas")