def _blast_feature(self, f, c1, c2): trans = Translator(self._abort_event) cds = trans.translate(f.extract(c1), 11) sixframes = trans.translate_six_frames_single(c2, 11) if not sixframes: return [(None, None, None)] results = [] for frame in sixframes: res = BlastCLI.s2s_blast(cds, frame, self.evalue, command='blastp', task='blastp') if res: results.extend(res) hsps = BlastCLI.all_hsps(results) if not hsps: return [(None, None, None)] f1 = [] f2 = [] col = [] fname = self._feature_name(f, default='CDS') cds_len = len(cds) min_len = len(cds) * self.min_length for hsp in hsps: if hsp.align_length < min_len: continue if hsp.identities / float(hsp.align_length) < self.min_identity: continue color_t = (float(hsp.identities) / hsp.align_length) print '%s %s: %5.1f%% (%5.1f%%)' % (c1.description, fname, color_t * 100, float(hsp.identities) / cds_len * 100) col.append(colors.linearlyInterpolatedColor(colors.Color(0, 0, 1, 0.2), colors.Color(0, 1, 0, 0.2), 0.2, 1, color_t)) qstart = (hsp.query_start - 1) * 3 qend = qstart + hsp.align_length * 3 sstart = (hsp.sbjct_start - 1) * 3 send = sstart + hsp.align_length * 3 f1.append( SeqFeature(FeatureLocation(f.location.start + qstart, f.location.start + qend, strand=hsp.strand[0]))) f2.append(SeqFeature(FeatureLocation(sstart, send, strand=hsp.strand[1]))) return zip(f1, f2, col)
def _blast_feature(self, f, c1, c2, features1, features2, evalue, max_rlen): results = BlastCLI.s2s_blast(f.extract(c1), c2, evalue, command='blastn', task='blastn') hsps = BlastCLI.all_hsps(results, max_rlen) if not hsps: return [(None, None, None)] f1 = [] f2 = [] col = [] for hsp in hsps: col.append(colors.linearlyInterpolatedColor(colors.Color(1,1,1,0.2), colors.Color(0,0,0,0.2), 0, 1, float(hsp.identities)/hsp.align_length)) f1.append(SeqFeature(FeatureLocation(f.location.start+hsp.query_start, f.location.start+hsp.query_start+hsp.align_length, strand=0))) f2.append(SeqFeature(FeatureLocation(hsp.sbjct_start, hsp.sbjct_start+hsp.align_length, strand=0))) return zip(f1, f2, col)
def _blast_feature(self, f, c1, c2, features1, features2, evalue, max_rlen): trans = Translator(self._abort_event) cds = trans.translate(f.extract(c1), 11) sixframes = trans.translate_six_frames_single(c2, 11) if not sixframes: return [(None, None, None)] results = [] for frame in sixframes: res = BlastCLI.s2s_blast(cds, frame, evalue, command='blastp', task='blastp') if res: results.extend(res) hsps = BlastCLI.all_hsps(results, max_rlen) if not hsps: return [(None, None, None)] f1 = [] f2 = [] col = [] c1_name = pretty_rec_name(c1) if 'locus_tag' in f.qualifiers: fname = f.qualifiers['locus_tag'][0] else: fname = 'CDS' cds_len = len(cds) for hsp in hsps: color_t = (float(hsp.identities) / hsp.align_length) print '%s %s: %5.1f%% (%5.1f%%)' % (c1_name, fname, color_t * 100, float(hsp.identities) / cds_len * 100) col.append( colors.linearlyInterpolatedColor(colors.Color(0, 0, 1, 0.2), colors.Color(0, 1, 0, 0.2), 0.2, 1, color_t)) qstart = (hsp.query_start - 1) * 3 qend = qstart + hsp.align_length * 3 sstart = (hsp.sbjct_start - 1) * 3 send = sstart + hsp.align_length * 3 f1.append( SeqFeature( FeatureLocation(f.location.start + qstart, f.location.start + qend, strand=hsp.strand[0]))) f2.append( SeqFeature(FeatureLocation(sstart, send, strand=hsp.strand[1]))) return zip(f1, f2, col)
def blast_genes(self, db, evalue=10, **kwargs): gresults = [None] * len(self) for g in self.genes: s = g.extract(self.contig) results = BlastCLI.blast_seq(s, db, evalue, **kwargs) print print 'Results: %s' % str(results) if not results: print 'No results' continue print g for rec in results: print 'Record: %s, %d alignments' % (str(rec), len(rec.alignments)) for alignment in rec.alignments: print 'Alignment: %s' % alignment for hsp in alignment.hsps: print 'HSP: %s' % hsp
if __name__ == '__main__': from multiprocessing import Event from BioUtils.Tools.Output import user_message from BioUtils.SeqUtils import load_files _pid = os.getpid() #setup signal handler signal.signal(signal.SIGINT, sig_handler) signal.signal(signal.SIGTERM, sig_handler) signal.signal(signal.SIGQUIT, sig_handler) if True: # from DegenPrimer import MultiprocessingBase # MultiprocessingBase.cpu_count = 1 abort_event = Event() lb = BlastCLI(abort_event) with user_message('Loading genomes...', '\n'): genomes_dir = u'/home/allis/Dropbox/Science/Микра/Thermococcus/sequence/GenBank/Thermococcus' genome_names = [ 'Thermococcus_barophilus_Ch5-complete.gb', 'Thermococcus_onnurineus_NA1-complete-genome.gb', 'Thermococcus_sp._ES1.gb', 'Thermococcus-DS1-preliminary.gb' ] genomes = load_files( abort_event, [os.path.join(genomes_dir, f) for f in genome_names], 'gb') ref = genomes[0] subj = genomes[1:]
if __name__ == '__main__': from multiprocessing import Event from BioUtils.Tools.Output import user_message from BioUtils.SeqUtils import load_files _pid = os.getpid() #setup signal handler signal.signal(signal.SIGINT, sig_handler) signal.signal(signal.SIGTERM, sig_handler) signal.signal(signal.SIGQUIT, sig_handler) if True: # from DegenPrimer import MultiprocessingBase # MultiprocessingBase.cpu_count = 1 abort_event = Event() lb = BlastCLI(abort_event) with user_message('Loading genomes...', '\n'): genomes_dir = u'/home/allis/Dropbox/Science/Микра/Thermococcus/sequence/GenBank/Thermococcus' genome_names = ['Thermococcus_barophilus_Ch5-complete.gb', 'Thermococcus_onnurineus_NA1-complete-genome.gb', 'Thermococcus_sp._ES1.gb', 'Thermococcus-DS1-preliminary.gb'] genomes = load_files(abort_event, [os.path.join(genomes_dir, f) for f in genome_names], 'gb') ref = genomes[0] subj = genomes[1:] @shelf_result def g2g2shelf(): return lb.g2g_blastp(ref, subj, 11, features_of_interest=[{'ugene_name': 'FC-full'}, {'ugene_name': 'COC-full'}])
def _main(self): query = simple_rec( 'AAACTGGGGCTAATACCCGATGGGTGAGGAGGCCTGGAATGGTTCTTCACCGAAAAGACGTTGAGACCATGCTTTTCAACGTTGCCTAAGGATGGGGCCGCGTCCGATCAGGTTGTTGGTGGGGTAACGGCTCACCAAGCCTATAACCGGTACGGGCCGTGGGAGCGGAAGCCCGGAGATGGGCACTGAGACAAGGGCCCAGGCCCTACGGGGCGCAGCAGTCGCGAAAACTCCGCAATGCGCGAAAGCGTGACGGGGCTACCCCGAGTGCCGTCCGCTGAGGATGGCTTTTCCCCGGTGTAATGAGCCTGGGGAATAAGGAGAGGGCAAGCCTGGTGTCAGCCGCCGCGGTAATACCAGCTCTCCGAGTGGTAGGGATGATTATTGGGCTTAAAGCGTCCGTAGCCAGCCCGGCAAGTCTCCCGTTAAATCCAGCGACCTAATCGTTGGGCTGCGGAAGATACTGTTGGGCTAGGGGGCGGGAGAGGCCGACGGTATTCCCGGGGTAGGGGTGAAATCCTATAATCCTGGGAGGACCACCAGTGGCGAAGGCTGTCGGCTAGAACGCGCTCGACGGTGAGGGACGAAAGCTGGGGGAGCGAACTGGATTAGATACCCGGGTAGTCCCAGCTGTAAACGATGCGGGCTAGGTGTTGGGGTGGCTACGAGCCACCTCAGTGCCGCAGGGAAGCCATTAAGCCCGCCGCCTGGGAAGTACGGCCGCAAGGCTGAAACTTAAAGGAATTGGCGGGGGAGCACCACAAGGCGTGAAGCTTGCGGTTTAATTGGAGTCAACGCCGGGAACCTTACCGGGGGCGACAGCAGGATGAGGGCCAGATTGAAGGTCTTGCTTGACAAGCTGAGAGGAGGTGCATGGCCGTCGCCAGTTCGTGCCGTGAGGTGTCCTGTTAAGTCAGGCAACGATCGAGACCCGCACCCTTAGTTGCAACCCCTGCGGAACCCGCAGGGGGCACACTACGGGAACTGCCGCCGATAAGGCGGAGGAAGGAGCGGGCCACGGCAGGTCAGTATGCCCCGAATCCCCCGGGCCACACGCGAGCTGCAATGGCAGAGACAATGGGTTCCAACCTTGAAAGAGGGAGGTAATCCCTAAACCCTGCCTCAGTTGGGATCGAGGGCTGCAACCCGCCCTCGTGAACATGGAATGCCTAGTAATCGCGTGTCATCATCGCGCGGTGAATACGTCCCCGCTCCTTGCACACACCGCCCGTCGCTCCATCCGAGTGGGGTTTGGGTGAGGCGTGGTCTGTTGGCCGCGTCGAATCTAGGCTTCGCGAGGAGGGAGAAGTCGTAACAAGGTGGCCGTAGGGGAACCTGCGGCCGGATCACCTCCT', 'BA2-16S') suns_db = '/home/allis/Documents/INMI/SunS-metagenome/BlastDB-big/sunspring_meta' silva_db = '/home/allis/Documents/INMI/SILVA-DB/SILVA_123_SSURef_Nr99_tax_silva' additions = [ simple_rec( 'AAACTGGGGCTAATCCCCCATAGGCCTGGGGTACTGGAAGGTCCCCAGGCCGAAAGGG------GACCGTA-----AGGTCCCGCCCGAGGATGGGCCGGCGGCCGATTAGGTAGTTGGTGGGGTAACGGCCCACCAAG--CCGAAGATCGGTACGGGCC-GTGAGAGCGGGAGCCCGGAGATGGACA---CTGAGACACGGGTCCAGGCCCTACGGGGCGCAGCAGGCGCGAAACC-TCCGCAATGCGGGAAACCGCGACGGGGGGACCCCCAGTGCCGTGCCTCTGGC-----ACGGCTTTTCCGGAGTG-TAAAAAGCTCCGGGAATAAGGGCTGGGCAAGGCCGGTGGC-AGCCGCCGCGGTAATACCGGCGGCCCGAGTGGTGGCCACTATTATTGGGCCTAAAGCGGCCGTAGCCGGGCCCGTAAGTCCCTGGCG-AAATCCCACGGCTCAACCGTGGGGCTCGCTGGGGATACTGCGG-GCCTTGGGACCGGGAGAGGCCGGGGGTACC-CCCGGGGTAGGGGTGAAATCCTATAATCCCGGGGGGACCGCCAGT-GGCGAAGGCGCCC--GGCTGGAACGGGTCCGACGGTGAGGGCCGAAGGCC-AGGGGAGCGAACCGGATTAGATACCCGGGTAGTCCTGGCTGTAAAGGATGCGGGCTAGGTGTCGGGCGAG-CTTCGAGCTCGC-CCGGTGCCGTAGGGAAGCCGTTAAGCCCGCCGCC-TGGGGAGTACGGCCGCAAGGCT-GAAACTTAAAGGAATT-GGCGGGGGAGC-ACTACAAGGGGTGGAGCGTGCGGTTTAATTGGATTCAACGCCGGGAACCTCACCGGGGGCGACGGCAGGATGAA-GGCCAGGCTGAAGGTCTTGCCGGACGCGCCGAGAGGAG-----------------------------------GTGCATGGCCGCCGTCAGCTCGTACCGTGAGGCGTCCA-CTTAAGTGTGGTAACGAGCGAGACCCGC--GCCCCCAGTTGCCAGTCCCTCCCGCTGGGA---GGGAGGC-ACTCTGGGGGG-ACTGCCGGCGAT-AAGCCGGAGGAAGGGGCGGGCGACGGTAGGTCAGTATG-CCCCGAAACCC-CCGGGCT-ACACGCGCGCTACAATGGGCGGGACAATGGGA-CCCGACCCCGAAAGGGGAAGGGAATCCCCTAAACCCGCCCTCAGTTCGGATCGCGGGCTG-CAACTCGCCCGCGTGAAGC-TGGAAT-CCCTAGTACCCGCGCGTCATCATCGCGCGGCGAATACGTCCCTGCTCCTTGCACACACCGCCCGTCACTCCACCCGAG-CGGGGCCC-GGGTGAGGCCCGATCTCCTTCGGGAGGTCGGGTCGAGCCTGGGCTC-CGTGAGGGGGG-AGAAGTCGTAACAAGGTAGCC------------------------------' .replace('-', ''), 'Thermococcus_chitonophagus'), simple_rec( 'AAACTGGGATTAATACCCACTAAATGATAATACCTGGAATGGCTTATCATTGAAAGAC-TCTGGAAACATGCTTC-CAGCGTCGCCCAAGG-------------------------------------------------------------------------------GGAGCCCGGAGATGGAAA---CTGAGACAAGGTTCCAGGCCCTACGGGGCGCAGCAGGCGCGAAACC-TCCACAATGCGCGAAAGCGTGATGGGGTTATCCCGAGTGCCGTCCGATGAGG-----ATGGCTTTTCCTCGGTG-TAAGGATCCGAGGGAATAAAGGGGGGGCAAGACTGGTGTC-AGCCGCCGCGGTAATACCAGCTCCCTGAGTGGTAAGGACGATTATTTGGCCTAAAGCGTCCGTAGCCGGCTTATCAAGTCTCTTGTT-AAACCCAGTGATTCAATCATTGACCT-GCAAGAGATACTGTTA-TGCTAGAGGACGGGAGAGGTCGACGG---------GGGTAGGGGTGAAATCCTATAATCCTTGGAGGACCACCAGT-GGCGAAGGCGGTC--GACTAGAACGTGCCTGACGGTGAGGGACGAAAGCT-GGGGGAGCGAACCGGATTAGATACCCGGGTAGTCCCAGCTGTAAACGATGCGGGCTAGGTGTTGGGGTAG-CTACGAGCTACT-CCAGTGCCGCAGAGAAGTTGTTAAGCCCGCCGCC-TGGGGAGTACGGCCGCAAGGCT-GAAACTTAAAGGAATT-GGCGGGGGAGC-ACCACAAGGGGTGAAGGCTGCGGTTTAATTGGAGTCAACGCCGGGAACCTTACCGGGGCTGACAGCAGAGTGAA-GGCCAGACTGAAGATCTTGCCAGACAAGCTGAGAGGAGGTGCATGAAGATCTTGCCAGACAAGCTGAGAGGAGGTGCATGGCCGTCGCCAGTTCGTGCCGTGAGGTGTCCT-GTTAAGTCAGGCAACGAACGAGACCCCC--ACTGTTAGTTGCCAGCGAATTCCAACGGAAT--GTCGGGC-ACACTAACAGG-ACTGCCACCGAT-AAGGTGGAGGAAGGAGGGGGCAACGGCAGGTCAGTATG-CCCC--------------------------------------------------------------------------------------------------------------GAACTCGCCCTCATGAACA-TGGAAT-CCCTAGTAACCGCGTGTCATCATCGCGCGGTGAATACGTCCCCGCTCCTTGCACACACCGCCCGTCGCTCCATCCAAG-TCGGGTCT-AGATGAGGCGCAGTCTTCT-----TGGCTACGTCGAATCTGGGTTC-GGTGAGGGGGG-AGAAGTCGTAACAAGGTGGCCGTAGGGGAACCTGCGGCCGGATCACCTCCT' .replace('-', ''), 'SMTZ1-55'), simple_rec( 'ACTCCGGTTGATCCTGCCGGACCCCACTGCTATCGGGGTAGGACTTAACCATGCGAGTTGTGCGTCCCCAAGCCATGGTGGGGGCGCGGCATACGGCTCAGTAACACGTGGCTAACCTAGCCTTTGGACGGGGACAACCCCGGGAAACTGGGGCTAATCCCCGATGGGTGGGAAGGCCTGGAATGGTTTCCCACCGAAAGGGCGTCTGAACCATGCTTCAGGCGTTGCCGAAGGATGGGGCCGCGGCCGATCAGGTTGTTGGTGAGGTAACGGCTCACCAAGCCTATAACCGGTACGGGCCGTGAGAGCGGGAGCCCGGAGATGGGCACTGAGACAAGGGCCCAGGCCCTACGGGGCGCAGCAGGTGCGAAAACTCCGCGATGCGCGAAAGCGTGACGGGGCTATCCCGAGTGCCGTCCGCTGAGGATGGCTTTTCCCCGGTGTAGGGAGCCGGGGGAATAAGGAGAGGGCAAGTCTGGTGTCAGCCGCCGCGGTAATACCAGCTCTCCGAGTGGTGGGGACAATTATTGGGCTTAAAGCGTCCGTAGCCGGCCCATCAAGTCTCTTGTTAAATCCAGCGATCCAATCGCTGGACTGCGGGAGATACTGCTGGGCTAGGGGGCGGGAGAAGCCGATGGTATTCTCGGGGTAGGGGTGAAATCCTATAATCCCGGGAGGACCACCAGTGGCGTAGGCGGTCGGCTAGAACGCGCCCGACGGTGAGGGACGAAAGCTGGGGGAGCGAACCGGATTAGATACCCGGGTAGTCCCAGCCGTAAACGATGCGGGCTAGGTGTTGGGGTGGCTACGAGCCACCCCAGTGCCGCATGGAAGCAATTAAGCCCGCCGCCTGGGGAGTACGGCCGCAAGGCTGAAACTTAAAGGAATTGGCGGGGGAGCACCACAAGGGGTGAAGCTTGCGGTTTAATTGGAGTCAACGCCGGGAAAGGAACAGCGTTTTGTTGTTCCTCTGGATACCTTACCGGGGGCGACAGCAGGATGAAGGCCAGATTGAAGGTCTTGCTGGACGAGCTGAGAGGAGGTGCATGGCCGTCGCCAGTTCGTGCCGTGAGGTGTCCTGTTAAGTCAGGTAACGATCGAGACCCACACCCCCAGTTGCTACCTCTTCGGAGGGCACTCTAGGGGTACTGCCGCCGATAAGGCGGAGGAAGGAGTGGGCCACGGCAGGTCAGTATGCCCCGAATCCCCCGGGCCACACGCGAGCTGCAATGGCAAGGACAATGGGTTCTGACCCCGAGAGGGGAAGGTAATCCCGAAACCCTGCCTCAGTTGGGATCGAGGGCTGAAACCCGCCCTCGTGAACATGGAATCCCTAGTAATCGCGGGTCACCAGCCCGCGGTGAATACGTCCCTGCTCCTTGCACACACCGCCCGTCGCTCCATCCGAGTGGGGTTTAGGTGAGGCGTGGTCCTTGTGGCTGTGTCGAATCTAGGCTTCGCGAGGAGGGAGAAGTCGTAACAAGGTGGCCGTAGGGGAACCTGCGGCCGGATCACCTC', 'BA1-16S'), simple_rec( 'CTGGTGGAAATATAGAAGAGGCCAAATCCGGGGTTCAGGCCGCCCGGGGTAATTACCCGTTGTCGGAGTGGGGGGGGGACGCTATTGGGGCTTAAGCCATCGTTAGCCCGTTTGACCAGGTCTCTTGTTAAATCAGGCGGATTTATTGGTCGATTGCAGGAGATTATGTTCGTCTTAGGGGCCGGAGGAGTCAACAGTATTCCCGGGGTAGGAGTGAATGCCTATATTCCCGGAGGTACCACCAGTGGGGACGCCGTTGGTATAGAACGCGCCGGCCGGTGATGGAATGAAAGTGAGGGAACCGACCCGAATTAGATACCGGGGTATTGCTACCGTTAACCGATGCAGCTTAGGTGTTCGGGTGGTTACTAGCCATTCGAGTGCGCCAGGGAAGCTGTCAGGCTTACCGCTTGGGAAGTGCGGCTGCAGGGCCAAAACTTAAGGAAATCGCCGGGGAAGCACCCCAGGGGGTGAAGCTTGCGCTTTAATGGAATTCACCGCGGTAATTCTCACCGGGGGAGCCACCAGGAGGAAAGCCAGATTAAAGTTCTTGTTGGCGGAGTGGAGAGGAGGTGCATGCCGTTCGCCAGTTCTTCCCGGGAGGTTCTTGTTAGTTCAGCCACCGATGAGGACCGCCATCCCCTGTTGTTATTGGCCTTGCGCCAGGCACACTGGGGAGACCGCCGCCGATAAGGCGGAGGAAGGAGCGGGCCACGGCAGGTCAGTATGCCCCGAATCCCCCGTCCACACGCGAGGGGCAATG', '155a'), simple_rec( 'CAAGTCCTATAACCGGTACGGGCCGTGGGAGCGGTAGCCCGGAGATGGGCACTGAGACAAGGGCCCAGGCCCTACGGGGCGCAGCAGTCGCGAAACCTCCGCAATACGCGAAAGCGTGACGGGGTCATCCCGAGTGCCGTCCGCTGAGGATGGCTTTTCCCCAGTGTAGACAGCTGGGGGAATAAGGAGAGGGCAAGTCGGGTGTCAGCCGCCGCGGTAATACCCGCTCTCCGAGTGGTGGGGACGCTTATTGGGCCTAAAGCATCCGTAGCCGGCTGGACAAGTCCCCTGTTAAATCCAGCGATTTAATCGTTGGACTGCGGGGGATACTGTCCGGCTAGGGGGCGGGAGAGGCCGACGGTATTTCCGGGGTAGGGGTGAAATCCTATAATCCCGGGAGGACCACCAGTGGCGAAGGCTGTCGGCTAGAACGCGCCCGACGGTGAGGGATGAAAGCTGGGGGAGCGAACCGGATTAGATACCCGGGTAGTCCCAGCCGTAAACGATGCAGGCTAGGTGTTCGGGTGGCTACGTGCCACTCGAGTGCCGCAGGGAAGCTGTTAAGCCTGCCGCCTGGGGAGTACGGCCGCAAGGCTGAAACTTAAAGGAATTGGCGGGGGAGCACCACAAGGGGTGAAGCTTGCGGTTTAATTGGAGTCAACGCCGGAAATCTCACCGGGGGAGACAGCAGGATGAAAGCCAGATTAAAGGTCTTGCTAGACGAGCTGAGAGGAGGTGCATGGCCGTCGCCAGTTCGTGCCGTGAGGTGTCCTGTTAAGTCAAGGCAACGATCGAGACTCGCATCCTCTGTTGCTACTACCCTTGCGCCAAGGCACACTGGGGGAGACCGCCGCTCGATAAGGCGGAAGGAAGGAGCGGCCCACGGCAGTCAGTATGCCCCGAATTCCCTCGGCCACACGCAAGCTGCAATG', '156a'), simple_rec( 'GGGGATCGGGGCATACTGACCTGCCGTGGCCCGCTCCTTCCTCCGCCTTATCGGCGGCGGTCTCCCCAGTGTGCCTGGCGCAAGGGCAGTAACAACAGGGGATGGGGGTCTCGATCGGTGGCTGGCTTAACAGGAAACCTCACGGGACGAACTGGCGAACGGCATGGACCTTCTCTCAACTTGGCTAAGAAGAACTTTAATCTGGCTTTCATTCTGGTGGCTTCCCCGGTGAGAATTCCGGCGGTGACTCCCAATAAAACGCAAGCTTCACCCCTTGGGGTGGTTCCCCGGCCATTTCTTTAAGGTTCAAGCTTTGCGGCGGTATTCCCAAGCGGCAAGGTTAACAGCTTCCCTGCCGCACTCGAGTGGCACGTAACCACCCGAACAACTAACCTGCATCCGTTACCGGTTGGACTAACCCGGTATCTAATCCGGGTCGCTCCCCCAGCCTTCATTCCTTCACCGTCCGGCGCGGTTCTAAGCGACCGGCTTTCGCACTTGTGGTTCCTCCCGGGGATTATAAGAATTCACCCCTACCCCGGAAATTACGGTCCGGCTCCTCCGGCCCCTAACCCGACACGTAATCCCCCGCCAGTTCAACCGATTAAATCCGCTTGAATTTAACAAGGGGGACCTTGTCCAGCCGGCCTACGGATGCTTTAAGGCCCAATAAGCCGTCCCCACCACTCCGAGAGCGGGTAATAACCGCGGCCGGCCTGACAACCGACCTGGCCTCTCCTAAATCCCCCAGCTGTTCACACTTGGGAAAGGGCATTCCTCAGCGAACGGCACTTCGGGATGAACCCGTCACGCTTTCGCGTAATTGCGGGAAGGTTTCGCGAACTGCTGCGCCCCGTAAAGGCCTGGGTCCTTGTGTCTCAAATTGCCCCATCTCCGGGCTATACGCTCTCCACGGGCCCGTACC', '157a') ] #prepare filter filt = BlastFilter(lambda a, r: a.hsps[0].align_length > 1100) filt.AND = BlastFilter( lambda a, r: all(hsp.score > 500 for hsp in a.hsps)) filt.AND.AND = BlastFilter(lambda a, r: all( hsp.identities / float(hsp.align_length) > 0.8 for hsp in a.hsps)) #make ring-blast blast = BlastCLI(self.abort_event) orig_seqs = blast.ring_blast(query, suns_db, 100, filt, 3) if not orig_seqs: print 'No blast results.' return 1 nseqs = len(orig_seqs) print 'RingBlast to:\n%s\nreturned %d sequences.\n' % (suns_db, nseqs) #save an initial alignment self.fix_ids(orig_seqs) alifile = '/home/allis/Documents/INMI/SunS-metagenome/Bathy/BA2_SunS_16S.aln.fasta' with user_message('Aligning retrieved sequences...', '\n'): if not AlignmentUtils.align(orig_seqs + [query] + additions, outfile=alifile): return 3 #search for additional homologs add_seqs = blast.ring_blast(orig_seqs, silva_db, 100, filt, 0) if add_seqs: self.fix_ids(add_seqs) print 'RingBlast to:\n%s\nreturned %d additional sequences.\n' % ( silva_db, len(add_seqs)) #build an alignment seqs = orig_seqs + add_seqs + [query] + additions alifile = '/home/allis/Documents/INMI/SunS-metagenome/Bathy/BA2_SunS_16S.big.aln.fasta' with user_message('Aligning retrieved sequences...', '\n'): if not AlignmentUtils.align(seqs, outfile=alifile): return 3 #build a tree treefile = '/home/allis/Documents/INMI/SunS-metagenome/Bathy/BA2_SunS_16S.big.aln.tre' if not PhyloUtils.build_fast_tree(alifile, treefile): return 4 #annotate the tree if False: with open( '/home/allis/Documents/INMI/16S/SSBaF4-SSBaR4-1_243072232-iPCR-report.txt' ) as inp: # SSBaF4-SSBaR4_65397396-iPCR-report.txt sids = set() len_re = re.compile(r'(\s|^)(\d+)(\sbp|\\s*:)?', re.MULTILINE) entry = False cur_sid = None cur_len = -1 for l in inp: if l == '========= histograms and electrophorograms of PCR products of each hit =========': break if l.startswith('---'): entry = False if cur_sid and cur_len > 0 and abs(cur_len - 920) < 60: sids.add(cur_sid) cur_sid = None cur_len = -1 continue if entry or '#' in l: entry = True plen = len_re.search(l) if plen: cur_len = int(plen.group(2)) sid = BlastID.extract(l)[0] if sid: cur_sid = sid organisms = Organisms.from_records(seqs) if PhyloUtils.annotate_tree( treefile, organisms, reroot_at='Thermococcus_chitonophagus', # beautify_leafs=True, # collapse_taxa=['miscellaneous crenarchaeotic group', 'thaumarchaeota'], # collapse_last=True, # collapse_hard=True, # mark_leafs=sids, mark_leafs=[r.id for r in orig_seqs + [query] + additions], lineage_colors={ 'miscellaneous crenarchaeotic group': (0, 0, 255), 'thaumarchaeta': (255, 0, 0) }, top_lineage=Lineage('archaea')): return 0 return 2
def _main(self): query = simple_rec('AAACTGGGGCTAATACCCGATGGGTGAGGAGGCCTGGAATGGTTCTTCACCGAAAAGACGTTGAGACCATGCTTTTCAACGTTGCCTAAGGATGGGGCCGCGTCCGATCAGGTTGTTGGTGGGGTAACGGCTCACCAAGCCTATAACCGGTACGGGCCGTGGGAGCGGAAGCCCGGAGATGGGCACTGAGACAAGGGCCCAGGCCCTACGGGGCGCAGCAGTCGCGAAAACTCCGCAATGCGCGAAAGCGTGACGGGGCTACCCCGAGTGCCGTCCGCTGAGGATGGCTTTTCCCCGGTGTAATGAGCCTGGGGAATAAGGAGAGGGCAAGCCTGGTGTCAGCCGCCGCGGTAATACCAGCTCTCCGAGTGGTAGGGATGATTATTGGGCTTAAAGCGTCCGTAGCCAGCCCGGCAAGTCTCCCGTTAAATCCAGCGACCTAATCGTTGGGCTGCGGAAGATACTGTTGGGCTAGGGGGCGGGAGAGGCCGACGGTATTCCCGGGGTAGGGGTGAAATCCTATAATCCTGGGAGGACCACCAGTGGCGAAGGCTGTCGGCTAGAACGCGCTCGACGGTGAGGGACGAAAGCTGGGGGAGCGAACTGGATTAGATACCCGGGTAGTCCCAGCTGTAAACGATGCGGGCTAGGTGTTGGGGTGGCTACGAGCCACCTCAGTGCCGCAGGGAAGCCATTAAGCCCGCCGCCTGGGAAGTACGGCCGCAAGGCTGAAACTTAAAGGAATTGGCGGGGGAGCACCACAAGGCGTGAAGCTTGCGGTTTAATTGGAGTCAACGCCGGGAACCTTACCGGGGGCGACAGCAGGATGAGGGCCAGATTGAAGGTCTTGCTTGACAAGCTGAGAGGAGGTGCATGGCCGTCGCCAGTTCGTGCCGTGAGGTGTCCTGTTAAGTCAGGCAACGATCGAGACCCGCACCCTTAGTTGCAACCCCTGCGGAACCCGCAGGGGGCACACTACGGGAACTGCCGCCGATAAGGCGGAGGAAGGAGCGGGCCACGGCAGGTCAGTATGCCCCGAATCCCCCGGGCCACACGCGAGCTGCAATGGCAGAGACAATGGGTTCCAACCTTGAAAGAGGGAGGTAATCCCTAAACCCTGCCTCAGTTGGGATCGAGGGCTGCAACCCGCCCTCGTGAACATGGAATGCCTAGTAATCGCGTGTCATCATCGCGCGGTGAATACGTCCCCGCTCCTTGCACACACCGCCCGTCGCTCCATCCGAGTGGGGTTTGGGTGAGGCGTGGTCTGTTGGCCGCGTCGAATCTAGGCTTCGCGAGGAGGGAGAAGTCGTAACAAGGTGGCCGTAGGGGAACCTGCGGCCGGATCACCTCCT', 'BA2-16S') suns_db = '/home/allis/Documents/INMI/SunS-metagenome/BlastDB-big/sunspring_meta' silva_db = '/home/allis/Documents/INMI/SILVA-DB/SILVA_123_SSURef_Nr99_tax_silva' additions = [simple_rec('AAACTGGGGCTAATCCCCCATAGGCCTGGGGTACTGGAAGGTCCCCAGGCCGAAAGGG------GACCGTA-----AGGTCCCGCCCGAGGATGGGCCGGCGGCCGATTAGGTAGTTGGTGGGGTAACGGCCCACCAAG--CCGAAGATCGGTACGGGCC-GTGAGAGCGGGAGCCCGGAGATGGACA---CTGAGACACGGGTCCAGGCCCTACGGGGCGCAGCAGGCGCGAAACC-TCCGCAATGCGGGAAACCGCGACGGGGGGACCCCCAGTGCCGTGCCTCTGGC-----ACGGCTTTTCCGGAGTG-TAAAAAGCTCCGGGAATAAGGGCTGGGCAAGGCCGGTGGC-AGCCGCCGCGGTAATACCGGCGGCCCGAGTGGTGGCCACTATTATTGGGCCTAAAGCGGCCGTAGCCGGGCCCGTAAGTCCCTGGCG-AAATCCCACGGCTCAACCGTGGGGCTCGCTGGGGATACTGCGG-GCCTTGGGACCGGGAGAGGCCGGGGGTACC-CCCGGGGTAGGGGTGAAATCCTATAATCCCGGGGGGACCGCCAGT-GGCGAAGGCGCCC--GGCTGGAACGGGTCCGACGGTGAGGGCCGAAGGCC-AGGGGAGCGAACCGGATTAGATACCCGGGTAGTCCTGGCTGTAAAGGATGCGGGCTAGGTGTCGGGCGAG-CTTCGAGCTCGC-CCGGTGCCGTAGGGAAGCCGTTAAGCCCGCCGCC-TGGGGAGTACGGCCGCAAGGCT-GAAACTTAAAGGAATT-GGCGGGGGAGC-ACTACAAGGGGTGGAGCGTGCGGTTTAATTGGATTCAACGCCGGGAACCTCACCGGGGGCGACGGCAGGATGAA-GGCCAGGCTGAAGGTCTTGCCGGACGCGCCGAGAGGAG-----------------------------------GTGCATGGCCGCCGTCAGCTCGTACCGTGAGGCGTCCA-CTTAAGTGTGGTAACGAGCGAGACCCGC--GCCCCCAGTTGCCAGTCCCTCCCGCTGGGA---GGGAGGC-ACTCTGGGGGG-ACTGCCGGCGAT-AAGCCGGAGGAAGGGGCGGGCGACGGTAGGTCAGTATG-CCCCGAAACCC-CCGGGCT-ACACGCGCGCTACAATGGGCGGGACAATGGGA-CCCGACCCCGAAAGGGGAAGGGAATCCCCTAAACCCGCCCTCAGTTCGGATCGCGGGCTG-CAACTCGCCCGCGTGAAGC-TGGAAT-CCCTAGTACCCGCGCGTCATCATCGCGCGGCGAATACGTCCCTGCTCCTTGCACACACCGCCCGTCACTCCACCCGAG-CGGGGCCC-GGGTGAGGCCCGATCTCCTTCGGGAGGTCGGGTCGAGCCTGGGCTC-CGTGAGGGGGG-AGAAGTCGTAACAAGGTAGCC------------------------------'.replace('-', ''), 'Thermococcus_chitonophagus'), simple_rec('AAACTGGGATTAATACCCACTAAATGATAATACCTGGAATGGCTTATCATTGAAAGAC-TCTGGAAACATGCTTC-CAGCGTCGCCCAAGG-------------------------------------------------------------------------------GGAGCCCGGAGATGGAAA---CTGAGACAAGGTTCCAGGCCCTACGGGGCGCAGCAGGCGCGAAACC-TCCACAATGCGCGAAAGCGTGATGGGGTTATCCCGAGTGCCGTCCGATGAGG-----ATGGCTTTTCCTCGGTG-TAAGGATCCGAGGGAATAAAGGGGGGGCAAGACTGGTGTC-AGCCGCCGCGGTAATACCAGCTCCCTGAGTGGTAAGGACGATTATTTGGCCTAAAGCGTCCGTAGCCGGCTTATCAAGTCTCTTGTT-AAACCCAGTGATTCAATCATTGACCT-GCAAGAGATACTGTTA-TGCTAGAGGACGGGAGAGGTCGACGG---------GGGTAGGGGTGAAATCCTATAATCCTTGGAGGACCACCAGT-GGCGAAGGCGGTC--GACTAGAACGTGCCTGACGGTGAGGGACGAAAGCT-GGGGGAGCGAACCGGATTAGATACCCGGGTAGTCCCAGCTGTAAACGATGCGGGCTAGGTGTTGGGGTAG-CTACGAGCTACT-CCAGTGCCGCAGAGAAGTTGTTAAGCCCGCCGCC-TGGGGAGTACGGCCGCAAGGCT-GAAACTTAAAGGAATT-GGCGGGGGAGC-ACCACAAGGGGTGAAGGCTGCGGTTTAATTGGAGTCAACGCCGGGAACCTTACCGGGGCTGACAGCAGAGTGAA-GGCCAGACTGAAGATCTTGCCAGACAAGCTGAGAGGAGGTGCATGAAGATCTTGCCAGACAAGCTGAGAGGAGGTGCATGGCCGTCGCCAGTTCGTGCCGTGAGGTGTCCT-GTTAAGTCAGGCAACGAACGAGACCCCC--ACTGTTAGTTGCCAGCGAATTCCAACGGAAT--GTCGGGC-ACACTAACAGG-ACTGCCACCGAT-AAGGTGGAGGAAGGAGGGGGCAACGGCAGGTCAGTATG-CCCC--------------------------------------------------------------------------------------------------------------GAACTCGCCCTCATGAACA-TGGAAT-CCCTAGTAACCGCGTGTCATCATCGCGCGGTGAATACGTCCCCGCTCCTTGCACACACCGCCCGTCGCTCCATCCAAG-TCGGGTCT-AGATGAGGCGCAGTCTTCT-----TGGCTACGTCGAATCTGGGTTC-GGTGAGGGGGG-AGAAGTCGTAACAAGGTGGCCGTAGGGGAACCTGCGGCCGGATCACCTCCT'.replace('-', ''), 'SMTZ1-55'), simple_rec('ACTCCGGTTGATCCTGCCGGACCCCACTGCTATCGGGGTAGGACTTAACCATGCGAGTTGTGCGTCCCCAAGCCATGGTGGGGGCGCGGCATACGGCTCAGTAACACGTGGCTAACCTAGCCTTTGGACGGGGACAACCCCGGGAAACTGGGGCTAATCCCCGATGGGTGGGAAGGCCTGGAATGGTTTCCCACCGAAAGGGCGTCTGAACCATGCTTCAGGCGTTGCCGAAGGATGGGGCCGCGGCCGATCAGGTTGTTGGTGAGGTAACGGCTCACCAAGCCTATAACCGGTACGGGCCGTGAGAGCGGGAGCCCGGAGATGGGCACTGAGACAAGGGCCCAGGCCCTACGGGGCGCAGCAGGTGCGAAAACTCCGCGATGCGCGAAAGCGTGACGGGGCTATCCCGAGTGCCGTCCGCTGAGGATGGCTTTTCCCCGGTGTAGGGAGCCGGGGGAATAAGGAGAGGGCAAGTCTGGTGTCAGCCGCCGCGGTAATACCAGCTCTCCGAGTGGTGGGGACAATTATTGGGCTTAAAGCGTCCGTAGCCGGCCCATCAAGTCTCTTGTTAAATCCAGCGATCCAATCGCTGGACTGCGGGAGATACTGCTGGGCTAGGGGGCGGGAGAAGCCGATGGTATTCTCGGGGTAGGGGTGAAATCCTATAATCCCGGGAGGACCACCAGTGGCGTAGGCGGTCGGCTAGAACGCGCCCGACGGTGAGGGACGAAAGCTGGGGGAGCGAACCGGATTAGATACCCGGGTAGTCCCAGCCGTAAACGATGCGGGCTAGGTGTTGGGGTGGCTACGAGCCACCCCAGTGCCGCATGGAAGCAATTAAGCCCGCCGCCTGGGGAGTACGGCCGCAAGGCTGAAACTTAAAGGAATTGGCGGGGGAGCACCACAAGGGGTGAAGCTTGCGGTTTAATTGGAGTCAACGCCGGGAAAGGAACAGCGTTTTGTTGTTCCTCTGGATACCTTACCGGGGGCGACAGCAGGATGAAGGCCAGATTGAAGGTCTTGCTGGACGAGCTGAGAGGAGGTGCATGGCCGTCGCCAGTTCGTGCCGTGAGGTGTCCTGTTAAGTCAGGTAACGATCGAGACCCACACCCCCAGTTGCTACCTCTTCGGAGGGCACTCTAGGGGTACTGCCGCCGATAAGGCGGAGGAAGGAGTGGGCCACGGCAGGTCAGTATGCCCCGAATCCCCCGGGCCACACGCGAGCTGCAATGGCAAGGACAATGGGTTCTGACCCCGAGAGGGGAAGGTAATCCCGAAACCCTGCCTCAGTTGGGATCGAGGGCTGAAACCCGCCCTCGTGAACATGGAATCCCTAGTAATCGCGGGTCACCAGCCCGCGGTGAATACGTCCCTGCTCCTTGCACACACCGCCCGTCGCTCCATCCGAGTGGGGTTTAGGTGAGGCGTGGTCCTTGTGGCTGTGTCGAATCTAGGCTTCGCGAGGAGGGAGAAGTCGTAACAAGGTGGCCGTAGGGGAACCTGCGGCCGGATCACCTC', 'BA1-16S') ] #prepare filter filt = BlastFilter(lambda a: a.hsps[0].align_length > 1100) filt.AND = BlastFilter(lambda a: all(hsp.score > 500 for hsp in a.hsps)) filt.AND.AND = BlastFilter(lambda a: all(hsp.identities/float(hsp.align_length) > 0.8 for hsp in a.hsps)) #make ring-blast blast = BlastCLI(self.abort_event) orig_seqs = blast.ring_blast(query, suns_db, 100, filt, 3) if not orig_seqs: print 'No blast results.' return 1 nseqs = len(orig_seqs) print 'RingBlast to:\n%s\nreturned %d sequences.\n' % (suns_db, nseqs) #save an initial alignment self.fix_ids(orig_seqs) alifile = '/home/allis/Documents/INMI/SunS-metagenome/Bathy/BA2_SunS_16S.aln.fasta' with user_message('Aligning retrieved sequences...', '\n'): if not AlignmentUtils.align(orig_seqs+[query]+additions, outfile=alifile): return 3 #search for additional homologs add_seqs = blast.ring_blast(orig_seqs, silva_db, 100, filt, 0) if add_seqs: self.fix_ids(add_seqs) print 'RingBlast to:\n%s\nreturned %d additional sequences.\n' % (silva_db, len(add_seqs)) #build an alignment seqs = orig_seqs+add_seqs+[query]+additions alifile = '/home/allis/Documents/INMI/SunS-metagenome/Bathy/BA2_SunS_16S.big.aln.fasta' with user_message('Aligning retrieved sequences...', '\n'): if not AlignmentUtils.align(seqs, outfile=alifile): return 3 #build a tree treefile = '/home/allis/Documents/INMI/SunS-metagenome/Bathy/BA2_SunS_16S.big.aln.tre' if not PhyloUtils.build_fast_tree(alifile, treefile): return 4 #annotate the tree with open('/home/allis/Documents/INMI/16S/SSBaF4-SSBaR4-1_243072232-iPCR-report.txt') as inp: # SSBaF4-SSBaR4_65397396-iPCR-report.txt sids = set() len_re = re.compile(r'(\s|^)(\d+)(\sbp|\\s*:)?', re.MULTILINE) entry = False cur_sid = None cur_len = -1 for l in inp: if l == '========= histograms and electrophorograms of PCR products of each hit =========': break if l.startswith('---'): entry = False if cur_sid and cur_len > 0 and abs(cur_len-920) < 60: sids.add(cur_sid) cur_sid = None cur_len = -1 continue if entry or '#' in l: entry = True plen = len_re.search(l) if plen: cur_len = int(plen.group(2)) sid = BlastID.extract(l)[0] if sid: cur_sid = sid organisms = Organisms.from_records(seqs) if PhyloUtils.annotate_tree(treefile, organisms, reroot_at='Thermococcus_chitonophagus', # beautify_leafs=True, # collapse_taxa=['miscellaneous crenarchaeotic group', 'thaumarchaeota'], # collapse_last=True, # collapse_hard=True, mark_leafs=sids, # [r.id for r in orig_seqs+[query]+additions], lineage_colors={'miscellaneous crenarchaeotic group':(0, 0, 255), 'thaumarchaeta':(255,0,0)}, top_lineage=Lineage('archaea')): return 0 return 2
def _main(self): email = '*****@*****.**' genome_dir = '/home/allis/Dropbox/Science/Микра/Thermococcus/sequence/GenBank/Thermococcales/Thermococcus/' genome = 'Thermococcus_barophilus_Ch5.gb' gene = 'TBCH5v1_1369' #cooS database = 'nr' segment = [3200, 12000] seq = SeqLoader.load_file(os.path.join(genome_dir, genome)) if not seq: raise RuntimeError('No genome loaded') seq = seq[0] index = get_indexes_of_genes(seq, gene) if not index: raise RuntimeError('No gene found') feature = seq.features[index[0]] query = feature.extract(seq) segments_file = 'CO-clusters.gb' #get cluster variants if needed if not os.path.isfile(segments_file): blast_file = 'blast.results.xml' if os.path.isfile(blast_file): blast = list(parse(open(blast_file))) else: blast = BlastCLI.blast_seq(query, database, 100, remote=True, task='blastn', parse_results=True, save_results_to='blast.results.xml') if not blast: raise RuntimeError('Blast returned no results') flt = BlastFilter(lambda hsp, r: hsp.align_length > 700, filter_hsps=True) flt(blast) queries = [] for ali in BlastCLI.iter_alignments(blast): q = BlastCLI.Query(ali, 'hsp', start_offset=segment[0], end_offset=segment[1]) if q: queries.append(q) print(queries[-1]) segments = BlastWWW.fetch_queries(email, queries) safe_write(segments, segments_file) for r in segments: print('[%s] %s: %dbp' % (r.id, pretty_rec_name(r), len(r))) return 0 #find primers in alignments of the selected features local_files = [ os.path.join(genome_dir, f) for f in ('Thermococcus_barophilus_DT4-complete-genome.gb', 'Thermococcus_ST-423.gb', 'Thermococcus_CH1-complete.gb') ] loader = SeqLoader(self.abort_event) segments = loader.load_files([segments_file] + local_files) fprimers, transF_ali = find_primers( segments, 'transF', dict(plen=(20, 30), max_mismatches=5, min_first_matches=3, AT_first=True)) rprimers, cooS_ali = find_primers(segments, 'cooS', dict(plen=(20, 30), max_mismatches=4, min_first_matches=3, AT_first=True), reverse=True) if not fprimers: print('\nNo forward primers found') return 1 if not rprimers: print('\nNo reverse primers found') return 1 print('\nForward primers:') for p in fprimers: print('%s: %s' % (p.id, p)) print('\nReverse primers:') for p in rprimers: print('%s: %s' % (p.id, p)) print() #add primers to alignments and save them transF_ali = PrimerFinder.add_primers_to_alignment( fprimers, transF_ali) cooS_ali = PrimerFinder.add_primers_to_alignment(rprimers, cooS_ali, reverse=True) AlignmentUtils.save(transF_ali, 'transF.aln') AlignmentUtils.save(cooS_ali, 'cooS.aln')