def add_primers_to_alignment(primers, alignment, reverse=False): alignment = alignment.clone() ali_len = alignment.get_alignment_length() if reverse: for p in primers: alignment.append( copy_attrs(p.master_sequence, p.master_sequence.reverse_complement()) + '-' * (ali_len - len(p))) else: for p in primers: alignment.append(p.master_sequence + '-' * (ali_len - len(p))) return AlignmentUtils.align(alignment)
def _main(self): alifile = '/home/allis/Documents/INMI/Fervidicoccales-signature/arb-silva.de_2016-04-06_id331139.fasta' group_names = ['Fervidicoccales', 'Acidilobales', 'Desulfurococcales', 'Thermoproteales:Sulfolobales', 'Other'] predefined_positions = [34, 501, 544, 1244, 1293] ref_name = 'Escherichia' reference = None groups = ListDB() with user_message('Loadding initial alignment...', '\n'): ali = AlignmentUtils.load_first(alifile) if not ali: return 1 with user_message('Sorting alignment into subgroups...', '\n'): for rec in ali: if ref_name in rec.description: reference = rec continue found = False for g in group_names: for k in g.split(':'): if k in rec.description: groups[g] = rec found = True break if not found: groups['Other'] = rec groups = dict((n, AlignmentExt(groups[n])) for n in groups) ali_len = ali.get_alignment_length() predefined_positions = [self._col_index(i, reference) for i in predefined_positions] print ('\nReference sequence:\n>%s\n%s' % (reference.description, str(reference.seq).replace('.', '').replace('-', ''))) print '\nAlignment: %d seqs, %d columns' % (len(ali), ali_len) print print_table([(g, '%d sequences' % len(groups[g])) for g in group_names]) print main_group = group_names[0] main_ali = groups[main_group] others = group_names[1:] for ci in xrange(ali_len): main_letter = self.LetterStats(main_ali[:,ci]) predef = ci in predefined_positions if predef or main_letter.freq_no_gaps >= 0.95 and main_letter.freq > 0.5: other_letters = [self.LetterStats(groups[g][:,ci]) for g in others] if predef or any(l.letter != main_letter.letter for l in other_letters): print ('------------------ E.coli position: %d ---------------------' % (self._ref_index(ci, reference)+1)) print print_table([(main_group, str(main_letter))]+ [(g, str(l)) for g, l in zip(others, other_letters)]) print print 'Done'
def find_primers(segments, foi, pfinder_args, reverse=False, qualifier='ugene_name'): genes = [] for s in segments: i = get_feature_indexes_by_qualifier(s, qualifier, foi) if not i: continue genes.append(s.features[i[0]].extract(s)) ali = AlignmentUtils.align(genes) if not ali: return None primer_alis = PrimerFinder.find_specific_primers(ali, reverse=reverse, **pfinder_args) return PrimerFinder.compile_primers(primer_alis, foi + '_', reverse), ali
def add_pairs_to_alignment(pairs, alignment): added = set() alignment = alignment.clone() ali_len = alignment.get_alignment_length() for i, p in enumerate(pairs): if p.forward.id not in added: alignment.append(p.forward.master_sequence + '-' * (ali_len - len(p.forward))) added.add(p.forward.id) if p.reverse.id not in added: alignment.append( copy_attrs(p.reverse.master_sequence, p.reverse.master_sequence.reverse_complement()) + '-' * (ali_len - len(p.reverse))) added.add(p.reverse.id) return AlignmentUtils.align(alignment)
def hmmbuild(alignment, outfile, name=None, **kwargs): unlink_file = False if isinstance(alignment, str): msafile = alignment elif isinstance(alignment, MultipleSeqAlignment): msafile = AlignmentUtils.mktmp(alignment) if not msafile: return False unlink_file = True else: print 'Alignment must be either a filename or an instance of MultipleSeqAlignment' return False if not name: name = FilenameParser.strip_ext(os.path.basename(outfile)) ret = run_cline(HMMBuildCommandline(input=msafile, out=outfile, n=name, cpu=cpu_count, seed=0, **kwargs), _msg='Unable to build HMM profile') if unlink_file: os.unlink(msafile) return ret
def hmmbuild(alignment, outfile, name=None, **kwargs): msafile = AlignmentUtils.mktmp(alignment) if not name: name = FilenameParser.strip_ext(outfile) return run_cline(HMMBuildCommandline(input=msafile, out=outfile, n=name, cpu=cpu_count, seed=0, **kwargs), _msg = 'Unable to build HMM profile')
def _main(self): min_prod = 400 silva_db = '/home/allis/Documents/INMI/SILVA-DB/SILVA_123_SSURef_Nr99_tax_silva.fasta' alifile = '/home/allis/Documents/INMI/SunS-metagenome/Bathy/BA2_SunS_16S.aln.fasta' add_filename = FilenameParser.strip_ext(alifile)+'.with_additions.fasta' outgroups = ['Thermococcus_chitonophagus', 'SMTZ1-55', 'contig72135_1581_sunspring_meta'] add = ['KF836721.1.1270','EU635905.1.1323'] exclude = []#['Thermococcus_chitonophagus', 'SMTZ1-55', 'BA1-16S', 'contig72135_1581_sunspring_meta'] #load alignment if os.path.isfile(add_filename): alifile = add_filename add_filename = '' with user_message('Loadding initial alignment...', '\n'): orig_ali = AlignmentUtils.load_first(alifile) if not orig_ali: return 1 #load homologs if add_filename: with user_message('Loadding additional sequences...', '\n'): add_seqs = [] db = SeqView() if db.load(silva_db): for sid in add: seq = db.get(sid) if seq: add_seqs.append(seq) else: print '%s not found in %s' % (sid, silva_db) #realign data if needed if add_seqs: with user_message('Realigning data...', '\n'): add_filename = FilenameParser.strip_ext(alifile)+'.with_additions.fasta' AlignmentUtils.align(list(orig_ali)+add_seqs, add_filename) orig_ali = AlignmentUtils.load_first(add_filename) if not orig_ali: return 2 #process the alignment ali = orig_ali.remove(*exclude).trim() for out in outgroups: if not ali.index(out): print '%s not found in the alignment' % out return 3 ali.sort(key=lambda r: 'zzzzzzzz' if r.id in outgroups else r.id) ali_len = ali.get_alignment_length() AlignmentUtils.save(ali, '/home/allis/Documents/INMI/SunS-metagenome/Bathy/BA2_SunS_16S.aln.trimmed.fasta') args = dict(plen = (20,40), max_mismatches = 8, min_match_mismatches = 1, first_match_mismatches = 1, first_may_match = 1, AT_first=True, outgroup=len(outgroups)) fprimers = self._find_primers(ali, **args) rprimers = self._find_primers(ali.reverse_complement(), **args) pairs = [] for i, (fs, fp) in enumerate(fprimers): start = fs fprimer = Primer.from_sequences(fp[:-1], 1, 'SSBaF%d' % fs) for _j, (rs, rp) in enumerate(rprimers): end = ali_len-rs if end-start <= min_prod: continue pairs.append((fprimer, Primer.from_sequences(rp[:-1], 1, 'SSBaR%d' % (ali_len-rs+1)))) if not pairs: print '\nNo suitable primer pairs found' return 3 added = set() for i, (fp, rp) in enumerate(pairs): print '\npair %d' % (i+1) print '%s: %s' % (fp.id, fp) print '%s: %s' % (rp.id, rp) if fp.id not in added: orig_ali.append(fp.master_sequence+'-'*(orig_ali.get_alignment_length()-len(fp))) added.add(fp.id) if rp.id not in added: orig_ali.append(copy_attrs(rp.master_sequence, rp.master_sequence.reverse_complement())+ '-'*(orig_ali.get_alignment_length()-len(rp))) added.add(rp.id) print orig_ali = AlignmentUtils.align(orig_ali) AlignmentUtils.save(orig_ali, '/home/allis/Documents/INMI/SunS-metagenome/Bathy/BA2_SunS_16S.with_primers.aln.fasta') print 'Done'
def _main(self): query = simple_rec( 'AAACTGGGGCTAATACCCGATGGGTGAGGAGGCCTGGAATGGTTCTTCACCGAAAAGACGTTGAGACCATGCTTTTCAACGTTGCCTAAGGATGGGGCCGCGTCCGATCAGGTTGTTGGTGGGGTAACGGCTCACCAAGCCTATAACCGGTACGGGCCGTGGGAGCGGAAGCCCGGAGATGGGCACTGAGACAAGGGCCCAGGCCCTACGGGGCGCAGCAGTCGCGAAAACTCCGCAATGCGCGAAAGCGTGACGGGGCTACCCCGAGTGCCGTCCGCTGAGGATGGCTTTTCCCCGGTGTAATGAGCCTGGGGAATAAGGAGAGGGCAAGCCTGGTGTCAGCCGCCGCGGTAATACCAGCTCTCCGAGTGGTAGGGATGATTATTGGGCTTAAAGCGTCCGTAGCCAGCCCGGCAAGTCTCCCGTTAAATCCAGCGACCTAATCGTTGGGCTGCGGAAGATACTGTTGGGCTAGGGGGCGGGAGAGGCCGACGGTATTCCCGGGGTAGGGGTGAAATCCTATAATCCTGGGAGGACCACCAGTGGCGAAGGCTGTCGGCTAGAACGCGCTCGACGGTGAGGGACGAAAGCTGGGGGAGCGAACTGGATTAGATACCCGGGTAGTCCCAGCTGTAAACGATGCGGGCTAGGTGTTGGGGTGGCTACGAGCCACCTCAGTGCCGCAGGGAAGCCATTAAGCCCGCCGCCTGGGAAGTACGGCCGCAAGGCTGAAACTTAAAGGAATTGGCGGGGGAGCACCACAAGGCGTGAAGCTTGCGGTTTAATTGGAGTCAACGCCGGGAACCTTACCGGGGGCGACAGCAGGATGAGGGCCAGATTGAAGGTCTTGCTTGACAAGCTGAGAGGAGGTGCATGGCCGTCGCCAGTTCGTGCCGTGAGGTGTCCTGTTAAGTCAGGCAACGATCGAGACCCGCACCCTTAGTTGCAACCCCTGCGGAACCCGCAGGGGGCACACTACGGGAACTGCCGCCGATAAGGCGGAGGAAGGAGCGGGCCACGGCAGGTCAGTATGCCCCGAATCCCCCGGGCCACACGCGAGCTGCAATGGCAGAGACAATGGGTTCCAACCTTGAAAGAGGGAGGTAATCCCTAAACCCTGCCTCAGTTGGGATCGAGGGCTGCAACCCGCCCTCGTGAACATGGAATGCCTAGTAATCGCGTGTCATCATCGCGCGGTGAATACGTCCCCGCTCCTTGCACACACCGCCCGTCGCTCCATCCGAGTGGGGTTTGGGTGAGGCGTGGTCTGTTGGCCGCGTCGAATCTAGGCTTCGCGAGGAGGGAGAAGTCGTAACAAGGTGGCCGTAGGGGAACCTGCGGCCGGATCACCTCCT', 'BA2-16S') suns_db = '/home/allis/Documents/INMI/SunS-metagenome/BlastDB-big/sunspring_meta' silva_db = '/home/allis/Documents/INMI/SILVA-DB/SILVA_123_SSURef_Nr99_tax_silva' additions = [ simple_rec( 'AAACTGGGGCTAATCCCCCATAGGCCTGGGGTACTGGAAGGTCCCCAGGCCGAAAGGG------GACCGTA-----AGGTCCCGCCCGAGGATGGGCCGGCGGCCGATTAGGTAGTTGGTGGGGTAACGGCCCACCAAG--CCGAAGATCGGTACGGGCC-GTGAGAGCGGGAGCCCGGAGATGGACA---CTGAGACACGGGTCCAGGCCCTACGGGGCGCAGCAGGCGCGAAACC-TCCGCAATGCGGGAAACCGCGACGGGGGGACCCCCAGTGCCGTGCCTCTGGC-----ACGGCTTTTCCGGAGTG-TAAAAAGCTCCGGGAATAAGGGCTGGGCAAGGCCGGTGGC-AGCCGCCGCGGTAATACCGGCGGCCCGAGTGGTGGCCACTATTATTGGGCCTAAAGCGGCCGTAGCCGGGCCCGTAAGTCCCTGGCG-AAATCCCACGGCTCAACCGTGGGGCTCGCTGGGGATACTGCGG-GCCTTGGGACCGGGAGAGGCCGGGGGTACC-CCCGGGGTAGGGGTGAAATCCTATAATCCCGGGGGGACCGCCAGT-GGCGAAGGCGCCC--GGCTGGAACGGGTCCGACGGTGAGGGCCGAAGGCC-AGGGGAGCGAACCGGATTAGATACCCGGGTAGTCCTGGCTGTAAAGGATGCGGGCTAGGTGTCGGGCGAG-CTTCGAGCTCGC-CCGGTGCCGTAGGGAAGCCGTTAAGCCCGCCGCC-TGGGGAGTACGGCCGCAAGGCT-GAAACTTAAAGGAATT-GGCGGGGGAGC-ACTACAAGGGGTGGAGCGTGCGGTTTAATTGGATTCAACGCCGGGAACCTCACCGGGGGCGACGGCAGGATGAA-GGCCAGGCTGAAGGTCTTGCCGGACGCGCCGAGAGGAG-----------------------------------GTGCATGGCCGCCGTCAGCTCGTACCGTGAGGCGTCCA-CTTAAGTGTGGTAACGAGCGAGACCCGC--GCCCCCAGTTGCCAGTCCCTCCCGCTGGGA---GGGAGGC-ACTCTGGGGGG-ACTGCCGGCGAT-AAGCCGGAGGAAGGGGCGGGCGACGGTAGGTCAGTATG-CCCCGAAACCC-CCGGGCT-ACACGCGCGCTACAATGGGCGGGACAATGGGA-CCCGACCCCGAAAGGGGAAGGGAATCCCCTAAACCCGCCCTCAGTTCGGATCGCGGGCTG-CAACTCGCCCGCGTGAAGC-TGGAAT-CCCTAGTACCCGCGCGTCATCATCGCGCGGCGAATACGTCCCTGCTCCTTGCACACACCGCCCGTCACTCCACCCGAG-CGGGGCCC-GGGTGAGGCCCGATCTCCTTCGGGAGGTCGGGTCGAGCCTGGGCTC-CGTGAGGGGGG-AGAAGTCGTAACAAGGTAGCC------------------------------' .replace('-', ''), 'Thermococcus_chitonophagus'), simple_rec( 'AAACTGGGATTAATACCCACTAAATGATAATACCTGGAATGGCTTATCATTGAAAGAC-TCTGGAAACATGCTTC-CAGCGTCGCCCAAGG-------------------------------------------------------------------------------GGAGCCCGGAGATGGAAA---CTGAGACAAGGTTCCAGGCCCTACGGGGCGCAGCAGGCGCGAAACC-TCCACAATGCGCGAAAGCGTGATGGGGTTATCCCGAGTGCCGTCCGATGAGG-----ATGGCTTTTCCTCGGTG-TAAGGATCCGAGGGAATAAAGGGGGGGCAAGACTGGTGTC-AGCCGCCGCGGTAATACCAGCTCCCTGAGTGGTAAGGACGATTATTTGGCCTAAAGCGTCCGTAGCCGGCTTATCAAGTCTCTTGTT-AAACCCAGTGATTCAATCATTGACCT-GCAAGAGATACTGTTA-TGCTAGAGGACGGGAGAGGTCGACGG---------GGGTAGGGGTGAAATCCTATAATCCTTGGAGGACCACCAGT-GGCGAAGGCGGTC--GACTAGAACGTGCCTGACGGTGAGGGACGAAAGCT-GGGGGAGCGAACCGGATTAGATACCCGGGTAGTCCCAGCTGTAAACGATGCGGGCTAGGTGTTGGGGTAG-CTACGAGCTACT-CCAGTGCCGCAGAGAAGTTGTTAAGCCCGCCGCC-TGGGGAGTACGGCCGCAAGGCT-GAAACTTAAAGGAATT-GGCGGGGGAGC-ACCACAAGGGGTGAAGGCTGCGGTTTAATTGGAGTCAACGCCGGGAACCTTACCGGGGCTGACAGCAGAGTGAA-GGCCAGACTGAAGATCTTGCCAGACAAGCTGAGAGGAGGTGCATGAAGATCTTGCCAGACAAGCTGAGAGGAGGTGCATGGCCGTCGCCAGTTCGTGCCGTGAGGTGTCCT-GTTAAGTCAGGCAACGAACGAGACCCCC--ACTGTTAGTTGCCAGCGAATTCCAACGGAAT--GTCGGGC-ACACTAACAGG-ACTGCCACCGAT-AAGGTGGAGGAAGGAGGGGGCAACGGCAGGTCAGTATG-CCCC--------------------------------------------------------------------------------------------------------------GAACTCGCCCTCATGAACA-TGGAAT-CCCTAGTAACCGCGTGTCATCATCGCGCGGTGAATACGTCCCCGCTCCTTGCACACACCGCCCGTCGCTCCATCCAAG-TCGGGTCT-AGATGAGGCGCAGTCTTCT-----TGGCTACGTCGAATCTGGGTTC-GGTGAGGGGGG-AGAAGTCGTAACAAGGTGGCCGTAGGGGAACCTGCGGCCGGATCACCTCCT' .replace('-', ''), 'SMTZ1-55'), simple_rec( 'ACTCCGGTTGATCCTGCCGGACCCCACTGCTATCGGGGTAGGACTTAACCATGCGAGTTGTGCGTCCCCAAGCCATGGTGGGGGCGCGGCATACGGCTCAGTAACACGTGGCTAACCTAGCCTTTGGACGGGGACAACCCCGGGAAACTGGGGCTAATCCCCGATGGGTGGGAAGGCCTGGAATGGTTTCCCACCGAAAGGGCGTCTGAACCATGCTTCAGGCGTTGCCGAAGGATGGGGCCGCGGCCGATCAGGTTGTTGGTGAGGTAACGGCTCACCAAGCCTATAACCGGTACGGGCCGTGAGAGCGGGAGCCCGGAGATGGGCACTGAGACAAGGGCCCAGGCCCTACGGGGCGCAGCAGGTGCGAAAACTCCGCGATGCGCGAAAGCGTGACGGGGCTATCCCGAGTGCCGTCCGCTGAGGATGGCTTTTCCCCGGTGTAGGGAGCCGGGGGAATAAGGAGAGGGCAAGTCTGGTGTCAGCCGCCGCGGTAATACCAGCTCTCCGAGTGGTGGGGACAATTATTGGGCTTAAAGCGTCCGTAGCCGGCCCATCAAGTCTCTTGTTAAATCCAGCGATCCAATCGCTGGACTGCGGGAGATACTGCTGGGCTAGGGGGCGGGAGAAGCCGATGGTATTCTCGGGGTAGGGGTGAAATCCTATAATCCCGGGAGGACCACCAGTGGCGTAGGCGGTCGGCTAGAACGCGCCCGACGGTGAGGGACGAAAGCTGGGGGAGCGAACCGGATTAGATACCCGGGTAGTCCCAGCCGTAAACGATGCGGGCTAGGTGTTGGGGTGGCTACGAGCCACCCCAGTGCCGCATGGAAGCAATTAAGCCCGCCGCCTGGGGAGTACGGCCGCAAGGCTGAAACTTAAAGGAATTGGCGGGGGAGCACCACAAGGGGTGAAGCTTGCGGTTTAATTGGAGTCAACGCCGGGAAAGGAACAGCGTTTTGTTGTTCCTCTGGATACCTTACCGGGGGCGACAGCAGGATGAAGGCCAGATTGAAGGTCTTGCTGGACGAGCTGAGAGGAGGTGCATGGCCGTCGCCAGTTCGTGCCGTGAGGTGTCCTGTTAAGTCAGGTAACGATCGAGACCCACACCCCCAGTTGCTACCTCTTCGGAGGGCACTCTAGGGGTACTGCCGCCGATAAGGCGGAGGAAGGAGTGGGCCACGGCAGGTCAGTATGCCCCGAATCCCCCGGGCCACACGCGAGCTGCAATGGCAAGGACAATGGGTTCTGACCCCGAGAGGGGAAGGTAATCCCGAAACCCTGCCTCAGTTGGGATCGAGGGCTGAAACCCGCCCTCGTGAACATGGAATCCCTAGTAATCGCGGGTCACCAGCCCGCGGTGAATACGTCCCTGCTCCTTGCACACACCGCCCGTCGCTCCATCCGAGTGGGGTTTAGGTGAGGCGTGGTCCTTGTGGCTGTGTCGAATCTAGGCTTCGCGAGGAGGGAGAAGTCGTAACAAGGTGGCCGTAGGGGAACCTGCGGCCGGATCACCTC', 'BA1-16S'), simple_rec( 'CTGGTGGAAATATAGAAGAGGCCAAATCCGGGGTTCAGGCCGCCCGGGGTAATTACCCGTTGTCGGAGTGGGGGGGGGACGCTATTGGGGCTTAAGCCATCGTTAGCCCGTTTGACCAGGTCTCTTGTTAAATCAGGCGGATTTATTGGTCGATTGCAGGAGATTATGTTCGTCTTAGGGGCCGGAGGAGTCAACAGTATTCCCGGGGTAGGAGTGAATGCCTATATTCCCGGAGGTACCACCAGTGGGGACGCCGTTGGTATAGAACGCGCCGGCCGGTGATGGAATGAAAGTGAGGGAACCGACCCGAATTAGATACCGGGGTATTGCTACCGTTAACCGATGCAGCTTAGGTGTTCGGGTGGTTACTAGCCATTCGAGTGCGCCAGGGAAGCTGTCAGGCTTACCGCTTGGGAAGTGCGGCTGCAGGGCCAAAACTTAAGGAAATCGCCGGGGAAGCACCCCAGGGGGTGAAGCTTGCGCTTTAATGGAATTCACCGCGGTAATTCTCACCGGGGGAGCCACCAGGAGGAAAGCCAGATTAAAGTTCTTGTTGGCGGAGTGGAGAGGAGGTGCATGCCGTTCGCCAGTTCTTCCCGGGAGGTTCTTGTTAGTTCAGCCACCGATGAGGACCGCCATCCCCTGTTGTTATTGGCCTTGCGCCAGGCACACTGGGGAGACCGCCGCCGATAAGGCGGAGGAAGGAGCGGGCCACGGCAGGTCAGTATGCCCCGAATCCCCCGTCCACACGCGAGGGGCAATG', '155a'), simple_rec( 'CAAGTCCTATAACCGGTACGGGCCGTGGGAGCGGTAGCCCGGAGATGGGCACTGAGACAAGGGCCCAGGCCCTACGGGGCGCAGCAGTCGCGAAACCTCCGCAATACGCGAAAGCGTGACGGGGTCATCCCGAGTGCCGTCCGCTGAGGATGGCTTTTCCCCAGTGTAGACAGCTGGGGGAATAAGGAGAGGGCAAGTCGGGTGTCAGCCGCCGCGGTAATACCCGCTCTCCGAGTGGTGGGGACGCTTATTGGGCCTAAAGCATCCGTAGCCGGCTGGACAAGTCCCCTGTTAAATCCAGCGATTTAATCGTTGGACTGCGGGGGATACTGTCCGGCTAGGGGGCGGGAGAGGCCGACGGTATTTCCGGGGTAGGGGTGAAATCCTATAATCCCGGGAGGACCACCAGTGGCGAAGGCTGTCGGCTAGAACGCGCCCGACGGTGAGGGATGAAAGCTGGGGGAGCGAACCGGATTAGATACCCGGGTAGTCCCAGCCGTAAACGATGCAGGCTAGGTGTTCGGGTGGCTACGTGCCACTCGAGTGCCGCAGGGAAGCTGTTAAGCCTGCCGCCTGGGGAGTACGGCCGCAAGGCTGAAACTTAAAGGAATTGGCGGGGGAGCACCACAAGGGGTGAAGCTTGCGGTTTAATTGGAGTCAACGCCGGAAATCTCACCGGGGGAGACAGCAGGATGAAAGCCAGATTAAAGGTCTTGCTAGACGAGCTGAGAGGAGGTGCATGGCCGTCGCCAGTTCGTGCCGTGAGGTGTCCTGTTAAGTCAAGGCAACGATCGAGACTCGCATCCTCTGTTGCTACTACCCTTGCGCCAAGGCACACTGGGGGAGACCGCCGCTCGATAAGGCGGAAGGAAGGAGCGGCCCACGGCAGTCAGTATGCCCCGAATTCCCTCGGCCACACGCAAGCTGCAATG', '156a'), simple_rec( 'GGGGATCGGGGCATACTGACCTGCCGTGGCCCGCTCCTTCCTCCGCCTTATCGGCGGCGGTCTCCCCAGTGTGCCTGGCGCAAGGGCAGTAACAACAGGGGATGGGGGTCTCGATCGGTGGCTGGCTTAACAGGAAACCTCACGGGACGAACTGGCGAACGGCATGGACCTTCTCTCAACTTGGCTAAGAAGAACTTTAATCTGGCTTTCATTCTGGTGGCTTCCCCGGTGAGAATTCCGGCGGTGACTCCCAATAAAACGCAAGCTTCACCCCTTGGGGTGGTTCCCCGGCCATTTCTTTAAGGTTCAAGCTTTGCGGCGGTATTCCCAAGCGGCAAGGTTAACAGCTTCCCTGCCGCACTCGAGTGGCACGTAACCACCCGAACAACTAACCTGCATCCGTTACCGGTTGGACTAACCCGGTATCTAATCCGGGTCGCTCCCCCAGCCTTCATTCCTTCACCGTCCGGCGCGGTTCTAAGCGACCGGCTTTCGCACTTGTGGTTCCTCCCGGGGATTATAAGAATTCACCCCTACCCCGGAAATTACGGTCCGGCTCCTCCGGCCCCTAACCCGACACGTAATCCCCCGCCAGTTCAACCGATTAAATCCGCTTGAATTTAACAAGGGGGACCTTGTCCAGCCGGCCTACGGATGCTTTAAGGCCCAATAAGCCGTCCCCACCACTCCGAGAGCGGGTAATAACCGCGGCCGGCCTGACAACCGACCTGGCCTCTCCTAAATCCCCCAGCTGTTCACACTTGGGAAAGGGCATTCCTCAGCGAACGGCACTTCGGGATGAACCCGTCACGCTTTCGCGTAATTGCGGGAAGGTTTCGCGAACTGCTGCGCCCCGTAAAGGCCTGGGTCCTTGTGTCTCAAATTGCCCCATCTCCGGGCTATACGCTCTCCACGGGCCCGTACC', '157a') ] #prepare filter filt = BlastFilter(lambda a, r: a.hsps[0].align_length > 1100) filt.AND = BlastFilter( lambda a, r: all(hsp.score > 500 for hsp in a.hsps)) filt.AND.AND = BlastFilter(lambda a, r: all( hsp.identities / float(hsp.align_length) > 0.8 for hsp in a.hsps)) #make ring-blast blast = BlastCLI(self.abort_event) orig_seqs = blast.ring_blast(query, suns_db, 100, filt, 3) if not orig_seqs: print 'No blast results.' return 1 nseqs = len(orig_seqs) print 'RingBlast to:\n%s\nreturned %d sequences.\n' % (suns_db, nseqs) #save an initial alignment self.fix_ids(orig_seqs) alifile = '/home/allis/Documents/INMI/SunS-metagenome/Bathy/BA2_SunS_16S.aln.fasta' with user_message('Aligning retrieved sequences...', '\n'): if not AlignmentUtils.align(orig_seqs + [query] + additions, outfile=alifile): return 3 #search for additional homologs add_seqs = blast.ring_blast(orig_seqs, silva_db, 100, filt, 0) if add_seqs: self.fix_ids(add_seqs) print 'RingBlast to:\n%s\nreturned %d additional sequences.\n' % ( silva_db, len(add_seqs)) #build an alignment seqs = orig_seqs + add_seqs + [query] + additions alifile = '/home/allis/Documents/INMI/SunS-metagenome/Bathy/BA2_SunS_16S.big.aln.fasta' with user_message('Aligning retrieved sequences...', '\n'): if not AlignmentUtils.align(seqs, outfile=alifile): return 3 #build a tree treefile = '/home/allis/Documents/INMI/SunS-metagenome/Bathy/BA2_SunS_16S.big.aln.tre' if not PhyloUtils.build_fast_tree(alifile, treefile): return 4 #annotate the tree if False: with open( '/home/allis/Documents/INMI/16S/SSBaF4-SSBaR4-1_243072232-iPCR-report.txt' ) as inp: # SSBaF4-SSBaR4_65397396-iPCR-report.txt sids = set() len_re = re.compile(r'(\s|^)(\d+)(\sbp|\\s*:)?', re.MULTILINE) entry = False cur_sid = None cur_len = -1 for l in inp: if l == '========= histograms and electrophorograms of PCR products of each hit =========': break if l.startswith('---'): entry = False if cur_sid and cur_len > 0 and abs(cur_len - 920) < 60: sids.add(cur_sid) cur_sid = None cur_len = -1 continue if entry or '#' in l: entry = True plen = len_re.search(l) if plen: cur_len = int(plen.group(2)) sid = BlastID.extract(l)[0] if sid: cur_sid = sid organisms = Organisms.from_records(seqs) if PhyloUtils.annotate_tree( treefile, organisms, reroot_at='Thermococcus_chitonophagus', # beautify_leafs=True, # collapse_taxa=['miscellaneous crenarchaeotic group', 'thaumarchaeota'], # collapse_last=True, # collapse_hard=True, # mark_leafs=sids, mark_leafs=[r.id for r in orig_seqs + [query] + additions], lineage_colors={ 'miscellaneous crenarchaeotic group': (0, 0, 255), 'thaumarchaeta': (255, 0, 0) }, top_lineage=Lineage('archaea')): return 0 return 2
def _main(self): query = simple_rec('AAACTGGGGCTAATACCCGATGGGTGAGGAGGCCTGGAATGGTTCTTCACCGAAAAGACGTTGAGACCATGCTTTTCAACGTTGCCTAAGGATGGGGCCGCGTCCGATCAGGTTGTTGGTGGGGTAACGGCTCACCAAGCCTATAACCGGTACGGGCCGTGGGAGCGGAAGCCCGGAGATGGGCACTGAGACAAGGGCCCAGGCCCTACGGGGCGCAGCAGTCGCGAAAACTCCGCAATGCGCGAAAGCGTGACGGGGCTACCCCGAGTGCCGTCCGCTGAGGATGGCTTTTCCCCGGTGTAATGAGCCTGGGGAATAAGGAGAGGGCAAGCCTGGTGTCAGCCGCCGCGGTAATACCAGCTCTCCGAGTGGTAGGGATGATTATTGGGCTTAAAGCGTCCGTAGCCAGCCCGGCAAGTCTCCCGTTAAATCCAGCGACCTAATCGTTGGGCTGCGGAAGATACTGTTGGGCTAGGGGGCGGGAGAGGCCGACGGTATTCCCGGGGTAGGGGTGAAATCCTATAATCCTGGGAGGACCACCAGTGGCGAAGGCTGTCGGCTAGAACGCGCTCGACGGTGAGGGACGAAAGCTGGGGGAGCGAACTGGATTAGATACCCGGGTAGTCCCAGCTGTAAACGATGCGGGCTAGGTGTTGGGGTGGCTACGAGCCACCTCAGTGCCGCAGGGAAGCCATTAAGCCCGCCGCCTGGGAAGTACGGCCGCAAGGCTGAAACTTAAAGGAATTGGCGGGGGAGCACCACAAGGCGTGAAGCTTGCGGTTTAATTGGAGTCAACGCCGGGAACCTTACCGGGGGCGACAGCAGGATGAGGGCCAGATTGAAGGTCTTGCTTGACAAGCTGAGAGGAGGTGCATGGCCGTCGCCAGTTCGTGCCGTGAGGTGTCCTGTTAAGTCAGGCAACGATCGAGACCCGCACCCTTAGTTGCAACCCCTGCGGAACCCGCAGGGGGCACACTACGGGAACTGCCGCCGATAAGGCGGAGGAAGGAGCGGGCCACGGCAGGTCAGTATGCCCCGAATCCCCCGGGCCACACGCGAGCTGCAATGGCAGAGACAATGGGTTCCAACCTTGAAAGAGGGAGGTAATCCCTAAACCCTGCCTCAGTTGGGATCGAGGGCTGCAACCCGCCCTCGTGAACATGGAATGCCTAGTAATCGCGTGTCATCATCGCGCGGTGAATACGTCCCCGCTCCTTGCACACACCGCCCGTCGCTCCATCCGAGTGGGGTTTGGGTGAGGCGTGGTCTGTTGGCCGCGTCGAATCTAGGCTTCGCGAGGAGGGAGAAGTCGTAACAAGGTGGCCGTAGGGGAACCTGCGGCCGGATCACCTCCT', 'BA2-16S') suns_db = '/home/allis/Documents/INMI/SunS-metagenome/BlastDB-big/sunspring_meta' silva_db = '/home/allis/Documents/INMI/SILVA-DB/SILVA_123_SSURef_Nr99_tax_silva' additions = [simple_rec('AAACTGGGGCTAATCCCCCATAGGCCTGGGGTACTGGAAGGTCCCCAGGCCGAAAGGG------GACCGTA-----AGGTCCCGCCCGAGGATGGGCCGGCGGCCGATTAGGTAGTTGGTGGGGTAACGGCCCACCAAG--CCGAAGATCGGTACGGGCC-GTGAGAGCGGGAGCCCGGAGATGGACA---CTGAGACACGGGTCCAGGCCCTACGGGGCGCAGCAGGCGCGAAACC-TCCGCAATGCGGGAAACCGCGACGGGGGGACCCCCAGTGCCGTGCCTCTGGC-----ACGGCTTTTCCGGAGTG-TAAAAAGCTCCGGGAATAAGGGCTGGGCAAGGCCGGTGGC-AGCCGCCGCGGTAATACCGGCGGCCCGAGTGGTGGCCACTATTATTGGGCCTAAAGCGGCCGTAGCCGGGCCCGTAAGTCCCTGGCG-AAATCCCACGGCTCAACCGTGGGGCTCGCTGGGGATACTGCGG-GCCTTGGGACCGGGAGAGGCCGGGGGTACC-CCCGGGGTAGGGGTGAAATCCTATAATCCCGGGGGGACCGCCAGT-GGCGAAGGCGCCC--GGCTGGAACGGGTCCGACGGTGAGGGCCGAAGGCC-AGGGGAGCGAACCGGATTAGATACCCGGGTAGTCCTGGCTGTAAAGGATGCGGGCTAGGTGTCGGGCGAG-CTTCGAGCTCGC-CCGGTGCCGTAGGGAAGCCGTTAAGCCCGCCGCC-TGGGGAGTACGGCCGCAAGGCT-GAAACTTAAAGGAATT-GGCGGGGGAGC-ACTACAAGGGGTGGAGCGTGCGGTTTAATTGGATTCAACGCCGGGAACCTCACCGGGGGCGACGGCAGGATGAA-GGCCAGGCTGAAGGTCTTGCCGGACGCGCCGAGAGGAG-----------------------------------GTGCATGGCCGCCGTCAGCTCGTACCGTGAGGCGTCCA-CTTAAGTGTGGTAACGAGCGAGACCCGC--GCCCCCAGTTGCCAGTCCCTCCCGCTGGGA---GGGAGGC-ACTCTGGGGGG-ACTGCCGGCGAT-AAGCCGGAGGAAGGGGCGGGCGACGGTAGGTCAGTATG-CCCCGAAACCC-CCGGGCT-ACACGCGCGCTACAATGGGCGGGACAATGGGA-CCCGACCCCGAAAGGGGAAGGGAATCCCCTAAACCCGCCCTCAGTTCGGATCGCGGGCTG-CAACTCGCCCGCGTGAAGC-TGGAAT-CCCTAGTACCCGCGCGTCATCATCGCGCGGCGAATACGTCCCTGCTCCTTGCACACACCGCCCGTCACTCCACCCGAG-CGGGGCCC-GGGTGAGGCCCGATCTCCTTCGGGAGGTCGGGTCGAGCCTGGGCTC-CGTGAGGGGGG-AGAAGTCGTAACAAGGTAGCC------------------------------'.replace('-', ''), 'Thermococcus_chitonophagus'), simple_rec('AAACTGGGATTAATACCCACTAAATGATAATACCTGGAATGGCTTATCATTGAAAGAC-TCTGGAAACATGCTTC-CAGCGTCGCCCAAGG-------------------------------------------------------------------------------GGAGCCCGGAGATGGAAA---CTGAGACAAGGTTCCAGGCCCTACGGGGCGCAGCAGGCGCGAAACC-TCCACAATGCGCGAAAGCGTGATGGGGTTATCCCGAGTGCCGTCCGATGAGG-----ATGGCTTTTCCTCGGTG-TAAGGATCCGAGGGAATAAAGGGGGGGCAAGACTGGTGTC-AGCCGCCGCGGTAATACCAGCTCCCTGAGTGGTAAGGACGATTATTTGGCCTAAAGCGTCCGTAGCCGGCTTATCAAGTCTCTTGTT-AAACCCAGTGATTCAATCATTGACCT-GCAAGAGATACTGTTA-TGCTAGAGGACGGGAGAGGTCGACGG---------GGGTAGGGGTGAAATCCTATAATCCTTGGAGGACCACCAGT-GGCGAAGGCGGTC--GACTAGAACGTGCCTGACGGTGAGGGACGAAAGCT-GGGGGAGCGAACCGGATTAGATACCCGGGTAGTCCCAGCTGTAAACGATGCGGGCTAGGTGTTGGGGTAG-CTACGAGCTACT-CCAGTGCCGCAGAGAAGTTGTTAAGCCCGCCGCC-TGGGGAGTACGGCCGCAAGGCT-GAAACTTAAAGGAATT-GGCGGGGGAGC-ACCACAAGGGGTGAAGGCTGCGGTTTAATTGGAGTCAACGCCGGGAACCTTACCGGGGCTGACAGCAGAGTGAA-GGCCAGACTGAAGATCTTGCCAGACAAGCTGAGAGGAGGTGCATGAAGATCTTGCCAGACAAGCTGAGAGGAGGTGCATGGCCGTCGCCAGTTCGTGCCGTGAGGTGTCCT-GTTAAGTCAGGCAACGAACGAGACCCCC--ACTGTTAGTTGCCAGCGAATTCCAACGGAAT--GTCGGGC-ACACTAACAGG-ACTGCCACCGAT-AAGGTGGAGGAAGGAGGGGGCAACGGCAGGTCAGTATG-CCCC--------------------------------------------------------------------------------------------------------------GAACTCGCCCTCATGAACA-TGGAAT-CCCTAGTAACCGCGTGTCATCATCGCGCGGTGAATACGTCCCCGCTCCTTGCACACACCGCCCGTCGCTCCATCCAAG-TCGGGTCT-AGATGAGGCGCAGTCTTCT-----TGGCTACGTCGAATCTGGGTTC-GGTGAGGGGGG-AGAAGTCGTAACAAGGTGGCCGTAGGGGAACCTGCGGCCGGATCACCTCCT'.replace('-', ''), 'SMTZ1-55'), simple_rec('ACTCCGGTTGATCCTGCCGGACCCCACTGCTATCGGGGTAGGACTTAACCATGCGAGTTGTGCGTCCCCAAGCCATGGTGGGGGCGCGGCATACGGCTCAGTAACACGTGGCTAACCTAGCCTTTGGACGGGGACAACCCCGGGAAACTGGGGCTAATCCCCGATGGGTGGGAAGGCCTGGAATGGTTTCCCACCGAAAGGGCGTCTGAACCATGCTTCAGGCGTTGCCGAAGGATGGGGCCGCGGCCGATCAGGTTGTTGGTGAGGTAACGGCTCACCAAGCCTATAACCGGTACGGGCCGTGAGAGCGGGAGCCCGGAGATGGGCACTGAGACAAGGGCCCAGGCCCTACGGGGCGCAGCAGGTGCGAAAACTCCGCGATGCGCGAAAGCGTGACGGGGCTATCCCGAGTGCCGTCCGCTGAGGATGGCTTTTCCCCGGTGTAGGGAGCCGGGGGAATAAGGAGAGGGCAAGTCTGGTGTCAGCCGCCGCGGTAATACCAGCTCTCCGAGTGGTGGGGACAATTATTGGGCTTAAAGCGTCCGTAGCCGGCCCATCAAGTCTCTTGTTAAATCCAGCGATCCAATCGCTGGACTGCGGGAGATACTGCTGGGCTAGGGGGCGGGAGAAGCCGATGGTATTCTCGGGGTAGGGGTGAAATCCTATAATCCCGGGAGGACCACCAGTGGCGTAGGCGGTCGGCTAGAACGCGCCCGACGGTGAGGGACGAAAGCTGGGGGAGCGAACCGGATTAGATACCCGGGTAGTCCCAGCCGTAAACGATGCGGGCTAGGTGTTGGGGTGGCTACGAGCCACCCCAGTGCCGCATGGAAGCAATTAAGCCCGCCGCCTGGGGAGTACGGCCGCAAGGCTGAAACTTAAAGGAATTGGCGGGGGAGCACCACAAGGGGTGAAGCTTGCGGTTTAATTGGAGTCAACGCCGGGAAAGGAACAGCGTTTTGTTGTTCCTCTGGATACCTTACCGGGGGCGACAGCAGGATGAAGGCCAGATTGAAGGTCTTGCTGGACGAGCTGAGAGGAGGTGCATGGCCGTCGCCAGTTCGTGCCGTGAGGTGTCCTGTTAAGTCAGGTAACGATCGAGACCCACACCCCCAGTTGCTACCTCTTCGGAGGGCACTCTAGGGGTACTGCCGCCGATAAGGCGGAGGAAGGAGTGGGCCACGGCAGGTCAGTATGCCCCGAATCCCCCGGGCCACACGCGAGCTGCAATGGCAAGGACAATGGGTTCTGACCCCGAGAGGGGAAGGTAATCCCGAAACCCTGCCTCAGTTGGGATCGAGGGCTGAAACCCGCCCTCGTGAACATGGAATCCCTAGTAATCGCGGGTCACCAGCCCGCGGTGAATACGTCCCTGCTCCTTGCACACACCGCCCGTCGCTCCATCCGAGTGGGGTTTAGGTGAGGCGTGGTCCTTGTGGCTGTGTCGAATCTAGGCTTCGCGAGGAGGGAGAAGTCGTAACAAGGTGGCCGTAGGGGAACCTGCGGCCGGATCACCTC', 'BA1-16S') ] #prepare filter filt = BlastFilter(lambda a: a.hsps[0].align_length > 1100) filt.AND = BlastFilter(lambda a: all(hsp.score > 500 for hsp in a.hsps)) filt.AND.AND = BlastFilter(lambda a: all(hsp.identities/float(hsp.align_length) > 0.8 for hsp in a.hsps)) #make ring-blast blast = BlastCLI(self.abort_event) orig_seqs = blast.ring_blast(query, suns_db, 100, filt, 3) if not orig_seqs: print 'No blast results.' return 1 nseqs = len(orig_seqs) print 'RingBlast to:\n%s\nreturned %d sequences.\n' % (suns_db, nseqs) #save an initial alignment self.fix_ids(orig_seqs) alifile = '/home/allis/Documents/INMI/SunS-metagenome/Bathy/BA2_SunS_16S.aln.fasta' with user_message('Aligning retrieved sequences...', '\n'): if not AlignmentUtils.align(orig_seqs+[query]+additions, outfile=alifile): return 3 #search for additional homologs add_seqs = blast.ring_blast(orig_seqs, silva_db, 100, filt, 0) if add_seqs: self.fix_ids(add_seqs) print 'RingBlast to:\n%s\nreturned %d additional sequences.\n' % (silva_db, len(add_seqs)) #build an alignment seqs = orig_seqs+add_seqs+[query]+additions alifile = '/home/allis/Documents/INMI/SunS-metagenome/Bathy/BA2_SunS_16S.big.aln.fasta' with user_message('Aligning retrieved sequences...', '\n'): if not AlignmentUtils.align(seqs, outfile=alifile): return 3 #build a tree treefile = '/home/allis/Documents/INMI/SunS-metagenome/Bathy/BA2_SunS_16S.big.aln.tre' if not PhyloUtils.build_fast_tree(alifile, treefile): return 4 #annotate the tree with open('/home/allis/Documents/INMI/16S/SSBaF4-SSBaR4-1_243072232-iPCR-report.txt') as inp: # SSBaF4-SSBaR4_65397396-iPCR-report.txt sids = set() len_re = re.compile(r'(\s|^)(\d+)(\sbp|\\s*:)?', re.MULTILINE) entry = False cur_sid = None cur_len = -1 for l in inp: if l == '========= histograms and electrophorograms of PCR products of each hit =========': break if l.startswith('---'): entry = False if cur_sid and cur_len > 0 and abs(cur_len-920) < 60: sids.add(cur_sid) cur_sid = None cur_len = -1 continue if entry or '#' in l: entry = True plen = len_re.search(l) if plen: cur_len = int(plen.group(2)) sid = BlastID.extract(l)[0] if sid: cur_sid = sid organisms = Organisms.from_records(seqs) if PhyloUtils.annotate_tree(treefile, organisms, reroot_at='Thermococcus_chitonophagus', # beautify_leafs=True, # collapse_taxa=['miscellaneous crenarchaeotic group', 'thaumarchaeota'], # collapse_last=True, # collapse_hard=True, mark_leafs=sids, # [r.id for r in orig_seqs+[query]+additions], lineage_colors={'miscellaneous crenarchaeotic group':(0, 0, 255), 'thaumarchaeta':(255,0,0)}, top_lineage=Lineage('archaea')): return 0 return 2
def _main(self): min_prod = 400 silva_db = '/home/allis/Documents/INMI/SILVA-DB/SILVA_123_SSURef_Nr99_tax_silva.fasta' alifile = '/home/allis/Documents/INMI/SunS-metagenome/Bathy/BA2_SunS_16S.aln.fasta' add_filename = FilenameParser.strip_ext( alifile) + '.with_additions.fasta' outgroups = [ 'Thermococcus_chitonophagus', 'SMTZ1-55', 'contig72135_1581_sunspring_meta' ] add = ['KF836721.1.1270', 'EU635905.1.1323'] exclude = [ ] #['Thermococcus_chitonophagus', 'SMTZ1-55', 'BA1-16S', 'contig72135_1581_sunspring_meta'] #load alignment if os.path.isfile(add_filename): alifile = add_filename add_filename = '' with user_message('Loadding initial alignment...', '\n'): orig_ali = AlignmentUtils.load_first(alifile) if not orig_ali: return 1 #load homologs if add_filename: with user_message('Loadding additional sequences...', '\n'): add_seqs = [] db = SeqView() if db.load(silva_db): for sid in add: seq = db.get(sid) if seq: add_seqs.append(seq) else: print '%s not found in %s' % (sid, silva_db) #realign data if needed if add_seqs: with user_message('Realigning data...', '\n'): add_filename = FilenameParser.strip_ext( alifile) + '.with_additions.fasta' AlignmentUtils.align( list(orig_ali) + add_seqs, add_filename) orig_ali = AlignmentUtils.load_first(add_filename) if not orig_ali: return 2 #process the alignment ali = orig_ali.remove(*exclude).trim() for out in outgroups: if not ali.index(out): print '%s not found in the alignment' % out return 3 ali.sort(key=lambda r: 'zzzzzzzz' if r.id in outgroups else r.id) AlignmentUtils.save( ali, '/home/allis/Documents/INMI/SunS-metagenome/Bathy/BA2_SunS_16S.aln.trimmed.fasta' ) args = dict(plen=(20, 40), max_mismatches=8, min_match_mismatches=1, first_match_mismatches=1, first_may_match=1, AT_first=True, outgroup=len(outgroups)) fprimers = PrimerFinder.find_discriminating_primers(ali, **args) rprimers = PrimerFinder.find_discriminating_primers(ali, reverse=True, **args) pairs = PrimerFinder.compile_pairs(fprimers, rprimers, min_prod, 'SSBa') if not pairs: print '\nNo suitable primer pairs found' return 3 PrimerFinder.print_pairs(pairs) orig_ali = PrimerFinder.add_pairs_to_alignment(pairs, orig_ali) AlignmentUtils.save( orig_ali, '/home/allis/Documents/INMI/SunS-metagenome/Bathy/BA2_SunS_16S.with_primers.aln.fasta' ) print 'Done'
def _main(self): email = '*****@*****.**' genome_dir = '/home/allis/Dropbox/Science/Микра/Thermococcus/sequence/GenBank/Thermococcales/Thermococcus/' genome = 'Thermococcus_barophilus_Ch5.gb' gene = 'TBCH5v1_1369' #cooS database = 'nr' segment = [3200, 12000] seq = SeqLoader.load_file(os.path.join(genome_dir, genome)) if not seq: raise RuntimeError('No genome loaded') seq = seq[0] index = get_indexes_of_genes(seq, gene) if not index: raise RuntimeError('No gene found') feature = seq.features[index[0]] query = feature.extract(seq) segments_file = 'CO-clusters.gb' #get cluster variants if needed if not os.path.isfile(segments_file): blast_file = 'blast.results.xml' if os.path.isfile(blast_file): blast = list(parse(open(blast_file))) else: blast = BlastCLI.blast_seq(query, database, 100, remote=True, task='blastn', parse_results=True, save_results_to='blast.results.xml') if not blast: raise RuntimeError('Blast returned no results') flt = BlastFilter(lambda hsp, r: hsp.align_length > 700, filter_hsps=True) flt(blast) queries = [] for ali in BlastCLI.iter_alignments(blast): q = BlastCLI.Query(ali, 'hsp', start_offset=segment[0], end_offset=segment[1]) if q: queries.append(q) print(queries[-1]) segments = BlastWWW.fetch_queries(email, queries) safe_write(segments, segments_file) for r in segments: print('[%s] %s: %dbp' % (r.id, pretty_rec_name(r), len(r))) return 0 #find primers in alignments of the selected features local_files = [ os.path.join(genome_dir, f) for f in ('Thermococcus_barophilus_DT4-complete-genome.gb', 'Thermococcus_ST-423.gb', 'Thermococcus_CH1-complete.gb') ] loader = SeqLoader(self.abort_event) segments = loader.load_files([segments_file] + local_files) fprimers, transF_ali = find_primers( segments, 'transF', dict(plen=(20, 30), max_mismatches=5, min_first_matches=3, AT_first=True)) rprimers, cooS_ali = find_primers(segments, 'cooS', dict(plen=(20, 30), max_mismatches=4, min_first_matches=3, AT_first=True), reverse=True) if not fprimers: print('\nNo forward primers found') return 1 if not rprimers: print('\nNo reverse primers found') return 1 print('\nForward primers:') for p in fprimers: print('%s: %s' % (p.id, p)) print('\nReverse primers:') for p in rprimers: print('%s: %s' % (p.id, p)) print() #add primers to alignments and save them transF_ali = PrimerFinder.add_primers_to_alignment( fprimers, transF_ali) cooS_ali = PrimerFinder.add_primers_to_alignment(rprimers, cooS_ali, reverse=True) AlignmentUtils.save(transF_ali, 'transF.aln') AlignmentUtils.save(cooS_ali, 'cooS.aln')