Exemplo n.º 1
0
 def blastn_annotate(self, tag_sequences, subject_record, min_identity, evalue=0.001, **kwargs):
     results = self.s2s_blast_batch(tag_sequences, [subject_record], evalue=evalue, command='blastn', **kwargs)
     if results is None: return False
     with user_message('Adding results as annotations...'):
         annotated = False
         for i, tag in enumerate(tag_sequences):
             if not results[i]: continue
             record = results[i][0]
             if not record: continue
             tag_name = pretty_rec_name(tag)
             if tag_name != tag.id:
                 tag_name += ' (%s)' % tag.id
             for hit in record:
                 for ali in hit.alignments:
                     for hsp in ali.hsps:
                         if hsp.identities / float(hsp.align_length) < min_identity: continue
                         strand = 1 if hsp.sbjct_start < hsp.sbjct_end else -1
                         if strand == 1:
                             location = FeatureLocation(hsp.sbjct_start-1,
                                                        hsp.sbjct_end,
                                                        strand)
                         else:
                             location = FeatureLocation(hsp.sbjct_end-1,
                                                        hsp.sbjct_start,
                                                        strand)
                         feature = self.hsp2feature(tag_name,'blastn_annotations', location, hsp)
                         self.add_program(feature, 'blastn')
                         subject_record.features.append(feature)
                         annotated = True
     return annotated
Exemplo n.º 2
0
 def _find_matches(self, seq_files, seq_id):
     if not self._load_db(seq_files):
         print 'No templates were loaded from: %s' % str(seq_files)
         return False
     if self.aborted(): return False
     template = self._seq_db[seq_id]
     self._seq_name = pretty_rec_name(template)
     self._matches_list = self._searcher.find_matches(
         WorkCounter(), template, self._max_mismatches,
         self._PCR_ProductsFinder)
     return self._matches_list is not None
Exemplo n.º 3
0
 def _find_matches(self, seq_files, seq_id):
     if not self._load_db(seq_files):
         print 'No templates were loaded from: %s' % str(seq_files) 
         return False
     if self.aborted(): return False
     template = self._seq_db[seq_id]
     self._seq_name = pretty_rec_name(template)
     self._matches_list = self._searcher.find_matches(WorkCounter(), 
                                                      template, 
                                                      self._max_mismatches, 
                                                      self._PCR_ProductsFinder)
     return self._matches_list is not None
Exemplo n.º 4
0
 def worker(template):
     search_results = []
     for primer in self._primers:
         matches = self._searcher.find_matches(WorkCounter(), template, primer, mismatches)
         if not matches: continue
         duplexes = self._searcher.compile_duplexes(WorkCounter(), *matches)
         if duplexes: search_results.append(duplexes)
     if not search_results: return None
     all_annealings = self._combine_annealings(*search_results)
     tname = pretty_rec_name(template)
     mixture = self.create_PCR_mixture(WorkCounter(), tname, all_annealings[0], all_annealings[1])
     if not mixture: return None
     return tname, mixture.save()
Exemplo n.º 5
0
 def fetchMore(self, index=QModelIndex()):
     start = len(self._rows)
     end = start + min(len(self._db) - start, self.rows_to_load)
     self.beginInsertRows(QModelIndex(), start, end - 1)
     self._rows.extend((sid, pretty_rec_name(self._db[sid]))
                       for sid in self._db.keys()[start:end])
     self.endInsertRows()
     if self._to_select and start <= self._to_select[0]:
         selected = 0
         for row in self._to_select:
             if row >= end: break
             self.select_row.emit(row)
             selected += 1
         self._to_select = self._to_select[selected:]
Exemplo n.º 6
0
 def _blast_feature(self, f, c1, c2, features1, features2, evalue,
                    max_rlen):
     trans = Translator(self._abort_event)
     cds = trans.translate(f.extract(c1), 11)
     sixframes = trans.translate_six_frames_single(c2, 11)
     if not sixframes: return [(None, None, None)]
     results = []
     for frame in sixframes:
         res = BlastCLI.s2s_blast(cds,
                                  frame,
                                  evalue,
                                  command='blastp',
                                  task='blastp')
         if res: results.extend(res)
     hsps = BlastCLI.all_hsps(results, max_rlen)
     if not hsps: return [(None, None, None)]
     f1 = []
     f2 = []
     col = []
     c1_name = pretty_rec_name(c1)
     if 'locus_tag' in f.qualifiers:
         fname = f.qualifiers['locus_tag'][0]
     else:
         fname = 'CDS'
     cds_len = len(cds)
     for hsp in hsps:
         color_t = (float(hsp.identities) / hsp.align_length)
         print '%s %s: %5.1f%% (%5.1f%%)' % (c1_name, fname, color_t * 100,
                                             float(hsp.identities) /
                                             cds_len * 100)
         col.append(
             colors.linearlyInterpolatedColor(colors.Color(0, 0, 1, 0.2),
                                              colors.Color(0, 1, 0, 0.2),
                                              0.2, 1, color_t))
         qstart = (hsp.query_start - 1) * 3
         qend = qstart + hsp.align_length * 3
         sstart = (hsp.sbjct_start - 1) * 3
         send = sstart + hsp.align_length * 3
         f1.append(
             SeqFeature(
                 FeatureLocation(f.location.start + qstart,
                                 f.location.start + qend,
                                 strand=hsp.strand[0])))
         f2.append(
             SeqFeature(FeatureLocation(sstart, send,
                                        strand=hsp.strand[1])))
     return zip(f1, f2, col)
Exemplo n.º 7
0
 def find(self, counter, template, mismatches):
     all_annealings = []
     counter.set_subwork(self._num_p+1,
                         self._p_weights+[0.01*self._pw_sum])
     for i, primer in enumerate(self._primers):
         if self.aborted(): return None
         annealings = self._searcher.find(counter[i], template, primer, mismatches)
         if annealings is None: continue
         all_annealings.append(annealings)
     if not all_annealings: return None
     tname = pretty_rec_name(template)
     mixture = self.create_PCR_mixture(counter[-1], tname, 
                                       *self._extract_annealings(*all_annealings))
     cleanup_files(all_annealings)
     counter.done()
     if not mixture: return None
     return {tname: mixture.save()}
 def worker(template):
     search_results = []
     for primer in self._primers:
         matches = self._searcher.find_matches(WorkCounter(), template,
                                               primer, mismatches)
         if not matches: continue
         duplexes = self._searcher.compile_duplexes(
             WorkCounter(), *matches)
         if duplexes: search_results.append(duplexes)
     if not search_results: return None
     all_annealings = self._combine_annealings(*search_results)
     tname = pretty_rec_name(template)
     mixture = self.create_PCR_mixture(WorkCounter(), tname,
                                       all_annealings[0],
                                       all_annealings[1])
     if not mixture: return None
     return tname, mixture.save()
 def find(self, counter, template, mismatches):
     all_annealings = []
     counter.set_subwork(self._num_p + 1,
                         self._p_weights + [0.01 * self._pw_sum])
     for i, primer in enumerate(self._primers):
         if self.aborted(): return None
         annealings = self._searcher.find(counter[i], template, primer,
                                          mismatches)
         if annealings is None: continue
         all_annealings.append(annealings)
     if not all_annealings: return None
     tname = pretty_rec_name(template)
     mixture = self.create_PCR_mixture(
         counter[-1], tname, *self._extract_annealings(*all_annealings))
     cleanup_files(all_annealings)
     counter.done()
     if not mixture: return None
     return {tname: mixture.save()}
Exemplo n.º 10
0
 def blastp_annotate(self, tag_sequences, subject_record, min_identity, evalue=0.001, table=11, **kwargs):
     # translate subject in six frames
     with user_message('Translating whole genome in 6 reading frames', '\n'):
         translator = Translator(self._abort_event)
         translation = translator.translate_six_frames(subject_record, table)
     if not translation: return False
     results = self.s2s_blast_batch(tag_sequences, translation, evalue=evalue, command='blastp', **kwargs)
     if results is None: return False
     with user_message('Adding results as annotations...'):
         annotated = False
         subj_len = len(subject_record)
         for i, tag in enumerate(tag_sequences):
             if not results[i]: continue
             tag_name = pretty_rec_name(tag)
             if tag_name != tag.id:
                 tag_name += ' (%s)' % tag.id
             for frame, record in enumerate(results[i]):
                 if not record: continue
                 frec = translation[frame]
                 start = frec.annotations['start']
                 strand = frec.annotations['strand']
                 for hit in record:
                     for ali in hit.alignments:
                         for hsp in ali.hsps:
                             if hsp.identities / float(hsp.align_length) < min_identity: continue
                             if strand == 1:
                                 location = FeatureLocation(start+(hsp.sbjct_start-1)*3,
                                                            start+hsp.sbjct_end*3,
                                                            strand)
                             else:
                                 location = FeatureLocation(subj_len-start-hsp.sbjct_end*3,
                                                            subj_len-start-hsp.sbjct_start*3,
                                                            strand)
                             feature = self.hsp2feature(tag_name, 'blastp_annotations', location, hsp)
                             self.add_program(feature, 'blastp')
                             subject_record.features.append(feature)
                             annotated = True
     return annotated
Exemplo n.º 11
0
    def _main(self):
        email = '*****@*****.**'
        genome_dir = '/home/allis/Dropbox/Science/Микра/Thermococcus/sequence/GenBank/Thermococcales/Thermococcus/'
        genome = 'Thermococcus_barophilus_Ch5.gb'
        gene = 'TBCH5v1_1369'  #cooS
        database = 'nr'
        segment = [3200, 12000]

        seq = SeqLoader.load_file(os.path.join(genome_dir, genome))
        if not seq: raise RuntimeError('No genome loaded')
        seq = seq[0]

        index = get_indexes_of_genes(seq, gene)
        if not index: raise RuntimeError('No gene found')

        feature = seq.features[index[0]]
        query = feature.extract(seq)

        segments_file = 'CO-clusters.gb'
        #get cluster variants if needed
        if not os.path.isfile(segments_file):
            blast_file = 'blast.results.xml'
            if os.path.isfile(blast_file):
                blast = list(parse(open(blast_file)))
            else:
                blast = BlastCLI.blast_seq(query,
                                           database,
                                           100,
                                           remote=True,
                                           task='blastn',
                                           parse_results=True,
                                           save_results_to='blast.results.xml')
            if not blast: raise RuntimeError('Blast returned no results')
            flt = BlastFilter(lambda hsp, r: hsp.align_length > 700,
                              filter_hsps=True)
            flt(blast)
            queries = []
            for ali in BlastCLI.iter_alignments(blast):
                q = BlastCLI.Query(ali,
                                   'hsp',
                                   start_offset=segment[0],
                                   end_offset=segment[1])
                if q: queries.append(q)
                print(queries[-1])

            segments = BlastWWW.fetch_queries(email, queries)
            safe_write(segments, segments_file)
            for r in segments:
                print('[%s] %s: %dbp' % (r.id, pretty_rec_name(r), len(r)))
            return 0

        #find primers in alignments of the selected features
        local_files = [
            os.path.join(genome_dir, f)
            for f in ('Thermococcus_barophilus_DT4-complete-genome.gb',
                      'Thermococcus_ST-423.gb', 'Thermococcus_CH1-complete.gb')
        ]
        loader = SeqLoader(self.abort_event)
        segments = loader.load_files([segments_file] + local_files)
        fprimers, transF_ali = find_primers(
            segments, 'transF',
            dict(plen=(20, 30),
                 max_mismatches=5,
                 min_first_matches=3,
                 AT_first=True))
        rprimers, cooS_ali = find_primers(segments,
                                          'cooS',
                                          dict(plen=(20, 30),
                                               max_mismatches=4,
                                               min_first_matches=3,
                                               AT_first=True),
                                          reverse=True)
        if not fprimers:
            print('\nNo forward primers found')
            return 1
        if not rprimers:
            print('\nNo reverse primers found')
            return 1
        print('\nForward primers:')
        for p in fprimers:
            print('%s: %s' % (p.id, p))
        print('\nReverse primers:')
        for p in rprimers:
            print('%s: %s' % (p.id, p))
        print()
        #add primers to alignments and save them
        transF_ali = PrimerFinder.add_primers_to_alignment(
            fprimers, transF_ali)
        cooS_ali = PrimerFinder.add_primers_to_alignment(rprimers,
                                                         cooS_ali,
                                                         reverse=True)
        AlignmentUtils.save(transF_ali, 'transF.aln')
        AlignmentUtils.save(cooS_ali, 'cooS.aln')