def __call__(self, seqs): 'It runs the actual annotations' if not seqs: return seqs pep_fhand = NamedTemporaryFile() dna_fhand = NamedTemporaryFile() _run_estscan(seqs, pep_fhand.name, dna_fhand.name, self._usage_matrix) # now we read the result files estscan_result = _read_estcan_results(open(pep_fhand.name), open(dna_fhand.name)) for seq in seqs: seq_name = get_name(seq) orfs = estscan_result.get(seq_name, {}) feats = [] for (start, end, strand), str_seqs in orfs.viewitems(): start -= 1 # end is fine -- end[ feat = SeqFeature(location=FeatureLocation(start, end, strand), type='ORF', qualifiers=str_seqs) feats.append(feat) if feats: seq.object.features.extend(feats) dna_fhand.close() pep_fhand.close() return seqs
def _annotate_polya(seq, min_len, max_cont_mismatches): 'It annotates the polyA with the EMBOSS trimest method' str_seq = get_str_seq(seq) polya = _detect_polya_tail(str_seq, THREE_PRIME, min_len, max_cont_mismatches) polyt = _detect_polya_tail(str_seq, FIVE_PRIME, min_len, max_cont_mismatches) a_len = polya[1] - polya[0] if polya else 0 t_len = polyt[1] - polyt[0] if polyt else 0 chosen_tail = None if a_len > t_len: chosen_tail = 'A' elif t_len > a_len: chosen_tail = 'T' elif a_len and a_len == t_len: if randint(0, 1): chosen_tail = 'A' else: chosen_tail = 'T' if chosen_tail: strand = 1 if chosen_tail == 'A' else -1 start, end = polya if chosen_tail == 'A' else polyt feat = SeqFeature(location=FeatureLocation(start, end, strand), type='polyA_sequence') # We're assuming that the seq has a SeqRecord in it seq.object.features.append(feat)
def __call__(self, seqrecords): 'It does the work' if not seqrecords: return seqrecords matcher = Blaster(seqrecords, self.blastdb, self._program, self._dbtype, filters=self._filters, params=self._params, remote=self._remote) blasts = matcher.blasts blastdb = os.path.basename(self.blastdb) for seqrecord in seqrecords: align_result = blasts.get(get_name(seqrecord), None) if not align_result: continue match_counter = 0 for match in align_result['matches']: subject = match['subject']['name'] match_counter += 1 for match_part in match['match_parts']: if match_part['subject_end'] < match_part['subject_start']: strand = -1 subject_start = match_part['subject_end'] subject_end = match_part['subject_start'] else: strand = 1 subject_start = match_part['subject_start'] subject_end = match_part['subject_end'] query_start = match_part['query_start'] query_end = match_part['query_end'] qualifiers = {} qualifiers['Target'] = { 'start': subject_start, 'end': subject_end, 'name': subject } qualifiers['score'] = match_part['scores']['expect'] qualifiers['identity'] = match_part['scores']['identity'] qualifiers['blastdb'] = blastdb location = FeatureLocation(query_start, query_end, strand) feature = SeqFeature( location=location, type='match_part', qualifiers=qualifiers, id='match{0:03d}'.format(match_counter)) seqrecord.object.features.append(feature) return seqrecords