def main(args): tables = [] container = bioio.multisequence() n = 1 for infile in args.files: mseqs = bioio.load(infile) mseqs.sort(lambda x: x.label) for s in mseqs: tables.append(( n, s.label, s.attr.get('collection_date', ''), s.attr.get('country', ''), s.attr.get('isolate', ''), s.definition, )) container.append(bioio.biosequence('%04d' % n, s.seq.upper())) n += 1 # write to output file tabfile = open(args.tabfile, 'w') tabfile.write('LABEL\tACCNO\tDATE\tCOUNTRY\tISOLATE\tDEFINITION\n') tables.sort() for r in tables: tabfile.write('%04d\t%s\t%s\t%s\t%s\t%s\n' % r) tabfile.close() bioio.save(container, args.outfile)
def new_sequence(self): n = bioio.biosequence('~new~', b'') m = self.model() s = m.selection() if s is not None: idx = max( s.indices() ) m.insert( idx, n ) else: m.append(n) m.signals().ContentUpdated.emit()
def retranslate(self, idx=None): if not idx: self.clear() for idx in range( len( self._src_msa ) ): self.append( bioio.biosequence('', funcs.translated( self._src_msa[idx].seq, start_pos = self._start_atg )) ) print(self[-1].seq) else: self[idx].seq = funcs.translated( self._src_msa[idx].seq, start_pos = self._start_atg )
def make_consensus(self, thresholds = [ 0.5, 0.75, 0.90, 0.95, 0.99] ): from seqpy.core.funcs.profiles import na_profile, aa_profile m = self.model() if isinstance(m.selection(), LabelSelection): msa = m.selection().copy_to_msa() else: msa = m if m.type() == bioio.PROTEIN: profile = aa_profile(msa) else: profile = na_profile(msa) for th in thresholds: m.append(bioio.biosequence('CONS-%3.2f' % th, profile.consensus(th))) m.signals().ContentUpdated.emit()
def dereplicate(mseq): from seqpy.core.bioio import biosequence, multisequence dedups = {} for s in mseq: if str(s.seq) in dedups: dedups[str(s.seq)][1].append( s.label ) else: dedups[str(s.seq)] = (s.seq, [ s.label ] ) dedupseqs = multisequence() for (k, v) in dedups.items(): dedupseqs.append( biosequence( '#'.join( v[1] ), v[0] ) ) return dedupseqs
def main(args): mseq = bioio.multisequence() for infile in args.files: trace = bioio.load(infile) result = traceutils.trim(trace, args.winsize, args.qual_threshold) if not result: continue bases, quals, upstream_trim, downstream_trim = result seq = bioio.biosequence(infile, bases) seq.add_attr('upstream_trim', str(upstream_trim)) seq.add_attr('downstream_trim', str(downstream_trim)) mseq.append(seq) bioio.save(mseq, args.outfile)
def condensed(multiseq): from seqpy.core.bioio import biosequence cseqs = multiseq.clone() positions = [] for x in range( multiseq.max_seqlen() ): c = multiseq[0][x] for s in multiseq[1:]: if s[x] == 78 or s[x] == 110: continue if s[x] != c and abs(s[x] - c) != 32: break else: continue for y in range(len(multiseq)): cseqs[y].append( multiseq[y][x] ) positions.append( x ) cseqs.add_control('position', biosequence('position', b','.join( ('%d' % x).encode('ASCII') for x in positions))) return cseqs
def make_consensus_marker(self): from seqpy.core.funcs.profiles import na_profile, aa_profile m = self.model() if isinstance(m.selection(), LabelSelection): msa = m.selection().copy_to_msa() else: msa = m if m.type() == bioio.PROTEIN: profile = aa_profile(msa) else: profile = na_profile(msa) cons_seq = bytearray() cons = profile.mat for i in range(0, len(cons)): max_pos = cons[i].argmax() if cons[i, max_pos] < 0.99: cons_seq.append( 42 ) print(i) else: cons_seq.append( 32 ) m.append(bioio.biosequence('CONS-MARKER', cons_seq)) m.signals().ContentUpdated.emit()
def init_samples(self, line): super().init_samples(line) # create multisequence and populate with similar samples for label in self.sample_labels: self.mseq.append(bioio.biosequence(label=label))
def get_sequence(self): from seqpy.core.bioio import biosequence return biosequence(self.name(), self.edit_bases)