Пример #1
0
def main(args):

    tables = []
    container = bioio.multisequence()

    n = 1
    for infile in args.files:

        mseqs = bioio.load(infile)

        mseqs.sort(lambda x: x.label)

        for s in mseqs:
            tables.append((
                n,
                s.label,
                s.attr.get('collection_date', ''),
                s.attr.get('country', ''),
                s.attr.get('isolate', ''),
                s.definition,
            ))
            container.append(bioio.biosequence('%04d' % n, s.seq.upper()))
            n += 1

    # write to output file
    tabfile = open(args.tabfile, 'w')
    tabfile.write('LABEL\tACCNO\tDATE\tCOUNTRY\tISOLATE\tDEFINITION\n')
    tables.sort()
    for r in tables:
        tabfile.write('%04d\t%s\t%s\t%s\t%s\t%s\n' % r)
    tabfile.close()

    bioio.save(container, args.outfile)
Пример #2
0
 def new_sequence(self):
     n = bioio.biosequence('~new~', b'')
     m = self.model()
     s = m.selection()
     if s is not None:
         idx = max( s.indices() )
         m.insert( idx, n )
     else:
         m.append(n)
     m.signals().ContentUpdated.emit()
Пример #3
0
 def retranslate(self, idx=None):
     if not idx:
         self.clear()
         for idx in range( len( self._src_msa ) ):
             self.append(
                 bioio.biosequence('', funcs.translated( self._src_msa[idx].seq,
                                 start_pos = self._start_atg )) )
             print(self[-1].seq)
     else:
         self[idx].seq = funcs.translated( self._src_msa[idx].seq,
                             start_pos = self._start_atg )
Пример #4
0
 def make_consensus(self, thresholds = [ 0.5, 0.75, 0.90, 0.95, 0.99] ):
     from seqpy.core.funcs.profiles import na_profile, aa_profile
     m = self.model()
     if isinstance(m.selection(), LabelSelection):
         msa = m.selection().copy_to_msa()
     else:
         msa = m
     if m.type() == bioio.PROTEIN:
         profile = aa_profile(msa)
     else:
         profile = na_profile(msa)
     for th in thresholds:
         m.append(bioio.biosequence('CONS-%3.2f' % th, profile.consensus(th)))
     m.signals().ContentUpdated.emit()
Пример #5
0
def dereplicate(mseq):

    from seqpy.core.bioio import biosequence, multisequence

    dedups = {}
    for s in mseq:
        if str(s.seq) in dedups:
            dedups[str(s.seq)][1].append( s.label )
        else:
            dedups[str(s.seq)] = (s.seq, [ s.label ] )

    dedupseqs = multisequence()
    for (k, v) in dedups.items():
        dedupseqs.append( biosequence( '#'.join( v[1] ), v[0] ) )

    return dedupseqs
Пример #6
0
def main(args):

    mseq = bioio.multisequence()

    for infile in args.files:
        trace = bioio.load(infile)
        result = traceutils.trim(trace, args.winsize, args.qual_threshold)
        if not result:
            continue

        bases, quals, upstream_trim, downstream_trim = result
        seq = bioio.biosequence(infile, bases)
        seq.add_attr('upstream_trim', str(upstream_trim))
        seq.add_attr('downstream_trim', str(downstream_trim))

        mseq.append(seq)

    bioio.save(mseq, args.outfile)
Пример #7
0
def condensed(multiseq):

    from seqpy.core.bioio import biosequence

    cseqs = multiseq.clone()
    positions = []
    for x in range( multiseq.max_seqlen() ):
        c = multiseq[0][x]
        for s in multiseq[1:]:
            if s[x] == 78 or s[x] == 110:
                continue
            if s[x] != c and abs(s[x] - c) != 32:
                break
        else:
            continue
        for y in range(len(multiseq)):
            cseqs[y].append( multiseq[y][x] )
        positions.append( x )
    cseqs.add_control('position', biosequence('position', b','.join( ('%d' % x).encode('ASCII') for x in positions)))
    return cseqs
Пример #8
0
    def make_consensus_marker(self):
        from seqpy.core.funcs.profiles import na_profile, aa_profile
        m = self.model()
        if isinstance(m.selection(), LabelSelection):
            msa = m.selection().copy_to_msa()
        else:
            msa = m
        if m.type() == bioio.PROTEIN:
            profile = aa_profile(msa)
        else:
            profile = na_profile(msa)

        cons_seq = bytearray()
        cons = profile.mat
        for i in range(0, len(cons)):
            max_pos = cons[i].argmax()
            if cons[i, max_pos] < 0.99:
                cons_seq.append( 42 )
                print(i)
            else:
                cons_seq.append( 32 )
        m.append(bioio.biosequence('CONS-MARKER', cons_seq))
        m.signals().ContentUpdated.emit()
Пример #9
0
 def init_samples(self, line):
     super().init_samples(line)
     # create multisequence and populate with similar samples
     for label in self.sample_labels:
         self.mseq.append(bioio.biosequence(label=label))
Пример #10
0
 def get_sequence(self):
     from seqpy.core.bioio import biosequence
     return biosequence(self.name(), self.edit_bases)