Exemplo n.º 1
0
def cluster(seqfile, seqnames, identity=1.0, prefix='cluster-', threads=None):
    with util.ntf(prefix=prefix, suffix='.fasta') as fa, \
            util.ntf(prefix=prefix, suffix='.uc') as uc:
        wrap.esl_sfetch(seqfile, seqnames, fa)
        fa.flush()
        uclust.cluster(fa.name,
                       uc.name,
                       pct_id=identity,
                       pre_sorted=False,
                       quiet=True,
                       threads=threads)
        df = uclust.parse_uclust_as_df(uc)
        df = df[df.type != 'C']
        df = df[['type', 'query_label', 'target_label']]

        return df
Exemplo n.º 2
0
def cluster(seqfile, seqnames, identity=1.0, prefix='cluster-', threads=None):
    prefix = prefix.replace('/', '\\')  # / confuses the filesystem
    with util.ntf(prefix=prefix, suffix='.fasta') as fa, \
            util.ntf(prefix=prefix, suffix='.uc') as uc:
        wrap.esl_sfetch(seqfile, seqnames, fa)
        fa.flush()
        uclust.cluster(fa.name,
                       uc.name,
                       pct_id=identity,
                       pre_sorted=False,
                       quiet=True,
                       threads=threads)
        df = uclust.parse_uclust_as_df(uc)
        df = df[df.type != 'C']
        df = df[['type', 'query_label', 'target_label']]

        return df
Exemplo n.º 3
0
 def test01(self):
     df = uclust.parse_uclust_as_df(self.infile)
     # target_label always has a value for types S and H
     self.assertFalse(any(df[df['type'] != 'C']['target_label'].isnull()))