Exemplo n.º 1
0
    def test_squeeze_seq(self):
        """squeeze should collapse homopolymers to one nuc."""

        seq = "AAAGGGAAACCCGGGA"
        self.assertEqual(squeeze_seq(seq), "AGACGA")
        self.assertEqual(squeeze_seq("AAAATATTTAGGC"), "ATATAGC")
        self.assertEqual(squeeze_seq(""), "")
        self.assertEqual(squeeze_seq("ATGCATGCATGC"), "ATGCATGCATGC")
Exemplo n.º 2
0
   def test_squeeze_seq(self):
      """squeeze should collapse homopolymers to one nuc."""

      seq = "AAAGGGAAACCCGGGA"
      self.assertEqual(squeeze_seq(seq), "AGACGA")
      self.assertEqual(squeeze_seq("AAAATATTTAGGC"), "ATATAGC")
      self.assertEqual(squeeze_seq(""), "")
      self.assertEqual(squeeze_seq("ATGCATGCATGC"), "ATGCATGCATGC")
Exemplo n.º 3
0
def prefix_filter_flowgrams(flowgrams, squeeze=False):
    """Filters flowgrams by common prefixes.

    flowgrams: iterable source of flowgrams

    squeeze: if True, collapse all poly-X to X

    Returns prefix mapping.
    """

    # collect flowgram sequences
    if squeeze:
        seqs = imap(
            lambda f: (f.Name, squeeze_seq(str(f.toSeq(truncate=True)))),
            flowgrams)
    else:
        seqs = imap(lambda f: (f.Name, str(f.toSeq(truncate=True))), flowgrams)
    # equivalent but more efficient than
    #seqs = [(f.Name, str(f.toSeq(truncate=True))) for f in flowgrams]

    # get prefix mappings
    mapping = build_prefix_map(seqs)
    l = len(mapping)
    orig_l = sum([len(a) for a in mapping.values()]) + l

    return (l, orig_l, mapping)
Exemplo n.º 4
0
def prefix_filter_flowgrams(flowgrams, squeeze=False):
    """Filters flowgrams by common prefixes.

    flowgrams: iterable source of flowgrams

    squeeze: if True, collapse all poly-X to X

    Returns prefix mapping.
    """

    # collect flowgram sequences
    if squeeze:
        seqs = imap(
            lambda f: (f.Name, squeeze_seq(str(f.toSeq(truncate=True)))),
            flowgrams)
    else:
        seqs = imap(lambda f: (f.Name, str(f.toSeq(truncate=True))), flowgrams)
    # equivalent but more efficient than
    #seqs = [(f.Name, str(f.toSeq(truncate=True))) for f in flowgrams]

    # get prefix mappings
    mapping = build_prefix_map(seqs)
    l = len(mapping)
    orig_l = sum([len(a) for a in mapping.values()]) + l

    return (l, orig_l, mapping)