Exemplo n.º 1
0
 def test_prefix(self):
     trieobj = trie.trie()
     trieobj["hello"] = 5
     trieobj["he"] = 7
     trieobj["hej"] = 9
     trieobj["foo"] = "bar"
     k = sorted(trieobj.keys())
     self.assertEqual(k, ["foo", "he", "hej", "hello"])
     self.assertEqual(trieobj["hello"], 5)
     self.assertEqual(trieobj.get("bye"), None)
     self.assertEqual(trieobj.has_key("hello"), True)
     self.assertEqual(trieobj.has_key("he"), True)
     self.assertEqual(trieobj.has_key("bye"), False)
     self.assertEqual(trieobj.has_prefix("h"), True)
     self.assertEqual(trieobj.has_prefix("hel"), True)
     self.assertEqual(trieobj.has_prefix("foa"), False)
     self.assertEqual(trieobj.has_prefix("hello world"), False)
     self.assertEqual(len(trieobj), 4)
     k = sorted(trieobj.with_prefix("he"))
     self.assertEqual(k, ["he", "hej", "hello"])
     k = trieobj.with_prefix("l")
     self.assertEqual(k, [])
     k = trieobj.with_prefix("hej")
     self.assertEqual(k, ["hej"])
     k = trieobj.with_prefix("hejk")
     self.assertEqual(k, [])
Exemplo n.º 2
0
 def test_save(self):
     trieobj = trie.trie()
     trieobj["foo"] = 1
     k = list(trieobj.keys())
     self.assertEqual(k, ["foo"])
     v = list(trieobj.values())
     self.assertEqual(v, [1])
     self.assertEqual(trieobj.get("bar", 99), 99)
     trieobj["hello"] = '55a'
     self.assertEqual(trieobj.get_approximate("foo", 0), [("foo", 1, 0)])
     self.assertEqual(trieobj.get_approximate("foo", 1), [("foo", 1, 0)])
     self.assertEqual(trieobj.get_approximate("foa", 0), [])
     self.assertEqual(trieobj.get_approximate("foa", 1), [("foo", 1, 1)])
     x = sorted(trieobj.get_approximate("foa", 2))
     self.assertEqual(x, [("foo", 1, 1), ("foo", 1, 2), ("foo", 1, 2)])
     # foo  foo-  foo-
     # foa  f-oa  fo-a
     # mismatch a->o
     # insertion after f, deletion of o
     # insertion after o, deletion of o
     x = trieobj.get_approximate("foo", 4)
     y = {}
     for z in x:
         y[z] = y.get(z, 0) + 1
     x = sorted(y.items())
     self.assertEqual(x, [(('foo', 1, 0), 1), (('hello', '55a', 4), 6)])
     h = BytesIO()
     trie.save(h, trieobj)
     h.seek(0)
     trieobj = trie.load(h)
     k = list(trieobj.keys())
     self.assertTrue("foo" in k)
     self.assertTrue("hello" in k)
     self.assertEqual(repr(trieobj["foo"]), '1')
     self.assertEqual(repr(trieobj["hello"]), "'55a'")
Exemplo n.º 3
0
    def test_large_save_load(self):
        """Generate random key/val pairs in three length categories.

        100 items in each category. Insert them into a trie and into a reference dict.
        Write the trie to a temp file and read it back, verify that trie entries match
        the reference dict.
        """
        cmp_dict = {}
        trieobj = trie.trie()
        self.assertEqual(trieobj.get("foobar"), None)
        for max_str_len in [100, 1000, 10000]:
            cmp_dict = {}
            for i in range(1000):
                key = ''.join([
                    random.choice(ascii_lowercase) for _ in range(max_str_len)
                ])
                val = ''.join([
                    random.choice(ascii_lowercase) for _ in range(max_str_len)
                ])
                trieobj[key] = val
                cmp_dict[key] = val
            for key in cmp_dict:
                self.assertEqual(trieobj[key], cmp_dict[key])

        with tempfile.TemporaryFile(mode='w+b') as f:
            trie.save(f, trieobj)
            f.seek(0)
            trieobj = trie.load(f)
        for key in cmp_dict:
            self.assertEqual(trieobj[key], cmp_dict[key])
Exemplo n.º 4
0
 def test_save(self):
     trieobj = trie.trie()
     trieobj["foo"] = 1
     k = trieobj.keys()
     self.assertEqual(k, ["foo"])
     v = trieobj.values()
     self.assertEqual(v, [1])
     self.assertEqual(trieobj.get("bar", 99), 99)
     trieobj["hello"] = "55a"
     self.assertEqual(trieobj.get_approximate("foo", 0), [("foo", 1, 0)])
     self.assertEqual(trieobj.get_approximate("foo", 1), [("foo", 1, 0)])
     self.assertEqual(trieobj.get_approximate("foa", 0), [])
     self.assertEqual(trieobj.get_approximate("foa", 1), [("foo", 1, 1)])
     x = sorted(trieobj.get_approximate("foa", 2))
     self.assertEqual(x, [("foo", 1, 1), ("foo", 1, 2), ("foo", 1, 2)])
     # foo  foo-  foo-
     # foa  f-oa  fo-a
     # mismatch a->o
     # insertion after f, deletion of o
     # insertion after o, deletion of o
     x = trieobj.get_approximate("foo", 4)
     y = {}
     for z in x:
         y[z] = y.get(z, 0) + 1
     x = sorted(y.items())
     self.assertEqual(x, [(("foo", 1, 0), 1), (("hello", "55a", 4), 6)])
     h = StringIO()
     trie.save(h, trieobj)
     h.seek(0)
     trieobj = trie.load(h)
     k = trieobj.keys()
     self.assertTrue("foo" in k)
     self.assertTrue("hello" in k)
     self.assertEqual(repr(trieobj["foo"]), "1")
     self.assertEqual(repr(trieobj["hello"]), "'55a'")
def get_list_of_candidates(target_string, PAM, gRNA_length, exclude_stop_codons, consider_negative, alt_pams):
    target = target_string
    if consider_negative:
        target_rev = reverse_complement(target)
    if alt_pams is None:
        PAMs = NFiller(PAM).get_list()
    else:
        for pam in alt_pams:
            if len(pam) != 3:
                raise ValueError('Length of one or more PAMs not set to 3')
            for character in pam:
                if character != 'A' and character != 'C' and character != 'G' and character != 'T':
                    raise ValueError('Invalid PAM has been entered')
        PAMs = alt_pams
    candidates_rev = []
    candidates = []
    for PAM in PAMs:
        candidates.extend(find_candidates(target, PAM, gRNA_length))
        if consider_negative:
            candidates_rev.extend(find_candidates(target_rev, PAM, gRNA_length))
    trie_dic = trie.trie()
    for candidate in candidates:
        key = candidate[1]
        if exclude_stop_codons and ('TAG' in key or 'TAA' in key or 'TGA' in key):
            continue
        if key not in trie_dic.keys():
            trie_dic[key] = '+'
    for candidate in candidates_rev:
        if exclude_stop_codons and ('TAG' in candidate[1] or 'TAA' in candidate[1] or 'TGA' in candidate[1]):
            continue
        key = reverse_complement(candidate[1])
        if key not in trie_dic.keys():
            trie_dic[key] = '-'
    return trie_dic
Exemplo n.º 6
0
 def test_find(self):
     from Bio import triefind
     trieobj = trie.trie()
     trieobj["hello"] = 5
     trieobj["he"] = 7
     trieobj["hej"] = 9
     trieobj["foo"] = "bar"
     trieobj["wor"] = "ld"
     self.assertEqual(triefind.match("hello world!", trieobj), "hello")
     k = triefind.match_all("hello world!", trieobj)
     k.sort()
     self.assertEqual(k, ["he", "hello"])
     k = triefind.find("hello world!", trieobj)
     k.sort()
     self.assertEqual(k, [("he", 0, 2), ("hello", 0, 5), ("wor", 6, 9)])
     k = triefind.find_words("hello world!", trieobj)
     k.sort()
     self.assertEqual(k, [("hello", 0, 5)])
     trieobj["world"] = "full"
     k = triefind.find("hello world!", trieobj)
     k.sort()
     self.assertEqual(k, [("he", 0, 2), ("hello", 0, 5), ("wor", 6, 9),
                          ("world", 6, 11)])
     k = triefind.find_words("hello world!", trieobj)
     k.sort()
     self.assertEqual(k, [("hello", 0, 5), ("world", 6, 11)])
Exemplo n.º 7
0
def parse_barcode_file(fp, primer=None, header=False):
    """
    Load label, barcode, primer records from a CSV file.

    Returns a map from barcode -> label

    Any additional columns are ignored
    """
    tr = trie.trie()
    reader = csv.reader(fp)

    if header:
        # Skip header
        next(reader)

    # Skip blank rows
    records = (record for record in reader if record)

    for record in records:
        specimen, barcode = record[:2]
        if primer is not None:
            pr = primer
        else:
            pr = record[2]
        for sequence in all_unambiguous(barcode + pr):
            if tr.has_key(sequence):
                raise ValueError("Duplicate sample: {0}, {1} both have {2}",
                        specimen, tr[sequence], sequence)
            logging.info('%s->%s', sequence, specimen)
            tr[sequence] = specimen

    return tr
Exemplo n.º 8
0
    def test_large_save_load(self):
        """Generate random key/val pairs in three length categories.

        100 items in each category. Insert them into a trie and into a reference dict.
        Write the trie to a temp file and read it back, verify that trie entries match
        the reference dict.
        """
        cmp_dict = {}
        trieobj = trie.trie()
        self.assertEqual(trieobj.get("foobar"), None)
        for max_str_len in [100, 1000, 10000]:
            cmp_dict = {}
            for i in range(1000):
                key = ''.join([random.choice(ascii_lowercase) for _ in range(max_str_len)])
                val = ''.join([random.choice(ascii_lowercase) for _ in range(max_str_len)])
                trieobj[key] = val
                cmp_dict[key] = val
            for key in cmp_dict:
                self.assertEqual(trieobj[key], cmp_dict[key])

        with tempfile.TemporaryFile(mode='w+b') as f:
            trie.save(f, trieobj)
            f.seek(0)
            trieobj = trie.load(f)
        for key in cmp_dict:
            self.assertEqual(trieobj[key], cmp_dict[key])
Exemplo n.º 9
0
 def test_prefix(self):
     trieobj = trie.trie()
     trieobj["hello"] = 5
     trieobj["he"] = 7
     trieobj["hej"] = 9
     trieobj["foo"] = "bar"
     k = sorted(trieobj.keys())
     self.assertEqual(k, ["foo", "he", "hej", "hello"])
     self.assertEqual(trieobj["hello"], 5)
     self.assertEqual(trieobj.get("bye"), None)
     self.assertTrue("hello" in trieobj)
     self.assertTrue("he" in trieobj)
     self.assertFalse("bye" in trieobj)
     self.assertTrue(trieobj.has_prefix("h"))
     self.assertTrue(trieobj.has_prefix("hel"))
     self.assertFalse(trieobj.has_prefix("foa"))
     self.assertFalse(trieobj.has_prefix("hello world"))
     self.assertEqual(len(trieobj), 4)
     k = sorted(trieobj.with_prefix("he"))
     self.assertEqual(k, ["he", "hej", "hello"])
     k = trieobj.with_prefix("l")
     self.assertEqual(k, [])
     k = trieobj.with_prefix("hej")
     self.assertEqual(k, ["hej"])
     k = trieobj.with_prefix("hejk")
     self.assertEqual(k, [])
Exemplo n.º 10
0
def parse_barcode_file(fp, primer=None, header=False):
    """
    Load label, barcode, primer records from a CSV file.

    Returns a map from barcode -> label

    Any additional columns are ignored
    """
    tr = trie.trie()
    reader = csv.reader(fp)

    if header:
        # Skip header
        next(reader)

    # Skip blank rows
    records = (record for record in reader if record)

    for record in records:
        specimen, barcode = record[:2]
        if primer is not None:
            pr = primer
        else:
            pr = record[2]
        for sequence in all_unambiguous(barcode + pr):
            if sequence in tr:
                raise ValueError("Duplicate sample: {0}, {1} both have {2}",
                                 specimen, tr[sequence], sequence)
            logging.info('%s->%s', sequence, specimen)
            tr[sequence] = specimen

    return tr
Exemplo n.º 11
0
 def test_find(self):
     from Bio import triefind
     trieobj = trie.trie()
     trieobj["hello"] = 5
     trieobj["he"] = 7
     trieobj["hej"] = 9
     trieobj["foo"] = "bar"
     trieobj["wor"] = "ld"
     self.assertEqual(triefind.match("hello world!", trieobj), "hello")
     k = triefind.match_all("hello world!", trieobj)
     k.sort()
     self.assertEqual(k, ["he", "hello"])
     k = triefind.find("hello world!", trieobj)
     k.sort()
     self.assertEqual(k, [("he", 0, 2), ("hello", 0, 5), ("wor", 6, 9)])
     k = triefind.find_words("hello world!", trieobj)
     k.sort()
     self.assertEqual(k, [("hello", 0, 5)])
     trieobj["world"] = "full"
     k = triefind.find("hello world!", trieobj)
     k.sort()
     self.assertEqual(k, [("he", 0, 2), ("hello", 0, 5), ("wor", 6, 9), ("world", 6, 11)])
     k = triefind.find_words("hello world!", trieobj)
     k.sort()
     self.assertEqual(k, [("hello", 0, 5), ("world", 6, 11)])
Exemplo n.º 12
0
 def test_get_approximate(self):
     # Found bug, doesn't handle insertions and deletions at end properly.
     trieobj = trie.trie()
     trieobj["hello"] = 1
     self.assertEqual(trieobj.get_approximate('he', 2), [])
     self.assertEqual(trieobj.get_approximate('he', 3), [('hello', 1, 3)])
     self.assertEqual(trieobj.get_approximate('hello me!', 3), [])
     self.assertEqual(trieobj.get_approximate('hello me!', 4), [('hello', 1, 4)])
     self.assertEqual(trieobj.get_approximate('hello me!', 5), [('hello', 1, 4)])
Exemplo n.º 13
0
 def test_get_approximate(self):
     # Found bug, doesn't handle insertions and deletions at end properly.
     trieobj = trie.trie()
     trieobj["hello"] = 1
     self.assertEqual(trieobj.get_approximate("he", 2), [])
     self.assertEqual(trieobj.get_approximate("he", 3), [("hello", 1, 3)])
     self.assertEqual(trieobj.get_approximate("hello me!", 3), [])
     self.assertEqual(trieobj.get_approximate("hello me!", 4), [("hello", 1, 4)])
     self.assertEqual(trieobj.get_approximate("hello me!", 5), [("hello", 1, 4)])
Exemplo n.º 14
0
    def __init__(self):
        triefind.DEFAULT_BOUNDARY_CHARS = triefind.DEFAULT_BOUNDARY_CHARS.translate(None, utils.INCHI_SPECIAL_CHARS)

        self.index = trie.trie()

        for inchi in CompoundStructures.objects.all().values_list('standard_inchi', 'molecule_id'):
            for chunk in inchi[0].split('/')[1:]:
                if len(chunk) > 2:
                    if self.index.get(str(chunk)):
                        self.index[str(chunk)].append(inchi[1])
                    else:
                        self.index[str(chunk)] = [inchi[1]]
Exemplo n.º 15
0
 def test_with_prefix(self):
     trieobj = trie.trie()
     s = "BANANA"
     for i in range(len(s)):  # insert all suffixes into trie
         trieobj[s[i:]] = i
         self.assertEqual(trieobj[s[i:]], i)
     self.assertEqual(set(trieobj.values()), set(range(6)))
     self.assertEqual(set(["A", "ANA", "ANANA", "BANANA", "NA", "NANA"]), set(trieobj.keys()))
     self.assertEqual(set(["NA", "NANA"]), set(trieobj.with_prefix("N")))
     self.assertEqual(set(["NA", "NANA"]), set(trieobj.with_prefix("NA")))
     self.assertEqual(set(["A", "ANA", "ANANA"]), set(trieobj.with_prefix("A")))
     self.assertEqual(set(["ANA", "ANANA"]), set(trieobj.with_prefix("AN")))
Exemplo n.º 16
0
 def test_with_prefix(self):
     trieobj = trie.trie()
     s = "BANANA"
     for i in range(len(s)):  # insert all suffixes into trie
         trieobj[s[i:]] = i
         self.assertEqual(trieobj[s[i:]], i)
     self.assertEqual(set(trieobj.values()), set(range(6)))
     self.assertEqual({'A', 'ANA', 'ANANA', 'BANANA', 'NA', 'NANA'},
                      set(trieobj.keys()))
     self.assertEqual({'NA', 'NANA'}, set(trieobj.with_prefix("N")))
     self.assertEqual({'NA', 'NANA'}, set(trieobj.with_prefix("NA")))
     self.assertEqual({'A', 'ANA', 'ANANA'}, set(trieobj.with_prefix("A")))
     self.assertEqual({'ANA', 'ANANA'}, set(trieobj.with_prefix("AN")))
Exemplo n.º 17
0
 def test_get_set(self):
     trieobj = trie.trie()
     trieobj["hello world"] = "s1"
     trieobj["bye"] = "s2"
     trieobj["hell sucks"] = "s3"
     trieobj["hebee"] = "s4"
     self.assertEqual(trieobj["hello world"], "s1")
     self.assertEqual(trieobj["bye"], "s2")
     self.assertEqual(trieobj["hell sucks"], "s3")
     self.assertEqual(trieobj["hebee"], "s4")
     trieobj["blah"] = "s5"
     self.assertEqual(trieobj["blah"], "s5")
     self.assertEqual(trieobj.get("foobar"), None)
     self.assertEqual(len(trieobj), 5)
     trieobj["blah"] = "snew"
     self.assertEqual(trieobj["blah"], "snew")
Exemplo n.º 18
0
 def test_get_set(self):
     trieobj = trie.trie()
     trieobj["hello world"] = "s1"
     trieobj["bye"] = "s2"
     trieobj["hell sucks"] = "s3"
     trieobj["hebee"] = "s4"
     self.assertEqual(trieobj["hello world"], "s1")
     self.assertEqual(trieobj["bye"], "s2")
     self.assertEqual(trieobj["hell sucks"], "s3")
     self.assertEqual(trieobj["hebee"], "s4")
     trieobj["blah"] = "s5"
     self.assertEqual(trieobj["blah"], "s5")
     self.assertEqual(trieobj.get("foobar"), None)
     self.assertEqual(len(trieobj), 5)
     trieobj["blah"] = "snew"
     self.assertEqual(trieobj["blah"], "snew")
def construct_complex_trie(counter, lengths=None):
    t = trie.trie()
    seqs = list(counter)
    seqs.sort(key=len, reverse=True)
    if lengths is None:
        lengths = sorted(set([len(k) for k in seqs]))
    for seq in seqs:
        seq_len = len(seq)
        for l in lengths:
            if l > seq_len: continue
            for subseq in chunker(seq, l):
                if t.has_key(subseq): continue
                if subseq == seq:
                    t[seq] = counter[seq]
                else:
                    t[subseq] = seq
    return t
Exemplo n.º 20
0
def gen_seq_tries(seqs, tol):
    # New: don't allow detection of any sequences whose length is equal to or
    # less than the tolerance. This prevents weird behavior and should be a
    # reasonable assumption for pretty much all use cases.
    # Newer: make sure sequences are at least of length 2. I think this is
    # necessitated by the new ability to specify the tolerance as a fraction,
    # so a length 0 sequence would slip through here.
    seqs = [seq for seq in seqs if len(seq) > max(tol, 2)]
    lengths = list(set([len(seq) for seq in seqs]))
    lengths.sort(key = lambda x: -x)
    trie_list = [trie() for i in range(len(lengths))]
    len_dict = {x:i for i,x in enumerate(lengths)}
    for seq in seqs:
        trie_list[len_dict[len(seq)]][seq] = 0
    # Returns a list of tries, one for each sequence length,
    # sorted in descending order by sequence length.
    # FUTURE PLAN: allow for the observed sequence to match a different length
    # of reference sequence, basically so that insertions only cost 1 instead
    # of 2.
    return trie_list, lengths
Exemplo n.º 21
0
def extract_peptides(sequences, min_length=7, max_length=20):
    """
    Extract subsequences from full protein sequences, and return dictionary
    mapping each kmer to its source sequences.

    Parameters
    ----------
    sequences : list of Sequence
        All generated protein sequences

    min_length : int
        Smallest peptide length to include

    max_length : int
        Largest peptide length to include

    Returns
    -------
    Dictionary from str to list of Sequence objects which contained that peptide
    """
    from Bio.trie import trie
    peptide_dict = trie()

    for sequence_obj in progressbar(sequences):
        amino_acids = sequence_obj.amino_acids
        n_aa = len(amino_acids)
        already_seen_for_protein = set()
        for i in range(n_aa - min_length + 1):
            longest_peptide = amino_acids[i:i + max_length]
            longest_peptide_length = len(longest_peptide)
            for k in range(min_length, longest_peptide_length):
                kmer = longest_peptide[:k]
                if kmer not in already_seen_for_protein:
                    already_seen_for_protein.add(kmer)
                    if kmer in peptide_dict:
                        peptide_dict[kmer].append(sequence_obj)
                    else:
                        peptide_dict[kmer] = [sequence_obj]
    return peptide_dict
Exemplo n.º 22
0
 def resetSignatureDataStructure(self):
     self.signatureTrie, self.isEmpty = trie.trie(), True
def construct_simple_trie(counter):
    t = trie.trie()
    for seq, count in counter.iteritems():
        t[seq] = count
    return t
Exemplo n.º 24
0
#!/usr/bin/env python

import StringIO
from operator import truth

from Bio import trie
from Bio import triefind

trieobj = trie.trie()

trieobj["hello"] = 5
trieobj["he"] = 7
trieobj["hej"] = 9
trieobj["foo"] = "bar"
trieobj["wor"] = "ld"

print triefind.match("hello world!", trieobj)  # "hello"
k = triefind.match_all("hello world!", trieobj)
k.sort()
print k  # ["he", "hello"]

k = triefind.find("hello world!", trieobj)
k.sort()
print k  # [("he", 0, 2), ("hello", 0, 5), ("wor", 6, 9)]

k = triefind.find_words("hello world!", trieobj)
k.sort()
print k  # [("hello", 0, 5)]

trieobj["world"] = "full"
k = triefind.find("hello world!", trieobj)
Exemplo n.º 25
0
 def setUp(self):
     self.tr = trie.trie()
     self.tr['1000']=12;self.tr['1011']=12; self.tr['1010']=12
Exemplo n.º 26
0
 def setUp(self):
     self.tr = trie.trie()
     self.tr['1000'] = 12
     self.tr['1011'] = 12
     self.tr['1010'] = 12
Exemplo n.º 27
0
# How to perform efficient membership search on very large datasets in Python
from Bio import trie

tr = trie.trie()
Exemplo n.º 28
0
def generate_adjacent_mers(sequence, max_hamming_distance):
    alphabet = 'AGCT'
    t = trie.trie()
    hamming_ball(sequence, max_hamming_distance, alphabet, t)
    return t
Exemplo n.º 29
0
from Bio import trie #trie from the Biopython computational molecular biology library

run_time = time.time()

def t9_prediction(string, t9):
    """Return all the keys in the trie that match anywhere in the string."""
    for word in t.with_prefix(string):
        if len(word) == len(string):
            t9.append(word)
            return t9

#words = [line.strip() for line in open("/home/n/work/py/VR/t9/english.txt",  "rb")]

words = [line.strip() for line in open(sys.argv[1],  "rb")]

t = trie.trie()
for word in words:
    t[word] = 1

mapping = {1:["'"],  2:["a", "A", "b", "B", "c", "C"],          \
                     3:["d", "D", "e", "E", "f", "F"],          \
	             4:["g", "G", "h", "H", "i", "I"],          \
                     5:["j", "J", "k", "K", "l", "L"],          \
                     6:["m", "M", "n", "N", "o", "O"],          \
	             7:["p", "P", "q", "Q", "r", "R", "s","S"], \
                     8:["t", "T", "u", "U", "v", "V"],          \
                     9:["w", "W", "x", "X", "y", "Y", "z","Z"]} \

while True:
    try:
        my_inputs = [int(i) for i in raw_input("Enter space separated inputs: ").split()]
Exemplo n.º 30
0
import StringIO
from operator import truth

try :
    from Bio import trie
except ImportError :
    import os
    from Bio import MissingExternalDependencyError
    if os.name=="java" :
        message = "Not available on Jython, Bio.trie requires compiled C code."
    else :
        message = "Could not import Bio.trie, check C code was compiled."
    raise MissingExternalDependencyError(message)

trieobj = trie.trie()

trieobj["hello"] = 5
trieobj["he"] = 7
trieobj["hej"] = 9
trieobj["foo"] = "bar"

k = trieobj.keys()
k.sort()
print k                          # ["foo", "he", "hej", "hello"]
print trieobj["hello"]           # 5
print trieobj.get("bye")         # None

print trieobj.has_key("hello")   # 1
print trieobj.has_key("he")      # 1
print trieobj.has_key("bye")     # 0
Exemplo n.º 31
0
# How to perform efficient membership search on very large datasets in Python
from Bio import trie
tr = trie.trie()
Exemplo n.º 32
0
from itertools import product
from Bio import trie

t = trie.trie()
dictionary = [word.strip() for word in open("dictionary.txt", "rb")]

for word in dictionary:
    t[word] = len(word)

t9Map = {
    1: [""],
    2: ["a", "b", "c"],
    3: ["d", "e", "f"],
    4: ["g", "h", "i"],
    5: ["j", "k", "l"],
    6: ["m", "n", "o"],
    7: ["p", "q", "r", "s"],
    8: ["t", "u", "v"],
    9: ["w", "x", "y", "z"]
}

print "Enter a number:"
input = input()
strings = []

for char in str(input):
    strings.append(t9Map[int(char)])

result = []
for all in product(
        *(x for x in strings
Exemplo n.º 33
0
 def __init__ (self):
     self.index = trie.trie ()
     self.modified = True
Exemplo n.º 34
0
    def resetSignatureDataStructure(self): self.signatureTrie, self.isEmpty = trie.trie(), True
    
class SignaturePermutationWithSortedList(Permutation):
Exemplo n.º 35
0
try :
    from Bio import trie
except ImportError :
    import os
    from Bio import MissingExternalDependencyError
    if os.name=="java" :
        message = "Not available on Jython, Bio.trie requires compiled C code."
    else :
        message = "Could not import Bio.trie, check C code was compiled."
    raise MissingExternalDependencyError(message)


from Bio import triefind

trieobj = trie.trie()

trieobj["hello"] = 5
trieobj["he"] = 7
trieobj["hej"] = 9
trieobj["foo"] = "bar"
trieobj["wor"] = "ld"

print triefind.match("hello world!", trieobj)    # "hello"
k = triefind.match_all("hello world!", trieobj)
k.sort()
print k     # ["he", "hello"]

k = triefind.find("hello world!", trieobj)
k.sort()
print k     # [("he", 0, 2), ("hello", 0, 5), ("wor", 6, 9)]
Exemplo n.º 36
0
#!/usr/bin/env python

import StringIO
from operator import truth

from Bio import trie

trieobj = trie.trie()

trieobj["hello"] = 5
trieobj["he"] = 7
trieobj["hej"] = 9
trieobj["foo"] = "bar"

k = trieobj.keys()
k.sort()
print k  # ["foo", "he", "hej", "hello"]
print trieobj["hello"]  # 5
print trieobj.get("bye")  # None

print trieobj.has_key("hello")  # 1
print trieobj.has_key("he")  # 1
print trieobj.has_key("bye")  # 0

print trieobj.has_prefix("h")  # 1
print trieobj.has_prefix("hel")  # 1
print trieobj.has_prefix("foa")  # 0
print trieobj.has_prefix("hello world")  # 0

print len(trieobj)  # 4
Exemplo n.º 37
0
import argparse
import os
import sys
import platform
from Bio import trie
from Bio import triefind

# Quick check for either linux or OSX
if platform.system() == "Linux":
    prefix = "/home/tabboud"
else:
    prefix = "/Users/tabboud"

DROPBOX_DIR = prefix + '/Dropbox'
IGNORE_FILE = '.dbignore'
cache = trie.trie()

def cache_patterns(ignore_file):
    """ Open and read the patterns in an ignore file
    Args:
        ignore_file: IgnoreFile object
    NOTES:
        see dir.c:add_excludes for reading the file and storing the patterns in
        el
    """
    ignore_path = ignore_file.path
    if not os.path.isfile(ignore_path):
        print "ERROR: %s is not a file" % ignore_path
        return
    with open(ignore_path, 'r') as fp:
        for pattern in fp.readlines():