Пример #1
0
 def __init__(self):
     self.cache = {
         'resources': datrie.Trie(string.printable),
         'collections': datrie.Trie(string.printable)
     }
     Resource.register('created', self._add_item)
     Resource.register('deleted', self._del_item)
     Collection.register('created', self._add_item)
     Collection.register('deleted', self._del_item)
Пример #2
0
def test_trie_len():
    trie = datrie.Trie(string.ascii_lowercase)
    words = ['foo', 'f', 'faa', 'bar', 'foobar']
    for word in words:
        trie[word] = None
    assert len(trie) == len(words)

    # Calling len on an empty trie caused segfault, see #17 on GitHub.
    trie = datrie.Trie(string.ascii_lowercase)
    assert len(trie) == 0
Пример #3
0
def createPrefixTree(INVPATH, name):

    jsonFile = codecs.open(INVPATH + name, 'r', 'utf-8')
    jsonStr = jsonFile.read()
    invertIndex = json.loads(jsonStr)

    trie = datrie.Trie("абвгдеёжзийклмнопрстуфхцчшщъыьэюя")
    reverseTrie = datrie.Trie("абвгдеёжзийклмнопрстуфхцчшщъыьэюя")
    for k, v in invertIndex.items():
        trie[k] = v
        reverseTrie[k[::-1]] = v
    return trie, reverseTrie
Пример #4
0
 def __init__(self, alphabet=None, ranges=None):
     """
     Trie needs to know the range of unicode symbols for efficiency, either `alphabet`  or `ranges` must be applied.
     """
     if isinstance(alphabet, str):
         self.trie = datrie.Trie(alphabet)
     elif ranges is not None:
         self.trie = datrie.Trie(ranges=ranges)
     else:
         print(
             "Either `alphabet` or `ranges` must be applied when initialing. Using english related chars"
         )
         self.trie = datrie.Trie(string.printable)
Пример #5
0
def read_primers(args):
    trie = datrie.Trie(u'ATGCN')
    primer_names = []
    with open(args.primers) as primer_file:
        reader = csv.reader(primer_file, delimiter='\t')
        for row in reader:
            try:
                chrom, pos_fwd, pos_rev, name_fwd, name_rev, seq_fwd, seq_rev = row[:
                                                                                    7]
                pos_fwd = int(pos_fwd)
                pos_rev = int(pos_rev)
            except ValueError:
                print("Can't parse row: {}".format(row))
            else:
                primer_names.extend([name_fwd, name_rev])
                if args.prefix is not None:
                    # use only a prefix of each primer in the trie
                    seq_fwd = unicode(seq_fwd.upper()[:args.prefix])
                    seq_rev = unicode(seq_rev.upper()[:args.prefix])
                else:
                    # use the whole primer in the trie
                    seq_fwd = unicode(seq_fwd.upper())
                    seq_rev = unicode(seq_rev.upper())
                trie[unicode(seq_fwd)] = PrimerInfo(chrom, 'fwd', name_fwd,
                                                    pos_fwd, seq_fwd)
                trie[unicode(seq_rev)] = PrimerInfo(chrom, 'rev', name_rev,
                                                    pos_rev, seq_rev)
    return trie, primer_names
Пример #6
0
def prepare_keyword_trie(keyword_file):
    keyword_list = []
    keys = ''
    with open(keyword_file, 'r', encoding='utf8') as kw_file:
        for line in kw_file:
            if len(line.split('\t')) == 4:
                key, value, weight_str, category = line.strip().split('\t')
                try:
                    weight = int(weight_str)
                except:
                    continue
                keyword_list.append({
                    'k': key,
                    'v': value,
                    'w': weight,
                    'c': category
                })
                keys += key
    chars = set(keys)
    kw_trie = datrie.Trie(''.join(chars))
    for keyword in keyword_list:
        if keyword['k'] in kw_trie:
            value = kw_trie[keyword['k']]
            value.append(keyword)
            kw_trie[keyword['k']] = value
        else:
            kw_trie[keyword['k']] = [keyword]
    return kw_trie
Пример #7
0
def solve(board, pieces):
    candidates = [board]
    next_candidates = iter([])
    pieces = get_ordering(pieces, board)
    print "pieces %s" % (pieces, )
    trie = datrie.Trie("%s.\n" % pieces)
    trie = {}

    while pieces:
        (piece, count) = pieces.pop()
        print "processing %s %i" % (piece, count)
        #print hpy().heap()
        for c in candidates:
            cform = unicode(c.get_canonical().__repr__())
            if cform not in trie:
                #print cform
                trie[cform] = True
                moves = ([(piece, ) + t for t in c]
                         for c in combinations(c.free, count))
                next_candidates = chain(next_candidates,
                                        [c.add_piece(move) for move in moves])


#                next_candidates.extend()
#print "canditates now %i" % len(next_candidates)
        candidates = (n for n in next_candidates
                      if n != None and filterNode(n, pieces))
        #print "next_candidates %i" % len(candidates)
        next_candidates = iter([])
    return candidates
Пример #8
0
def solve_dfs(board, pieces, ordering):
    t = time.time()
    pieces = ordering(pieces, board)
    stack = [(board, pieces)]
    solutions = []
    #discovered = {}
    discovered = datrie.Trie("%s.\n" % pieces)
    print "order: %s" % pieces
    while stack:
        b, ps = stack.pop()
        (p, count), left = ps[0], ps[1:]
        moves = ([(p, ) + t for t in c] for c in combinations(b.free, count))
        for move in moves:
            c = b.add_piece(move)
            if not c:
                continue
            cform = unicode(c.get_canonical().__repr__())
            if cform in discovered:
                continue
            discovered[cform] = True
            if not filterNode(c, left):
                #print "filtered!"
                continue

            if not left:
                solutions.append(c)
                #print "%s\nstack size: %i solutions found: %i" % (c, len(stack), len(solutions))
                continue
            stack.append((c, left))
    print "took %f" % (time.time() - t)
    return solutions
Пример #9
0
def create_trie():
    words = words100k()
    trie = datrie.Trie(ALPHABET)

    for word in words:
        trie[word] = 1
    return trie
Пример #10
0
 def _insert_into_trie(items):
     """Insert items into a datrie trie."""
     logging.info("Start inserting into trie...")
     trie = datrie.Trie(string.printable)  # noqa
     for key, val in items.iteritems():
         trie[key] = val
     logging.info("Finished inserting into trie.")
     return trie
Пример #11
0
    def test_contains(words):
        trie = datrie.Trie(string.printable)
        for i, word in enumerate(set(words)):
            trie[word] = i

        for i, word in enumerate(set(words)):
            assert word in trie
            assert trie[word] == trie.get(word) == i
Пример #12
0
    def __init__(self, filepath):
        self.trie = datrie.Trie(string.ascii_letters + string.digits +
                                string.whitespace)

        with open(filepath, 'r') as f:
            for line in f:
                # Remove new line and store app names in lower case to allow searching case insensitive
                self.trie[unicode(line.rstrip().lower())] = line.rstrip()
Пример #13
0
def test_setdefault():
    trie = datrie.Trie(string.ascii_lowercase)
    assert trie.setdefault('foo', 5) == 5
    assert trie.setdefault('foo', 4) == 5
    assert trie.setdefault('foo', 5) == 5
    assert trie.setdefault('bar', 'vasia') == 'vasia'
    assert trie.setdefault('bar', 3) == 'vasia'
    assert trie.setdefault('bar', 7) == 'vasia'
Пример #14
0
def test_trie_items():
    trie = datrie.Trie(string.ascii_lowercase)
    trie['foo'] = 10
    trie['bar'] = 'foo'
    trie['foobar'] = 30
    assert trie.values() == ['foo', 10, 30]
    assert trie.items() == [('bar', 'foo'), ('foo', 10), ('foobar', 30)]
    assert trie.keys() == ['bar', 'foo', 'foobar']
Пример #15
0
 def _trie(self):
     trie = datrie.Trie(string.ascii_lowercase)
     trie['foo'] = 10
     trie['bar'] = 20
     trie['foobar'] = 30
     trie['foovar'] = 40
     trie['foobarzartic'] = None
     return trie
 def __init__(self):
     self.rooms_by_idx = SortedSet()  # integer ID only
     self.rooms = SortedDict()  # key: id, value: room
     self.bunnies_by_team = {
     }  # key: team id, value: SortedSet(key=bunny.reversed_name) of Bunny objects
     self.bunnies_by_suffix = datrie.Trie(string.ascii_letters + ''.join(
         str(part) for part in range(0, 10)))
     self.bunny_names = {}
Пример #17
0
def _build_trie(word_len=None, compare_func=_gte):
    trie = datrie.Trie(string.ascii_lowercase)

    with open(WORD_FILE, 'r') as fin:
        for line in fin:
            line = line.strip()
            if word_len is None or compare_func(len(line), word_len):
                trie[u'{}'.format(line)] = 1
    return trie
Пример #18
0
def load_freq_dictionary(path):
    """Load dictionary of freq from file"""
    with open(path, 'r', encoding='utf-8') as infile:
        dict_of_freq = datrie.Trie(string.ascii_lowercase)
        for line in infile:
            word, freq = line.split(' ')
            dict_of_freq[word] = int(freq)

    return dict_of_freq
Пример #19
0
    def test_pop(words):
        words = set(words)
        trie = datrie.Trie(string.printable)
        for i, word in enumerate(words):
            trie[word] = i

        for i, word in enumerate(words):
            assert trie.pop(word) == i
            assert trie.pop(word, 42) == trie.get(word, 42) == 42
Пример #20
0
 def makeDaTrie(self):
     # the word parse
     # charset = string.ascii_letters+'@#\'&]*-/[=;]'
     # all of labMT
     charset = "raingwtsyelofud'pcmhbkz1-vxq8j970&2=@3#[]46/_;5*"
     # all of all of the sets
     charset = u"raingwtsyelofud'pcmhbkz1-vxq8j970&2=@3#[]46/_;5*FALSEICUB+"
     fixedtrie = datrie.Trie(charset)
     stemtrie = datrie.Trie(charset)
     for i, word in zip(range(len(self.fixedwords)), self.fixedwords):
         fixedtrie[u(word)] = (self.fixedscores[i], i)
     for i, word in zip(range(len(self.stemwords)), self.stemwords):
         stemtrie[u(word)] = (self.stemscores[i], i)
     fixedtrie.save('{0}/{1:.2f}-fixed.da'.format(self.folders[self.cindex],
                                                  self.stopVal))
     stemtrie.save('{0}/{1:.2f}-stem.da'.format(self.folders[self.cindex],
                                                self.stopVal))
     return (fixedtrie, stemtrie)
Пример #21
0
def load_as_datrie(filepath):

    with open(filepath, 'r') as f:
        lines = (x.rstrip() for x in f.readlines())
    
    data = datrie.Trie(string.printable)
    for x in lines:
        data[x] = 0
    return data
Пример #22
0
 def __init__(self, query_log_path):
     self.vocabulary = datrie.Trie(string.ascii_lowercase)
     with open(query_log_path) as log_file:
         for line in log_file:
             uid, query, time = line.split()
             if query not in self.vocabulary:
                 self.vocabulary[query] = 1
             else:
                 self.vocabulary[query] += 1
Пример #23
0
    def test_pickle_unpickle(words):
        trie = datrie.Trie(string.printable)
        for i, word in enumerate(set(words)):
            trie[word] = i

        trie = pickle.loads(pickle.dumps(trie))
        for i, word in enumerate(set(words)):
            assert word in trie
            assert trie[word] == i
Пример #24
0
def test_trie_comparison():
    trie = datrie.Trie(string.ascii_lowercase)
    assert trie == trie
    assert trie == datrie.Trie(string.ascii_lowercase)

    other = datrie.Trie(string.ascii_lowercase)
    trie['foo'] = 42
    other['foo'] = 24
    assert trie != other

    other['foo'] = trie['foo']
    assert trie == other

    other['bar'] = 42
    assert trie != other

    with pytest.raises(TypeError):
        trie < other  # same for other comparisons
Пример #25
0
def test_trie_ascii():
    trie = datrie.Trie(string.ascii_letters)
    trie['x'] = 1
    trie['y'] = 'foo'
    trie['xx'] = 2

    assert trie['x'] == 1
    assert trie['y'] == 'foo'
    assert trie['xx'] == 2
Пример #26
0
 def __init__(self, language='english'):
     """A vocabulary is a collection of words in a specific language."""
     self.data_folder = 'data'
     self.language = language.lower()
     self.words = datrie.Trie(string.ascii_lowercase)
     try:
         self._unpickle()
     except IOError:
         self._load_from_file()
         self._pickle()
Пример #27
0
def _trie():
    trie = datrie.Trie(ranges=[(chr(0), chr(127))])
    trie['f'] = 1
    trie['fo'] = 2
    trie['fa'] = 3
    trie['faur'] = 4
    trie['fauxiiiip'] = 5
    trie['fauzox'] = 10
    trie['fauzoy'] = 20
    return trie
Пример #28
0
def test_trie_unicode():
    # trie for lowercase Russian characters
    trie = datrie.Trie(ranges=[('а', 'я')])
    trie['а'] = 1
    trie['б'] = 2
    trie['аб'] = 'vasia'

    assert trie['а'] == 1
    assert trie['б'] == 2
    assert trie['аб'] == 'vasia'
Пример #29
0
    def __init__(self, to_ascii, config):
        self.to_ascii = to_ascii
        self.variant_only = config['variant_only']

        # Set up datrie
        if config['replacements']:
            self.replacements = datrie.Trie(config['chars'])
            for src, repllist in config['replacements']:
                self.replacements[src] = repllist
        else:
            self.replacements = None
Пример #30
0
def get_data():
    """Get data from input stream and create need structure"""
    n = int(input())
    dict_of_freq = datrie.Trie(string.ascii_lowercase)
    for _ in range(n):
        word, freq = input().split(' ')
        dict_of_freq[word] = int(freq)

    m = int(input())
    test_set = tuple(input() for _ in range(m))

    return dict_of_freq, test_set