def add(self, docnum, ls): out = [] for v in ls: assert len(v) == self._fixedlen out.append(v) b = emptybytes.join(out) self._child.add(docnum, b)
def add(self, docnum, ls): out = [varint(len(ls))] for v in ls: assert isinstance(v, bytes_type) out.append(varint(len(v))) out.append(v) self._child.add(emptybytes.join(out))
def add(self, docnum, ls): out = [varint(len(ls))] for v in ls: assert isinstance(v, bytes_type) out.append(varint(len(v))) out.append(v) self._child.add(docnum, emptybytes.join(out))
def glob_graph_limit(graph, mode, pattern, address): low = mode == LO output = [] arc = Arc(target=address) for op in pattern: if arc.target is None: break code = op[0] if code == _STAR or code == _PLUS: while arc.target: if low: arc = graph.arc_at(arc.target, arc) else: for arc in graph.iter_arcs(arc.target, arc): pass output.append(arc.label) if low and arc.accept: break elif code == _QUEST: if low: arc = graph.arc_at(arc.target, arc) else: for arc in graph.iter_arcs(arc.target, arc): pass elif code == _LIT: labels = op[1] for label in labels: arc = graph.find_arc(arc.target, label) if arc is None: break output.append(label) if arc.target is None: break if arc is None: break elif code == _RANGE: chars = op[1] negate = op[2] newarc = None for a in graph.iter_arcs(arc.target): if (a.label in chars) ^ negate: newarc = a.copy() if low: break if newarc: output.append(newarc.label) arc = newarc else: break return emptybytes.join(output)
def _mini_values(self): # Minify values fixedsize = self._format.fixed_value_size() values = self._values if fixedsize is None or fixedsize < 0: vs = tuple(values) elif fixedsize == 0: vs = None else: vs = emptybytes.join(values) return vs
def glob_vacuum_limit(mode, pattern): low = mode == LO output = [] for op in pattern: code = op[0] if code == _STAR or code == _PLUS or code == _QUEST: break elif code == _LIT: output.append(op[1]) elif code == _RANGE: if op[2]: # Don't do negated char lists break chars = op[1] if low: output.append(min(chars)) else: output.append(max(chars)) return emptybytes.join(output)
def peek_key_bytes(self): """Returns the next closest key in the graph as a single bytes object. """ return emptybytes.join(self.peek_key())
def prefix_bytes(self): """Returns the label bytes for the path from the root to the current arc as a single joined bytes object. """ return emptybytes.join(self.prefix())
def within(graph, text, k=1, prefix=0, address=None): """Yields a series of keys in the given graph within ``k`` edit distance of ``text``. If ``prefix`` is greater than 0, all keys must match the first ``prefix`` characters of ``text``. """ text = to_labels(text) if address is None: address = graph._root sofar = emptybytes accept = False if prefix: prefixchars = text[:prefix] arc = graph.find_path(prefixchars, address=address) if arc is None: return sofar = emptybytes.join(prefixchars) address = arc.target accept = arc.accept stack = [(address, k, prefix, sofar, accept)] seen = set() while stack: state = stack.pop() # Have we already tried this state? if state in seen: continue seen.add(state) address, k, i, sofar, accept = state # If we're at the end of the text (or deleting enough chars would get # us to the end and still within K), and we're in the accept state, # yield the current result if (len(text) - i <= k) and accept: yield utf8decode(sofar)[0] # If we're in the stop state, give up if address is None: continue # Exact match if i < len(text): arc = graph.find_arc(address, text[i]) if arc: stack.append((arc.target, k, i + 1, sofar + text[i], arc.accept)) # If K is already 0, can't do any more edits if k < 1: continue k -= 1 arcs = graph.arc_dict(address) # Insertions stack.extend((arc.target, k, i, sofar + char, arc.accept) for char, arc in iteritems(arcs)) # Deletion, replacement, and transpo only work before the end if i >= len(text): continue char = text[i] # Deletion stack.append((address, k, i + 1, sofar, False)) # Replacement for char2, arc in iteritems(arcs): if char2 != char: stack.append((arc.target, k, i + 1, sofar + char2, arc.accept)) # Transposition if i < len(text) - 1: char2 = text[i + 1] if char != char2 and char2 in arcs: # Find arc from next char to this char target = arcs[char2].target if target: arc = graph.find_arc(target, char) if arc: stack.append((arc.target, k, i + 2, sofar + char2 + char, arc.accept))
def within(graph, text, k=1, prefix=0, address=None): """Yields a series of keys in the given graph within ``k`` edit distance of ``text``. If ``prefix`` is greater than 0, all keys must match the first ``prefix`` characters of ``text``. """ text = to_labels(text) if address is None: address = graph._root sofar = emptybytes accept = False if prefix: prefixchars = text[:prefix] arc = graph.find_path(prefixchars, address=address) if arc is None: return sofar = emptybytes.join(prefixchars) address = arc.target accept = arc.accept stack = [(address, k, prefix, sofar, accept)] seen = set() while stack: state = stack.pop() # Have we already tried this state? if state in seen: continue seen.add(state) address, k, i, sofar, accept = state # If we're at the end of the text (or deleting enough chars would get # us to the end and still within K), and we're in the accept state, # yield the current result if (len(text) - i <= k) and accept: yield utf8decode(sofar)[0] # If we're in the stop state, give up if address is None: continue # Exact match if i < len(text): arc = graph.find_arc(address, text[i]) if arc: stack.append( (arc.target, k, i + 1, sofar + text[i], arc.accept)) # If K is already 0, can't do any more edits if k < 1: continue k -= 1 arcs = graph.arc_dict(address) # Insertions stack.extend((arc.target, k, i, sofar + char, arc.accept) for char, arc in iteritems(arcs)) # Deletion, replacement, and transpo only work before the end if i >= len(text): continue char = text[i] # Deletion stack.append((address, k, i + 1, sofar, False)) # Replacement for char2, arc in iteritems(arcs): if char2 != char: stack.append((arc.target, k, i + 1, sofar + char2, arc.accept)) # Transposition if i < len(text) - 1: char2 = text[i + 1] if char != char2 and char2 in arcs: # Find arc from next char to this char target = arcs[char2].target if target: arc = graph.find_arc(target, char) if arc: stack.append((arc.target, k, i + 2, sofar + char2 + char, arc.accept))