def add(self, docnum, ls):
     out = []
     for v in ls:
         assert len(v) == self._fixedlen
         out.append(v)
     b = emptybytes.join(out)
     self._child.add(docnum, b)
예제 #2
0
 def add(self, docnum, ls):
     out = []
     for v in ls:
         assert len(v) == self._fixedlen
         out.append(v)
     b = emptybytes.join(out)
     self._child.add(docnum, b)
 def add(self, docnum, ls):
     out = [varint(len(ls))]
     for v in ls:
         assert isinstance(v, bytes_type)
         out.append(varint(len(v)))
         out.append(v)
     self._child.add(emptybytes.join(out))
예제 #4
0
 def add(self, docnum, ls):
     out = [varint(len(ls))]
     for v in ls:
         assert isinstance(v, bytes_type)
         out.append(varint(len(v)))
         out.append(v)
     self._child.add(docnum, emptybytes.join(out))
예제 #5
0
def glob_graph_limit(graph, mode, pattern, address):
    low = mode == LO

    output = []
    arc = Arc(target=address)
    for op in pattern:
        if arc.target is None:
            break

        code = op[0]
        if code == _STAR or code == _PLUS:
            while arc.target:
                if low:
                    arc = graph.arc_at(arc.target, arc)
                else:
                    for arc in graph.iter_arcs(arc.target, arc):
                        pass
                output.append(arc.label)
                if low and arc.accept:
                    break
        elif code == _QUEST:
            if low:
                arc = graph.arc_at(arc.target, arc)
            else:
                for arc in graph.iter_arcs(arc.target, arc):
                    pass
        elif code == _LIT:
            labels = op[1]
            for label in labels:
                arc = graph.find_arc(arc.target, label)
                if arc is None:
                    break
                output.append(label)
                if arc.target is None:
                    break
            if arc is None:
                break
        elif code == _RANGE:
            chars = op[1]
            negate = op[2]
            newarc = None
            for a in graph.iter_arcs(arc.target):
                if (a.label in chars) ^ negate:
                    newarc = a.copy()
                    if low:
                        break
            if newarc:
                output.append(newarc.label)
                arc = newarc
            else:
                break
    return emptybytes.join(output)
예제 #6
0
파일: glob.py 프로젝트: MadAd360/GoGramming
def glob_graph_limit(graph, mode, pattern, address):
    low = mode == LO

    output = []
    arc = Arc(target=address)
    for op in pattern:
        if arc.target is None:
            break

        code = op[0]
        if code == _STAR or code == _PLUS:
            while arc.target:
                if low:
                    arc = graph.arc_at(arc.target, arc)
                else:
                    for arc in graph.iter_arcs(arc.target, arc):
                        pass
                output.append(arc.label)
                if low and arc.accept:
                    break
        elif code == _QUEST:
            if low:
                arc = graph.arc_at(arc.target, arc)
            else:
                for arc in graph.iter_arcs(arc.target, arc):
                    pass
        elif code == _LIT:
            labels = op[1]
            for label in labels:
                arc = graph.find_arc(arc.target, label)
                if arc is None:
                    break
                output.append(label)
                if arc.target is None:
                    break
            if arc is None:
                break
        elif code == _RANGE:
            chars = op[1]
            negate = op[2]
            newarc = None
            for a in graph.iter_arcs(arc.target):
                if (a.label in chars) ^ negate:
                    newarc = a.copy()
                    if low:
                        break
            if newarc:
                output.append(newarc.label)
                arc = newarc
            else:
                break
    return emptybytes.join(output)
예제 #7
0
    def _mini_values(self):
        # Minify values

        fixedsize = self._format.fixed_value_size()
        values = self._values

        if fixedsize is None or fixedsize < 0:
            vs = tuple(values)
        elif fixedsize == 0:
            vs = None
        else:
            vs = emptybytes.join(values)
        return vs
    def _mini_values(self):
        # Minify values

        fixedsize = self._format.fixed_value_size()
        values = self._values

        if fixedsize is None or fixedsize < 0:
            vs = tuple(values)
        elif fixedsize == 0:
            vs = None
        else:
            vs = emptybytes.join(values)
        return vs
예제 #9
0
def glob_vacuum_limit(mode, pattern):
    low = mode == LO
    output = []
    for op in pattern:
        code = op[0]
        if code == _STAR or code == _PLUS or code == _QUEST:
            break
        elif code == _LIT:
            output.append(op[1])
        elif code == _RANGE:
            if op[2]:  # Don't do negated char lists
                break
            chars = op[1]
            if low:
                output.append(min(chars))
            else:
                output.append(max(chars))
    return emptybytes.join(output)
예제 #10
0
파일: glob.py 프로젝트: MadAd360/GoGramming
def glob_vacuum_limit(mode, pattern):
    low = mode == LO
    output = []
    for op in pattern:
        code = op[0]
        if code == _STAR or code == _PLUS or code == _QUEST:
            break
        elif code == _LIT:
            output.append(op[1])
        elif code == _RANGE:
            if op[2]:  # Don't do negated char lists
                break
            chars = op[1]
            if low:
                output.append(min(chars))
            else:
                output.append(max(chars))
    return emptybytes.join(output)
예제 #11
0
파일: fst.py 프로젝트: adamhorner/yaki-tng
    def peek_key_bytes(self):
        """Returns the next closest key in the graph as a single bytes object.
        """

        return emptybytes.join(self.peek_key())
예제 #12
0
파일: fst.py 프로젝트: adamhorner/yaki-tng
    def prefix_bytes(self):
        """Returns the label bytes for the path from the root to the current
        arc as a single joined bytes object.
        """

        return emptybytes.join(self.prefix())
예제 #13
0
파일: fst.py 프로젝트: adamhorner/yaki-tng
def within(graph, text, k=1, prefix=0, address=None):
    """Yields a series of keys in the given graph within ``k`` edit distance of
    ``text``. If ``prefix`` is greater than 0, all keys must match the first
    ``prefix`` characters of ``text``.
    """

    text = to_labels(text)
    if address is None:
        address = graph._root

    sofar = emptybytes
    accept = False
    if prefix:
        prefixchars = text[:prefix]
        arc = graph.find_path(prefixchars, address=address)
        if arc is None:
            return
        sofar = emptybytes.join(prefixchars)
        address = arc.target
        accept = arc.accept

    stack = [(address, k, prefix, sofar, accept)]
    seen = set()
    while stack:
        state = stack.pop()
        # Have we already tried this state?
        if state in seen:
            continue
        seen.add(state)

        address, k, i, sofar, accept = state
        # If we're at the end of the text (or deleting enough chars would get
        # us to the end and still within K), and we're in the accept state,
        # yield the current result
        if (len(text) - i <= k) and accept:
            yield utf8decode(sofar)[0]

        # If we're in the stop state, give up
        if address is None:
            continue

        # Exact match
        if i < len(text):
            arc = graph.find_arc(address, text[i])
            if arc:
                stack.append((arc.target, k, i + 1, sofar + text[i],
                              arc.accept))
        # If K is already 0, can't do any more edits
        if k < 1:
            continue
        k -= 1

        arcs = graph.arc_dict(address)
        # Insertions
        stack.extend((arc.target, k, i, sofar + char, arc.accept)
                     for char, arc in iteritems(arcs))

        # Deletion, replacement, and transpo only work before the end
        if i >= len(text):
            continue
        char = text[i]

        # Deletion
        stack.append((address, k, i + 1, sofar, False))
        # Replacement
        for char2, arc in iteritems(arcs):
            if char2 != char:
                stack.append((arc.target, k, i + 1, sofar + char2, arc.accept))
        # Transposition
        if i < len(text) - 1:
            char2 = text[i + 1]
            if char != char2 and char2 in arcs:
                # Find arc from next char to this char
                target = arcs[char2].target
                if target:
                    arc = graph.find_arc(target, char)
                    if arc:
                        stack.append((arc.target, k, i + 2,
                                      sofar + char2 + char, arc.accept))
예제 #14
0
파일: dawg.py 프로젝트: skrieder/microblog
    def peek_key_bytes(self):
        """Returns the next closest key in the graph as a single bytes object.
        """

        return emptybytes.join(self.peek_key())
예제 #15
0
파일: dawg.py 프로젝트: skrieder/microblog
    def prefix_bytes(self):
        """Returns the label bytes for the path from the root to the current
        arc as a single joined bytes object.
        """

        return emptybytes.join(self.prefix())
예제 #16
0
파일: dawg.py 프로젝트: skrieder/microblog
def within(graph, text, k=1, prefix=0, address=None):
    """Yields a series of keys in the given graph within ``k`` edit distance of
    ``text``. If ``prefix`` is greater than 0, all keys must match the first
    ``prefix`` characters of ``text``.
    """

    text = to_labels(text)
    if address is None:
        address = graph._root

    sofar = emptybytes
    accept = False
    if prefix:
        prefixchars = text[:prefix]
        arc = graph.find_path(prefixchars, address=address)
        if arc is None:
            return
        sofar = emptybytes.join(prefixchars)
        address = arc.target
        accept = arc.accept

    stack = [(address, k, prefix, sofar, accept)]
    seen = set()
    while stack:
        state = stack.pop()
        # Have we already tried this state?
        if state in seen:
            continue
        seen.add(state)

        address, k, i, sofar, accept = state
        # If we're at the end of the text (or deleting enough chars would get
        # us to the end and still within K), and we're in the accept state,
        # yield the current result
        if (len(text) - i <= k) and accept:
            yield utf8decode(sofar)[0]

        # If we're in the stop state, give up
        if address is None:
            continue

        # Exact match
        if i < len(text):
            arc = graph.find_arc(address, text[i])
            if arc:
                stack.append(
                    (arc.target, k, i + 1, sofar + text[i], arc.accept))
        # If K is already 0, can't do any more edits
        if k < 1:
            continue
        k -= 1

        arcs = graph.arc_dict(address)
        # Insertions
        stack.extend((arc.target, k, i, sofar + char, arc.accept)
                     for char, arc in iteritems(arcs))

        # Deletion, replacement, and transpo only work before the end
        if i >= len(text):
            continue
        char = text[i]

        # Deletion
        stack.append((address, k, i + 1, sofar, False))
        # Replacement
        for char2, arc in iteritems(arcs):
            if char2 != char:
                stack.append((arc.target, k, i + 1, sofar + char2, arc.accept))
        # Transposition
        if i < len(text) - 1:
            char2 = text[i + 1]
            if char != char2 and char2 in arcs:
                # Find arc from next char to this char
                target = arcs[char2].target
                if target:
                    arc = graph.find_arc(target, char)
                    if arc:
                        stack.append((arc.target, k, i + 2,
                                      sofar + char2 + char, arc.accept))