def test_within_delete(): st = gwrite(enlist("abc def ghi")) gr = greader(st) assert set(fst.within(gr, "df")) == set(["def"]) st = gwrite(enlist("0")) gr = greader(st) assert list(fst.within(gr, "01")) == ["0"]
def test_within_delete(): st = gwrite(enlist("abc def ghi")) gr = greader(st) assert set(fst.within(gr, "df")) == set(["def"]) st = gwrite(enlist("0")) gr = greader(st) assert list(fst.within(gr, "01")) == ["0"]
def test_within_replace(): st = gwrite(enlist("abc def ghi")) gr = greader(st) assert set(fst.within(gr, "dez")) == set(["def"]) st = gwrite(enlist("00 01 10 11")) gr = greader(st) s = set(fst.within(gr, "00")) assert s == set(["00", "10", "01"])
def test_within_replace(): st = gwrite(enlist("abc def ghi")) gr = greader(st) assert set(fst.within(gr, "dez")) == set(["def"]) st = gwrite(enlist("00 01 10 11")) gr = greader(st) s = set(fst.within(gr, "00")) assert s == set(["00", "10", "01"])
def _suggestions(self, text, maxdist, prefix): if self.dict.has_key(text): yield (len(text) * 10 + self.dict.get(text, 0) * 5, text) for sug in fst.within(self.graph, text, k=maxdist, prefix=prefix): # Higher scores are better, so negate the edit distance yield ((0 - maxdist) * 100 + len(sug) * 10 + self.dict.get(sug, 0), sug)
def test_within(): with TempStorage() as st: gwrite(enlist("0 00 000 001 01 010 011 1 10 100 101 11 110 111"), st) gr = greader(st) s = set(fst.within(gr, "01", k=1)) gr.close() assert s == set(["0", "00", "01", "011", "010", "001", "10", "101", "1", "11"])
def terms_within(self, fieldname, text, maxdist, prefix=0): if not self.has_word_graph(fieldname): # This reader doesn't have a graph stored, use the slow method return IndexReader.terms_within(self, fieldname, text, maxdist, prefix=prefix) gr = self._get_graph() return fst.within(gr, text, k=maxdist, prefix=prefix, address=self._graph.root(fieldname))
def test_within(): with TempStorage() as st: gwrite(enlist("0 00 000 001 01 010 011 1 10 100 101 11 110 111"), st) gr = greader(st) s = set(fst.within(gr, "01", k=1)) gr.close() assert s == set( ["0", "00", "01", "011", "010", "001", "10", "101", "1", "11"])
def terms_within(self, fieldname, text, maxdist, prefix=0): if not self.has_word_graph(fieldname): # This reader doesn't have a graph stored, use the slow method return IndexReader.terms_within(self, fieldname, text, maxdist, prefix=prefix) gr = self._get_graph() return fst.within(gr, text, k=maxdist, prefix=prefix, address=self._graph.root(fieldname))
def test_within_unicode(): domain = [u("\u280b\u2817\u2801\u281d\u2809\u2811"), u("\u65e5\u672c"), u("\uc774\uc124\ud76c"), ] st = RamStorage() gw = fst.GraphWriter(st.create_file("test")) gw.start_field("test") for key in domain: gw.insert(key) gw.close() gr = fst.GraphReader(st.open_file("test")) s = list(fst.within(gr, u("\uc774.\ud76c"))) assert s == [u("\uc774\uc124\ud76c")]
def test_within_unicode(): domain = [ u("\u280b\u2817\u2801\u281d\u2809\u2811"), u("\u65e5\u672c"), u("\uc774\uc124\ud76c"), ] st = RamStorage() gw = fst.GraphWriter(st.create_file("test")) gw.start_field("test") for key in domain: gw.insert(key) gw.close() gr = fst.GraphReader(st.open_file("test")) s = list(fst.within(gr, u("\uc774.\ud76c"))) assert s == [u("\uc774\uc124\ud76c")]
def test_within_prefix(): st = gwrite(enlist("aabc aadc babc badc")) gr = greader(st) s = set(fst.within(gr, "aaxc", prefix=2)) assert s == set(["aabc", "aadc"])
def test_within_k2(): st = gwrite(enlist("abc bac cba")) gr = greader(st) s = set(fst.within(gr, "cb", k=2)) assert s == set(["abc", "cba"])
def test_within_transpose(): st = gwrite(enlist("abc def ghi")) gr = greader(st) s = set(fst.within(gr, "dfe")) assert s == set(["def"])
def test_within_k2(): st = gwrite(enlist("abc bac cba")) gr = greader(st) s = set(fst.within(gr, "cb", k=2)) assert s == set(["abc", "cba"])
def test_within_transpose(): st = gwrite(enlist("abc def ghi")) gr = greader(st) s = set(fst.within(gr, "dfe")) assert s == set(["def"])
def test_within_insert(): st = gwrite(enlist("00 01 10 11")) gr = greader(st) s = set(fst.within(gr, "0")) assert s == set(["00", "01", "10"])
def test_within_match(): st = gwrite(enlist("abc def ghi")) gr = greader(st) assert set(fst.within(gr, "def")) == set(["def"])
def test_within_insert(): st = gwrite(enlist("00 01 10 11")) gr = greader(st) s = set(fst.within(gr, "0")) assert s == set(["00", "01", "10"])
def test_within_prefix(): st = gwrite(enlist("aabc aadc babc badc")) gr = greader(st) s = set(fst.within(gr, "aaxc", prefix=2)) assert s == set(["aabc", "aadc"])
def _suggestions(self, text, maxdist, prefix): if self.dict.has_key(text): yield (len(text)*10 + self.dict.get(text,0)*5, text) for sug in fst.within(self.graph, text, k=maxdist, prefix=prefix): # Higher scores are better, so negate the edit distance yield ((0-maxdist)*100 + len(sug)*10 + self.dict.get(sug,0), sug)
def _suggestions(self, text, maxdist, prefix): for sug in fst.within(self.graph, text, k=maxdist, prefix=prefix): # Higher scores are better, so negate the edit distance yield (0 - maxdist, sug)
def test_within_match(): st = gwrite(enlist("abc def ghi")) gr = greader(st) assert set(fst.within(gr, "def")) == set(["def"])
def _suggestions(self, text, maxdist, prefix): for sug in fst.within(self.graph, text, k=maxdist, prefix=prefix): # Higher scores are better, so negate the edit distance yield (0 - maxdist, sug)