def test_span_characters(): ix = get_index() with ix.searcher() as s: pq = Phrase("text", ["bravo", "echo"]) m = pq.matcher(s) while m.is_active(): orig = " ".join(s.stored_fields(m.id())["text"]) for span in m.spans(): startchar, endchar = span.startchar, span.endchar assert orig[startchar:endchar] == "bravo echo" m.next()
def test_posting_phrase(): schema = fields.Schema(name=fields.ID(stored=True), value=fields.TEXT) storage = RamStorage() ix = storage.create_index(schema) writer = ix.writer() writer.add_document(name=u("A"), value=u("Little Miss Muffet sat on a tuffet")) writer.add_document(name=u("B"), value=u("Miss Little Muffet tuffet")) writer.add_document(name=u("C"), value=u("Miss Little Muffet tuffet sat")) writer.add_document(name=u("D"), value=u("Gibberish blonk falunk miss muffet sat tuffet garbonzo")) writer.add_document(name=u("E"), value=u("Blah blah blah pancakes")) writer.commit() with ix.searcher() as s: def names(results): return sorted([fields['name'] for fields in results]) q = Phrase("value", [u("little"), u("miss"), u("muffet"), u("sat"), u("tuffet")]) m = q.matcher(s) assert_equal(m.__class__.__name__, "SpanNearMatcher") r = s.search(q) assert_equal(names(r), ["A"]) assert_equal(len(r), 1) q = Phrase("value", [u("miss"), u("muffet"), u("sat"), u("tuffet")]) assert_equal(names(s.search(q)), ["A", "D"]) q = Phrase("value", [u("falunk"), u("gibberish")]) r = s.search(q) assert_equal(names(r), []) assert_equal(len(r), 0) q = Phrase("value", [u("gibberish"), u("falunk")], slop=2) assert_equal(names(s.search(q)), ["D"]) q = Phrase("value", [u("blah")] * 4) assert_equal(names(s.search(q)), []) # blah blah blah blah q = Phrase("value", [u("blah")] * 3) m = q.matcher(s) assert_equal(names(s.search(q)), ["E"])
def test_phrase_score(): schema = fields.Schema(name=fields.ID(stored=True), value=fields.TEXT) storage = RamStorage() ix = storage.create_index(schema) writer = ix.writer() writer.add_document(name=u("A"), value=u("Little Miss Muffet sat on a tuffet")) writer.add_document(name=u("D"), value=u("Gibberish blonk falunk miss muffet sat tuffet garbonzo")) writer.add_document(name=u("E"), value=u("Blah blah blah pancakes")) writer.add_document(name=u("F"), value=u("Little miss muffet little miss muffet")) writer.commit() with ix.searcher() as s: q = Phrase("value", [u("little"), u("miss"), u("muffet")]) m = q.matcher(s) assert_equal(m.id(), 0) score1 = m.weight() assert score1 > 0 m.next() assert_equal(m.id(), 3) assert m.weight() > score1