Пример #1
0
    def test_deletion(self):
        s = fields.Schema(key=fields.ID, name=fields.TEXT, value=fields.TEXT)
        st = store.RamStorage()
        ix = index.Index(st, s, create=True)

        w = writing.IndexWriter(ix)
        w.add_document(key=u"A",
                       name=u"Yellow brown",
                       value=u"Blue red green purple?")
        w.add_document(key=u"B",
                       name=u"Alpha beta",
                       value=u"Gamma delta epsilon omega.")
        w.add_document(key=u"C", name=u"One two", value=u"Three four five.")
        w.commit()

        count = ix.delete_by_term("key", u"B")
        self.assertEqual(count, 1)
        ix.commit()

        self.assertEqual(ix.doc_count_all(), 3)
        self.assertEqual(ix.doc_count(), 2)

        ix.optimize()
        self.assertEqual(ix.doc_count(), 2)
        tr = ix.term_reader()
        self.assertEqual(list(tr.lexicon("name")),
                         ["brown", "one", "two", "yellow"])
Пример #2
0
 def _create_index(self):
     s = fields.Schema(f1 = fields.KEYWORD(stored = True),
                       f2 = fields.KEYWORD,
                       f3 = fields.KEYWORD)
     st = store.RamStorage()
     ix = index.Index(st, s, create = True)
     return ix
Пример #3
0
    def test_creation(self):
        s = fields.Schema()
        s.add("content", fields.TEXT(phrase=True))
        s.add("title", fields.TEXT(stored=True))
        s.add("path", fields.ID(stored=True))
        s.add("tags", fields.KEYWORD(stored=True))
        s.add("quick", fields.NGRAM)
        s.add("note", fields.STORED)
        st = store.RamStorage()

        ix = index.Index(st, s, create=True)
        w = writing.IndexWriter(ix)
        w.add_document(title=u"First",
                       content=u"This is the first document",
                       path=u"/a",
                       tags=u"first second third",
                       quick=u"First document",
                       note=u"This is the first document")
        w.start_document()
        w.add_field("content", u"Let's try this again")
        w.add_field("title", u"Second")
        w.add_field("path", u"/b")
        w.add_field("tags", u"Uno Dos Tres")
        w.add_field("quick", u"Second document")
        w.add_field("note", u"This is the second document")
        w.end_document()

        w.commit()
Пример #4
0
    def test_frequency_text(self):
        s = fields.Schema(content=fields.KEYWORD)
        st = store.RamStorage()
        ix = index.Index(st, s, create=True)

        w = ix.writer()
        w.add_document(content=u"alfa bravo charlie delta echo")
        w.add_document(content=u"bravo bravo bravo bravo charlie delta delta")
        w.add_document(content=u"delta echo foxtrot")
        w.commit()

        tr = ix.term_reader()
        self.assertEqual(tr.doc_frequency("content", u"bravo"), 2)
        self.assertEqual(tr.frequency("content", u"bravo"), 5)
        self.assertEqual(tr.doc_frequency("content", u"echo"), 2)
        self.assertEqual(tr.frequency("content", u"echo"), 2)
        self.assertEqual(tr.doc_frequency("content", u"alfa"), 1)
        self.assertEqual(tr.frequency("content", u"alfa"), 1)
        self.assertEqual(tr.doc_frequency("content", u"delta"), 3)
        self.assertEqual(tr.frequency("content", u"delta"), 4)
        self.assertEqual(tr.doc_frequency("content", u"foxtrot"), 1)
        self.assertEqual(tr.frequency("content", u"foxtrot"), 1)
        self.assertEqual(tr.doc_frequency("content", u"zulu"), 0)
        self.assertEqual(tr.frequency("content", u"zulu"), 0)
        self.assertEqual(list(tr), [(0, u"alfa", 1, 1), (0, u"bravo", 2, 5),
                                    (0, u"charlie", 2, 2), (0, u"delta", 3, 4),
                                    (0, u"echo", 2, 2), (0, u"foxtrot", 1, 1)])
Пример #5
0
    def test_frequency_keyword(self):
        s = fields.Schema(content=fields.KEYWORD)
        st = store.RamStorage()
        ix = index.Index(st, s, create=True)

        w = ix.writer()
        w.add_document(content=u"A B C D E")
        w.add_document(content=u"B B B B C D D")
        w.add_document(content=u"D E F")
        w.commit()

        tr = ix.term_reader()
        self.assertEqual(tr.doc_frequency("content", u"B"), 2)
        self.assertEqual(tr.frequency("content", u"B"), 5)
        self.assertEqual(tr.doc_frequency("content", u"E"), 2)
        self.assertEqual(tr.frequency("content", u"E"), 2)
        self.assertEqual(tr.doc_frequency("content", u"A"), 1)
        self.assertEqual(tr.frequency("content", u"A"), 1)
        self.assertEqual(tr.doc_frequency("content", u"D"), 3)
        self.assertEqual(tr.frequency("content", u"D"), 4)
        self.assertEqual(tr.doc_frequency("content", u"F"), 1)
        self.assertEqual(tr.frequency("content", u"F"), 1)
        self.assertEqual(tr.doc_frequency("content", u"Z"), 0)
        self.assertEqual(tr.frequency("content", u"Z"), 0)
        self.assertEqual(list(tr), [(0, u"A", 1, 1), (0, u"B", 2, 5),
                                    (0, u"C", 2, 2), (0, u"D", 3, 4),
                                    (0, u"E", 2, 2), (0, u"F", 1, 1)])
Пример #6
0
    def test_merged_lengths(self):
        s = fields.Schema(f1=fields.KEYWORD(stored=True, scorable=True),
                          f2=fields.KEYWORD(stored=True, scorable=True))
        st = store.RamStorage()
        ix = index.Index(st, s, create=True)
        w = writing.IndexWriter(ix)
        w.add_document(f1=u"A B C", f2=u"X")
        w.add_document(f1=u"B C D E", f2=u"Y Z")
        w.commit()

        w = writing.IndexWriter(ix)
        w.add_document(f1=u"A", f2=u"B C D E X Y")
        w.add_document(f1=u"B C", f2=u"X")
        w.commit(writing.NO_MERGE)

        w = writing.IndexWriter(ix)
        w.add_document(f1=u"A B X Y Z", f2=u"B C")
        w.add_document(f1=u"Y X", f2=u"A B")
        w.commit(writing.NO_MERGE)

        dr = ix.doc_reader()
        self.assertEqual(dr[0]["f1"], u"A B C")
        self.assertEqual(dr.doc_field_length(0, "f1"), 3)
        self.assertEqual(dr.doc_field_length(2, "f2"), 6)
        self.assertEqual(dr.doc_field_length(4, "f1"), 5)
Пример #7
0
def update_index(sender, instance, created, **kwargs):
	storage = store.FileStorage(settings.WHOOSH_INDEX)
	ix = index.Index(storage, schema=WHOOSH_SCHEMA)
	writer = ix.writer()
	if created:
		writer.add_document(title=unicode(instance), content=instance.content,
									url=unicode(instance.get_absolute_url()))
		writer.commit()
	else:
		writer.update_document(title=unicode(instance), content=instance.content,
									url=unicode(instance.get_absolute_url()))
		writer.commit()
Пример #8
0
    def test_integrity(self):
        s = fields.Schema(name=fields.TEXT, value=fields.TEXT)
        st = store.RamStorage()
        ix = index.Index(st, s, create=True)

        w = writing.IndexWriter(ix)
        w.add_document(name=u"Yellow brown", value=u"Blue red green purple?")
        w.add_document(name=u"Alpha beta", value=u"Gamma delta epsilon omega.")
        w.commit()

        w = writing.IndexWriter(ix)
        w.add_document(name=u"One two", value=u"Three four five.")
        w.commit()

        tr = ix.term_reader()
        self.assertEqual(ix.doc_count_all(), 3)
        self.assertEqual(list(tr.lexicon("name")),
                         ["alpha", "beta", "brown", "one", "two", "yellow"])
Пример #9
0
 def test_vector_postings(self):
     s = fields.Schema(id=fields.ID(stored=True, unique=True),
                       content=fields.TEXT(vector=fields.Positions(analyzer=analysis.StandardAnalyzer())))
     st = store.RamStorage()
     ix = index.Index(st, s, create = True)
     
     writer = ix.writer()
     writer.add_document(id=u'1', content=u'the quick brown fox jumped over the lazy dogs')
     writer.commit()
     dr = ix.doc_reader()
     
     terms = list(dr.vector_as(0, 0, "weight"))
     self.assertEqual(terms, [(u'brown', 1.0),
                              (u'dogs', 1.0),
                              (u'fox', 1.0),
                              (u'jumped', 1.0),
                              (u'lazy', 1.0),
                              (u'over', 1.0),
                              (u'quick', 1.0),
                              ])
Пример #10
0
    def test_missing_field_scoring(self):
        schema = fields.Schema(name=fields.TEXT(stored=True),
                               hobbies=fields.TEXT(stored=True))
        storage = store.RamStorage()
        idx = index.Index(storage, schema, create=True)
        writer = idx.writer()
        writer.add_document(name=u'Frank', hobbies=u'baseball, basketball')
        writer.commit()
        self.assertEqual(idx.segments[0].field_length(0), 2)  # hobbies
        self.assertEqual(idx.segments[0].field_length(1), 1)  # name

        writer = idx.writer()
        writer.add_document(name=u'Jonny')
        writer.commit()
        self.assertEqual(len(idx.segments), 1)
        self.assertEqual(idx.segments[0].field_length(0), 2)  # hobbies
        self.assertEqual(idx.segments[0].field_length(1), 2)  # name

        parser = qparser.MultifieldParser(['name', 'hobbies'], schema=schema)
        searcher = idx.searcher()
        result = searcher.search(parser.parse(u'baseball'))
        self.assertEqual(len(result), 1)
Пример #11
0
    def test_score_retrieval(self):
        schema = fields.Schema(title=fields.TEXT(stored=True),
                               content=fields.TEXT(stored=True))
        storage = store.RamStorage()
        ix = index.Index(storage, schema, create=True)
        writer = ix.writer()
        writer.add_document(
            title=u"Miss Mary",
            content=u"Mary had a little white lamb its fleece was white as snow"
        )
        writer.add_document(
            title=u"Snow White",
            content=u"Snow white lived in the forrest with seven dwarfs")
        writer.commit()

        searcher = ix.searcher()
        results = searcher.search(Term("content", "white"))
        self.assertEqual(len(results), 2)
        self.assertEqual(results[0]['title'], u"Miss Mary")
        self.assertEqual(results[1]['title'], u"Snow White")
        self.assertNotEqual(results.score(0), None)
        self.assertNotEqual(results.score(0), 0)
        self.assertNotEqual(results.score(0), 1)
Пример #12
0
    def setUp(self):
        s = fields.Schema(key=fields.ID(stored=True),
                          name=fields.TEXT,
                          value=fields.TEXT)
        st = store.RamStorage()
        ix = index.Index(st, s, create=True)

        w = writing.IndexWriter(ix)
        w.add_document(key=u"A",
                       name=u"Yellow brown",
                       value=u"Blue red green render purple?")
        w.add_document(key=u"B",
                       name=u"Alpha beta",
                       value=u"Gamma delta epsilon omega.")
        w.add_document(key=u"C",
                       name=u"One two",
                       value=u"Three rendered four five.")
        w.add_document(key=u"D", name=u"Quick went", value=u"Every red town.")
        w.add_document(key=u"E",
                       name=u"Yellow uptown",
                       value=u"Interest rendering outer photo!")
        w.commit()

        self.ix = ix
Пример #13
0
    def test_lengths_ram(self):
        s = fields.Schema(f1=fields.KEYWORD(stored=True, scorable=True),
                          f2=fields.KEYWORD(stored=True, scorable=True))
        st = store.RamStorage()
        ix = index.Index(st, s, create=True)
        w = writing.IndexWriter(ix)
        w.add_document(f1=u"A B C D E", f2=u"X Y Z")
        w.add_document(f1=u"B B B B C D D Q", f2=u"Q R S T")
        w.add_document(f1=u"D E F", f2=u"U V A B C D E")
        w.commit()

        dr = ix.doc_reader()
        ls1 = [dr.doc_field_length(i, "f1") for i in xrange(0, 3)]
        ls2 = [dr.doc_field_length(i, "f2") for i in xrange(0, 3)]
        self.assertEqual(dr[0]["f1"], "A B C D E")
        self.assertEqual(dr.doc_field_length(0, "f1"), 5)
        self.assertEqual(dr.doc_field_length(1, "f1"), 8)
        self.assertEqual(dr.doc_field_length(2, "f1"), 3)
        self.assertEqual(dr.doc_field_length(0, "f2"), 3)
        self.assertEqual(dr.doc_field_length(1, "f2"), 4)
        self.assertEqual(dr.doc_field_length(2, "f2"), 7)

        self.assertEqual(ix.field_length("f1"), 16)
        self.assertEqual(ix.field_length("f2"), 14)
Пример #14
0
 def make_index(self, dirname, schema):
     if not exists(dirname):
         mkdir(dirname)
     st = store.FileStorage(dirname)
     ix = index.Index(st, schema, create=True)
     return ix