예제 #1
0
def test_fieldboost():
    schema = fields.Schema(id=fields.STORED, a=fields.TEXT, b=fields.TEXT)
    ix = RamStorage().create_index(schema)
    w = ix.writer()
    w.add_document(id=0, a=u("alfa bravo charlie"), b=u("echo foxtrot india"))
    w.add_document(id=1, a=u("delta bravo charlie"), b=u("alfa alfa alfa"))
    w.add_document(id=2, a=u("alfa alfa alfa"), b=u("echo foxtrot india"))
    w.add_document(id=3, a=u("alfa sierra romeo"), b=u("alfa tango echo"))
    w.add_document(id=4, a=u("bravo charlie delta"), b=u("alfa foxtrot india"))
    w.add_document(id=5, a=u("alfa alfa echo"), b=u("tango tango tango"))
    w.add_document(id=6, a=u("alfa bravo echo"), b=u("alfa alfa tango"))
    w.commit()

    def field_booster(fieldname, factor=2.0):
        "Returns a function which will boost the given field in a query tree"
        def booster_fn(obj):
            if obj.is_leaf() and obj.field() == fieldname:
                obj = copy.deepcopy(obj)
                obj.boost *= factor
                return obj
            else:
                return obj
        return booster_fn

    with ix.searcher() as s:
        q = Or([Term("a", u("alfa")), Term("b", u("alfa"))])
        q = q.accept(field_booster("a", 100.0))
        assert_equal(text_type(q), text_type("(a:alfa^100.0 OR b:alfa)"))
        r = s.search(q)
        assert_equal([hit["id"] for hit in r], [2, 5, 6, 3, 0, 1, 4])
예제 #2
0
 def __unicode__(self):
     r = u("DisMax(")
     r += " ".join(sorted(text_type(s) for s in self.subqueries))
     r += u(")")
     if self.tiebreak:
         r += u("~") + text_type(self.tiebreak)
     return r
예제 #3
0
def test_gtlt():
    schema = fields.Schema(a=fields.KEYWORD, b=fields.NUMERIC,
                           c=fields.KEYWORD,
                           d=fields.NUMERIC(float), e=fields.DATETIME)
    qp = qparser.QueryParser("a", schema)
    qp.add_plugin(plugins.GtLtPlugin())
    qp.add_plugin(dateparse.DateParserPlugin())

    q = qp.parse(u("a:hello b:>100 c:<=z there"))
    assert_equal(q.__class__, query.And)
    assert_equal(len(q), 4)
    assert_equal(q[0], query.Term("a", "hello"))
    assert_equal(q[1], query.NumericRange("b", 100, None, startexcl=True))
    assert_equal(q[2], query.TermRange("c", None, 'z'))
    assert_equal(q[3], query.Term("a", "there"))

    q = qp.parse(u("hello e:>'29 mar 2001' there"))
    assert_equal(q.__class__, query.And)
    assert_equal(len(q), 3)
    assert_equal(q[0], query.Term("a", "hello"))
    # As of this writing, date ranges don't support startexcl/endexcl
    assert_equal(q[1], query.DateRange("e", datetime(2001, 3, 29, 0, 0), None))
    assert_equal(q[2], query.Term("a", "there"))

    q = qp.parse(u("a:> alfa c:<= bravo"))
    assert_equal(text_type(q), "(a:a: AND a:alfa AND a:c: AND a:bravo)")

    qp.remove_plugin_class(plugins.FieldsPlugin)
    qp.remove_plugin_class(plugins.RangePlugin)
    q = qp.parse(u("hello a:>500 there"))
    assert_equal(text_type(q), "(a:hello AND a:a: AND a:500 AND a:there)")
예제 #4
0
def test_dismax():
    parser = default.DisMaxParser({"body": 0.8, "title": 2.5}, None)
    q = parser.parse(u("alfa bravo charlie"))
    assert_equal(text_type(q),
                 "(DisMax(body:alfa^0.8 title:alfa^2.5) OR " +
                 "DisMax(body:bravo^0.8 title:bravo^2.5) OR " +
                 "DisMax(body:charlie^0.8 title:charlie^2.5))")

    q = parser.parse(u("alfa +bravo charlie"))
    assert_equal(text_type(q),
                 "(DisMax(body:bravo^0.8 title:bravo^2.5) ANDMAYBE " +
                 "(DisMax(body:alfa^0.8 title:alfa^2.5) OR " +
                 "DisMax(body:charlie^0.8 title:charlie^2.5)))")

    q = parser.parse(u("alfa -bravo charlie"))
    assert_equal(text_type(q),
                 "((DisMax(body:alfa^0.8 title:alfa^2.5) OR " +
                 "DisMax(body:charlie^0.8 title:charlie^2.5)) ANDNOT " +
                 "DisMax(body:bravo^0.8 title:bravo^2.5))")

    q = parser.parse(u("alfa -bravo +charlie"))
    assert_equal(text_type(q),
                 "((DisMax(body:charlie^0.8 title:charlie^2.5) ANDMAYBE " +
                 "DisMax(body:alfa^0.8 title:alfa^2.5)) ANDNOT " +
                 "DisMax(body:bravo^0.8 title:bravo^2.5))")
예제 #5
0
def test_fieldname_chars():
    s = fields.Schema(abc123=fields.TEXT, nisbah=fields.KEYWORD)
    qp = default.QueryParser("content", s)
    fieldmap = {'nisbah': [u('\u0646\u0633\u0628\u0629')],
                'abc123': ['xyz']}
    qp.add_plugin(plugins.FieldAliasPlugin(fieldmap))

    q = qp.parse(u("abc123:456"))
    assert_equal(q.__class__, query.Term)
    assert_equal(q.fieldname, u('abc123'))
    assert_equal(q.text, u('456'))

    q = qp.parse(u("abc123:456 def"))
    assert_equal(text_type(q), u("(abc123:456 AND content:def)"))

    q = qp.parse(u('\u0646\u0633\u0628\u0629:\u0627\u0644\u0641\u0644\u0633'
                   '\u0637\u064a\u0646\u064a'))
    assert_equal(q.__class__, query.Term)
    assert_equal(q.fieldname, u('nisbah'))
    assert_equal(q.text,
                 u('\u0627\u0644\u0641\u0644\u0633\u0637\u064a\u0646\u064a'))

    q = qp.parse(u("abc123 (xyz:123 OR qrs)"))
    assert_equal(text_type(q),
                 "(content:abc123 AND (abc123:123 OR content:qrs))")
예제 #6
0
def test_not_assoc():
    qp = default.QueryParser("text", None)
    q = qp.parse(u("a AND NOT b OR c"))
    assert_equal(text_type(q), "((text:a AND NOT text:b) OR text:c)")

    qp = default.QueryParser("text", None)
    q = qp.parse(u("a NOT (b OR c)"))
    assert_equal(text_type(q), "(text:a AND NOT (text:b OR text:c))")
def test_not_assoc():
    qp = default.QueryParser("text", None)
    q = qp.parse(u("a AND NOT b OR c"))
    assert text_type(q) == "((text:a AND NOT text:b) OR text:c)"

    qp = default.QueryParser("text", None)
    q = qp.parse(u("a NOT (b OR c)"))
    assert text_type(q) == "(text:a AND NOT (text:b OR text:c))"
def test_paren_fieldname():
    schema = fields.Schema(kind=fields.ID, content=fields.TEXT)

    qp = default.QueryParser("content", schema)
    q = qp.parse(u("(kind:1d565 OR kind:7c584) AND (stuff)"))
    assert text_type(q) == "((kind:1d565 OR kind:7c584) AND content:stuff)"

    q = qp.parse(u("kind:(1d565 OR 7c584) AND (stuff)"))
    assert text_type(q) == "((kind:1d565 OR kind:7c584) AND content:stuff)"
예제 #9
0
def test_paren_fieldname():
    schema = fields.Schema(kind=fields.ID, content=fields.TEXT)

    qp = default.QueryParser("content", schema)
    q = qp.parse(u("(kind:1d565 OR kind:7c584) AND (stuff)"))
    assert_equal(text_type(q), "((kind:1d565 OR kind:7c584) AND content:stuff)")

    q = qp.parse(u("kind:(1d565 OR 7c584) AND (stuff)"))
    assert_equal(text_type(q), "((kind:1d565 OR kind:7c584) AND content:stuff)")
예제 #10
0
def test_operator_queries():
    qp = default.QueryParser("f", None)

    q = qp.parse("a AND b OR c AND d")
    assert_equal(text_type(q), "((f:a AND f:b) OR (f:c AND f:d))")

    q = qp.parse("a OR b OR c OR d")
    assert_equal(text_type(q), "(f:a OR f:b OR f:c OR f:d)")

    q = qp.parse("a ANDMAYBE b ANDNOT c REQUIRE d")
    assert_equal(text_type(q), "((f:a ANDMAYBE (f:b ANDNOT f:c)) REQUIRE f:d)")
def test_operator_queries():
    qp = default.QueryParser("f", None)

    q = qp.parse("a AND b OR c AND d")
    assert text_type(q) == "((f:a AND f:b) OR (f:c AND f:d))"

    q = qp.parse("a OR b OR c OR d")
    assert text_type(q) == "(f:a OR f:b OR f:c OR f:d)"

    q = qp.parse("a ANDMAYBE b ANDNOT c REQUIRE d")
    assert text_type(q) == "((f:a ANDMAYBE (f:b ANDNOT f:c)) REQUIRE f:d)"
예제 #12
0
def test_simple():
    parser = default.SimpleParser("x", None)
    q = parser.parse(u("alfa bravo charlie delta"))
    assert_equal(text_type(q), "(x:alfa OR x:bravo OR x:charlie OR x:delta)")

    q = parser.parse(u("alfa +bravo charlie delta"))
    assert_equal(text_type(q), "(x:bravo ANDMAYBE (x:alfa OR x:charlie OR x:delta))")

    q = parser.parse(u("alfa +bravo -charlie delta"))
    assert_equal(text_type(q), "((x:bravo ANDMAYBE (x:alfa OR x:delta)) ANDNOT x:charlie)")

    q = parser.parse(u("- alfa +bravo + delta"))
    assert_equal(text_type(q), "((x:bravo AND x:delta) ANDNOT x:alfa)")
예제 #13
0
def test_dismax():
    parser = default.DisMaxParser({"body": 0.8, "title": 2.5}, None)
    q = parser.parse(u("alfa bravo charlie"))
    assert_equal(text_type(q), "(DisMax(body:alfa^0.8 title:alfa^2.5) OR DisMax(body:bravo^0.8 title:bravo^2.5) OR DisMax(body:charlie^0.8 title:charlie^2.5))")

    q = parser.parse(u("alfa +bravo charlie"))
    assert_equal(text_type(q), "(DisMax(body:bravo^0.8 title:bravo^2.5) ANDMAYBE (DisMax(body:alfa^0.8 title:alfa^2.5) OR DisMax(body:charlie^0.8 title:charlie^2.5)))")

    q = parser.parse(u("alfa -bravo charlie"))
    assert_equal(text_type(q), "((DisMax(body:alfa^0.8 title:alfa^2.5) OR DisMax(body:charlie^0.8 title:charlie^2.5)) ANDNOT DisMax(body:bravo^0.8 title:bravo^2.5))")

    q = parser.parse(u("alfa -bravo +charlie"))
    assert_equal(text_type(q), "((DisMax(body:charlie^0.8 title:charlie^2.5) ANDMAYBE DisMax(body:alfa^0.8 title:alfa^2.5)) ANDNOT DisMax(body:bravo^0.8 title:bravo^2.5))")
예제 #14
0
def test_update2():
    schema = fields.Schema(key=fields.ID(unique=True, stored=True),
                           p=fields.ID(stored=True))
    with TempIndex(schema, "update2") as ix:
        nums = list(range(21))
        random.shuffle(nums)
        for i, n in enumerate(nums):
            w = ix.writer()
            w.update_document(key=text_type(n % 10), p=text_type(i))
            w.commit()

        with ix.searcher() as s:
            results = [d["key"] for _, d in s.iter_docs()]
            results = " ".join(sorted(results))
            assert results == "0 1 2 3 4 5 6 7 8 9"
예제 #15
0
def test_update2():
    schema = fields.Schema(key=fields.ID(unique=True, stored=True),
                           p=fields.ID(stored=True))
    with TempIndex(schema, "update2") as ix:
        nums = list(range(100))
        random.shuffle(nums)
        for i, n in enumerate(nums):
            w = ix.writer()
            w.update_document(key=text_type(n % 10), p=text_type(i))
            w.commit()

        with ix.searcher() as s:
            results = [d["key"] for d in s.all_stored_fields()]
            results.sort()
            assert_equal(results, ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"])
예제 #16
0
def test_update2():
    schema = fields.Schema(key=fields.ID(unique=True, stored=True),
                           p=fields.ID(stored=True))
    with TempIndex(schema, "update2") as ix:
        nums = list(range(100))
        random.shuffle(nums)
        for i, n in enumerate(nums):
            w = ix.writer()
            w.update_document(key=text_type(n % 10), p=text_type(i))
            w.commit()

        with ix.searcher() as s:
            results = [d["key"] for d in s.all_stored_fields()]
            results.sort()
            assert_equal(results, ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"])
예제 #17
0
def test_update2():
    schema = fields.Schema(key=fields.ID(unique=True, stored=True),
                           p=fields.ID(stored=True))
    with TempIndex(schema, "update2") as ix:
        nums = list(range(21))
        random.shuffle(nums)
        for i, n in enumerate(nums):
            w = ix.writer()
            w.update_document(key=text_type(n % 10), p=text_type(i))
            w.commit()

        with ix.searcher() as s:
            results = [d["key"] for _, d in s.iter_docs()]
            results = " ".join(sorted(results))
            assert results == "0 1 2 3 4 5 6 7 8 9"
예제 #18
0
def test_andnot():
    qp = default.QueryParser("content", None)
    q = qp.parse(u("this ANDNOT that"))
    assert q.__class__ == query.AndNot
    assert q.a.__class__ == query.Term
    assert q.b.__class__ == query.Term
    assert q.a.text == "this"
    assert q.b.text == "that"

    q = qp.parse(u("foo ANDNOT bar baz"))
    assert q.__class__ == query.And
    assert len(q) == 2
    assert q[0].__class__ == query.AndNot
    assert q[1].__class__ == query.Term

    q = qp.parse(u("foo fie ANDNOT bar baz"))
    assert q.__class__ == query.And
    assert len(q) == 3
    assert q[0].__class__ == query.Term
    assert q[1].__class__ == query.AndNot
    assert q[2].__class__ == query.Term

    q = qp.parse(u("a AND b ANDNOT c"))
    assert q.__class__ == query.AndNot
    assert text_type(q) == "((content:a AND content:b) ANDNOT content:c)"
예제 #19
0
def test_asyncwriter_no_stored():
    schema = fields.Schema(id=fields.ID, text=fields.TEXT)
    with TempIndex(schema, "asyncnostored") as ix:
        domain = (u"alfa", u"bravo", u"charlie", u"delta", u"echo",
                  u"foxtrot", u"golf", u"hotel", u"india")

        writers = []
        # Simulate doing 20 (near-)simultaneous commits. If we weren't using
        # AsyncWriter, at least some of these would fail because the first
        # writer wouldn't be finished yet.
        for i in xrange(20):
            w = writing.AsyncWriter(ix)
            writers.append(w)
            w.add_document(id=text_type(i),
                           text=u" ".join(random.sample(domain, 5)))
            w.commit()

        # Wait for all writers to finish before checking the results
        for w in writers:
            if w.running:
                w.join()

        # Check whether all documents made it into the index.
        with ix.reader() as r:
            assert sorted([int(id) for id in r.lexicon("id")]) == list(range(20))
예제 #20
0
def test_buffered_update():
    schema = fields.Schema(id=fields.ID(stored=True, unique=True),
                           payload=fields.STORED)
    with TempIndex(schema, "bufferedupdate") as ix:
        w = writing.BufferedWriter(ix, period=None, limit=5)
        for i in xrange(10):
            for char in u"abc":
                fs = dict(id=char, payload=text_type(i) + char)
                w.update_document(**fs)

        with w.reader() as r:
            sfs = [sf for _, sf in r.iter_docs()]
            sfs = sorted(sfs, key=lambda x: x["id"])
            assert sfs == [{
                'id': u('a'),
                'payload': u('9a')
            }, {
                'id': u('b'),
                'payload': u('9b')
            }, {
                'id': u('c'),
                'payload': u('9c')
            }]
            assert r.doc_count() == 3

        w.close()
예제 #21
0
def test_andnot():
    qp = default.QueryParser("content", None)
    q = qp.parse(u("this ANDNOT that"))
    assert_equal(q.__class__, query.AndNot)
    assert_equal(q.a.__class__, query.Term)
    assert_equal(q.b.__class__, query.Term)
    assert_equal(q.a.text, "this")
    assert_equal(q.b.text, "that")

    q = qp.parse(u("foo ANDNOT bar baz"))
    assert_equal(q.__class__, query.And)
    assert_equal(len(q), 2)
    assert_equal(q[0].__class__, query.AndNot)
    assert_equal(q[1].__class__, query.Term)

    q = qp.parse(u("foo fie ANDNOT bar baz"))
    assert_equal(q.__class__, query.And)
    assert_equal(len(q), 3)
    assert_equal(q[0].__class__, query.Term)
    assert_equal(q[1].__class__, query.AndNot)
    assert_equal(q[2].__class__, query.Term)

    q = qp.parse(u("a AND b ANDNOT c"))
    assert_equal(q.__class__, query.AndNot)
    assert_equal(text_type(q), "((content:a AND content:b) ANDNOT content:c)")
예제 #22
0
 def __unicode__(self):
     r = u("(")
     r += (self.JOINT).join([text_type(s) for s in self.subqueries])
     r += u(")")
     if self.minmatch:
         r += u(">%s") % self.minmatch
     return r
예제 #23
0
def test_andnot():
    qp = default.QueryParser("content", None)
    q = qp.parse(u("this ANDNOT that"))
    assert_equal(q.__class__, query.AndNot)
    assert_equal(q.a.__class__, query.Term)
    assert_equal(q.b.__class__, query.Term)
    assert_equal(q.a.text, "this")
    assert_equal(q.b.text, "that")

    q = qp.parse(u("foo ANDNOT bar baz"))
    assert_equal(q.__class__, query.And)
    assert_equal(len(q), 2)
    assert_equal(q[0].__class__, query.AndNot)
    assert_equal(q[1].__class__, query.Term)

    q = qp.parse(u("foo fie ANDNOT bar baz"))
    assert_equal(q.__class__, query.And)
    assert_equal(len(q), 3)
    assert_equal(q[0].__class__, query.Term)
    assert_equal(q[1].__class__, query.AndNot)
    assert_equal(q[2].__class__, query.Term)

    q = qp.parse(u("a AND b ANDNOT c"))
    assert_equal(q.__class__, query.AndNot)
    assert_equal(text_type(q), "((content:a AND content:b) ANDNOT content:c)")
def test_andnot():
    qp = default.QueryParser("content", None)
    q = qp.parse(u("this ANDNOT that"))
    assert q.__class__ == query.AndNot
    assert q.a.__class__ == query.Term
    assert q.b.__class__ == query.Term
    assert q.a.text == "this"
    assert q.b.text == "that"

    q = qp.parse(u("foo ANDNOT bar baz"))
    assert q.__class__ == query.And
    assert len(q) == 2
    assert q[0].__class__ == query.AndNot
    assert q[1].__class__ == query.Term

    q = qp.parse(u("foo fie ANDNOT bar baz"))
    assert q.__class__ == query.And
    assert len(q) == 3
    assert q[0].__class__ == query.Term
    assert q[1].__class__ == query.AndNot
    assert q[2].__class__ == query.Term

    q = qp.parse(u("a AND b ANDNOT c"))
    assert q.__class__ == query.AndNot
    assert text_type(q) == "((content:a AND content:b) ANDNOT content:c)"
예제 #25
0
def test_asyncwriter_no_stored():
    schema = fields.Schema(id=fields.ID, text=fields.TEXT)
    with TempIndex(schema, "asyncnostored") as ix:
        domain = (u("alfa"), u("bravo"), u("charlie"), u("delta"), u("echo"),
                  u("foxtrot"), u("golf"), u("hotel"), u("india"))

        writers = []
        # Simulate doing 20 (near-)simultaneous commits. If we weren't using
        # AsyncWriter, at least some of these would fail because the first
        # writer wouldn't be finished yet.
        for i in xrange(20):
            w = writing.AsyncWriter(ix)
            writers.append(w)
            w.add_document(id=text_type(i),
                           text=u(" ").join(random.sample(domain, 5)))
            w.commit()

        # Wait for all writers to finish before checking the results
        for w in writers:
            if w.running:
                w.join()

        # Check whether all documents made it into the index.
        with ix.reader() as r:
            assert_equal(sorted([int(id) for id in r.lexicon("id")]),
                         list(range(20)))
예제 #26
0
def test_buffered_update():
    schema = fields.Schema(id=fields.ID(stored=True, unique=True),
                           payload=fields.STORED)
    with TempIndex(schema, "bufferedupdate") as ix:
        w = writing.BufferedWriter(ix, period=None, limit=5)
        for i in xrange(10):
            for char in u("abc"):
                fs = dict(id=char, payload=text_type(i) + char)
                w.update_document(**fs)

        with w.reader() as r:
            assert_equal(sorted(r.all_stored_fields(), key=lambda x: x["id"]),
                         [{
                             'id': u('a'),
                             'payload': u('9a')
                         }, {
                             'id': u('b'),
                             'payload': u('9b')
                         }, {
                             'id': u('c'),
                             'payload': u('9c')
                         }])
            assert_equal(r.doc_count(), 3)

        w.close()
예제 #27
0
def test_field_alias():
    qp = qparser.QueryParser("content", None)
    qp.add_plugin(plugins.FieldAliasPlugin({"title": ("article", "caption")}))
    q = qp.parse("alfa title:bravo article:charlie caption:delta")
    assert_equal(
        text_type(q),
        u("(content:alfa AND title:bravo AND title:charlie AND " +
          "title:delta)"))
예제 #28
0
def test_copyfield():
    qp = qparser.QueryParser("a", None)
    qp.add_plugin(plugins.CopyFieldPlugin({"b": "c"}, None))
    assert_equal(text_type(qp.parse("hello b:matt")), "(a:hello AND b:matt AND c:matt)")

    qp = qparser.QueryParser("a", None)
    qp.add_plugin(plugins.CopyFieldPlugin({"b": "c"}, syntax.AndMaybeGroup))
    assert_equal(text_type(qp.parse("hello b:matt")), "(a:hello AND (b:matt ANDMAYBE c:matt))")

    qp = qparser.QueryParser("a", None)
    qp.add_plugin(plugins.CopyFieldPlugin({"b": "c"}, syntax.RequireGroup))
    assert_equal(text_type(qp.parse("hello (there OR b:matt)")), "(a:hello AND (a:there OR (b:matt REQUIRE c:matt)))")

    qp = qparser.QueryParser("a", None)
    qp.add_plugin(plugins.CopyFieldPlugin({"a": "c"}, syntax.OrGroup))
    assert_equal(text_type(qp.parse("hello there")), "((a:hello OR c:hello) AND (a:there OR c:there))")

    qp = qparser.QueryParser("a", None)
    qp.add_plugin(plugins.CopyFieldPlugin({"b": "c"}, mirror=True))
    assert_equal(text_type(qp.parse("hello c:matt")), "(a:hello AND (c:matt OR b:matt))")

    qp = qparser.QueryParser("a", None)
    qp.add_plugin(plugins.CopyFieldPlugin({"c": "a"}, mirror=True))
    assert_equal(text_type(qp.parse("hello c:matt")), "((a:hello OR c:hello) AND (c:matt OR a:matt))")

    ana = analysis.RegexAnalyzer(r"\w+") | analysis.DoubleMetaphoneFilter()
    fmt = formats.Frequency()
    schema = fields.Schema(name=fields.KEYWORD, name_phone=fields.FieldType(fmt, ana, multitoken_query="or"))
    qp = qparser.QueryParser("name", schema)
    qp.add_plugin(plugins.CopyFieldPlugin({"name": "name_phone"}))
    assert_equal(text_type(qp.parse(u("spruce view"))), "((name:spruce OR name_phone:SPRS) AND (name:view OR name_phone:F OR name_phone:FF))")
예제 #29
0
def test_multifield():
    schema = fields.Schema(content=fields.TEXT, title=fields.TEXT,
                           cat=fields.KEYWORD, date=fields.DATETIME)

    qs = u("a (b c cat:d) OR (b c cat:e)")
    qp = default.MultifieldParser(['x', 'y'], schema)

    q = qp.parse(qs)
    assert_equal(text_type(q), "((x:a OR y:a) AND (((x:b OR y:b) AND (x:c OR y:c) AND cat:d) OR ((x:b OR y:b) AND (x:c OR y:c) AND cat:e)))")
예제 #30
0
    def _check(schema, **kwargs):
        ix = RamStorage().create_index(schema)
        with ix.writer() as w:
            for i, text in enumerate(docs):
                w.add_document(id=text_type(i + 1), text=text)

        with ix.searcher() as s:
            docnum = s.document_number(id=u("1"))
            r = s.more_like(docnum, "text", model=model, **kwargs)
            assert [hit["id"] for hit in r] == ["6", "2", "3"]
예제 #31
0
    def _check(schema, **kwargs):
        ix = RamStorage().create_index(schema)
        with ix.writer() as w:
            for i, text in enumerate(docs):
                w.add_document(id=text_type(i + 1), text=text)

        with ix.searcher() as s:
            docnum = s.document_number(id=u("1"))
            r = s.more_like(docnum, "text", **kwargs)
            assert [hit["id"] for hit in r] == ["6", "2", "3"]
예제 #32
0
def test_bigsort():
    times = 30000
    dirname = "testindex"

    df = fields.DATETIME(stored=True)
    schema = fields.Schema(id=fields.ID(stored=True), date=df)

    if os.path.exists(dirname):
        shutil.rmtree(dirname)
    os.mkdir(dirname)
    ix = index.create_in(dirname, schema)

    print("Writing...")
    t = now()
    w = ix.writer(limitmb=512)
    for i in xrange(times):
        dt = datetime.fromtimestamp(random.randint(15839593, 1294102139))
        w.add_document(id=text_type(i), date=dt)
    w.commit()
    print("Writing took ", now() - t)

    ix = index.open_dir(dirname)
    s = ix.searcher()
    q = query.Wildcard("id", "1?2*")

    t = now()
    x = list(df.sortable_terms(s.reader(), "date"))
    print(now() - t, len(x))

    t = now()
    for y in x:
        p = list(s.postings("date", y).all_ids())
    print(now() - t)



    t = now()
    r = s.search(q, limit=25, sortedby="date", reverse=True)
    print("Search 1 took", now() - t)
    print("len=", r.scored_length())

    t = now()
    r = s.search(q, limit=25, sortedby="date")
    print("Search 2 took", now() - t)

    t = now()
    r = s.search(q, limit=25, sortedby="date")
    print("Search 2 took", now() - t)

    from heapq import nlargest
    t = now()
    sf = s.stored_fields
    gen = ((sf(n)["date"], n) for n in q.docs(s))
    r = nlargest(25, gen)
    print(now() - t)
예제 #33
0
def test_bigsort():
    times = 30000
    dirname = "testindex"
    
    df = fields.DATETIME(stored=True)
    schema = fields.Schema(id=fields.ID(stored=True), date=df)
    
    if os.path.exists(dirname):
        shutil.rmtree(dirname)
    os.mkdir(dirname)
    ix = index.create_in(dirname, schema)
    
    print("Writing...")
    t = now()
    w = ix.writer(limitmb=512)
    for i in xrange(times):
        dt = datetime.fromtimestamp(random.randint(15839593, 1294102139))
        w.add_document(id=text_type(i), date=dt)
    w.commit()
    print("Writing took ", now() - t)
    
    ix = index.open_dir(dirname)
    s = ix.searcher()
    q = query.Wildcard("id", "1?2*")
    
    t = now()
    x = list(df.sortable_values(s.reader(), "date"))
    print(now() - t, len(x))
    
    t = now()
    for y in x:
        p = list(s.postings("date", y).all_ids())
    print(now() - t)
    
    
    
    t = now()
    r = s.search(q, limit=25, sortedby="date", reverse=True)
    print("Search 1 took", now() - t)
    print("len=", r.scored_length())
    
    t = now()
    r = s.search(q, limit=25, sortedby="date")
    print("Search 2 took", now() - t)
    
    t = now()
    r = s.search(q, limit=25, sortedby="date")
    print("Search 2 took", now() - t)
    
    from heapq import nlargest
    t = now()
    sf = s.stored_fields
    gen = ((sf(n)["date"], n) for n in q.docs(s))
    r = nlargest(25, gen)
    print(now() - t)
예제 #34
0
    def __unicode__(self):
        text = self.text
        if isinstance(text, bytes_type):
            try:
                text = text.decode("ascii")
            except UnicodeDecodeError:
                text = repr(text)

        t = u("%s:%s") % (self.fieldname, text)
        if self.boost != 1:
            t += u("^") + text_type(self.boost)
        return t
예제 #35
0
def test_many_updates():
    schema = fields.Schema(key=fields.ID(unique=True, stored=True))
    with TempIndex(schema, "manyupdates") as ix:
        for _ in xrange(10000):
            num = random.randint(0, 5000)
            w = ix.writer()
            w.update_document(key=text_type(num))
            w.commit()

        with ix.searcher() as s:
            result = [d["key"] for d in s.search(query.Every())]
            assert_equal(len(result), len(set(result)))
예제 #36
0
    def __unicode__(self):
        text = self.text
        if isinstance(text, bytes_type):
            try:
                text = text.decode("ascii")
            except UnicodeDecodeError:
                text = repr(text)

        t = u("%s:%s") % (self.fieldname, text)
        if self.boost != 1:
            t += u("^") + text_type(self.boost)
        return t
예제 #37
0
def test_many_updates():
    schema = fields.Schema(key=fields.ID(unique=True, stored=True))
    with TempIndex(schema, "manyupdates") as ix:
        for _ in xrange(10000):
            num = random.randint(0, 5000)
            w = ix.writer()
            w.update_document(key=text_type(num))
            w.commit()

        with ix.searcher() as s:
            result = [d["key"] for d in s.search(query.Every())]
            assert_equal(len(result), len(set(result)))
예제 #38
0
def test_fieldname_chars():
    s = fields.Schema(abc123=fields.TEXT, nisbah=fields.KEYWORD)
    qp = default.QueryParser("content", s)
    fieldmap = {'nisbah': [u('\u0646\u0633\u0628\u0629')],
                'abc123': ['xyz']}
    qp.add_plugin(plugins.FieldAliasPlugin(fieldmap))

    q = qp.parse(u("abc123:456"))
    assert_equal(q.__class__, query.Term)
    assert_equal(q.fieldname, u('abc123'))
    assert_equal(q.text, u('456'))

    q = qp.parse(u("abc123:456 def"))
    assert_equal(text_type(q), u("(abc123:456 AND content:def)"))

    q = qp.parse(u('\u0646\u0633\u0628\u0629:\u0627\u0644\u0641\u0644\u0633\u0637\u064a\u0646\u064a'))
    assert_equal(q.__class__, query.Term)
    assert_equal(q.fieldname, u('nisbah'))
    assert_equal(q.text, u('\u0627\u0644\u0641\u0644\u0633\u0637\u064a\u0646\u064a'))

    q = qp.parse(u("abc123 (xyz:123 OR qrs)"))
    assert_equal(text_type(q), "(content:abc123 AND (abc123:123 OR content:qrs))")
예제 #39
0
def test_no_stored():
    schema = fields.Schema(id=fields.ID, text=fields.TEXT)
    with TempIndex(schema, "nostored") as ix:
        domain = (u("alfa"), u("bravo"), u("charlie"), u("delta"), u("echo"), u("foxtrot"),
                  u("golf"), u("hotel"), u("india"))
        
        w = ix.writer()
        for i in xrange(20):
            w.add_document(id=text_type(i), text=u(" ").join(random.sample(domain, 5)))
        w.commit()
        
        with ix.reader() as r:
            assert_equal(sorted([int(id) for id in r.lexicon("id")]), list(range(20)))
예제 #40
0
def test_buffered():
    schema = fields.Schema(id=fields.ID, text=fields.TEXT)
    with TempIndex(schema, "buffered") as ix:
        domain = (u("alfa"), u("bravo"), u("charlie"), u("delta"), u("echo"), u("foxtrot"),
                  u("golf"), u("hotel"), u("india"))
        
        w = writing.BufferedWriter(ix, period=None, limit=10,
                                   commitargs={"merge": False})
        for i in xrange(100):
            w.add_document(id=text_type(i), text=u(" ").join(random.sample(domain, 5)))
        time.sleep(0.5)
        w.close()
        
        assert_equal(len(ix._segments()), 10)
예제 #41
0
def make_index(storage, indexname, word2nums, num2words):
    """Creates a Whoosh index in the given storage object containing
    synonyms taken from word2nums and num2words. Returns the Index
    object.
    """

    schema = Schema(word=ID, syns=STORED)
    ix = storage.create_index(schema, indexname=indexname)
    w = ix.writer()
    for word in iterkeys(word2nums):
        syns = synonyms(word2nums, num2words, word)
        w.add_document(word=text_type(word), syns=syns)
    w.commit()
    return ix
예제 #42
0
            def run(self):
                ix = st.create_index(dir, schema)
                num = 0

                for i in xrange(50):
                    print(i)
                    w = ix.writer()
                    for _ in xrange(random.randint(1, 100)):
                        content = u(" ").join(random.sample(domain, random.randint(5, 20)))
                        w.add_document(id=text_type(num), content=content)
                        num += 1
                    w.commit()

                    time.sleep(0.1)
예제 #43
0
def test_update_numeric():
    schema = fields.Schema(num=fields.NUMERIC(unique=True, stored=True),
                           text=fields.ID(stored=True))
    with TempIndex(schema, "updatenum") as ix:
        nums = list(range(5)) * 3
        random.shuffle(nums)
        for num in nums:
            with ix.writer() as w:
                w.update_document(num=num, text=text_type(num))

        with ix.searcher() as s:
            results = [d["text"] for _, d in s.iter_docs()]
            results = " ".join(sorted(results))
            assert results == "0 1 2 3 4"
예제 #44
0
def test_update_numeric():
    schema = fields.Schema(num=fields.NUMERIC(unique=True, stored=True),
                           text=fields.ID(stored=True))
    with TempIndex(schema, "updatenum") as ix:
        nums = list(range(5)) * 3
        random.shuffle(nums)
        for num in nums:
            with ix.writer() as w:
                w.update_document(num=num, text=text_type(num))

        with ix.searcher() as s:
            results = [d["text"] for _, d in s.iter_docs()]
            results = " ".join(sorted(results))
            assert results == "0 1 2 3 4"
예제 #45
0
def make_index(storage, indexname, word2nums, num2words):
    """Creates a Whoosh index in the given storage object containing
    synonyms taken from word2nums and num2words. Returns the Index
    object.
    """

    schema = Schema(word=ID, syns=STORED)
    ix = storage.create_index(schema, indexname=indexname)
    w = ix.writer()
    for word in iterkeys(word2nums):
        syns = synonyms(word2nums, num2words, word)
        w.add_document(word=text_type(word), syns=syns)
    w.commit()
    return ix
예제 #46
0
def test_no_stored():
    schema = fields.Schema(id=fields.ID, text=fields.TEXT)
    with TempIndex(schema, "nostored") as ix:
        domain = (u"alfa", u"bravo", u"charlie", u"delta", u"echo",
                  u"foxtrot", u"golf", u"hotel", u"india")

        w = ix.writer()
        for i in xrange(20):
            w.add_document(id=text_type(i),
                           text=u" ".join(random.sample(domain, 5)))
        w.commit()

        with ix.reader() as r:
            assert sorted([int(id) for id in r.lexicon("id")]) == list(range(20))
예제 #47
0
def test_finish_segment():
    check_multi()

    from whoosh.multiproc import MpWriter

    schema = fields.Schema(a=fields.KEYWORD(stored=True))
    with TempIndex(schema) as ix:
        w = MpWriter(ix, procs=2, batchsize=1, multisegment=False,
                     limitmb=0.00001)

        for i in range(100):
            w.add_document(a=text_type(i) * 10)

        w.commit()
예제 #48
0
def test_andor():
    qp = default.QueryParser("a", None)
    q = qp.parse("a AND b OR c AND d OR e AND f")
    assert text_type(q) == "((a:a AND a:b) OR (a:c AND a:d) OR (a:e AND a:f))"

    q = qp.parse("aORb")
    assert q == query.Term("a", "aORb")

    q = qp.parse("aOR b")
    assert q == query.And([query.Term("a", "aOR"), query.Term("a", "b")])

    q = qp.parse("a ORb")
    assert q == query.And([query.Term("a", "a"), query.Term("a", "ORb")])

    assert qp.parse("OR") == query.Term("a", "OR")
def test_andor():
    qp = default.QueryParser("a", None)
    q = qp.parse("a AND b OR c AND d OR e AND f")
    assert text_type(q) == "((a:a AND a:b) OR (a:c AND a:d) OR (a:e AND a:f))"

    q = qp.parse("aORb")
    assert q == query.Term("a", "aORb")

    q = qp.parse("aOR b")
    assert q == query.And([query.Term("a", "aOR"), query.Term("a", "b")])

    q = qp.parse("a ORb")
    assert q == query.And([query.Term("a", "a"), query.Term("a", "ORb")])

    assert qp.parse("OR") == query.Term("a", "OR")
예제 #50
0
def test_andor():
    qp = default.QueryParser("a", None)
    q = qp.parse("a AND b OR c AND d OR e AND f")
    assert_equal(text_type(q), "((a:a AND a:b) OR (a:c AND a:d) OR (a:e AND a:f))")

    q = qp.parse("aORb")
    assert_equal(q, query.Term("a", "aORb"))

    q = qp.parse("aOR b")
    assert_equal(q, query.And([query.Term("a", "aOR"), query.Term("a", "b")]))

    q = qp.parse("a ORb")
    assert_equal(q, query.And([query.Term("a", "a"), query.Term("a", "ORb")]))

    assert_equal(qp.parse("OR"), query.Term("a", "OR"))
예제 #51
0
def test_buffered():
    schema = fields.Schema(id=fields.ID, text=fields.TEXT)
    with TempIndex(schema, "buffered") as ix:
        domain = u"alfa bravo charlie delta echo foxtrot golf hotel india"
        domain = domain.split()

        w = writing.BufferedWriter(ix, period=None, limit=10,
                                   commitargs={"merge": False})
        for i in xrange(20):
            w.add_document(id=text_type(i),
                           text=u" ".join(random.sample(domain, 5)))
        time.sleep(0.1)
        w.close()

        assert len(ix._segments()) == 2