Пример #1
0
def test_and():
    _run_query(query.And([query.Term("value", u("red")),
                          query.Term("name", u("yellow"))]),
               [u("A")])
    # Missing
    _run_query(query.And([query.Term("value", u("ochre")),
                          query.Term("name", u("glonk"))]),
               [])
Пример #2
0
    def GET(self):
        search_term = self.request.get_param("s")

        all_tags = r.table(rm.Recipe.table)\
            .concat_map(lambda doc: doc["tags"])\
            .distinct()\
            .coerce_to('array').run()

        self.view.data = {"tags": all_tags, "recipes": None}

        if search_term:
            if "recipe:" in search_term:
                parts = search_term.split(" ")
                for part in parts:
                    if "recipe:" in part:
                        recipe = rm.Recipe.find(part[7:])

                        if recipe is not None:
                            return Redirect("/recipes/{}".format(part[7:]))

            search_term = search_term.replace("tag:", "tags:")

            searcher = RecipeSearcher()

            if self.session.id:
                allow = q.Or([
                    q.And([
                        q.Term("user", self.session.id),
                        q.Term("deleted", False),
                        q.Term("reported", False)
                    ]),
                    q.And([
                        q.Term("public", True),
                        q.Term("deleted", False),
                        q.Term("reported", False)
                    ])
                ])

            else:
                allow = q.And([
                    q.Term("public", True),
                    q.Term("deleted", False),
                    q.Term("reported", False)
                ])

            ids = searcher.search(search_term, collection=True, allow=allow)
            if ids is not None:
                ids.fetch()

                page = Paginate(ids,
                                self.request,
                                "title",
                                sort_direction_default="desc")
                self.view.data = {"recipes": page}

            self.view.template = "public/recipes/search/results"

        return self.view
Пример #3
0
def test_regressions():
    qp = default.QueryParser("f", None)

    # From 0.3.18, these used to require escaping. Mostly good for
    # regression testing.
    assert_equal(qp.parse(u("re-inker")), query.Term("f", "re-inker"))
    assert_equal(qp.parse(u("0.7 wire")), query.And([query.Term("f", "0.7"), query.Term("f", "wire")]))
    assert (qp.parse(u("daler-rowney pearl 'bell bronze'"))
            == query.And([query.Term("f", "daler-rowney"),
                          query.Term("f", "pearl"),
                          query.Term("f", "bell bronze")]))

    q = qp.parse(u('22" BX'))
    assert_equal(q, query.And([query.Term("f", '22"'), query.Term("f", "BX")]))
Пример #4
0
def test_andor():
    qp = default.QueryParser("a", None)
    q = qp.parse("a AND b OR c AND d OR e AND f")
    assert_equal(text_type(q), "((a:a AND a:b) OR (a:c AND a:d) OR (a:e AND a:f))")

    q = qp.parse("aORb")
    assert_equal(q, query.Term("a", "aORb"))

    q = qp.parse("aOR b")
    assert_equal(q, query.And([query.Term("a", "aOR"), query.Term("a", "b")]))

    q = qp.parse("a ORb")
    assert_equal(q, query.And([query.Term("a", "a"), query.Term("a", "ORb")]))

    assert_equal(qp.parse("OR"), query.Term("a", "OR"))
def test_andor():
    qp = default.QueryParser("a", None)
    q = qp.parse("a AND b OR c AND d OR e AND f")
    assert text_type(q) == "((a:a AND a:b) OR (a:c AND a:d) OR (a:e AND a:f))"

    q = qp.parse("aORb")
    assert q == query.Term("a", "aORb")

    q = qp.parse("aOR b")
    assert q == query.And([query.Term("a", "aOR"), query.Term("a", "b")])

    q = qp.parse("a ORb")
    assert q == query.And([query.Term("a", "a"), query.Term("a", "ORb")])

    assert qp.parse("OR") == query.Term("a", "OR")
Пример #6
0
def get_attachments_from_dms(community):
    index_service = get_service("indexing")
    filters = wq.And(
        [
            wq.Term("community_id", community.id),
            wq.Term("object_type", Document.entity_type),
        ]
    )
    sortedby = whoosh.sorting.FieldFacet("created_at", reverse=True)
    documents = index_service.search("", filter=filters, sortedby=sortedby, limit=50)

    attachments = []
    for doc in documents:
        url = url_for(doc)
        attachment = Attachment(
            url,
            doc["name"],
            doc["owner_name"],
            doc["created_at"],
            doc.get("content_length"),
            doc.get("content_type", ""),
        )
        attachments.append(attachment)

    return attachments
Пример #7
0
 def _query_filter(self):
     criterias = []
     for k, v in self.query_params.items():
         if k == 'correspondent__id':
             criterias.append(query.Term('correspondent_id', v))
         elif k == 'tags__id__all':
             for tag_id in v.split(","):
                 criterias.append(query.Term('tag_id', tag_id))
         elif k == 'document_type__id':
             criterias.append(query.Term('type_id', v))
         elif k == 'correspondent__isnull':
             criterias.append(query.Term("has_correspondent", v == "false"))
         elif k == 'is_tagged':
             criterias.append(query.Term("has_tag", v == "true"))
         elif k == 'document_type__isnull':
             criterias.append(query.Term("has_type", v == "false"))
         elif k == 'created__date__lt':
             criterias.append(
                 query.DateRange("created", start=None, end=isoparse(v)))
         elif k == 'created__date__gt':
             criterias.append(
                 query.DateRange("created", start=isoparse(v), end=None))
         elif k == 'added__date__gt':
             criterias.append(
                 query.DateRange("added", start=isoparse(v), end=None))
         elif k == 'added__date__lt':
             criterias.append(
                 query.DateRange("added", start=None, end=isoparse(v)))
     if len(criterias) > 0:
         return query.And(criterias)
     else:
         return None
Пример #8
0
    def query_pre_process(self, query_parameters, context=None):
        if not self.enabled:
            return

        permissions = self.get_user_permissions(context.req.authname)
        allowed_docs, denied_docs = [], []
        for product, doc_type, doc_id, perm, denied in permissions:
            term_spec = []
            if product:
                term_spec.append(query.Term(IndexFields.PRODUCT, product))
            else:
                term_spec.append(query.Not(query.Every(IndexFields.PRODUCT)))

            if doc_type != '*':
                term_spec.append(query.Term(IndexFields.TYPE, doc_type))
            if doc_id != '*':
                term_spec.append(query.Term(IndexFields.ID, doc_id))
            term_spec.append(query.Term(IndexFields.REQUIRED_PERMISSION, perm))
            term_spec = query.And(term_spec)
            if denied:
                denied_docs.append(term_spec)
            else:
                allowed_docs.append(term_spec)
        self.update_security_filter(query_parameters, allowed_docs,
                                    denied_docs)
Пример #9
0
    def parse(self, input):
        required = []
        optional = []
        gramsize = max(self.minchars, min(self.maxchars, len(input)))
        if gramsize > len(input):
            return None

        discardspaces = self.discardspaces
        for t in self.analyzerclass(gramsize)(input):
            gram = t.text
            if " " in gram:
                if not discardspaces:
                    optional.append(gram)
            else:
                required.append(gram)

        if required:
            fieldname = self.fieldname
            andquery = query.And([query.Term(fieldname, g) for g in required])
            if optional:
                orquery = query.Or(
                    [query.Term(fieldname, g) for g in optional])
                return query.AndMaybe([andquery, orquery])
            else:
                return andquery
        else:
            return None
Пример #10
0
def test_termdocs():
    schema = fields.Schema(key=fields.TEXT, city=fields.ID)
    ix = RamStorage().create_index(schema)
    with ix.writer() as w:
        w.add_document(key=u"ant", city=u"london")
        w.add_document(key=u"anteater", city=u"roma")
        w.add_document(key=u"bear", city=u"london")
        w.add_document(key=u"bees", city=u"roma")
        w.add_document(key=u"anorak", city=u"london")
        w.add_document(key=u"antimatter", city=u"roma")
        w.add_document(key=u"angora", city=u"london")
        w.add_document(key=u"angels", city=u"roma")

    with ix.searcher() as s:
        cond_q = query.Term("city", u"london")
        pref_q = query.Prefix("key", u"an")
        q = query.And([cond_q, pref_q]).normalize()
        r = s.search(q, scored=False, terms=True)

        field = s.schema["key"]
        terms = [
            field.from_bytes(term) for fieldname, term in r.termdocs
            if fieldname == "key"
        ]
        assert sorted(terms) == [u"angora", u"anorak", u"ant"]
Пример #11
0
def test_sorting_function():
    schema = fields.Schema(id=fields.STORED,
                           text=fields.TEXT(stored=True, vector=True))
    ix = RamStorage().create_index(schema)
    w = ix.writer()
    domain = ("alfa", "bravo", "charlie")
    count = 1
    for w1 in domain:
        for w2 in domain:
            for w3 in domain:
                for w4 in domain:
                    w.add_document(id=count,
                                   text=u(" ").join((w1, w2, w3, w4)))
                    count += 1
    w.commit()

    def fn(searcher, docnum):
        v = dict(searcher.vector_as("frequency", docnum, "text"))
        # Sort documents that have equal number of "alfa"
        # and "bravo" first
        return 0 - 1.0 / (abs(v.get("alfa", 0) - v.get("bravo", 0)) + 1.0)
    fnfacet = sorting.FunctionFacet(fn)

    with ix.searcher() as s:
        q = query.And([query.Term("text", u("alfa")),
                       query.Term("text", u("bravo"))])
        results = s.search(q, sortedby=fnfacet)
        r = [hit["text"] for hit in results]
        for t in r[:10]:
            tks = t.split()
            assert tks.count("alfa") == tks.count("bravo")
Пример #12
0
 def parse(self, input):
     """Parses the input string and returns a Query object/tree.
     
     This method may return None if the input string does not result in any
     valid queries. It may also raise a variety of exceptions if the input
     string is malformed.
     
     :input: the unicode string to parse.
     """
     
     required = []
     optional = []
     gramsize = max(self.minchars, min(self.maxchars, len(input)))
     if gramsize > len(input):
         return None
     
     discardspaces = self.discardspaces
     for t in self.analyzerclass(gramsize)(input):
         gram = t.text
         if " " in gram:
             if not discardspaces:
                 optional.append(gram)
         else:
             required.append(gram)
     
     if required:
         fieldname = self.fieldname
         andquery = query.And([query.Term(fieldname, g) for g in required])
         if optional:
             orquery = query.Or([query.Term(fieldname, g) for g in optional])
             return query.AndMaybe([andquery, orquery])
         else:
             return andquery
     else:
         return None
Пример #13
0
 def allowed_documents():
     #todo: add special case handling for trac_admin and product_owner
     for product, perm in self._get_all_user_permissions(context):
         if product:
             prod_term = query.Term(IndexFields.PRODUCT, product)
         else:
             prod_term = query.Not(query.Every(IndexFields.PRODUCT))
         perm_term = query.Term(IndexFields.REQUIRED_PERMISSION, perm)
         yield query.And([prod_term, perm_term])
Пример #14
0
def test_nested_skip():
    schema = fields.Schema(
        id=fields.ID(unique=True, stored=True),
        name=fields.TEXT(stored=True),
        name_ngrams=fields.NGRAMWORDS(minsize=4, field_boost=1.2),
        type=fields.TEXT,
    )

    domain = [(u"book_1", u"The Dark Knight Returns", u"book"),
              (u"chapter_1", u"The Dark Knight Returns", u"chapter"),
              (u"chapter_2", u"The Dark Knight Triumphant", u"chapter"),
              (u"chapter_3", u"Hunt the Dark Knight", u"chapter"),
              (u"chapter_4", u"The Dark Knight Falls", u"chapter")]

    with TempIndex(schema) as ix:
        with ix.writer() as w:
            for id, name, typ in domain:
                w.add_document(id=id, name=name, name_ngrams=name, type=typ)

        with ix.searcher() as s:
            all_parents = query.Term("type", "book")
            wanted_parents = query.Term("name", "dark")
            children_of_wanted_parents = query.NestedChildren(
                all_parents, wanted_parents)

            r1 = s.search(children_of_wanted_parents)
            assert r1.scored_length() == 4
            assert [hit["id"] for hit in r1
                    ] == ["chapter_1", "chapter_2", "chapter_3", "chapter_4"]

            wanted_children = query.And(
                [query.Term("type", "chapter"),
                 query.Term("name", "hunt")])

            r2 = s.search(wanted_children)
            assert r2.scored_length() == 1
            assert [hit["id"] for hit in r2] == ["chapter_3"]

            complex_query = query.And(
                [children_of_wanted_parents, wanted_children])

            r3 = s.search(complex_query)
            assert r3.scored_length() == 1
            assert [hit["id"] for hit in r3] == ["chapter_3"]
Пример #15
0
    def document_numbers(self, **kw):
        """Returns a generator of the document numbers for documents
        matching the given keyword arguments, where the keyword keys are
        field names and the values are terms that must appear in the field.
        
        >>> docnums = list(searcher.document_numbers(emailto=u"*****@*****.**"))
        """

        q = query.And([query.Term(k, v) for k, v in kw.iteritems()])
        return q.docs(self)
Пример #16
0
 def _Toplevel(self, node, fieldname):
     queries = [self._eval(s, fieldname) for s in node]
     reqds = [q[0] for q in queries if isinstance(q, tuple)]
     if reqds:
         nots = [q for q in queries if isinstance(q, query.Not)]
         opts = [q for q in queries
                 if not isinstance(q, query.Not) and not isinstance(q, tuple)]
         return query.AndMaybe([query.And(reqds + nots), query.Or(opts)])
     else:
         return query.Or(queries)
Пример #17
0
 def create_security_filter(self, query_parameters):
     security_filter = self.find_security_filter(query_parameters['filter'])
     if not security_filter:
         security_filter = SecurityFilter()
         if query_parameters['filter']:
             query_parameters['filter'] = query.And(
                 [query_parameters['filter'], security_filter])
         else:
             query_parameters['filter'] = security_filter
     return security_filter
Пример #18
0
    def eval_get_ranked_set_baseline(self, basefile):
        # Step 1: Read the saved keyterms for a subset of articles
        # (created by analyze_baseline_queries)
        g = Graph()
        g.parse(self.generic_path("keyterms", "analyzed", ".n3"), format="n3")

        articles = {}
        for (s, p, o) in g:
            if not str(s) in articles:
                articles[str(s)] = []
            articles[str(s)].append(str(o))

        # Step 2: Open the large whoosh index containing the text of
        # all cases. Then, create a query for each article based on
        # the keyterms.
        connector = query.Or
        indexdir = os.path.sep.join([self.config.datadir, 'ecj', 'index'])
        storage = FileStorage(indexdir)
        idx = storage.open_index()
        searcher = idx.searcher(weighting=scoring.BM25F())

        res = {}

        # for article in sorted(articles.keys()):
        for article in self._articles(basefile):
            terms = articles[article]
            rankedset = []
            #parser = qparser.QueryParser("content", idx.schema)
            #q = parser.parse(connector.join(terms))
            q = query.And([
                # query.Term("articles", article),
                connector([query.Term("content", x) for x in terms])
            ])
            # print q
            # self.log.debug("Article %s: %s", article, " or ".join(terms))
            results = searcher.search(q, limit=None)
            resultidx = 0
            # self.log.info("Keyterms for result: %r" % results.key_terms("content", docs=10, numterms=10))
            for result in results:
                reslbl = "%s (%s)" % (result['basefile'],
                                      results.score(resultidx))
                rankedset.append([result['basefile'], reslbl])
                # self.log.debug(u"\t%s: %2.2d" % (result['title'], results.score(resultidx)))
                resultidx += 1
            self.log.info(
                "Created baseline ranked set for %s: Top result %s (of %s)" %
                (article.split("/")[-1], rankedset[0][0], len(rankedset)))

            # return just a list of URIs, no scoring information. But the
            # full URI isnt available in the whoosh db, so we recreate it.
            res[article] = [
                "http://lagen.nu/ext/celex/%s" % x[0] for x in rankedset
            ]

        return res
Пример #19
0
    def documents(self, **kw):
        """
        Convenience function returns the stored fields of a document
        matching the given keyword arguments, where the keyword keys are
        field names and the values are terms that must appear in the field.
        
        Returns a generator of dictionaries containing the
        stored fields of any documents matching the keyword arguments.
        """

        q = query.And([query.Term(k, v) for k, v in kw.iteritems()])
        doc_reader = self.doc_reader
        return (doc_reader[docnum] for docnum in q.docs(self))
Пример #20
0
 def get_filter(self, querydict):
     """
     Generates a Whoosh query filter reflecting which facets are currently selected.
     Takes `querydict` - a MultiDict with current HTTP GET params.
     """
     terms = []
     for field in self.get_fields():
         # user-provided values concerning a given field
         values = querydict.getlist('filter_' + field)
         if values:
             subterms = [query.Term(field, val) for val in values]
             terms.append(query.Or(subterms))
     return query.And(terms)
Пример #21
0
    def GET(self):
        search_term = self.request.get_param("s")
        if search_term:
            search_term = search_term.replace("tag:", "tags:")

            searcher = RecipeSearcher()

            if self.session.id:
                allow = q.Or([
                    q.And([
                        q.Term("user", self.session.id),
                        q.Term("deleted", False),
                        q.Term("reported", False)
                    ]),
                    q.And([
                        q.Term("public", True),
                        q.Term("deleted", False),
                        q.Term("reported", False)
                    ])
                ])

            else:
                allow = q.And([
                    q.Term("public", True),
                    q.Term("deleted", False),
                    q.Term("reported", False)
                ])

            ids = searcher.search(search_term, collection=True, allow=allow)

            if ids is not None:
                page = Paginate(ids,
                                self.request,
                                "title",
                                sort_direction_default="desc")
                return page

        return {"page": None, "pail": None}
Пример #22
0
def test_multireader_not():
    schema = fields.Schema(id=fields.STORED, f=fields.TEXT)

    ix = RamStorage().create_index(schema)
    w = ix.writer()
    w.add_document(id=0, f=u("alfa bravo chralie"))
    w.add_document(id=1, f=u("bravo chralie delta"))
    w.add_document(id=2, f=u("charlie delta echo"))
    w.add_document(id=3, f=u("delta echo foxtrot"))
    w.add_document(id=4, f=u("echo foxtrot golf"))
    w.commit()

    with ix.searcher() as s:
        q = query.And([query.Term("f", "delta"),
                       query.Not(query.Term("f", "delta"))])
        r = s.search(q)
        assert_equal(len(r), 0)

    ix = RamStorage().create_index(schema)
    w = ix.writer()
    w.add_document(id=5, f=u("alfa bravo chralie"))
    w.add_document(id=6, f=u("bravo chralie delta"))
    w.commit(merge=False)
    w = ix.writer()
    w.add_document(id=7, f=u("charlie delta echo"))
    w.add_document(id=8, f=u("delta echo foxtrot"))
    w.commit(merge=False)
    w = ix.writer()
    w.add_document(id=9, f=u("echo foxtrot golf"))
    w.add_document(id=10, f=u("foxtrot golf delta"))
    w.commit(merge=False)
    assert len(ix._segments()) > 1

    with ix.searcher() as s:
        q = query.And([query.Term("f", "delta"),
                       query.Not(query.Term("f", "delta"))])
        r = s.search(q)
        assert_equal(len(r), 0)
Пример #23
0
    def test_can_parse_complex_query(self):
        parsed_query = self.parser.parse("content:test $ticket $unresolved")

        self.assertEqual(
            parsed_query,
            query.And([
                query.Term('content', 'test'),
                query.Term('type', 'ticket'),
                query.Not(
                    query.Or([
                        query.Term('status', 'resolved'),
                        query.Term('status', 'closed')
                    ]))
            ]))
Пример #24
0
def test_filtered_grouped():
    schema = fields.Schema(tag=fields.ID, text=fields.TEXT(stored=True))
    ix = RamStorage().create_index(schema)
    domain = u("alfa bravo charlie delta echo foxtrot").split()

    with ix.writer() as w:
        for i, ls in enumerate(permutations(domain, 3)):
            tag = u(str(i % 3))
            w.add_document(tag=tag, text=u(" ").join(ls))

    with ix.searcher() as s:
        f = query.And([query.Term("text", "charlie"),
                       query.Term("text", "delta")])
        r = s.search(query.Every(), filter=f, groupedby="tag", limit=None)
        assert len(r) == 24
Пример #25
0
def compose_whoosh_terms(field, value, schema):
    """ 合成Term """
    _qsub = QueryParser(field, schema=schema)
    _parse = _qsub.parse(value.lower())
    _terms = _parse.all_terms()
    if len(_terms) == 1:
        return query.Term(field, value.lower())
    else:
        lst = []
        for terms in _terms:
            if terms[1]:
                lst.append(query.Term(field, terms[1]), )
        # 完整的暂时去掉
        # lst.append(query.Term(field, value.lower()))
        return query.And(lst)
Пример #26
0
def do_search(txt, sumlevel=None, kind=None, tries=0, limit=10, is_stem=None):
    txt = txt.replace(",", "")

    my_filter = None

    if kind and sumlevel:
        kf = query.Term("kind", kind)
        sf = query.Term("sumlevel", sumlevel)
        my_filter = query.And([kf, sf])
    elif kind:
        my_filter = query.Term("kind", kind)
    elif sumlevel:
        my_filter = query.Term("sumlevel", sumlevel)
    if is_stem and is_stem > 0 and my_filter is not None:
        my_filter = my_filter & query.NumericRange("is_stem", 1, is_stem)
    elif is_stem and is_stem > 0 and my_filter is None:
        my_filter = query.NumericRange("is_stem", 1, is_stem)

    if tries > 2:
        return [], [], []
    q = qp.parse(txt)
    weighter = SimpleWeighter(txt, B=.45, content_B=1.0, K1=1.5)
    with ix.searcher(weighting=weighter) as s:
        if len(txt) > 2:
            corrector = s.corrector("display")
            suggs = corrector.suggest(txt, limit=10, maxdist=2, prefix=3)
        else:
            suggs = []
        results = s.search_page(q,
                                1,
                                sortedby=[scores],
                                pagelen=20,
                                filter=my_filter)
        data = [[
            r["id"], r["name"], r["zvalue"], r["kind"], r["display"],
            r["sumlevel"] if "sumlevel" in r else "",
            r["is_stem"] if "is_stem" in r else False,
            r["url_name"] if "url_name" in r else None
        ] for r in results]
        if not data and suggs:
            return do_search(suggs[0],
                             sumlevel,
                             kind,
                             tries=tries + 1,
                             limit=limit,
                             is_stem=is_stem)
        return data, suggs, tries
def test_current_terms():
    domain = u("alfa bravo charlie delta").split()
    schema = fields.Schema(text=fields.TEXT(stored=True))
    ix = RamStorage().create_index(schema)
    w = ix.writer()
    for ls in permutations(domain, 3):
        w.add_document(text=" ".join(ls), _stored_text=ls)
    w.commit()

    with ix.searcher() as s:
        q = query.And([query.Term("text", "alfa"),
                       query.Term("text", "charlie")])
        m = q.matcher(s)

        while m.is_active():
            assert sorted(m.matching_terms()) == [("text", b("alfa")), ("text", b("charlie"))]
            m.next()
Пример #28
0
    def lookup(self, source_language, target_language, text):
        langfilter = query.And([
            query.Term('source_language', source_language),
            query.Term('target_language', target_language),
        ])
        self.open_searcher()
        text_query = self.parser.parse(text)
        matches = self.searcher.search(text_query,
                                       filter=langfilter,
                                       limit=20000)

        for match in matches:
            similarity = self.comparer.similarity(text, match['source'])
            if similarity < 30:
                continue
            yield (match['source'], match['target'], similarity,
                   match['origin'])
def test_or_nots1():
    # Issue #285
    schema = fields.Schema(a=fields.KEYWORD(stored=True),
                           b=fields.KEYWORD(stored=True))
    st = RamStorage()
    ix = st.create_index(schema)
    with ix.writer() as w:
        w.add_document(a=u("alfa"), b=u("charlie"))

    with ix.searcher() as s:
        q = query.And([query.Term("a", "alfa"),
                       query.Or([query.Not(query.Term("b", "bravo")),
                                 query.Not(query.Term("b", "charlie"))
                                 ])
                       ])
        r = s.search(q)
        assert len(r) == 1
Пример #30
0
    def lookup(self, source_language, target_language, text, user, project,
               use_shared):
        langfilter = query.And([
            query.Term('source_language', source_language),
            query.Term('target_language', target_language),
            self.get_filter(user, project, use_shared, True),
        ])
        text_query = self.parser.parse(text)
        matches = self.searcher.search(text_query,
                                       filter=langfilter,
                                       limit=20000)

        for match in matches:
            similarity = self.comparer.similarity(text, match['source'])
            if similarity < 30:
                continue
            yield (match['source'], match['target'], similarity,
                   match['category'], match['origin'])