def test_and(): _run_query(query.And([query.Term("value", u("red")), query.Term("name", u("yellow"))]), [u("A")]) # Missing _run_query(query.And([query.Term("value", u("ochre")), query.Term("name", u("glonk"))]), [])
def GET(self): search_term = self.request.get_param("s") all_tags = r.table(rm.Recipe.table)\ .concat_map(lambda doc: doc["tags"])\ .distinct()\ .coerce_to('array').run() self.view.data = {"tags": all_tags, "recipes": None} if search_term: if "recipe:" in search_term: parts = search_term.split(" ") for part in parts: if "recipe:" in part: recipe = rm.Recipe.find(part[7:]) if recipe is not None: return Redirect("/recipes/{}".format(part[7:])) search_term = search_term.replace("tag:", "tags:") searcher = RecipeSearcher() if self.session.id: allow = q.Or([ q.And([ q.Term("user", self.session.id), q.Term("deleted", False), q.Term("reported", False) ]), q.And([ q.Term("public", True), q.Term("deleted", False), q.Term("reported", False) ]) ]) else: allow = q.And([ q.Term("public", True), q.Term("deleted", False), q.Term("reported", False) ]) ids = searcher.search(search_term, collection=True, allow=allow) if ids is not None: ids.fetch() page = Paginate(ids, self.request, "title", sort_direction_default="desc") self.view.data = {"recipes": page} self.view.template = "public/recipes/search/results" return self.view
def test_regressions(): qp = default.QueryParser("f", None) # From 0.3.18, these used to require escaping. Mostly good for # regression testing. assert_equal(qp.parse(u("re-inker")), query.Term("f", "re-inker")) assert_equal(qp.parse(u("0.7 wire")), query.And([query.Term("f", "0.7"), query.Term("f", "wire")])) assert (qp.parse(u("daler-rowney pearl 'bell bronze'")) == query.And([query.Term("f", "daler-rowney"), query.Term("f", "pearl"), query.Term("f", "bell bronze")])) q = qp.parse(u('22" BX')) assert_equal(q, query.And([query.Term("f", '22"'), query.Term("f", "BX")]))
def test_andor(): qp = default.QueryParser("a", None) q = qp.parse("a AND b OR c AND d OR e AND f") assert_equal(text_type(q), "((a:a AND a:b) OR (a:c AND a:d) OR (a:e AND a:f))") q = qp.parse("aORb") assert_equal(q, query.Term("a", "aORb")) q = qp.parse("aOR b") assert_equal(q, query.And([query.Term("a", "aOR"), query.Term("a", "b")])) q = qp.parse("a ORb") assert_equal(q, query.And([query.Term("a", "a"), query.Term("a", "ORb")])) assert_equal(qp.parse("OR"), query.Term("a", "OR"))
def test_andor(): qp = default.QueryParser("a", None) q = qp.parse("a AND b OR c AND d OR e AND f") assert text_type(q) == "((a:a AND a:b) OR (a:c AND a:d) OR (a:e AND a:f))" q = qp.parse("aORb") assert q == query.Term("a", "aORb") q = qp.parse("aOR b") assert q == query.And([query.Term("a", "aOR"), query.Term("a", "b")]) q = qp.parse("a ORb") assert q == query.And([query.Term("a", "a"), query.Term("a", "ORb")]) assert qp.parse("OR") == query.Term("a", "OR")
def get_attachments_from_dms(community): index_service = get_service("indexing") filters = wq.And( [ wq.Term("community_id", community.id), wq.Term("object_type", Document.entity_type), ] ) sortedby = whoosh.sorting.FieldFacet("created_at", reverse=True) documents = index_service.search("", filter=filters, sortedby=sortedby, limit=50) attachments = [] for doc in documents: url = url_for(doc) attachment = Attachment( url, doc["name"], doc["owner_name"], doc["created_at"], doc.get("content_length"), doc.get("content_type", ""), ) attachments.append(attachment) return attachments
def _query_filter(self): criterias = [] for k, v in self.query_params.items(): if k == 'correspondent__id': criterias.append(query.Term('correspondent_id', v)) elif k == 'tags__id__all': for tag_id in v.split(","): criterias.append(query.Term('tag_id', tag_id)) elif k == 'document_type__id': criterias.append(query.Term('type_id', v)) elif k == 'correspondent__isnull': criterias.append(query.Term("has_correspondent", v == "false")) elif k == 'is_tagged': criterias.append(query.Term("has_tag", v == "true")) elif k == 'document_type__isnull': criterias.append(query.Term("has_type", v == "false")) elif k == 'created__date__lt': criterias.append( query.DateRange("created", start=None, end=isoparse(v))) elif k == 'created__date__gt': criterias.append( query.DateRange("created", start=isoparse(v), end=None)) elif k == 'added__date__gt': criterias.append( query.DateRange("added", start=isoparse(v), end=None)) elif k == 'added__date__lt': criterias.append( query.DateRange("added", start=None, end=isoparse(v))) if len(criterias) > 0: return query.And(criterias) else: return None
def query_pre_process(self, query_parameters, context=None): if not self.enabled: return permissions = self.get_user_permissions(context.req.authname) allowed_docs, denied_docs = [], [] for product, doc_type, doc_id, perm, denied in permissions: term_spec = [] if product: term_spec.append(query.Term(IndexFields.PRODUCT, product)) else: term_spec.append(query.Not(query.Every(IndexFields.PRODUCT))) if doc_type != '*': term_spec.append(query.Term(IndexFields.TYPE, doc_type)) if doc_id != '*': term_spec.append(query.Term(IndexFields.ID, doc_id)) term_spec.append(query.Term(IndexFields.REQUIRED_PERMISSION, perm)) term_spec = query.And(term_spec) if denied: denied_docs.append(term_spec) else: allowed_docs.append(term_spec) self.update_security_filter(query_parameters, allowed_docs, denied_docs)
def parse(self, input): required = [] optional = [] gramsize = max(self.minchars, min(self.maxchars, len(input))) if gramsize > len(input): return None discardspaces = self.discardspaces for t in self.analyzerclass(gramsize)(input): gram = t.text if " " in gram: if not discardspaces: optional.append(gram) else: required.append(gram) if required: fieldname = self.fieldname andquery = query.And([query.Term(fieldname, g) for g in required]) if optional: orquery = query.Or( [query.Term(fieldname, g) for g in optional]) return query.AndMaybe([andquery, orquery]) else: return andquery else: return None
def test_termdocs(): schema = fields.Schema(key=fields.TEXT, city=fields.ID) ix = RamStorage().create_index(schema) with ix.writer() as w: w.add_document(key=u"ant", city=u"london") w.add_document(key=u"anteater", city=u"roma") w.add_document(key=u"bear", city=u"london") w.add_document(key=u"bees", city=u"roma") w.add_document(key=u"anorak", city=u"london") w.add_document(key=u"antimatter", city=u"roma") w.add_document(key=u"angora", city=u"london") w.add_document(key=u"angels", city=u"roma") with ix.searcher() as s: cond_q = query.Term("city", u"london") pref_q = query.Prefix("key", u"an") q = query.And([cond_q, pref_q]).normalize() r = s.search(q, scored=False, terms=True) field = s.schema["key"] terms = [ field.from_bytes(term) for fieldname, term in r.termdocs if fieldname == "key" ] assert sorted(terms) == [u"angora", u"anorak", u"ant"]
def test_sorting_function(): schema = fields.Schema(id=fields.STORED, text=fields.TEXT(stored=True, vector=True)) ix = RamStorage().create_index(schema) w = ix.writer() domain = ("alfa", "bravo", "charlie") count = 1 for w1 in domain: for w2 in domain: for w3 in domain: for w4 in domain: w.add_document(id=count, text=u(" ").join((w1, w2, w3, w4))) count += 1 w.commit() def fn(searcher, docnum): v = dict(searcher.vector_as("frequency", docnum, "text")) # Sort documents that have equal number of "alfa" # and "bravo" first return 0 - 1.0 / (abs(v.get("alfa", 0) - v.get("bravo", 0)) + 1.0) fnfacet = sorting.FunctionFacet(fn) with ix.searcher() as s: q = query.And([query.Term("text", u("alfa")), query.Term("text", u("bravo"))]) results = s.search(q, sortedby=fnfacet) r = [hit["text"] for hit in results] for t in r[:10]: tks = t.split() assert tks.count("alfa") == tks.count("bravo")
def parse(self, input): """Parses the input string and returns a Query object/tree. This method may return None if the input string does not result in any valid queries. It may also raise a variety of exceptions if the input string is malformed. :input: the unicode string to parse. """ required = [] optional = [] gramsize = max(self.minchars, min(self.maxchars, len(input))) if gramsize > len(input): return None discardspaces = self.discardspaces for t in self.analyzerclass(gramsize)(input): gram = t.text if " " in gram: if not discardspaces: optional.append(gram) else: required.append(gram) if required: fieldname = self.fieldname andquery = query.And([query.Term(fieldname, g) for g in required]) if optional: orquery = query.Or([query.Term(fieldname, g) for g in optional]) return query.AndMaybe([andquery, orquery]) else: return andquery else: return None
def allowed_documents(): #todo: add special case handling for trac_admin and product_owner for product, perm in self._get_all_user_permissions(context): if product: prod_term = query.Term(IndexFields.PRODUCT, product) else: prod_term = query.Not(query.Every(IndexFields.PRODUCT)) perm_term = query.Term(IndexFields.REQUIRED_PERMISSION, perm) yield query.And([prod_term, perm_term])
def test_nested_skip(): schema = fields.Schema( id=fields.ID(unique=True, stored=True), name=fields.TEXT(stored=True), name_ngrams=fields.NGRAMWORDS(minsize=4, field_boost=1.2), type=fields.TEXT, ) domain = [(u"book_1", u"The Dark Knight Returns", u"book"), (u"chapter_1", u"The Dark Knight Returns", u"chapter"), (u"chapter_2", u"The Dark Knight Triumphant", u"chapter"), (u"chapter_3", u"Hunt the Dark Knight", u"chapter"), (u"chapter_4", u"The Dark Knight Falls", u"chapter")] with TempIndex(schema) as ix: with ix.writer() as w: for id, name, typ in domain: w.add_document(id=id, name=name, name_ngrams=name, type=typ) with ix.searcher() as s: all_parents = query.Term("type", "book") wanted_parents = query.Term("name", "dark") children_of_wanted_parents = query.NestedChildren( all_parents, wanted_parents) r1 = s.search(children_of_wanted_parents) assert r1.scored_length() == 4 assert [hit["id"] for hit in r1 ] == ["chapter_1", "chapter_2", "chapter_3", "chapter_4"] wanted_children = query.And( [query.Term("type", "chapter"), query.Term("name", "hunt")]) r2 = s.search(wanted_children) assert r2.scored_length() == 1 assert [hit["id"] for hit in r2] == ["chapter_3"] complex_query = query.And( [children_of_wanted_parents, wanted_children]) r3 = s.search(complex_query) assert r3.scored_length() == 1 assert [hit["id"] for hit in r3] == ["chapter_3"]
def document_numbers(self, **kw): """Returns a generator of the document numbers for documents matching the given keyword arguments, where the keyword keys are field names and the values are terms that must appear in the field. >>> docnums = list(searcher.document_numbers(emailto=u"*****@*****.**")) """ q = query.And([query.Term(k, v) for k, v in kw.iteritems()]) return q.docs(self)
def _Toplevel(self, node, fieldname): queries = [self._eval(s, fieldname) for s in node] reqds = [q[0] for q in queries if isinstance(q, tuple)] if reqds: nots = [q for q in queries if isinstance(q, query.Not)] opts = [q for q in queries if not isinstance(q, query.Not) and not isinstance(q, tuple)] return query.AndMaybe([query.And(reqds + nots), query.Or(opts)]) else: return query.Or(queries)
def create_security_filter(self, query_parameters): security_filter = self.find_security_filter(query_parameters['filter']) if not security_filter: security_filter = SecurityFilter() if query_parameters['filter']: query_parameters['filter'] = query.And( [query_parameters['filter'], security_filter]) else: query_parameters['filter'] = security_filter return security_filter
def eval_get_ranked_set_baseline(self, basefile): # Step 1: Read the saved keyterms for a subset of articles # (created by analyze_baseline_queries) g = Graph() g.parse(self.generic_path("keyterms", "analyzed", ".n3"), format="n3") articles = {} for (s, p, o) in g: if not str(s) in articles: articles[str(s)] = [] articles[str(s)].append(str(o)) # Step 2: Open the large whoosh index containing the text of # all cases. Then, create a query for each article based on # the keyterms. connector = query.Or indexdir = os.path.sep.join([self.config.datadir, 'ecj', 'index']) storage = FileStorage(indexdir) idx = storage.open_index() searcher = idx.searcher(weighting=scoring.BM25F()) res = {} # for article in sorted(articles.keys()): for article in self._articles(basefile): terms = articles[article] rankedset = [] #parser = qparser.QueryParser("content", idx.schema) #q = parser.parse(connector.join(terms)) q = query.And([ # query.Term("articles", article), connector([query.Term("content", x) for x in terms]) ]) # print q # self.log.debug("Article %s: %s", article, " or ".join(terms)) results = searcher.search(q, limit=None) resultidx = 0 # self.log.info("Keyterms for result: %r" % results.key_terms("content", docs=10, numterms=10)) for result in results: reslbl = "%s (%s)" % (result['basefile'], results.score(resultidx)) rankedset.append([result['basefile'], reslbl]) # self.log.debug(u"\t%s: %2.2d" % (result['title'], results.score(resultidx))) resultidx += 1 self.log.info( "Created baseline ranked set for %s: Top result %s (of %s)" % (article.split("/")[-1], rankedset[0][0], len(rankedset))) # return just a list of URIs, no scoring information. But the # full URI isnt available in the whoosh db, so we recreate it. res[article] = [ "http://lagen.nu/ext/celex/%s" % x[0] for x in rankedset ] return res
def documents(self, **kw): """ Convenience function returns the stored fields of a document matching the given keyword arguments, where the keyword keys are field names and the values are terms that must appear in the field. Returns a generator of dictionaries containing the stored fields of any documents matching the keyword arguments. """ q = query.And([query.Term(k, v) for k, v in kw.iteritems()]) doc_reader = self.doc_reader return (doc_reader[docnum] for docnum in q.docs(self))
def get_filter(self, querydict): """ Generates a Whoosh query filter reflecting which facets are currently selected. Takes `querydict` - a MultiDict with current HTTP GET params. """ terms = [] for field in self.get_fields(): # user-provided values concerning a given field values = querydict.getlist('filter_' + field) if values: subterms = [query.Term(field, val) for val in values] terms.append(query.Or(subterms)) return query.And(terms)
def GET(self): search_term = self.request.get_param("s") if search_term: search_term = search_term.replace("tag:", "tags:") searcher = RecipeSearcher() if self.session.id: allow = q.Or([ q.And([ q.Term("user", self.session.id), q.Term("deleted", False), q.Term("reported", False) ]), q.And([ q.Term("public", True), q.Term("deleted", False), q.Term("reported", False) ]) ]) else: allow = q.And([ q.Term("public", True), q.Term("deleted", False), q.Term("reported", False) ]) ids = searcher.search(search_term, collection=True, allow=allow) if ids is not None: page = Paginate(ids, self.request, "title", sort_direction_default="desc") return page return {"page": None, "pail": None}
def test_multireader_not(): schema = fields.Schema(id=fields.STORED, f=fields.TEXT) ix = RamStorage().create_index(schema) w = ix.writer() w.add_document(id=0, f=u("alfa bravo chralie")) w.add_document(id=1, f=u("bravo chralie delta")) w.add_document(id=2, f=u("charlie delta echo")) w.add_document(id=3, f=u("delta echo foxtrot")) w.add_document(id=4, f=u("echo foxtrot golf")) w.commit() with ix.searcher() as s: q = query.And([query.Term("f", "delta"), query.Not(query.Term("f", "delta"))]) r = s.search(q) assert_equal(len(r), 0) ix = RamStorage().create_index(schema) w = ix.writer() w.add_document(id=5, f=u("alfa bravo chralie")) w.add_document(id=6, f=u("bravo chralie delta")) w.commit(merge=False) w = ix.writer() w.add_document(id=7, f=u("charlie delta echo")) w.add_document(id=8, f=u("delta echo foxtrot")) w.commit(merge=False) w = ix.writer() w.add_document(id=9, f=u("echo foxtrot golf")) w.add_document(id=10, f=u("foxtrot golf delta")) w.commit(merge=False) assert len(ix._segments()) > 1 with ix.searcher() as s: q = query.And([query.Term("f", "delta"), query.Not(query.Term("f", "delta"))]) r = s.search(q) assert_equal(len(r), 0)
def test_can_parse_complex_query(self): parsed_query = self.parser.parse("content:test $ticket $unresolved") self.assertEqual( parsed_query, query.And([ query.Term('content', 'test'), query.Term('type', 'ticket'), query.Not( query.Or([ query.Term('status', 'resolved'), query.Term('status', 'closed') ])) ]))
def test_filtered_grouped(): schema = fields.Schema(tag=fields.ID, text=fields.TEXT(stored=True)) ix = RamStorage().create_index(schema) domain = u("alfa bravo charlie delta echo foxtrot").split() with ix.writer() as w: for i, ls in enumerate(permutations(domain, 3)): tag = u(str(i % 3)) w.add_document(tag=tag, text=u(" ").join(ls)) with ix.searcher() as s: f = query.And([query.Term("text", "charlie"), query.Term("text", "delta")]) r = s.search(query.Every(), filter=f, groupedby="tag", limit=None) assert len(r) == 24
def compose_whoosh_terms(field, value, schema): """ 合成Term """ _qsub = QueryParser(field, schema=schema) _parse = _qsub.parse(value.lower()) _terms = _parse.all_terms() if len(_terms) == 1: return query.Term(field, value.lower()) else: lst = [] for terms in _terms: if terms[1]: lst.append(query.Term(field, terms[1]), ) # 完整的暂时去掉 # lst.append(query.Term(field, value.lower())) return query.And(lst)
def do_search(txt, sumlevel=None, kind=None, tries=0, limit=10, is_stem=None): txt = txt.replace(",", "") my_filter = None if kind and sumlevel: kf = query.Term("kind", kind) sf = query.Term("sumlevel", sumlevel) my_filter = query.And([kf, sf]) elif kind: my_filter = query.Term("kind", kind) elif sumlevel: my_filter = query.Term("sumlevel", sumlevel) if is_stem and is_stem > 0 and my_filter is not None: my_filter = my_filter & query.NumericRange("is_stem", 1, is_stem) elif is_stem and is_stem > 0 and my_filter is None: my_filter = query.NumericRange("is_stem", 1, is_stem) if tries > 2: return [], [], [] q = qp.parse(txt) weighter = SimpleWeighter(txt, B=.45, content_B=1.0, K1=1.5) with ix.searcher(weighting=weighter) as s: if len(txt) > 2: corrector = s.corrector("display") suggs = corrector.suggest(txt, limit=10, maxdist=2, prefix=3) else: suggs = [] results = s.search_page(q, 1, sortedby=[scores], pagelen=20, filter=my_filter) data = [[ r["id"], r["name"], r["zvalue"], r["kind"], r["display"], r["sumlevel"] if "sumlevel" in r else "", r["is_stem"] if "is_stem" in r else False, r["url_name"] if "url_name" in r else None ] for r in results] if not data and suggs: return do_search(suggs[0], sumlevel, kind, tries=tries + 1, limit=limit, is_stem=is_stem) return data, suggs, tries
def test_current_terms(): domain = u("alfa bravo charlie delta").split() schema = fields.Schema(text=fields.TEXT(stored=True)) ix = RamStorage().create_index(schema) w = ix.writer() for ls in permutations(domain, 3): w.add_document(text=" ".join(ls), _stored_text=ls) w.commit() with ix.searcher() as s: q = query.And([query.Term("text", "alfa"), query.Term("text", "charlie")]) m = q.matcher(s) while m.is_active(): assert sorted(m.matching_terms()) == [("text", b("alfa")), ("text", b("charlie"))] m.next()
def lookup(self, source_language, target_language, text): langfilter = query.And([ query.Term('source_language', source_language), query.Term('target_language', target_language), ]) self.open_searcher() text_query = self.parser.parse(text) matches = self.searcher.search(text_query, filter=langfilter, limit=20000) for match in matches: similarity = self.comparer.similarity(text, match['source']) if similarity < 30: continue yield (match['source'], match['target'], similarity, match['origin'])
def test_or_nots1(): # Issue #285 schema = fields.Schema(a=fields.KEYWORD(stored=True), b=fields.KEYWORD(stored=True)) st = RamStorage() ix = st.create_index(schema) with ix.writer() as w: w.add_document(a=u("alfa"), b=u("charlie")) with ix.searcher() as s: q = query.And([query.Term("a", "alfa"), query.Or([query.Not(query.Term("b", "bravo")), query.Not(query.Term("b", "charlie")) ]) ]) r = s.search(q) assert len(r) == 1
def lookup(self, source_language, target_language, text, user, project, use_shared): langfilter = query.And([ query.Term('source_language', source_language), query.Term('target_language', target_language), self.get_filter(user, project, use_shared, True), ]) text_query = self.parser.parse(text) matches = self.searcher.search(text_query, filter=langfilter, limit=20000) for match in matches: similarity = self.comparer.similarity(text, match['source']) if similarity < 30: continue yield (match['source'], match['target'], similarity, match['category'], match['origin'])