示例#1
0
def test_normalize_compound():
    def oq():
        return Or([Term("a", u("a")), Term("a", u("b"))])

    def nq(level):
        if level == 0:
            return oq()
        else:
            return Or([nq(level - 1), nq(level - 1), nq(level - 1)])

    q = nq(7)
    q = q.normalize()
    assert_equal(q, Or([Term("a", u("a")), Term("a", u("b"))]))
示例#2
0
def test_span_or():
    ix = get_index()
    with ix.searcher() as s:
        nq = spans.SpanNear(Term("text", "alfa"),
                            Term("text", "charlie"),
                            slop=2)
        bq = Term("text", "bravo")
        q = spans.SpanOr([nq, bq])
        m = q.matcher(s)
        while m.is_active():
            orig = s.stored_fields(m.id())["text"]
            assert ("alfa" in orig and "charlie" in orig) or "bravo" in orig
            m.next()
示例#3
0
    def get_subitem_revs(self):
        """
        Create a list of subitems of this item.

        Subitems are in the form of storage Revisions.
        """
        query = And([Term(WIKINAME, app.cfg.interwikiname), Term(NAMESPACE, self.fqname.namespace)])
        # trick: an item of empty name can be considered as "virtual root item"
        # that has all wiki items as sub items
        if self.names:
            query = And([query, Or([Prefix(NAME_EXACT, prefix) for prefix in self.subitem_prefixes])])
        revs = flaskg.storage.search(query, sortedby=NAME_EXACT, limit=None)
        return revs
示例#4
0
def search():
    print(request.args)
    search = request.args.get('search')
    author = request.args.get('author')
    category = request.args.get('category')
    page = int(request.args.get(
        'page')) if not request.args.get('page') is None else 1
    print(search)

    if search is None and author is None and category is None:
        myquery = Every()
    else:
        if search is None:
            if not author is None:
                myquery = Term('author', author)
                if not category is None:
                    myquery = myquery & Term('category', category)
            else:
                myquery = Term('category', category)
        else:
            myquery = MultifieldParser(["title", "post_content"],
                                       ix.schema,
                                       plugins=[FuzzyTermPlugin()
                                                ]).parse(search)

            if not author is None:
                myquery = myquery & Term('author', author)

            if not category is None:
                myquery = myquery & Term('category', category)

    with ix.searcher() as searcher:
        results = searcher.search_page(myquery,
                                       page,
                                       pagelen=25,
                                       sortedby="date",
                                       reverse=True)
        print(results.is_last_page())
        results_json = json.dumps(
            {
                "results": [dict(i) for i in results],
                "page": page,
                "total_results": results.total
            },
            default=str)

    resp = Response(response=results_json,
                    status=200,
                    mimetype="application/json")

    return resp
示例#5
0
def get_subscribers(**meta):
    """ Get all users that are subscribed to the item

    :param meta: key/value pairs from item metadata - itemid, name, namespace, tags keys
    :return: a set of Subscriber objects
    """
    itemid = meta.get(ITEMID)
    name = meta.get(NAME)
    namespace = meta.get(NAMESPACE)
    fqname = CompositeName(namespace, ITEMID, itemid)
    tags = meta.get(TAGS)
    terms = []
    if itemid is not None:
        terms.extend(
            [Term(SUBSCRIPTION_IDS, "{0}:{1}".format(ITEMID, itemid))])
    if namespace is not None:
        if name is not None:
            terms.extend(
                Term(SUBSCRIPTION_IDS, "{0}:{1}:{2}".format(
                    NAME, namespace, name_)) for name_ in name)
        if tags is not None:
            terms.extend(
                Term(SUBSCRIPTION_IDS, "{0}:{1}:{2}".format(
                    TAGS, namespace, tag)) for tag in tags)
    query = Or(terms)
    with flaskg.storage.indexer.ix[LATEST_REVS].searcher() as searcher:
        result_iterators = [
            searcher.search(query, limit=None),
        ]
        subscription_patterns = searcher.lexicon(SUBSCRIPTION_PATTERNS)
        # looks like whoosh gives us bytes (not str), decode them:
        subscription_patterns = [
            p if isinstance(p, str) else p.decode()
            for p in subscription_patterns
        ]
        patterns = get_matched_subscription_patterns(subscription_patterns,
                                                     **meta)
        result_iterators.extend(
            searcher.documents(subscription_patterns=pattern)
            for pattern in patterns)
        subscribers = set()
        for user in chain.from_iterable(result_iterators):
            email = user.get(EMAIL)
            if email:
                from moin.user import User
                u = User(uid=user.get(ITEMID))
                if u.may.read(fqname):
                    locale = user.get(LOCALE, DEFAULT_LOCALE)
                    subscribers.add(
                        Subscriber(user[ITEMID], user[NAME][0], email, locale))
    return subscribers
示例#6
0
def add_other_versions(searcher, results, user, staff):

    allow_q = []

    if not staff:
        allow_q = [Or([Term('public', 't'), Term('users', user.username.lower())] +
            [Term('groups', group.name.lower()) for group in user.groups.all()])]

    for result in results:
        user_q = And([Term('vendor_name', '%s/%s' % (result['vendor'], result['name']))] + allow_q)
        version_results = [h.fields()['version'] for h in searcher.search(user_q)]
        result['others'] = [v for v in version_results if v != result['version']]

    return results
示例#7
0
    def do_show(self, revid):
        """
        Show a blog item and a list of its blog entries below it.

        If tag GET-parameter is defined, the list of blog entries consists only
        of those entries that contain the tag value in their lists of tags.
        """
        # for now it is just one tag=value, later it could be tag=value1&tag=value2&...
        tag = request.values.get('tag')
        prefix = self.name + u'/'
        current_timestamp = int(time.time())
        terms = [
            Term(WIKINAME, app.cfg.interwikiname),
            # Only blog entry itemtypes
            Term(ITEMTYPE, ITEMTYPE_BLOG_ENTRY),
            # Only sub items of this item
            Prefix(NAME_EXACT, prefix),
        ]
        if tag:
            terms.append(Term(TAGS, tag))
        query = And(terms)

        def ptime_sort_key(searcher, docnum):
            """
            Compute the publication time key for blog entries sorting.

            If PTIME is not defined, we use MTIME.
            """
            fields = searcher.stored_fields(docnum)
            ptime = fields.get(PTIME, fields[MTIME])
            return ptime

        ptime_sort_facet = FunctionFacet(ptime_sort_key)

        revs = flaskg.storage.search(query,
                                     sortedby=ptime_sort_facet,
                                     reverse=True,
                                     limit=None)
        blog_entry_items = [
            Item.create(rev.name, rev_id=rev.revid) for rev in revs
        ]
        return render_template(
            'blog/main.html',
            item_name=self.name,
            fqname=split_fqname(self.name),
            blog_item=self,
            blog_entry_items=blog_entry_items,
            tag=tag,
            item=self,
        )
示例#8
0
def get_item_last_revisions(app, fqname):
    """ Get 2 or less most recent item revisions from the index

    :param app: local proxy app
    :param fqname: the fqname of the item
    :return: a list of revisions
    """
    # TODO: Implement AccessDenied or similar error in case the user does not have access to item
    # and to also to handle the case where the item has no revisions
    terms = [Term(WIKINAME, app.cfg.interwikiname), Term(fqname.field, fqname.value), ]
    query = And(terms)
    return list(
        flaskg.storage.search(query, idx_name=ALL_REVS, sortedby=[MTIME],
                              reverse=True, limit=2))
示例#9
0
 def has_word(self, character_set, key):
     assert character_set & TRADITIONAL or character_set & SIMPLIFIED
     with self._index.searcher() as searcher:
         query = NullQuery()
         # Documentation for Whoosh says 'in'
         # operator can be used on the searcher
         # to look for the key but it didn't work
         # for me.
         if character_set & TRADITIONAL:
             query |= Term("traditional", key)
         if character_set & SIMPLIFIED:
             query |= Term('simplified', key)
         results = searcher.search(query)
         return len(results) > 0
示例#10
0
def search_addresses(searcher, query):
    restrict_q = Term("tag", "drafts") | Term("tag", "trash")
    results = []
    for field in ['to', 'cc', 'bcc', 'sender']:
        query_parser = QueryParser(field, searcher.schema)
        results.append(
            searcher.search(query_parser.parse("*%s* OR *%s*" %
                                               (query.title(), query)),
                            limit=None,
                            mask=restrict_q,
                            groupedby=sorting.FieldFacet(field,
                                                         allow_overlap=True),
                            terms=True).matched_terms())
    return [address[1] for address in flatten(results)]
示例#11
0
def item_acl_report():
    """
    Return a sorted list of all items in the wiki along with the ACL Meta-data.

    Item names are prefixed with the namespace, if there is a non-default namespace.
    If there are multiple names, the first name is used for sorting.
    """
    query = And([
        Term(WIKINAME, app.cfg.interwikiname),
        Not(Term(NAMESPACE, NAMESPACE_USERPROFILES)),
    ])
    all_metas = flaskg.storage.search_meta(query,
                                           idx_name=LATEST_REVS,
                                           sortedby=[NAMESPACE, NAME],
                                           limit=None)
    items_acls = []
    for meta in all_metas:
        item_namespace = meta.get(NAMESPACE)
        item_id = meta.get(ITEMID)
        if item_namespace:
            item_name = [
                item_namespace + '/' + name for name in meta.get(NAME)
            ]
        else:
            item_name = meta.get(NAME)
        item_acl = meta.get(ACL)
        acl_default = item_acl is None
        if acl_default:
            for namespace, acl_config in app.cfg.acl_mapping:
                if item_namespace == namespace:
                    item_acl = acl_config['default']
        fqnames = gen_fqnames(meta)
        fqname = fqnames[0]
        items_acls.append({
            'name': item_name,
            'name_old': meta.get('name_old', []),
            'itemid': item_id,
            'fqnames': fqnames,
            'fqname': fqnames[0],
            'acl': item_acl,
            'acl_default': acl_default
        })
    # deleted items have no names; this sort places deleted items on top of the report;
    # the display name may be similar to: "9cf939f ~(DeletedItemName)"
    items_acls = sorted(items_acls, key=lambda k: (k['name'], k['name_old']))
    return render_template('admin/item_acl_report.html',
                           title_name=_('Item ACL Report'),
                           number_items=len(items_acls),
                           items_acls=items_acls)
示例#12
0
文件: filters.py 项目: jlpujante/kpi
 def filter_queryset(self, request, queryset, view):
     if ('parent' in request.query_params and
             request.query_params['parent'] == ''):
         # Empty string means query for null parent
         queryset = queryset.filter(parent=None)
     if 'q' not in request.query_params:
         return queryset
     queryset_pks = list(queryset.values_list('pk', flat=True))
     if not len(queryset_pks):
         return queryset
     # 'q' means do a full-text search of the document fields, where the
     # critera are given in the Whoosh query language:
     # https://pythonhosted.org/Whoosh/querylang.html
     search_queryset = SearchQuerySet().models(queryset.model)
     search_backend = search_queryset.query.backend
     if not isinstance(search_backend, WhooshSearchBackend):
         raise NotImplementedError(
             'Only the Whoosh search engine is supported at this time')
     if not search_backend.setup_complete:
         search_backend.setup()
     searcher = search_backend.index.searcher()
     # Parse the user's query
     user_query = QueryParser('text', search_backend.index.schema).parse(
         request.query_params['q'])
     # Construct a query to restrict the search to the appropriate model
     filter_query = Term(DJANGO_CT, get_model_ct(queryset.model))
     # Does the search index for this model have a field that allows
     # filtering by permissions?
     haystack_index = haystack.connections[
         'default'].get_unified_index().get_index(queryset.model)
     if hasattr(haystack_index, 'users_granted_permission'):
         # Also restrict the search to records that the user can access
         filter_query &= Term(
             'users_granted_permission', request.user.username)
     results = searcher.search(
         user_query,
         filter=filter_query,
         scored=False,
         sortedby=None,
         limit=None
     )
     pk_type = type(queryset_pks[0])
     results_pks = {
         # Coerce each `django_id` from unicode to the appropriate type,
         # usually `int`
         pk_type((x['django_id'])) for x in results
     }
     filter_pks = results_pks.intersection(queryset_pks)
     return queryset.filter(pk__in=filter_pks)
示例#13
0
def itemsize():
    """display a table with item sizes"""
    headings = [
        _('Size'),
        _('Item name'),
    ]
    query = And([Term(WIKINAME, app.cfg.interwikiname), Not(Term(NAMESPACE, NAMESPACE_USERPROFILES)), Not(Term(TRASH, True))])
    revs = flaskg.storage.search_meta(query, idx_name=LATEST_REVS, sortedby=[NAME], limit=None)
    rows = [(rev[SIZE], CompositeName(rev[NAMESPACE], NAME_EXACT, rev[NAME][0]))
            for rev in revs]
    rows = sorted(rows, reverse=True)
    return render_template('user/itemsize.html',
                           title_name=_("Item Sizes"),
                           headings=headings,
                           rows=rows)
def test_or_nots3():
    schema = fields.Schema(title=fields.TEXT(stored=True),
                           itemtype=fields.ID(stored=True))
    with TempIndex(schema, "ornot") as ix:
        w = ix.writer()
        w.add_document(title=u("a1"), itemtype=u("a"))
        w.add_document(title=u("a2"), itemtype=u("a"))
        w.add_document(title=u("b1"), itemtype=u("b"))
        w.commit()

        q = Term('itemtype', 'a') | Not(Term('itemtype', 'a'))

        with ix.searcher() as s:
            r = " ".join([hit["title"] for hit in s.search(q)])
            assert r == "a1 a2 b1"
示例#15
0
def build_filter_terms(field_name, *, include=None, exclude=None):
    """
    Build Whoosh query terms that may be used to filter a search.

    :param list include: List of values to allow in the search results. If
        `None`, no inclusion term gets produced.

    :param list exclude: List of values to deny from the search results. If
        `None`, no exclusion term gets produced.
    """
    terms = [Or([Term(field_name, value)
                 for value in include])] if include else []
    if exclude:
        terms.extend([Not(Term(field_name, value)) for value in exclude])
    return terms
示例#16
0
def user_acl_report(uid):
    query = And([
        Term(WIKINAME, app.cfg.interwikiname),
        Not(Term(NAMESPACE, NAMESPACE_USERPROFILES))
    ])
    all_items = flaskg.storage.search_meta(query,
                                           idx_name=LATEST_REVS,
                                           sortedby=[NAMESPACE, NAME],
                                           limit=None)
    theuser = user.User(uid=uid)
    itemwise_acl = []
    last_item_acl_parts = (None, None, None)
    last_item_result = {
        'read': False,
        'write': False,
        'create': False,
        'admin': False,
        'destroy': False
    }
    for item in all_items:
        if item.meta.get(NAME):
            fqname = CompositeName(item.meta.get(NAMESPACE), NAME_EXACT,
                                   item.meta.get(NAME)[0])
        else:
            fqname = CompositeName(item.meta.get(NAMESPACE), ITEMID,
                                   item.meta.get(ITEMID))
        this_rev_acl_parts = (item.meta[NAMESPACE], item.meta.get(PARENTNAMES),
                              item.meta.get(ACL))
        name_parts = {
            'name': item.meta.get(NAME),
            'namespace': item.meta.get(NAMESPACE),
            'itemid': item.meta.get(ITEMID),
            'fqname': fqname
        }
        if not last_item_acl_parts == this_rev_acl_parts:
            last_item_acl_parts = this_rev_acl_parts
            last_item_result = {
                'read': theuser.may.read(fqname),
                'write': theuser.may.write(fqname),
                'create': theuser.may.create(fqname),
                'admin': theuser.may.admin(fqname),
                'destroy': theuser.may.destroy(fqname)
            }
        itemwise_acl.append({**name_parts, **last_item_result})
    return render_template('admin/user_acl_report.html',
                           title_name=_('User ACL Report'),
                           user_names=theuser.name,
                           itemwise_acl=itemwise_acl)
示例#17
0
def test_span_term():
    ix = get_index()
    with ix.searcher() as s:
        alllists = [d["text"] for d in s.all_stored_fields()]

        for word in domain:
            q = Term("text", word)
            m = q.matcher(s)

            ids = set()
            while m.is_active():
                id = m.id()
                sps = m.spans()
                ids.add(id)
                original = list(s.stored_fields(id)["text"])
                assert word in original

                if word != "bravo":
                    assert len(sps) == 1
                assert original.index(word) == sps[0].start
                assert original.index(word) == sps[0].end
                m.next()

            for i, ls in enumerate(alllists):
                if word in ls:
                    assert i in ids
                else:
                    assert i not in ids
示例#18
0
    def search_text(self,
                    groupname: str,
                    field: str,
                    text: str,
                    op: str,
                    sortby: str = "path",
                    scoring: str = "unscored") -> Iterable[SearchResult]:

        assert sortby in ("path", "score")
        assert scoring in ("unscored", "bm25f")
        scored = scoring != "unscored"
        sortedby = {"score": None}.get(sortby, sortby)
        limit = None

        qp = QueryParser(field, self.invindex.ix.schema)
        q = qp.parse(text)
        terms = list(
            Term(fieldname, value) for fieldname, value in q.iter_all_terms())

        if op == "and":
            query = And(terms)
        elif op == "or":
            query = Or(terms)

        with self.searcher() as searcher:
            for hit in searcher.search(query,
                                       limit=limit,
                                       scored=scored,
                                       sortedby=sortedby):
                yield Path(
                    hit["path"]), hit.score  # hit.pos, hit.rank, hit.docnum
示例#19
0
def more_like(pk, source, top=5):
    """Find similar units."""
    index = get_source_index()
    with index.searcher() as searcher:
        # Extract key terms
        kts = searcher.key_terms_from_text('source',
                                           source,
                                           numterms=10,
                                           normalize=False)
        # Create an Or query from the key terms
        query = Or(
            [Term('source', word, boost=weight) for word, weight in kts])

        # Grab fulltext results
        results = [(h['pk'], h.score)
                   for h in searcher.search(query, limit=top)]
        if not results:
            return [], {}
        # Normalize scores to 0-100
        max_score = max([h[1] for h in results])
        scores = {h[0]: h[1] * 100 / max_score for h in results}

        # Filter results with score above 30 and not current unit
        return (
            [h[0] for h in results if scores[h[0]] > 30 and h[0] != pk],
            scores,
        )
示例#20
0
    def more_like(self, pk, source, top=5):
        """Find similar units."""
        index = self.get_source_index()
        with index.searcher() as searcher:
            # Extract key terms
            kts = searcher.key_terms_from_text('source',
                                               source,
                                               numterms=10,
                                               normalize=False)
            # Create an Or query from the key terms
            query = Or(
                [Term('source', word, boost=weight) for word, weight in kts])
            LOGGER.debug('more like query: %r', query)

            # Grab fulltext results
            results = [(h['pk'], h.score)
                       for h in searcher.search(query, limit=top)]
            LOGGER.debug('found %d matches', len(results))
            if not results:
                return []

            # Filter bad results
            threshold = max([h[1] for h in results]) / 2
            results = [h[0] for h in results if h[1] > threshold]
            LOGGER.debug('filter %d matches over threshold %d', len(results),
                         threshold)

            return results
示例#21
0
    def search(
        self,
        query: str,
        page: int,
        pagesize: int,
        include_private: bool = True,
        extend: bool = False,
    ):
        """Search the index.

        If `include_private` is true, include also private objects and
        search in private fields.
        """
        query_parser = (self.query_parser_all
                        if include_private else self.query_parser_public)
        query_parser.add_plugin(DateParserPlugin())
        # if private objects should not be shown, add a mask
        mask = None if include_private else Term("private", True)
        parsed_query = query_parser.parse(query)
        with self.index().searcher() as searcher:
            results = searcher.search_page(parsed_query,
                                           page,
                                           pagesize,
                                           mask=mask)
            return results.total, [self.format_hit(hit) for hit in results]
示例#22
0
def _trashed(namespace):
    q = And([Term(WIKINAME, app.cfg.interwikiname), Term(TRASH, True)])
    if namespace != NAMESPACE_ALL:
        q = And([
            q,
            Term(NAMESPACE, namespace),
        ])
    trashedEntry = namedtuple('trashedEntry',
                              'fqname oldname revid mtime comment editor')
    results = []
    for meta in flaskg.storage.search_meta(q, limit=None):
        fqname = CompositeName(meta[NAMESPACE], ITEMID, meta[ITEMID])
        results.append(
            trashedEntry(fqname, meta[NAME_OLD], meta[REVID], meta[MTIME],
                         meta[COMMENT], get_editor_info(meta)))
    return results
示例#23
0
    def search(self, w):
        if not self.ix.up_to_date():
            self.initialize_trie(
            )  # if the index is not up to date, someone has added cards, so we reinitialize the trie

        # If we searched for an alias, make it the exact hit
        for alias, name in fetcher.card_aliases():
            if w == card.canonicalize(alias):
                return SearchResult(name, None, None, None)

        normalized = list(WhooshConstants.normalized_analyzer(w))[0].text

        # If we get matches by prefix, we return that
        exact, prefix_whole_word, other_prefixed = self.find_matches_by_prefix(
            normalized)
        if exact or len(prefix_whole_word) > 0 or len(other_prefixed) > 0:
            return SearchResult(exact, prefix_whole_word, other_prefixed, None)

        # We try fuzzy and stemmed queries
        query_normalized = fuzzy_term(normalized, self.DIST, "name_normalized")
        query_stemmed = And([
            Term('name_stemmed', q.text)
            for q in WhooshConstants.stem_analyzer(w)
        ])
        query_tokenized = And([
            fuzzy_term(q.text, self.DIST, "name_tokenized")
            for q in WhooshConstants.tokenized_analyzer(w)
        ])
        query = Or([query_normalized, query_tokenized, query_stemmed])

        with self.ix.searcher() as searcher:
            fuzzy = [(r['name'], r.score)
                     for r in searcher.search(query, limit=40)]
        return SearchResult(exact, prefix_whole_word, other_prefixed, fuzzy)
示例#24
0
    def matcher(self, searcher, context=None):
        from whoosh.query import Term, SpanNear2

        fieldname = self.fieldname
        if fieldname not in searcher.schema:
            return matching.NullMatcher()

        field = searcher.schema[fieldname]
        if not field.format or not field.format.supports("positions"):
            raise qcore.QueryError("Phrase search: %r field has no positions"
                                   % self.fieldname)

        terms = []
        # Build a list of Term queries from the words in the phrase
        reader = searcher.reader()
        for word in self.words:
            word = field.to_bytes(word)
            if (fieldname, word) not in reader:
                # Shortcut the query if one of the words doesn't exist.
                return matching.NullMatcher()
            terms.append(Term(fieldname, word))

        # Create the equivalent SpanNear2 query from the terms
        q = SpanNear2(terms, slop=self.slop, ordered=True, mindist=1)
        # Get the matcher
        m = q.matcher(searcher, context)

        if self.boost != 1.0:
            m = matching.WrappingMatcher(m, boost=self.boost)
        return m
示例#25
0
def get_query(line, ix):
    lines = line.strip().split('\t')
    #context=unicode(' '.join(lines[2:-1]), 'gb18030')
    post = unicode(lines[0], 'gb18030')
    #q1=QueryParser("context", ix.schema).parse(context)
    q2 = QueryParser("post", ix.schema).parse(post)
    #context=' '.join(lines[2:-1])
    #query =QueryParser("post", ix.schema).parse(post)
    terms = list(q2.all_terms())
    query = Or([Term(*x) for x in terms])
    return query
    context = unicode(context, 'gb18030')
    q1 = QueryParser("context", ix.schema).parse(context)
    terms = list(q1.all_terms()) + list(q2.all_terms())
    query = Or([Term(*x) for x in terms])
    return query
示例#26
0
def test_simplify():
    s = fields.Schema(k=fields.ID, v=fields.TEXT)
    ix = RamStorage().create_index(s)

    w = ix.writer()
    w.add_document(k=u("1"), v=u("aardvark apple allan alfa bear bee"))
    w.add_document(k=u("2"), v=u("brie glue geewhiz goop julia"))
    w.commit()

    r = ix.reader()
    q1 = And([Prefix("v", "b", boost=2.0), Term("v", "juliet")])
    q2 = And([Or([Term('v', u('bear'), boost=2.0),
                  Term('v', u('bee'), boost=2.0),
                  Term('v', u('brie'), boost=2.0)]),
              Term('v', 'juliet')])
    assert_equal(q1.simplify(r), q2)
示例#27
0
def test_excludematcher():
    schema = fields.Schema(content=fields.TEXT(stored=True))
    ix = RamStorage().create_index(schema)

    domain = ("alfa", "bravo", "charlie", "delta")

    for _ in xrange(3):
        w = ix.writer()
        for ls in permutations(domain):
            w.add_document(content=u(" ").join(ls))
        w.commit(merge=False)

    w = ix.writer()
    w.delete_document(5)
    w.delete_document(10)
    w.delete_document(28)
    w.commit(merge=False)

    q = Term("content", "bravo")
    with ix.searcher() as s:
        m = q.matcher(s)
        while m.is_active():
            content = s.stored_fields(m.id())["content"].split()
            spans = m.spans()
            for span in spans:
                assert content[span.start] == "bravo"
            m.next()
示例#28
0
def test_span_near2():
    ana = analysis.SimpleAnalyzer()
    schema = fields.Schema(text=fields.TEXT(analyzer=ana, stored=True))
    st = RamStorage()
    ix = st.create_index(schema)
    w = ix.writer()
    w.add_document(text=u("The Lucene library is by Doug Cutting and Whoosh " +
                          "was made by Matt Chaput"))
    w.commit()

    nq1 = spans.SpanNear(Term("text", "lucene"), Term("text", "doug"), slop=5)
    nq2 = spans.SpanNear(nq1, Term("text", "whoosh"), slop=4)

    with ix.searcher() as s:
        m = nq2.matcher(s)
        assert m.spans() == [spans.Span(1, 8)]
示例#29
0
 def get_query(line, ix):
     lines = line.strip().split('\t')
     post = lines[0].decode('utf-8')
     q2 = QueryParser("post", ix.schema).parse(post)
     terms = list(q2.all_terms())
     query = Or([Term(*x) for x in terms])
     return query
示例#30
0
def test_near_unordered():
    schema = fields.Schema(text=fields.TEXT(stored=True))
    st = RamStorage()
    ix = st.create_index(schema)
    w = ix.writer()
    w.add_document(text=u("alfa bravo charlie delta echo"))
    w.add_document(text=u("alfa bravo delta echo charlie"))
    w.add_document(text=u("alfa charlie bravo delta echo"))
    w.add_document(text=u("echo delta alfa foxtrot"))
    w.commit()

    with ix.searcher() as s:
        q = spans.SpanNear(Term("text", "bravo"), Term("text", "charlie"),
                           ordered=False)
        r = sorted(d["text"] for d in s.search(q))
        assert r == [u('alfa bravo charlie delta echo'), u('alfa charlie bravo delta echo')]