예제 #1
0
파일: catalog.py 프로젝트: hforge/itools
def _decode_simple_value(field_cls, data):
    """Used to decode values in stored fields.
    """
    # Overload the Integer type, cf _encode_simple_value
    if issubclass(field_cls, Integer):
        return int(sortable_unserialise(data))
    elif issubclass(field_cls, Decimal):
        return decimal(sortable_unserialise(data))
    # A common field or a new field
    return field_cls.decode(data)
예제 #2
0
파일: catalog.py 프로젝트: nkhine/itools
def _decode_simple_value(field_cls, data):
    """Used to decode values in stored fields.
    """
    # Overload the Integer type, cf _encode_simple_value
    if issubclass(field_cls, Integer):
        return int(sortable_unserialise(data))
    elif issubclass(field_cls, Decimal):
        return decimal(sortable_unserialise(data))
    # A common field or a new field
    return field_cls.decode(data)
예제 #3
0
    def two_range(self, field, purpose, q):
        """Check the result of a range search which should return 2 items.

        """
        r = [x for x in q.search(0, 10)]
        self.assertEqual(len(r), 2)
        val = xapian.sortable_unserialise(r[0].get_value("foo", "collsort"))
        self.assertTrue(3 <= val)
        self.assertTrue(val <= 4)
        val = xapian.sortable_unserialise(r[1].get_value("foo", "collsort"))
        self.assertTrue(4 <= val)
        self.assertTrue(val <= 5)
예제 #4
0
def group_poi(request):
    response = {}
    try:
        response['status'] = 'OK'
        group_type = request.GET.get('gt', 'city_code')
        if group_type == 'admin_code':
            admin_code_spy = xapian.ValueCountMatchSpy(1)
        elif group_type == 'prov_code':
            admin_code_spy = xapian.ValueCountMatchSpy(4)
        else:
            admin_code_spy = xapian.ValueCountMatchSpy(5)
        with contextlib.closing(get_xapian_conn()) as xapian_database:
            poi_query_parser = get_poi_query_parser()
            poi_query_parser.set_database(xapian_database)
            make_group_matches(
                request, poi_query_parser, admin_code_spy, xapian_database)
            group_result = {}
            for value in admin_code_spy.values():
                code = int(xapian.sortable_unserialise(value.term))
                group_result[code] = value.termfreq
            response['results'] = group_result
    except BaseException as e:
        logger.exception(e)
        response['results'] = []
        response['size'] = 0
        response['status'] = 'ERROR_PARAMETERS'
    return json.dumps(response, ensure_ascii=False, encoding='utf-8')
예제 #5
0
    def _remove_cached_items(self, docid=None, xapid=None):
        """Remove from the cache any items for the specified document.

        The document may be specified by xappy docid, or by xapian document id.

        """
        if self.cache_manager is None:
            raise errors.IndexerError("CacheManager has been applied to this "
                                      "index, but is not currently set.")

        doc, xapid = self._get_xapdoc(docid, xapid)
        if doc is None:
            return

        #print "Removing docid=%d" % xapid
        # FIXME: this will only remove the hits from the set cache
        # manager, if we have multiple applied caches, the others won't be
        # updated.  This means that currently, if multiple caches are applied
        # and document removals happen, some of the caches will get out of
        # date; multiple caches are therefore not really suitable for use in
        # production systems - they are however useful for experimenting with
        # different caching algorithms.
        for value in doc.values():
            base_slot = self._cache_manager_slot_start
            upper_slot = self._cache_manager_slot_start + self.cache_manager.num_cached_queries()
            if not (base_slot <= value.num < upper_slot):
                continue
            rank = int(self._cache_manager_max_hits -
                       xapian.sortable_unserialise(value.value))
            self.cache_manager.remove_hits(
                value.num - self._cache_manager_slot_start,
                ((rank, xapid),))
예제 #6
0
 def get_popcon(self, doc):
     """ Return a popcon value from a xapian document """
     popcon_raw = doc.get_value(XapianValues.POPCON)
     if popcon_raw:
         popcon = xapian.sortable_unserialise(popcon_raw)
     else:
         popcon = 0
     return popcon
 def get_popcon(self, doc):
     """ Return a popcon value from a xapian document """
     popcon_raw = doc.get_value(XapianValues.POPCON)
     if popcon_raw:
         popcon = xapian.sortable_unserialise(popcon_raw)
     else:
         popcon = 0
     return popcon
예제 #8
0
    def single_range(self, field, purpose, q):
        """Check the result of a range search which should return 1 item.

        """
        r = [x for x in q.search(0, 10)]
        self.assertEqual(len(r), 1)
        val = xapian.sortable_unserialise(r[0].get_value(field, purpose))
        self.assertTrue(3 <= val)
        self.assertTrue(val <= 4.01)
예제 #9
0
 def deconvert(self, data):
     if data is None:
         return data
     if self.ftype == PyFieldMeta.TYPE_LONG:
         data = data or long(0)
         return long(data)
     elif self.ftype == PyFieldMeta.TYPE_FLOAT:
         return xapian.sortable_unserialise(data)
     else:
         return data.decode('utf-8')
예제 #10
0
def search_database(keywords,result_limit,limit):
    c_key=('query_'+keywords+str(result_limit)).encode('utf-8')
    CT=cache.get(c_key)
    if CT!=None:
      print "using cache",c_key
      return CT
    database = xapian.Database(databasePath)
    enquire = xapian.Enquire(database)
    queryParser = xapian.QueryParser()
    queryParser.set_stemmer(xapian.Stem('english'))
    queryParser.set_database(database)
    queryParser.set_stemming_strategy(xapian.QueryParser.STEM_SOME)
    query = queryParser.parse_query(keywords)
    rex=re.compile(r'[0-9]+|[a-zA-Z]+|[\x80-\xff3]{3}')
    all_terms=rex.findall(keywords.encode('utf-8'))  
    query_list = [] 
    for word in all_terms:
      query = xapian.Query(word)
      query_list.append(query)
    if len(query_list) != 1:
      query = xapian.Query(xapian.Query.OP_AND, query_list)
    else:
      query = query_list[0]

    offset= 0
    sorter = MultiValueSorter()
    sorter.add(1)
    sorter.add(2)
    enquire.set_query(query)
    enquire.set_sort_by_key(sorter)
    result_list=[]
    result_got=0
    max_try=0
    

    while True:
      print "loop",result_got
      print "limit",result_limit
      matches = enquire.get_mset(offset, limit)
      for match in matches:
        str_content=match.document.get_data()
        if str_content.find(keywords)==-1:
          continue
        query_content=cut_str(str_content,50)
        result_got+=1
        result_list.append({"index":result_got,"query":query_content,"freq":xapian.sortable_unserialise(match.document.get_value(FREQ)),"click":xapian.sortable_unserialise(match.document.get_value(CLICK)),"date":match.document.get_value(DATE)})
      offset+=limit
      max_try+=1
      if result_got>result_limit:
        break
      if max_try>15:
        break
    print cache.set(c_key,[result_list,result_got],3600)
    print "cached",c_key
    return result_list,result_got
예제 #11
0
파일: indexer.py 프로젝트: rdavydov/demozoo
    def extract(self, document):
        if self.number:
            value = document.get_value(self.number)

            content_type = self._get_content_type(value)

            if self._is_float_or_interger(content_type):
                value = xapian.sortable_unserialise(value)

            return value

        return None
예제 #12
0
 def remove_cached_items(self, iconn, doc, xapid):
     #print "Removing docid=%d" % xapid
     for value in doc.values():
         base_slot = cache_manager_slot_start(iconn, self.id)
         upper_slot = base_slot + self.num_cached_queries()
         if not (base_slot <= value.num < upper_slot):
             continue
         rank = int(CACHE_MANAGER_MAX_HITS -
                    xapian.sortable_unserialise(value.value))
         self.remove_hits(
             value.num - base_slot,
             ((rank, xapid),))
예제 #13
0
파일: indexer.py 프로젝트: Bombe/demozoo
    def extract(self, document):
        if self.number:
            value = document.get_value(self.number)

            content_type = self._get_content_type(value)

            if self._is_float_or_interger(content_type):
                value = xapian.sortable_unserialise(value)

            return value

        return None
예제 #14
0
    def display_differences(self, ids1, ids2, name1, name2):
        ids1_unique = ids1 - ids2
        ids2_unique = ids2 - ids1
        if ids1_unique or ids2_unique:
            print "results for %s and %s differ" % (name1, name2)
        if ids1_unique:
            print "ids only in %s: " % name1, ids1_unique
        if ids2_unique:
            print "ids only in %s: " % name2, ids2_unique

        for i in ids1 ^ ids2:
            d = self.sconn.get_document(i)
            print "value: ", xapian.sortable_unserialise(d.get_value('price', 'collsort'))
            print "termlist: ", map (lambda t: t.term, d._doc.termlist())
예제 #15
0
파일: jot.py 프로젝트: ttaylordev/z
def doc2dict(doc):
    od = OrderedDict()

    url = doc.get_value(VALUE_URL)
    od['url'] = url

    title = doc.get_value(VALUE_TITLE)
    if title:
        od['title'] = title.decode('UTF-8')

    tags = doc.get_value(VALUE_TAGS)
    od['tags'] = tags.decode('UTF-8').split(u'\x1f') if tags else []

    created = xapian.sortable_unserialise(doc.get_value(VALUE_CREATED))
    od['created'] = arrow.get(created)

    archived_val = doc.get_value(VALUE_ARCHIVED)
    if archived_val:
        archived = xapian.sortable_unserialise(archived_val)
        od['archived'] = arrow.get(archived)

    od['notes'] = doc.get_data().decode('UTF-8')

    return od
예제 #16
0
파일: xaql.py 프로젝트: yamingd/play
    def _generate_records(self, mset, select=set(["*"])):
        """
        仅返回item_id,item_type,外部再从memcached、db中读取详细数据
        """
        for m in mset:
            result = {"_did" : m.docid, "_score" : m.percent, "_rank" : m.rank, "_collapse_count" : m.collapse_count, "_weight" : m.weight}
            result['item_id'] = int(xapian.sortable_unserialise(m.document.get_value(DOC_ITEM_ID))) #int
            result['item_type'] = m.document.get_value(DOC_ITEM_TYPE)  #string
            
            if select:
                doc = m.document
                data_str = doc.get_data()
                if len(data_str):
                    data_dict = cPickle.loads(data_str)
                    for key, value in data_dict.items():
                        if key in select or "*" in select:
                            result[key] = value

            yield result
예제 #17
0
    def size(self):
        """Return the size of the application without dependencies

        Note that this will return the download size if the app is
        not installed and the installed size if it is installed.
        """
        if self._pkg:
            if not self._pkg.installed:
                if self._app.archive_suite:
                    ver = self._get_version_for_archive_suite(self._pkg, self._app.archive_suite)
                    if ver:
                        return ver.size
                return self._pkg.candidate.size
            else:
                return self._pkg.installed.size
        elif self._doc:
            size = self._doc.get_value(XapianValues.DOWNLOAD_SIZE)
            if size:
                return xapian.sortable_unserialise(self._doc.get_value(XapianValues.DOWNLOAD_SIZE))
예제 #18
0
    def size(self):
        """Return the size of the application without dependencies

        Note that this will return the download size if the app is
        not installed and the installed size if it is installed.
        """
        if self._pkg:
            if not self._pkg.installed:
                if self._app.archive_suite:
                    ver = self._get_version_for_archive_suite(
                        self._pkg, self._app.archive_suite)
                    if ver:
                        return ver.size
                return self._pkg.candidate.size
            else:
                return self._pkg.installed.size
        elif self._doc:
            size = self._doc.get_value(XapianValues.DOWNLOAD_SIZE)
            if size:
                return xapian.sortable_unserialise(
                    self._doc.get_value(XapianValues.DOWNLOAD_SIZE))
예제 #19
0
 def remove_cached_items(self, iconn, doc, xapid):
     slots_info = self._get_slots_info(iconn)
     if not slots_info:
         return
     index = 0
     base_slot, upper_slot, cm = slots_info[index]
     for value in doc.values():
         slot_number = value.num
         if slot_number >= upper_slot:
             index += 1
             if index == len(slots_info):
                 return
             base_slot, upper_slot, cm = slots_info[index]
             
         if not (base_slot <= slot_number < upper_slot):
             continue
         rank = int(CACHE_MANAGER_MAX_HITS -
                    xapian.sortable_unserialise(value.value))
         cm.remove_hits(
         slot_number - base_slot,
         ((rank, xapid),))
예제 #20
0
    def _remove_cached_items(self, docid=None, xapid=None):
        """Remove from the cache any items for the specified document.

        The document may be specified by xappy docid, or by xapian document id.

        """
        if self.cache_manager is None:
            raise errors.IndexerError("CacheManager has been applied to this "
                                      "index, but is not currently set.")

        doc, xapid = self._get_xapdoc(docid, xapid)
        if doc is None:
            return

        #print "Removing docid=%d" % xapid
        for value in doc.values():
            if value.num < self._cache_manager_slot_start:
                continue
            rank = int(self._cache_manager_max_hits -
                       xapian.sortable_unserialise(value.value))
            self.cache_manager.remove_hits(
                value.num - self._cache_manager_slot_start,
                ((rank, xapid),))
예제 #21
0
    #parser.set_stemming_strategy(xapian.QueryParser.STEM_ALL)
    parser.set_database(db)
    #parser.add_prefix("pkg", "AP")
    query = parser.parse_query(search_term, 
                               xapian.QueryParser.FLAG_PARTIAL|
                               xapian.QueryParser.FLAG_WILDCARD)

    enquire = xapian.Enquire(db)
    enquire.set_sort_by_value_then_relevance(XAPIAN_VALUE_POPCON)
    enquire.set_query(query)
    matches = enquire.get_mset(0, db.get_doccount())
    print "Matches:"
    for m in matches:
        doc = m.document
        popcon = doc.get_value(XAPIAN_VALUE_POPCON)
        print doc.get_data(), "popcon:", xapian.sortable_unserialise(popcon)
        #for t in doc.termlist():
        #    print "'%s': %s (%s); " % (t.term, t.wdf, t.termfreq),
        #print "\n"
        appname = doc.get_data()
    
    # calculate a eset
    print "ESet:"
    rset = xapian.RSet()
    for m in matches:
        rset.add_document(m.docid)
    for m in enquire.get_eset(10, rset):
        print m.term


    # calulate the expansions
예제 #22
0
def test_all():
    # Test the version number reporting functions give plausible results.
    v = "%d.%d.%d" % (xapian.major_version(), xapian.minor_version(),
                      xapian.revision())
    v2 = xapian.version_string()
    expect(v2, v, "Unexpected version output")

    # A regexp check would be better, but seems to create a bogus "leak" of -1
    # objects in Python 3.
    expect(len(xapian.__version__.split('.')), 3,
           'xapian.__version__ not X.Y.Z')
    expect((xapian.__version__.split('.'))[0], '1',
           'xapian.__version__ not "1.Y.Z"')

    def access_cvar():
        res = xapian.cvar
        print("Unhandled constants: ", res)
        return res

    # Check that SWIG isn't generating cvar (regression test for ticket#297).
    #
    # Python 3.5 generates a different exception message here to earlier
    # versions, so we need a check which matches both.
    expect_exception(AttributeError,
                     lambda msg: msg.find("has no attribute 'cvar'") != -1,
                     access_cvar)

    stem = xapian.Stem(b"english")
    expect(str(stem), "Xapian::Stem(english)", "Unexpected str(stem)")

    doc = xapian.Document()
    doc.set_data(b"a\0b")
    if doc.get_data() == b"a":
        raise TestFail("get_data+set_data truncates at a zero byte")
    expect(doc.get_data(), b"a\0b",
           "get_data+set_data doesn't transparently handle a zero byte")
    doc.set_data(b"is there anybody out there?")
    doc.add_term(b"XYzzy")
    doc.add_posting(stem(b"is"), 1)
    doc.add_posting(stem(b"there"), 2)
    doc.add_posting(stem(b"anybody"), 3)
    doc.add_posting(stem(b"out"), 4)
    doc.add_posting(stem(b"there"), 5)

    db = xapian.WritableDatabase('', xapian.DB_BACKEND_INMEMORY)
    db.add_document(doc)
    expect(db.get_doccount(), 1, "Unexpected db.get_doccount()")
    terms = ["smoke", "test", "terms"]
    expect_query(
        xapian.Query(xapian.Query.OP_OR, [t.encode('utf-8') for t in terms]),
        "(smoke OR test OR terms)")
    query1 = xapian.Query(xapian.Query.OP_PHRASE,
                          (b"smoke", b"test", b"tuple"))
    query2 = xapian.Query(xapian.Query.OP_XOR,
                          (xapian.Query(b"smoke"), query1, b"string"))
    expect_query(query1, "(smoke PHRASE 3 test PHRASE 3 tuple)")
    expect_query(
        query2, "(smoke XOR (smoke PHRASE 3 test PHRASE 3 tuple) XOR string)")
    subqs = ["a", "b"]
    expect_query(
        xapian.Query(xapian.Query.OP_OR, [s.encode('utf-8') for s in subqs]),
        "(a OR b)")
    expect_query(xapian.Query(xapian.Query.OP_VALUE_RANGE, 0, b'1', b'4'),
                 "VALUE_RANGE 0 1 4")

    # Check database factory functions are wrapped as expected (or not wrapped
    # in the first cases):

    expect_exception(
        AttributeError,
        lambda msg: msg.find("has no attribute 'open_stub'") != -1,
        lambda: xapian.open_stub(b"nosuchdir/nosuchdb"))
    expect_exception(
        AttributeError,
        lambda msg: msg.find("has no attribute 'open_stub'") != -1,
        lambda: xapian.open_stub(b"nosuchdir/nosuchdb", xapian.DB_OPEN))

    expect_exception(
        xapian.DatabaseOpeningError, None,
        lambda: xapian.Database(b"nosuchdir/nosuchdb", xapian.DB_BACKEND_STUB))
    expect_exception(
        xapian.DatabaseOpeningError, None, lambda: xapian.WritableDatabase(
            b"nosuchdir/nosuchdb", xapian.DB_OPEN | xapian.DB_BACKEND_STUB))

    expect_exception(
        xapian.DatabaseOpeningError, None, lambda: xapian.Database(
            b"nosuchdir/nosuchdb", xapian.DB_BACKEND_GLASS))
    expect_exception(
        xapian.DatabaseCreateError, None, lambda: xapian.WritableDatabase(
            b"nosuchdir/nosuchdb", xapian.DB_CREATE | xapian.DB_BACKEND_GLASS))

    expect_exception(
        xapian.FeatureUnavailableError, None, lambda: xapian.Database(
            b"nosuchdir/nosuchdb", xapian.DB_BACKEND_CHERT))
    expect_exception(
        xapian.FeatureUnavailableError, None, lambda: xapian.WritableDatabase(
            b"nosuchdir/nosuchdb", xapian.DB_CREATE | xapian.DB_BACKEND_CHERT))

    expect_exception(xapian.NetworkError, None, xapian.remote_open,
                     b"/bin/false", b"")
    expect_exception(xapian.NetworkError, None, xapian.remote_open_writable,
                     b"/bin/false", b"")

    expect_exception(xapian.NetworkError, None, xapian.remote_open,
                     b"127.0.0.1", 0, 1)
    expect_exception(xapian.NetworkError, None, xapian.remote_open_writable,
                     b"127.0.0.1", 0, 1)

    # Check wrapping of MatchAll and MatchNothing:

    expect_query(xapian.Query.MatchAll, "<alldocuments>")
    expect_query(xapian.Query.MatchNothing, "")

    # Feature test for Query.__iter__
    term_count = 0
    for term in query2:
        term_count += 1
    expect(term_count, 4, "Unexpected number of terms in query2")

    enq = xapian.Enquire(db)
    enq.set_query(xapian.Query(xapian.Query.OP_OR, b"there", b"is"))
    mset = enq.get_mset(0, 10)
    expect(mset.size(), 1, "Unexpected mset.size()")
    expect(len(mset), 1, "Unexpected mset.size()")

    # Feature test for Enquire.matching_terms(docid)
    term_count = 0
    for term in enq.matching_terms(mset.get_hit(0)):
        term_count += 1
    expect(term_count, 2, "Unexpected number of matching terms")

    # Feature test for MSet.__iter__
    msize = 0
    for match in mset:
        msize += 1
    expect(msize, mset.size(), "Unexpected number of entries in mset")

    terms = b" ".join(enq.matching_terms(mset.get_hit(0)))
    expect(terms, b"is there", "Unexpected terms")

    # Feature test for ESet.__iter__
    rset = xapian.RSet()
    rset.add_document(1)
    eset = enq.get_eset(10, rset)
    term_count = 0
    for term in eset:
        term_count += 1
    expect(term_count, 3, "Unexpected number of expand terms")

    # Feature test for Database.__iter__
    term_count = 0
    for term in db:
        term_count += 1
    expect(term_count, 5, "Unexpected number of terms in db")

    # Feature test for Database.allterms
    term_count = 0
    for term in db.allterms():
        term_count += 1
    expect(term_count, 5, "Unexpected number of terms in db.allterms")

    # Feature test for Database.postlist
    count = 0
    for posting in db.postlist(b"there"):
        count += 1
    expect(count, 1, "Unexpected number of entries in db.postlist('there')")

    # Feature test for Database.postlist with empty term (alldocspostlist)
    count = 0
    for posting in db.postlist(b""):
        count += 1
    expect(count, 1, "Unexpected number of entries in db.postlist('')")

    # Feature test for Database.termlist
    count = 0
    for term in db.termlist(1):
        count += 1
    expect(count, 5, "Unexpected number of entries in db.termlist(1)")

    # Feature test for Database.positionlist
    count = 0
    for term in db.positionlist(1, b"there"):
        count += 1
    expect(count, 2,
           "Unexpected number of entries in db.positionlist(1, 'there')")

    # Feature test for Document.termlist
    count = 0
    for term in doc.termlist():
        count += 1
    expect(count, 5, "Unexpected number of entries in doc.termlist()")

    # Feature test for TermIter.skip_to
    term = doc.termlist()
    term.skip_to(b'n')
    while True:
        try:
            x = next(term)
        except StopIteration:
            break
        if x.term < b'n':
            raise TestFail("TermIter.skip_to didn't skip term '%s'" %
                           x.term.decode('utf-8'))

    # Feature test for Document.values
    count = 0
    for term in list(doc.values()):
        count += 1
    expect(count, 0, "Unexpected number of entries in doc.values")

    # Check exception handling for Xapian::DocNotFoundError
    expect_exception(xapian.DocNotFoundError, "Docid 3 not found",
                     db.get_document, 3)

    # Check value of OP_ELITE_SET
    expect(xapian.Query.OP_ELITE_SET, 10, "Unexpected value for OP_ELITE_SET")

    # Feature test for MatchDecider
    doc = xapian.Document()
    doc.set_data(b"Two")
    doc.add_posting(stem(b"out"), 1)
    doc.add_posting(stem(b"outside"), 1)
    doc.add_posting(stem(b"source"), 2)
    doc.add_value(0, b"yes")
    db.add_document(doc)

    class testmatchdecider(xapian.MatchDecider):
        def __call__(self, doc):
            return doc.get_value(0) == b"yes"

    query = xapian.Query(stem(b"out"))
    enquire = xapian.Enquire(db)
    enquire.set_query(query)
    mset = enquire.get_mset(0, 10, None, testmatchdecider())
    expect(mset.size(), 1,
           "Unexpected number of documents returned by match decider")
    expect(mset.get_docid(0), 2, "MatchDecider mset has wrong docid in")

    # Feature test for ExpandDecider
    class testexpanddecider(xapian.ExpandDecider):
        def __call__(self, term):
            return (not term.startswith(b'a'))

    enquire = xapian.Enquire(db)
    rset = xapian.RSet()
    rset.add_document(1)
    eset = enquire.get_eset(10, rset, xapian.Enquire.USE_EXACT_TERMFREQ, 1.0,
                            testexpanddecider())
    eset_terms = [item.term for item in eset]
    expect(len(eset_terms), eset.size(),
           "Unexpected number of terms returned by expand")
    if [t for t in eset_terms if t.startswith(b'a')]:
        raise TestFail("ExpandDecider was not used")

    # Check min_wt argument to get_eset() works (new in 1.2.5).
    eset = enquire.get_eset(100, rset, xapian.Enquire.USE_EXACT_TERMFREQ)
    expect([i.weight for i in eset][-1] < 1.9, True,
           "test get_eset() without min_wt")
    eset = enquire.get_eset(100, rset, xapian.Enquire.USE_EXACT_TERMFREQ, 1.0,
                            None, 1.9)
    expect([i.weight for i in eset][-1] >= 1.9, True, "test get_eset() min_wt")

    # Check QueryParser parsing error.
    qp = xapian.QueryParser()
    expect_exception(xapian.QueryParserError,
                     "Syntax: <expression> AND <expression>", qp.parse_query,
                     b"test AND")

    # Check QueryParser pure NOT option
    qp = xapian.QueryParser()
    expect_query(
        qp.parse_query(b"NOT test", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT),
        "(<alldocuments> AND_NOT test@1)")

    # Check QueryParser partial option
    qp = xapian.QueryParser()
    qp.set_database(db)
    qp.set_default_op(xapian.Query.OP_AND)
    qp.set_stemming_strategy(qp.STEM_SOME)
    qp.set_stemmer(xapian.Stem(b'en'))
    expect_query(qp.parse_query(b"foo o", qp.FLAG_PARTIAL),
                 "(Zfoo@1 AND ((SYNONYM WILDCARD OR o) OR Zo@2))")

    expect_query(qp.parse_query(b"foo outside", qp.FLAG_PARTIAL),
                 "(Zfoo@1 AND ((SYNONYM WILDCARD OR outside) OR Zoutsid@2))")

    # Test supplying unicode strings
    expect_query(xapian.Query(xapian.Query.OP_OR, (b'foo', b'bar')),
                 '(foo OR bar)')
    expect_query(xapian.Query(xapian.Query.OP_OR, (b'foo', b'bar\xa3')),
                 '(foo OR bar\\xa3)')
    expect_query(xapian.Query(xapian.Query.OP_OR, (b'foo', b'bar\xc2\xa3')),
                 '(foo OR bar\u00a3)')
    expect_query(xapian.Query(xapian.Query.OP_OR, b'foo', b'bar'),
                 '(foo OR bar)')

    expect_query(
        qp.parse_query(b"NOT t\xe9st", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT),
        "(<alldocuments> AND_NOT Zt\u00e9st@1)")

    doc = xapian.Document()
    doc.set_data(b"Unicode with an acc\xe9nt")
    doc.add_posting(stem(b"out\xe9r"), 1)
    expect(doc.get_data(), b"Unicode with an acc\xe9nt")
    term = next(doc.termlist()).term
    expect(term, b"out\xe9r")

    # Check simple stopper
    stop = xapian.SimpleStopper()
    qp.set_stopper(stop)
    expect(stop(b'a'), False)
    expect_query(qp.parse_query(b"foo bar a", qp.FLAG_BOOLEAN),
                 "(Zfoo@1 AND Zbar@2 AND Za@3)")

    stop.add(b'a')
    expect(stop(b'a'), True)
    expect_query(qp.parse_query(b"foo bar a", qp.FLAG_BOOLEAN),
                 "(Zfoo@1 AND Zbar@2)")

    # Feature test for custom Stopper
    class my_b_stopper(xapian.Stopper):
        def __call__(self, term):
            return term == b"b"

        def get_description(self):
            return "my_b_stopper"

    stop = my_b_stopper()
    expect(stop.get_description(), "my_b_stopper")
    qp.set_stopper(stop)
    expect(stop(b'a'), False)
    expect_query(qp.parse_query(b"foo bar a", qp.FLAG_BOOLEAN),
                 "(Zfoo@1 AND Zbar@2 AND Za@3)")

    expect(stop(b'b'), True)
    expect_query(qp.parse_query(b"foo bar b", qp.FLAG_BOOLEAN),
                 "(Zfoo@1 AND Zbar@2)")

    # Test TermGenerator
    termgen = xapian.TermGenerator()
    doc = xapian.Document()
    termgen.set_document(doc)
    termgen.index_text(b'foo bar baz foo')
    expect([(item.term, item.wdf, [pos for pos in item.positer])
            for item in doc.termlist()], [(b'bar', 1, [2]), (b'baz', 1, [3]),
                                          (b'foo', 2, [1, 4])])

    # Check DateValueRangeProcessor works
    context("checking that DateValueRangeProcessor works")
    qp = xapian.QueryParser()
    vrpdate = xapian.DateValueRangeProcessor(1, 1, 1960)
    qp.add_valuerangeprocessor(vrpdate)
    query = qp.parse_query(b'12/03/99..12/04/01')
    expect(str(query), 'Query(VALUE_RANGE 1 19991203 20011204)')

    # Regression test for bug#193, fixed in 1.0.3.
    context("running regression test for bug#193")
    vrp = xapian.NumberValueRangeProcessor(0, b'$', True)
    a = '$10'
    b = '20'
    slot, a, b = vrp(a, b.encode('utf-8'))
    expect(slot, 0)
    expect(xapian.sortable_unserialise(a), 10)
    expect(xapian.sortable_unserialise(b), 20)

    # Feature test for xapian.FieldProcessor
    context("running feature test for xapian.FieldProcessor")

    class testfieldprocessor(xapian.FieldProcessor):
        def __call__(self, s):
            if s == 'spam':
                raise Exception('already spam')
            return xapian.Query("spam")

    qp.add_prefix('spam', testfieldprocessor())
    qp.add_boolean_prefix('boolspam', testfieldprocessor())
    query = qp.parse_query('spam:ignored')
    expect(str(query), 'Query(spam)')

    # FIXME: This doesn't currently work:
    # expect_exception(Exception, 'already spam', qp.parse_query, 'spam:spam')

    # Regression tests copied from PHP (probably always worked in python, but
    # let's check...)
    context("running regression tests for issues which were found in PHP")

    # PHP overload resolution involving boolean types failed.
    enq.set_sort_by_value(1, True)

    # Regression test - fixed in 0.9.10.1.
    oqparser = xapian.QueryParser()
    oquery = oqparser.parse_query(b"I like tea")

    # Regression test for bug#192 - fixed in 1.0.3.
    enq.set_cutoff(100)

    # Test setting and getting metadata
    expect(db.get_metadata(b'Foo'), b'')
    db.set_metadata(b'Foo', b'Foo')
    expect(db.get_metadata(b'Foo'), b'Foo')
    expect_exception(xapian.InvalidArgumentError,
                     "Empty metadata keys are invalid", db.get_metadata, b'')
    expect_exception(xapian.InvalidArgumentError,
                     "Empty metadata keys are invalid", db.set_metadata, b'',
                     b'Foo')
    expect_exception(xapian.InvalidArgumentError,
                     "Empty metadata keys are invalid", db.get_metadata, b'')

    # Test OP_SCALE_WEIGHT and corresponding constructor
    expect_query(
        xapian.Query(xapian.Query.OP_SCALE_WEIGHT, xapian.Query(b'foo'), 5),
        "5 * foo")
예제 #23
0
def test_all():
    # Test the version number reporting functions give plausible results.
    v = "%d.%d.%d" % (xapian.major_version(), xapian.minor_version(),
                      xapian.revision())
    v2 = xapian.version_string()
    expect(v2, v, "Unexpected version output")

    def access_cvar():
        return xapian.cvar

    # Check that SWIG isn't generating cvar (regression test for ticket#297).
    expect_exception(AttributeError, "'module' object has no attribute 'cvar'",
                     access_cvar)

    stem = xapian.Stem("english")
    expect(str(stem), "Xapian::Stem(english)", "Unexpected str(stem)")

    doc = xapian.Document()
    doc.set_data("a\0b")
    if doc.get_data() == "a":
        raise TestFail("get_data+set_data truncates at a zero byte")
    expect(doc.get_data(), "a\0b",
           "get_data+set_data doesn't transparently handle a zero byte")
    doc.set_data("is there anybody out there?")
    doc.add_term("XYzzy")
    doc.add_posting(stem("is"), 1)
    doc.add_posting(stem("there"), 2)
    doc.add_posting(stem("anybody"), 3)
    doc.add_posting(stem("out"), 4)
    doc.add_posting(stem("there"), 5)

    db = xapian.inmemory_open()
    db.add_document(doc)
    expect(db.get_doccount(), 1, "Unexpected db.get_doccount()")
    terms = ["smoke", "test", "terms"]
    expect_query(xapian.Query(xapian.Query.OP_OR, terms),
                 "(smoke OR test OR terms)")
    query1 = xapian.Query(xapian.Query.OP_PHRASE, ("smoke", "test", "tuple"))
    query2 = xapian.Query(xapian.Query.OP_XOR,
                          (xapian.Query("smoke"), query1, "string"))
    expect_query(query1, "(smoke PHRASE 3 test PHRASE 3 tuple)")
    expect_query(
        query2, "(smoke XOR (smoke PHRASE 3 test PHRASE 3 tuple) XOR string)")
    subqs = ["a", "b"]
    expect_query(xapian.Query(xapian.Query.OP_OR, subqs), "(a OR b)")
    expect_query(xapian.Query(xapian.Query.OP_VALUE_RANGE, 0, '1', '4'),
                 "VALUE_RANGE 0 1 4")

    expect_query(xapian.Query.MatchAll, "<alldocuments>")
    expect_query(xapian.Query.MatchNothing, "")

    # Feature test for Query.__iter__
    term_count = 0
    for term in query2:
        term_count += 1
    expect(term_count, 4, "Unexpected number of terms in query2")

    enq = xapian.Enquire(db)
    enq.set_query(xapian.Query(xapian.Query.OP_OR, "there", "is"))
    mset = enq.get_mset(0, 10)
    expect(mset.size(), 1, "Unexpected mset.size()")
    expect(len(mset), 1, "Unexpected mset.size()")

    # Feature test for Enquire.matching_terms(docid)
    term_count = 0
    for term in enq.matching_terms(mset.get_hit(0)):
        term_count += 1
    expect(term_count, 2, "Unexpected number of matching terms")

    # Feature test for MSet.__iter__
    msize = 0
    for match in mset:
        msize += 1
    expect(msize, mset.size(), "Unexpected number of entries in mset")

    terms = " ".join(enq.matching_terms(mset.get_hit(0)))
    expect(terms, "is there", "Unexpected terms")

    # Feature test for ESet.__iter__
    rset = xapian.RSet()
    rset.add_document(1)
    eset = enq.get_eset(10, rset)
    term_count = 0
    for term in eset:
        term_count += 1
    expect(term_count, 3, "Unexpected number of expand terms")

    # Feature test for Database.__iter__
    term_count = 0
    for term in db:
        term_count += 1
    expect(term_count, 5, "Unexpected number of terms in db")

    # Feature test for Database.allterms
    term_count = 0
    for term in db.allterms():
        term_count += 1
    expect(term_count, 5, "Unexpected number of terms in db.allterms")

    # Feature test for Database.postlist
    count = 0
    for posting in db.postlist("there"):
        count += 1
    expect(count, 1, "Unexpected number of entries in db.postlist('there')")

    # Feature test for Database.postlist with empty term (alldocspostlist)
    count = 0
    for posting in db.postlist(""):
        count += 1
    expect(count, 1, "Unexpected number of entries in db.postlist('')")

    # Feature test for Database.termlist
    count = 0
    for term in db.termlist(1):
        count += 1
    expect(count, 5, "Unexpected number of entries in db.termlist(1)")

    # Feature test for Database.positionlist
    count = 0
    for term in db.positionlist(1, "there"):
        count += 1
    expect(count, 2,
           "Unexpected number of entries in db.positionlist(1, 'there')")

    # Feature test for Document.termlist
    count = 0
    for term in doc.termlist():
        count += 1
    expect(count, 5, "Unexpected number of entries in doc.termlist()")

    # Feature test for TermIter.skip_to
    term = doc.termlist()
    term.skip_to('n')
    while True:
        try:
            x = next(term)
        except StopIteration:
            break
        if x.term < 'n':
            raise TestFail("TermIter.skip_to didn't skip term '%s'" % x.term)

    # Feature test for Document.values
    count = 0
    for term in doc.values():
        count += 1
    expect(count, 0, "Unexpected number of entries in doc.values")

    # Check exception handling for Xapian::DocNotFoundError
    expect_exception(xapian.DocNotFoundError, "Docid 3 not found",
                     db.get_document, 3)

    # Check value of OP_ELITE_SET
    expect(xapian.Query.OP_ELITE_SET, 10, "Unexpected value for OP_ELITE_SET")

    # Feature test for MatchDecider
    doc = xapian.Document()
    doc.set_data("Two")
    doc.add_posting(stem("out"), 1)
    doc.add_posting(stem("outside"), 1)
    doc.add_posting(stem("source"), 2)
    doc.add_value(0, "yes")
    db.add_document(doc)

    class testmatchdecider(xapian.MatchDecider):
        def __call__(self, doc):
            return doc.get_value(0) == "yes"

    query = xapian.Query(stem("out"))
    enquire = xapian.Enquire(db)
    enquire.set_query(query)
    mset = enquire.get_mset(0, 10, None, testmatchdecider())
    expect(mset.size(), 1,
           "Unexpected number of documents returned by match decider")
    expect(mset.get_docid(0), 2, "MatchDecider mset has wrong docid in")

    # Feature test for ExpandDecider
    class testexpanddecider(xapian.ExpandDecider):
        def __call__(self, term):
            return (not term.startswith('a'))

    enquire = xapian.Enquire(db)
    rset = xapian.RSet()
    rset.add_document(1)
    eset = enquire.get_eset(10, rset, xapian.Enquire.USE_EXACT_TERMFREQ, 1.0,
                            testexpanddecider())
    eset_terms = [term[xapian.ESET_TNAME] for term in eset.items]
    expect(len(eset_terms), eset.size(),
           "Unexpected number of terms returned by expand")
    if [t for t in eset_terms if t.startswith('a')]:
        raise TestFail("ExpandDecider was not used")

    # Check min_wt argument to get_eset() works (new in 1.2.5).
    eset = enquire.get_eset(100, rset, xapian.Enquire.USE_EXACT_TERMFREQ)
    expect(eset.items[-1][xapian.ESET_WT] < 1.9, True,
           "test get_eset() without min_wt")
    eset = enquire.get_eset(100, rset, xapian.Enquire.USE_EXACT_TERMFREQ, 1.0,
                            None, 1.9)
    expect(eset.items[-1][xapian.ESET_WT] >= 1.9, True,
           "test get_eset() min_wt")

    # Check QueryParser parsing error.
    qp = xapian.QueryParser()
    expect_exception(xapian.QueryParserError,
                     "Syntax: <expression> AND <expression>", qp.parse_query,
                     "test AND")

    # Check QueryParser pure NOT option
    qp = xapian.QueryParser()
    expect_query(
        qp.parse_query("NOT test", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT),
        "(<alldocuments> AND_NOT test:(pos=1))")

    # Check QueryParser partial option
    qp = xapian.QueryParser()
    qp.set_database(db)
    qp.set_default_op(xapian.Query.OP_AND)
    qp.set_stemming_strategy(qp.STEM_SOME)
    qp.set_stemmer(xapian.Stem('en'))
    expect_query(
        qp.parse_query("foo o", qp.FLAG_PARTIAL),
        "(Zfoo:(pos=1) AND ((out:(pos=2) SYNONYM outsid:(pos=2)) OR Zo:(pos=2)))"
    )

    expect_query(qp.parse_query("foo outside", qp.FLAG_PARTIAL),
                 "(Zfoo:(pos=1) AND Zoutsid:(pos=2))")

    # Test supplying unicode strings
    expect_query(xapian.Query(xapian.Query.OP_OR, (u'foo', u'bar')),
                 '(foo OR bar)')
    expect_query(xapian.Query(xapian.Query.OP_OR, ('foo', u'bar\xa3')),
                 '(foo OR bar\xc2\xa3)')
    expect_query(xapian.Query(xapian.Query.OP_OR, ('foo', 'bar\xc2\xa3')),
                 '(foo OR bar\xc2\xa3)')
    expect_query(xapian.Query(xapian.Query.OP_OR, u'foo', u'bar'),
                 '(foo OR bar)')

    expect_query(
        qp.parse_query(u"NOT t\xe9st", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT),
        "(<alldocuments> AND_NOT Zt\xc3\xa9st:(pos=1))")

    doc = xapian.Document()
    doc.set_data(u"Unicode with an acc\xe9nt")
    doc.add_posting(stem(u"out\xe9r"), 1)
    expect(doc.get_data(), u"Unicode with an acc\xe9nt".encode('utf-8'))
    term = doc.termlist().next().term
    expect(term, u"out\xe9r".encode('utf-8'))

    # Check simple stopper
    stop = xapian.SimpleStopper()
    qp.set_stopper(stop)
    expect(stop('a'), False)
    expect_query(qp.parse_query(u"foo bar a", qp.FLAG_BOOLEAN),
                 "(Zfoo:(pos=1) AND Zbar:(pos=2) AND Za:(pos=3))")

    stop.add('a')
    expect(stop('a'), True)
    expect_query(qp.parse_query(u"foo bar a", qp.FLAG_BOOLEAN),
                 "(Zfoo:(pos=1) AND Zbar:(pos=2))")

    # Feature test for custom Stopper
    class my_b_stopper(xapian.Stopper):
        def __call__(self, term):
            return term == "b"

        def get_description(self):
            return u"my_b_stopper"

    stop = my_b_stopper()
    expect(stop.get_description(), u"my_b_stopper")
    qp.set_stopper(stop)
    expect(stop('a'), False)
    expect_query(qp.parse_query(u"foo bar a", qp.FLAG_BOOLEAN),
                 "(Zfoo:(pos=1) AND Zbar:(pos=2) AND Za:(pos=3))")

    expect(stop('b'), True)
    expect_query(qp.parse_query(u"foo bar b", qp.FLAG_BOOLEAN),
                 "(Zfoo:(pos=1) AND Zbar:(pos=2))")

    # Test TermGenerator
    termgen = xapian.TermGenerator()
    doc = xapian.Document()
    termgen.set_document(doc)
    termgen.index_text('foo bar baz foo')
    expect([(item.term, item.wdf, [pos for pos in item.positer])
            for item in doc.termlist()], [('bar', 1, [2]), ('baz', 1, [3]),
                                          ('foo', 2, [1, 4])])

    # Check DateValueRangeProcessor works
    context("checking that DateValueRangeProcessor works")
    qp = xapian.QueryParser()
    vrpdate = xapian.DateValueRangeProcessor(1, 1, 1960)
    qp.add_valuerangeprocessor(vrpdate)
    query = qp.parse_query('12/03/99..12/04/01')
    expect(str(query), 'Xapian::Query(VALUE_RANGE 1 19991203 20011204)')

    # Regression test for bug#193, fixed in 1.0.3.
    context("running regression test for bug#193")
    vrp = xapian.NumberValueRangeProcessor(0, '$', True)
    a = '$10'
    b = '20'
    slot, a, b = vrp(a, b)
    expect(slot, 0)
    expect(xapian.sortable_unserialise(a), 10)
    expect(xapian.sortable_unserialise(b), 20)

    # Regression tests copied from PHP (probably always worked in python, but
    # let's check...)
    context("running regression tests for issues which were found in PHP")

    # PHP overload resolution involving boolean types failed.
    enq.set_sort_by_value(1, True)

    # Regression test - fixed in 0.9.10.1.
    oqparser = xapian.QueryParser()
    oquery = oqparser.parse_query("I like tea")

    # Regression test for bug#192 - fixed in 1.0.3.
    enq.set_cutoff(100)

    # Test setting and getting metadata
    expect(db.get_metadata('Foo'), '')
    db.set_metadata('Foo', 'Foo')
    expect(db.get_metadata('Foo'), 'Foo')
    expect_exception(xapian.InvalidArgumentError,
                     "Empty metadata keys are invalid", db.get_metadata, '')
    expect_exception(xapian.InvalidArgumentError,
                     "Empty metadata keys are invalid", db.set_metadata, '',
                     'Foo')
    expect_exception(xapian.InvalidArgumentError,
                     "Empty metadata keys are invalid", db.get_metadata, '')

    # Test OP_SCALE_WEIGHT and corresponding constructor
    expect_query(
        xapian.Query(xapian.Query.OP_SCALE_WEIGHT, xapian.Query('foo'), 5),
        "5 * foo")
예제 #24
0
def test_all():
    # Test the version number reporting functions give plausible results.
    v = "%d.%d.%d" % (xapian.major_version(), xapian.minor_version(), xapian.revision())
    v2 = xapian.version_string()
    expect(v2, v, "Unexpected version output")

    def access_cvar():
        return xapian.cvar

    # Check that SWIG isn't generating cvar (regression test for ticket#297).
    expect_exception(AttributeError, "'module' object has no attribute 'cvar'", access_cvar)

    stem = xapian.Stem("english")
    expect(str(stem), "Xapian::Stem(english)", "Unexpected str(stem)")

    doc = xapian.Document()
    doc.set_data("a\0b")
    if doc.get_data() == "a":
        raise TestFail("get_data+set_data truncates at a zero byte")
    expect(doc.get_data(), "a\0b", "get_data+set_data doesn't transparently handle a zero byte")
    doc.set_data("is there anybody out there?")
    doc.add_term("XYzzy")
    doc.add_posting(stem("is"), 1)
    doc.add_posting(stem("there"), 2)
    doc.add_posting(stem("anybody"), 3)
    doc.add_posting(stem("out"), 4)
    doc.add_posting(stem("there"), 5)

    db = xapian.inmemory_open()
    db.add_document(doc)
    expect(db.get_doccount(), 1, "Unexpected db.get_doccount()")
    terms = ["smoke", "test", "terms"]
    expect_query(xapian.Query(xapian.Query.OP_OR, terms), "(smoke OR test OR terms)")
    query1 = xapian.Query(xapian.Query.OP_PHRASE, ("smoke", "test", "tuple"))
    query2 = xapian.Query(xapian.Query.OP_XOR, (xapian.Query("smoke"), query1, "string"))
    expect_query(query1, "(smoke PHRASE 3 test PHRASE 3 tuple)")
    expect_query(query2, "(smoke XOR (smoke PHRASE 3 test PHRASE 3 tuple) XOR string)")
    subqs = ["a", "b"]
    expect_query(xapian.Query(xapian.Query.OP_OR, subqs), "(a OR b)")
    expect_query(xapian.Query(xapian.Query.OP_VALUE_RANGE, 0, "1", "4"), "VALUE_RANGE 0 1 4")

    # Check database factory functions are wrapped as expected:

    expect_exception(xapian.DatabaseOpeningError, None, xapian.open_stub, "nosuchdir/nosuchdb")
    expect_exception(xapian.DatabaseOpeningError, None, xapian.open_stub, "nosuchdir/nosuchdb", xapian.DB_OPEN)

    expect_exception(xapian.DatabaseOpeningError, None, xapian.brass_open, "nosuchdir/nosuchdb")
    expect_exception(xapian.DatabaseCreateError, None, xapian.brass_open, "nosuchdir/nosuchdb", xapian.DB_CREATE)

    expect_exception(xapian.DatabaseOpeningError, None, xapian.chert_open, "nosuchdir/nosuchdb")
    expect_exception(xapian.DatabaseCreateError, None, xapian.chert_open, "nosuchdir/nosuchdb", xapian.DB_CREATE)

    expect_exception(xapian.NetworkError, None, xapian.remote_open, "/bin/false", "")
    expect_exception(xapian.NetworkError, None, xapian.remote_open_writable, "/bin/false", "")

    expect_exception(xapian.NetworkError, None, xapian.remote_open, "127.0.0.1", 0, 1)
    expect_exception(xapian.NetworkError, None, xapian.remote_open_writable, "127.0.0.1", 0, 1)

    # Check wrapping of MatchAll and MatchNothing:

    expect_query(xapian.Query.MatchAll, "<alldocuments>")
    expect_query(xapian.Query.MatchNothing, "")

    # Feature test for Query.__iter__
    term_count = 0
    for term in query2:
        term_count += 1
    expect(term_count, 4, "Unexpected number of terms in query2")

    enq = xapian.Enquire(db)
    enq.set_query(xapian.Query(xapian.Query.OP_OR, "there", "is"))
    mset = enq.get_mset(0, 10)
    expect(mset.size(), 1, "Unexpected mset.size()")
    expect(len(mset), 1, "Unexpected mset.size()")

    # Feature test for Enquire.matching_terms(docid)
    term_count = 0
    for term in enq.matching_terms(mset.get_hit(0)):
        term_count += 1
    expect(term_count, 2, "Unexpected number of matching terms")

    # Feature test for MSet.__iter__
    msize = 0
    for match in mset:
        msize += 1
    expect(msize, mset.size(), "Unexpected number of entries in mset")

    terms = " ".join(enq.matching_terms(mset.get_hit(0)))
    expect(terms, "is there", "Unexpected terms")

    # Feature test for ESet.__iter__
    rset = xapian.RSet()
    rset.add_document(1)
    eset = enq.get_eset(10, rset)
    term_count = 0
    for term in eset:
        term_count += 1
    expect(term_count, 3, "Unexpected number of expand terms")

    # Feature test for Database.__iter__
    term_count = 0
    for term in db:
        term_count += 1
    expect(term_count, 5, "Unexpected number of terms in db")

    # Feature test for Database.allterms
    term_count = 0
    for term in db.allterms():
        term_count += 1
    expect(term_count, 5, "Unexpected number of terms in db.allterms")

    # Feature test for Database.postlist
    count = 0
    for posting in db.postlist("there"):
        count += 1
    expect(count, 1, "Unexpected number of entries in db.postlist('there')")

    # Feature test for Database.postlist with empty term (alldocspostlist)
    count = 0
    for posting in db.postlist(""):
        count += 1
    expect(count, 1, "Unexpected number of entries in db.postlist('')")

    # Feature test for Database.termlist
    count = 0
    for term in db.termlist(1):
        count += 1
    expect(count, 5, "Unexpected number of entries in db.termlist(1)")

    # Feature test for Database.positionlist
    count = 0
    for term in db.positionlist(1, "there"):
        count += 1
    expect(count, 2, "Unexpected number of entries in db.positionlist(1, 'there')")

    # Feature test for Document.termlist
    count = 0
    for term in doc.termlist():
        count += 1
    expect(count, 5, "Unexpected number of entries in doc.termlist()")

    # Feature test for TermIter.skip_to
    term = doc.termlist()
    term.skip_to("n")
    while True:
        try:
            x = next(term)
        except StopIteration:
            break
        if x.term < "n":
            raise TestFail("TermIter.skip_to didn't skip term '%s'" % x.term)

    # Feature test for Document.values
    count = 0
    for term in doc.values():
        count += 1
    expect(count, 0, "Unexpected number of entries in doc.values")

    # Check exception handling for Xapian::DocNotFoundError
    expect_exception(xapian.DocNotFoundError, "Docid 3 not found", db.get_document, 3)

    # Check value of OP_ELITE_SET
    expect(xapian.Query.OP_ELITE_SET, 10, "Unexpected value for OP_ELITE_SET")

    # Feature test for MatchDecider
    doc = xapian.Document()
    doc.set_data("Two")
    doc.add_posting(stem("out"), 1)
    doc.add_posting(stem("outside"), 1)
    doc.add_posting(stem("source"), 2)
    doc.add_value(0, "yes")
    db.add_document(doc)

    class testmatchdecider(xapian.MatchDecider):
        def __call__(self, doc):
            return doc.get_value(0) == "yes"

    query = xapian.Query(stem("out"))
    enquire = xapian.Enquire(db)
    enquire.set_query(query)
    mset = enquire.get_mset(0, 10, None, testmatchdecider())
    expect(mset.size(), 1, "Unexpected number of documents returned by match decider")
    expect(mset.get_docid(0), 2, "MatchDecider mset has wrong docid in")

    # Feature test for ExpandDecider
    class testexpanddecider(xapian.ExpandDecider):
        def __call__(self, term):
            return not term.startswith("a")

    enquire = xapian.Enquire(db)
    rset = xapian.RSet()
    rset.add_document(1)
    eset = enquire.get_eset(10, rset, xapian.Enquire.USE_EXACT_TERMFREQ, 1.0, testexpanddecider())
    eset_terms = [term[xapian.ESET_TNAME] for term in eset.items]
    expect(len(eset_terms), eset.size(), "Unexpected number of terms returned by expand")
    if [t for t in eset_terms if t.startswith("a")]:
        raise TestFail("ExpandDecider was not used")

    # Check min_wt argument to get_eset() works (new in 1.2.5).
    eset = enquire.get_eset(100, rset, xapian.Enquire.USE_EXACT_TERMFREQ)
    expect(eset.items[-1][xapian.ESET_WT] < 1.9, True, "test get_eset() without min_wt")
    eset = enquire.get_eset(100, rset, xapian.Enquire.USE_EXACT_TERMFREQ, 1.0, None, 1.9)
    expect(eset.items[-1][xapian.ESET_WT] >= 1.9, True, "test get_eset() min_wt")

    # Check QueryParser parsing error.
    qp = xapian.QueryParser()
    expect_exception(xapian.QueryParserError, "Syntax: <expression> AND <expression>", qp.parse_query, "test AND")

    # Check QueryParser pure NOT option
    qp = xapian.QueryParser()
    expect_query(qp.parse_query("NOT test", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT), "(<alldocuments> AND_NOT test@1)")

    # Check QueryParser partial option
    qp = xapian.QueryParser()
    qp.set_database(db)
    qp.set_default_op(xapian.Query.OP_AND)
    qp.set_stemming_strategy(qp.STEM_SOME)
    qp.set_stemmer(xapian.Stem("en"))
    expect_query(qp.parse_query("foo o", qp.FLAG_PARTIAL), "(Zfoo@1 AND ((out@2 SYNONYM outsid@2) OR Zo@2))")

    expect_query(qp.parse_query("foo outside", qp.FLAG_PARTIAL), "(Zfoo@1 AND Zoutsid@2)")

    # Test supplying unicode strings
    expect_query(xapian.Query(xapian.Query.OP_OR, (u"foo", u"bar")), "(foo OR bar)")
    expect_query(xapian.Query(xapian.Query.OP_OR, ("foo", u"bar\xa3")), "(foo OR bar\xc2\xa3)")
    expect_query(xapian.Query(xapian.Query.OP_OR, ("foo", "bar\xc2\xa3")), "(foo OR bar\xc2\xa3)")
    expect_query(xapian.Query(xapian.Query.OP_OR, u"foo", u"bar"), "(foo OR bar)")

    expect_query(
        qp.parse_query(u"NOT t\xe9st", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT), "(<alldocuments> AND_NOT Zt\xc3\xa9st@1)"
    )

    doc = xapian.Document()
    doc.set_data(u"Unicode with an acc\xe9nt")
    doc.add_posting(stem(u"out\xe9r"), 1)
    expect(doc.get_data(), u"Unicode with an acc\xe9nt".encode("utf-8"))
    term = doc.termlist().next().term
    expect(term, u"out\xe9r".encode("utf-8"))

    # Check simple stopper
    stop = xapian.SimpleStopper()
    qp.set_stopper(stop)
    expect(stop("a"), False)
    expect_query(qp.parse_query(u"foo bar a", qp.FLAG_BOOLEAN), "(Zfoo@1 AND Zbar@2 AND Za@3)")

    stop.add("a")
    expect(stop("a"), True)
    expect_query(qp.parse_query(u"foo bar a", qp.FLAG_BOOLEAN), "(Zfoo@1 AND Zbar@2)")

    # Feature test for custom Stopper
    class my_b_stopper(xapian.Stopper):
        def __call__(self, term):
            return term == "b"

        def get_description(self):
            return u"my_b_stopper"

    stop = my_b_stopper()
    expect(stop.get_description(), u"my_b_stopper")
    qp.set_stopper(stop)
    expect(stop("a"), False)
    expect_query(qp.parse_query(u"foo bar a", qp.FLAG_BOOLEAN), "(Zfoo@1 AND Zbar@2 AND Za@3)")

    expect(stop("b"), True)
    expect_query(qp.parse_query(u"foo bar b", qp.FLAG_BOOLEAN), "(Zfoo@1 AND Zbar@2)")

    # Test TermGenerator
    termgen = xapian.TermGenerator()
    doc = xapian.Document()
    termgen.set_document(doc)
    termgen.index_text("foo bar baz foo")
    expect(
        [(item.term, item.wdf, [pos for pos in item.positer]) for item in doc.termlist()],
        [("bar", 1, [2]), ("baz", 1, [3]), ("foo", 2, [1, 4])],
    )

    # Check DateValueRangeProcessor works
    context("checking that DateValueRangeProcessor works")
    qp = xapian.QueryParser()
    vrpdate = xapian.DateValueRangeProcessor(1, 1, 1960)
    qp.add_valuerangeprocessor(vrpdate)
    query = qp.parse_query("12/03/99..12/04/01")
    expect(str(query), "Query(0 * VALUE_RANGE 1 19991203 20011204)")

    # Regression test for bug#193, fixed in 1.0.3.
    context("running regression test for bug#193")
    vrp = xapian.NumberValueRangeProcessor(0, "$", True)
    a = "$10"
    b = "20"
    slot, a, b = vrp(a, b)
    expect(slot, 0)
    expect(xapian.sortable_unserialise(a), 10)
    expect(xapian.sortable_unserialise(b), 20)

    # Regression tests copied from PHP (probably always worked in python, but
    # let's check...)
    context("running regression tests for issues which were found in PHP")

    # PHP overload resolution involving boolean types failed.
    enq.set_sort_by_value(1, True)

    # Regression test - fixed in 0.9.10.1.
    oqparser = xapian.QueryParser()
    oquery = oqparser.parse_query("I like tea")

    # Regression test for bug#192 - fixed in 1.0.3.
    enq.set_cutoff(100)

    # Test setting and getting metadata
    expect(db.get_metadata("Foo"), "")
    db.set_metadata("Foo", "Foo")
    expect(db.get_metadata("Foo"), "Foo")
    expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.get_metadata, "")
    expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.set_metadata, "", "Foo")
    expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.get_metadata, "")

    # Test OP_SCALE_WEIGHT and corresponding constructor
    expect_query(xapian.Query(xapian.Query.OP_SCALE_WEIGHT, xapian.Query("foo"), 5), "5 * foo")
예제 #25
0
    def query(self,
              querystring=None,
              qtype=None,
              begin=None,
              end=None,
              keywords=[],
              hashtags=[],
              synonymslist=[],
              emotiononly=False):
        if qtype == 'hy':
            self.qp.add_valuerangeprocessor(
                xapian.NumberValueRangeProcessor(self.timestampvi, ''))
            querystring = begin + '..' + end

            if emotiononly:
                self.qp.add_valuerangeprocessor(
                    xapian.NumberValueRangeProcessor(self.emotiononlyvi, 'f',
                                                     False))
                querystring += ' 1.0..1.0f'

            query = self.qp.parse_query(querystring)
            print "Parsed query is: %s" % [str(query)]

            self.enquire.set_query(query)
            #matches = self.enquire.get_mset(0, self.maxitems)
            matches = self.enquire.get_mset(0, 10000)
            # Display the results.
            print "%i results found." % matches.size()

            if not self.lowkeywords_proc(matches):
                return
            emotions_list, keywords_list = self.keywords_and_emotions_list_proc(
                matches)

            return emotions_list, keywords_list

        if qtype == 'yq':
            self.qp.add_valuerangeprocessor(
                xapian.NumberValueRangeProcessor(self.timestampvi, ''))
            querystring = begin + '..' + end
            query = self.qp.parse_query(querystring)
            print "Parsed query is: %s" % [str(query)]

            self.enquire.set_query(query)
            #matches = self.enquire.get_mset(0,10)
            matches = self.enquire.get_mset(0, self.maxitems)

            # Display the results.
            print "%i results found." % matches.size()

            keywords_arr = []
            for m in matches:
                #hashtag
                hashtags = json.loads(m.document.get_value(self.hashtagsvi))

                #keywords
                keywords_hash = json.loads(
                    m.document.get_value(self.keywordsvi))
                keywords_arr.append(keywords_hash)
                #keywords_counter += Counter(json.loads(m.document.get_value(self.keywordsvi)))

            print 'mapreduce begin: ', str(
                time.strftime("%H:%M:%S", time.gmtime()))
            mapper = SimpleMapReduce(hasharr_to_list, count_words)
            word_counts = mapper(keywords_arr)
            keywords_hash = {}
            for word, count in word_counts:
                keywords_hash[word] = count
            for synonyms in synonymslist:
                if len(synonyms) >= 2 and synonyms[0] in keywords_hash:
                    for word in synonyms[1:]:
                        if word in keywords_hash:
                            keywords_hash[synonyms[0]] += keywords_hash[word]
                            del keywords_hash[word]
            print 'mapreduce end: ', str(
                time.strftime("%H:%M:%S", time.gmtime()))

            #print keywords_counter
            return hashtags, keywords_hash

        if qtype == 'lh':
            self.qp.add_valuerangeprocessor(
                xapian.NumberValueRangeProcessor(self.timestampvi, ''))
            timequerystr = begin + '..' + end
            timequery = self.qp.parse_query(timequerystr)

            hashtags = ['H' + hashtag.lower() for hashtag in hashtags]
            keywords = [keyword.lower() for keyword in keywords]
            keywords.extend(hashtags)
            if len(keywords) > 0:
                wordsquery = xapian.Query(xapian.Query.OP_OR, keywords)
            else:
                return None

            query = xapian.Query(xapian.Query.OP_AND, [timequery, wordsquery])
            print "Parsed query is: %s" % [str(query)]

            self.enquire.set_query(query)
            self.enquire.set_sort_by_value(self.timestampvi, False)
            #matches = self.enquire.get_mset(0,10)
            matches = self.enquire.get_mset(0, self.maxitems)

            # Display the results.
            print "%i results found." % matches.size()

            results = []
            for m in matches:
                result = {}
                result['location'] = m.document.get_value(self.loctvi)
                result['repost_location'] = m.document.get_value(
                    self.reploctvi)
                result['timestamp'] = xapian.sortable_unserialise(
                    m.document.get_value(self.timestampvi))
                results.append(result)

            return results
예제 #26
0
import heapq
import os
import sys
import xapian

sys.path.insert(0, "../")
from softwarecenter.enums import *
from softwarecenter.utils import *

if __name__ == "__main__":

    topn = 20
    if len(sys.argv) > 1:
        topn = int(sys.argv[1])

    pathname = os.path.join(XAPIAN_BASE_PATH, "xapian")
    db = xapian.Database(pathname)

    heap = []
    for m in db.postlist(""):
        doc = db.get_document(m.docid)
        pkgname = doc.get_value(XAPIAN_VALUE_PKGNAME)
        appname = doc.get_value(XAPIAN_VALUE_APPNAME)
        summary = doc.get_value(XAPIAN_VALUE_SUMMARY)
        popcon = xapian.sortable_unserialise(doc.get_value(XAPIAN_VALUE_POPCON))
        heapq.heappush(heap, (popcon, appname, pkgname, summary))

    for (popcon, appname, pkgname, summary) in heapq.nlargest(topn, heap):
        print "[%i] %s - %s [%s]" % (popcon, appname, summary, pkgname)
예제 #27
0
def test_all():
    # Test the version number reporting functions give plausible results.
    v = "%d.%d.%d" % (xapian.major_version(),
                      xapian.minor_version(),
                      xapian.revision())
    v2 = xapian.version_string()
    expect(v2, v, "Unexpected version output")

    # A regexp check would be better, but seems to create a bogus "leak" of -1
    # objects in Python 3.
    expect(len(xapian.__version__.split('.')), 3, 'xapian.__version__ not X.Y.Z')
    expect((xapian.__version__.split('.'))[0], '1', 'xapian.__version__ not "1.Y.Z"')

    def access_cvar():
        res = xapian.cvar
        print("Unhandled constants: ", res)
        return res

    # Check that SWIG isn't generating cvar (regression test for ticket#297).
    expect_exception(AttributeError, "'module' object has no attribute 'cvar'",
                     access_cvar)

    stem = xapian.Stem(b"english")
    expect(str(stem), "Xapian::Stem(english)", "Unexpected str(stem)")

    doc = xapian.Document()
    doc.set_data(b"a\0b")
    if doc.get_data() == b"a":
        raise TestFail("get_data+set_data truncates at a zero byte")
    expect(doc.get_data(), b"a\0b", "get_data+set_data doesn't transparently handle a zero byte")
    doc.set_data(b"is there anybody out there?")
    doc.add_term(b"XYzzy")
    doc.add_posting(stem(b"is"), 1)
    doc.add_posting(stem(b"there"), 2)
    doc.add_posting(stem(b"anybody"), 3)
    doc.add_posting(stem(b"out"), 4)
    doc.add_posting(stem(b"there"), 5)

    db = xapian.inmemory_open()
    db.add_document(doc)
    expect(db.get_doccount(), 1, "Unexpected db.get_doccount()")
    terms = ["smoke", "test", "terms"]
    expect_query(xapian.Query(xapian.Query.OP_OR, [t.encode('utf-8') for t in terms]),
                 "(smoke OR test OR terms)")
    query1 = xapian.Query(xapian.Query.OP_PHRASE, (b"smoke", b"test", b"tuple"))
    query2 = xapian.Query(xapian.Query.OP_XOR, (xapian.Query(b"smoke"), query1, b"string"))
    expect_query(query1, "(smoke PHRASE 3 test PHRASE 3 tuple)")
    expect_query(query2, "(smoke XOR (smoke PHRASE 3 test PHRASE 3 tuple) XOR string)")
    subqs = ["a", "b"]
    expect_query(xapian.Query(xapian.Query.OP_OR, [s.encode('utf-8') for s in subqs]), "(a OR b)")
    expect_query(xapian.Query(xapian.Query.OP_VALUE_RANGE, 0, b'1', b'4'),
                 "VALUE_RANGE 0 1 4")

    # Check database factory functions are wrapped as expected (or not wrapped
    # in the first cases):

    expect_exception(AttributeError, "'module' object has no attribute 'open_stub'",
            lambda : xapian.open_stub(b"nosuchdir/nosuchdb"))
    expect_exception(AttributeError, "'module' object has no attribute 'open_stub'",
            lambda : xapian.open_stub(b"nosuchdir/nosuchdb", xapian.DB_OPEN))

    expect_exception(AttributeError, "'module' object has no attribute 'chert_open'",
            lambda : xapian.chert_open(b"nosuchdir/nosuchdb"))
    expect_exception(AttributeError, "'module' object has no attribute 'chert_open'",
            lambda : xapian.chert_open(b"nosuchdir/nosuchdb", xapian.DB_CREATE))

    expect_exception(xapian.DatabaseOpeningError, None,
            lambda : xapian.Database(b"nosuchdir/nosuchdb", xapian.DB_BACKEND_STUB))
    expect_exception(xapian.DatabaseOpeningError, None,
            lambda : xapian.WritableDatabase(b"nosuchdir/nosuchdb", xapian.DB_OPEN|xapian.DB_BACKEND_STUB))

    expect_exception(xapian.DatabaseOpeningError, None,
            lambda : xapian.Database(b"nosuchdir/nosuchdb", xapian.DB_BACKEND_GLASS))
    expect_exception(xapian.DatabaseCreateError, None,
            lambda : xapian.WritableDatabase(b"nosuchdir/nosuchdb", xapian.DB_CREATE|xapian.DB_BACKEND_GLASS))

    expect_exception(xapian.DatabaseOpeningError, None,
            lambda : xapian.Database(b"nosuchdir/nosuchdb", xapian.DB_BACKEND_CHERT))
    expect_exception(xapian.DatabaseCreateError, None,
            lambda : xapian.WritableDatabase(b"nosuchdir/nosuchdb", xapian.DB_CREATE|xapian.DB_BACKEND_CHERT))

    expect_exception(xapian.NetworkError, None,
                     xapian.remote_open, b"/bin/false", b"")
    expect_exception(xapian.NetworkError, None,
                     xapian.remote_open_writable, b"/bin/false", b"")

    expect_exception(xapian.NetworkError, None,
                     xapian.remote_open, b"127.0.0.1", 0, 1)
    expect_exception(xapian.NetworkError, None,
                     xapian.remote_open_writable, b"127.0.0.1", 0, 1)

    # Check wrapping of MatchAll and MatchNothing:

    expect_query(xapian.Query.MatchAll, "<alldocuments>")
    expect_query(xapian.Query.MatchNothing, "")

    # Feature test for Query.__iter__
    term_count = 0
    for term in query2:
        term_count += 1
    expect(term_count, 4, "Unexpected number of terms in query2")

    enq = xapian.Enquire(db)
    enq.set_query(xapian.Query(xapian.Query.OP_OR, b"there", b"is"))
    mset = enq.get_mset(0, 10)
    expect(mset.size(), 1, "Unexpected mset.size()")
    expect(len(mset), 1, "Unexpected mset.size()")

    # Feature test for Enquire.matching_terms(docid)
    term_count = 0
    for term in enq.matching_terms(mset.get_hit(0)):
        term_count += 1
    expect(term_count, 2, "Unexpected number of matching terms")

    # Feature test for MSet.__iter__
    msize = 0
    for match in mset:
        msize += 1
    expect(msize, mset.size(), "Unexpected number of entries in mset")

    terms = b" ".join(enq.matching_terms(mset.get_hit(0)))
    expect(terms, b"is there", "Unexpected terms")

    # Feature test for ESet.__iter__
    rset = xapian.RSet()
    rset.add_document(1)
    eset = enq.get_eset(10, rset)
    term_count = 0
    for term in eset:
        term_count += 1
    expect(term_count, 3, "Unexpected number of expand terms")

    # Feature test for Database.__iter__
    term_count = 0
    for term in db:
        term_count += 1
    expect(term_count, 5, "Unexpected number of terms in db")

    # Feature test for Database.allterms
    term_count = 0
    for term in db.allterms():
        term_count += 1
    expect(term_count, 5, "Unexpected number of terms in db.allterms")

    # Feature test for Database.postlist
    count = 0
    for posting in db.postlist(b"there"):
        count += 1
    expect(count, 1, "Unexpected number of entries in db.postlist('there')")

    # Feature test for Database.postlist with empty term (alldocspostlist)
    count = 0
    for posting in db.postlist(b""):
        count += 1
    expect(count, 1, "Unexpected number of entries in db.postlist('')")

    # Feature test for Database.termlist
    count = 0
    for term in db.termlist(1):
        count += 1
    expect(count, 5, "Unexpected number of entries in db.termlist(1)")

    # Feature test for Database.positionlist
    count = 0
    for term in db.positionlist(1, b"there"):
        count += 1
    expect(count, 2, "Unexpected number of entries in db.positionlist(1, 'there')")

    # Feature test for Document.termlist
    count = 0
    for term in doc.termlist():
        count += 1
    expect(count, 5, "Unexpected number of entries in doc.termlist()")

    # Feature test for TermIter.skip_to
    term = doc.termlist()
    term.skip_to(b'n')
    while True:
        try:
            x = next(term)
        except StopIteration:
            break
        if x.term < b'n':
            raise TestFail("TermIter.skip_to didn't skip term '%s'" % x.term.decode('utf-8'))

    # Feature test for Document.values
    count = 0
    for term in list(doc.values()):
        count += 1
    expect(count, 0, "Unexpected number of entries in doc.values")

    # Check exception handling for Xapian::DocNotFoundError
    expect_exception(xapian.DocNotFoundError, "Docid 3 not found", db.get_document, 3)

    # Check value of OP_ELITE_SET
    expect(xapian.Query.OP_ELITE_SET, 10, "Unexpected value for OP_ELITE_SET")

    # Feature test for MatchDecider
    doc = xapian.Document()
    doc.set_data(b"Two")
    doc.add_posting(stem(b"out"), 1)
    doc.add_posting(stem(b"outside"), 1)
    doc.add_posting(stem(b"source"), 2)
    doc.add_value(0, b"yes")
    db.add_document(doc)

    class testmatchdecider(xapian.MatchDecider):
        def __call__(self, doc):
            return doc.get_value(0) == b"yes"

    query = xapian.Query(stem(b"out"))
    enquire = xapian.Enquire(db)
    enquire.set_query(query)
    mset = enquire.get_mset(0, 10, None, testmatchdecider())
    expect(mset.size(), 1, "Unexpected number of documents returned by match decider")
    expect(mset.get_docid(0), 2, "MatchDecider mset has wrong docid in")

    # Feature test for ExpandDecider
    class testexpanddecider(xapian.ExpandDecider):
        def __call__(self, term):
            return (not term.startswith(b'a'))

    enquire = xapian.Enquire(db)
    rset = xapian.RSet()
    rset.add_document(1)
    eset = enquire.get_eset(10, rset, xapian.Enquire.USE_EXACT_TERMFREQ, 1.0, testexpanddecider())
    eset_terms = [item.term for item in eset]
    expect(len(eset_terms), eset.size(), "Unexpected number of terms returned by expand")
    if [t for t in eset_terms if t.startswith(b'a')]:
        raise TestFail("ExpandDecider was not used")

    # Check min_wt argument to get_eset() works (new in 1.2.5).
    eset = enquire.get_eset(100, rset, xapian.Enquire.USE_EXACT_TERMFREQ)
    expect([i.weight for i in eset][-1] < 1.9, True, "test get_eset() without min_wt")
    eset = enquire.get_eset(100, rset, xapian.Enquire.USE_EXACT_TERMFREQ, 1.0, None, 1.9)
    expect([i.weight for i in eset][-1] >= 1.9, True, "test get_eset() min_wt")

    # Check QueryParser parsing error.
    qp = xapian.QueryParser()
    expect_exception(xapian.QueryParserError, "Syntax: <expression> AND <expression>", qp.parse_query, b"test AND")

    # Check QueryParser pure NOT option
    qp = xapian.QueryParser()
    expect_query(qp.parse_query(b"NOT test", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT),
                 "(<alldocuments> AND_NOT test@1)")

    # Check QueryParser partial option
    qp = xapian.QueryParser()
    qp.set_database(db)
    qp.set_default_op(xapian.Query.OP_AND)
    qp.set_stemming_strategy(qp.STEM_SOME)
    qp.set_stemmer(xapian.Stem(b'en'))
    expect_query(qp.parse_query(b"foo o", qp.FLAG_PARTIAL),
                 "(Zfoo@1 AND ((out@2 SYNONYM outsid@2) OR Zo@2))")

    expect_query(qp.parse_query(b"foo outside", qp.FLAG_PARTIAL),
                 "(Zfoo@1 AND Zoutsid@2)")

    # Test supplying unicode strings
    expect_query(xapian.Query(xapian.Query.OP_OR, (b'foo', b'bar')),
                 '(foo OR bar)')
    expect_query(xapian.Query(xapian.Query.OP_OR, (b'foo', b'bar\xa3')),
                 '(foo OR bar\\xa3)')
    expect_query(xapian.Query(xapian.Query.OP_OR, (b'foo', b'bar\xc2\xa3')),
                 '(foo OR bar\u00a3)')
    expect_query(xapian.Query(xapian.Query.OP_OR, b'foo', b'bar'),
                 '(foo OR bar)')

    expect_query(qp.parse_query(b"NOT t\xe9st", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT),
                 "(<alldocuments> AND_NOT Zt\u00e9st@1)")

    doc = xapian.Document()
    doc.set_data(b"Unicode with an acc\xe9nt")
    doc.add_posting(stem(b"out\xe9r"), 1)
    expect(doc.get_data(), b"Unicode with an acc\xe9nt")
    term = next(doc.termlist()).term
    expect(term, b"out\xe9r")

    # Check simple stopper
    stop = xapian.SimpleStopper()
    qp.set_stopper(stop)
    expect(stop(b'a'), False)
    expect_query(qp.parse_query(b"foo bar a", qp.FLAG_BOOLEAN),
                 "(Zfoo@1 AND Zbar@2 AND Za@3)")

    stop.add(b'a')
    expect(stop(b'a'), True)
    expect_query(qp.parse_query(b"foo bar a", qp.FLAG_BOOLEAN),
                 "(Zfoo@1 AND Zbar@2)")

    # Feature test for custom Stopper
    class my_b_stopper(xapian.Stopper):
        def __call__(self, term):
            return term == b"b"

        def get_description(self):
            return "my_b_stopper"

    stop = my_b_stopper()
    expect(stop.get_description(), "my_b_stopper")
    qp.set_stopper(stop)
    expect(stop(b'a'), False)
    expect_query(qp.parse_query(b"foo bar a", qp.FLAG_BOOLEAN),
                 "(Zfoo@1 AND Zbar@2 AND Za@3)")

    expect(stop(b'b'), True)
    expect_query(qp.parse_query(b"foo bar b", qp.FLAG_BOOLEAN),
                 "(Zfoo@1 AND Zbar@2)")

    # Test TermGenerator
    termgen = xapian.TermGenerator()
    doc = xapian.Document()
    termgen.set_document(doc)
    termgen.index_text(b'foo bar baz foo')
    expect([(item.term, item.wdf, [pos for pos in item.positer]) for item in doc.termlist()], [(b'bar', 1, [2]), (b'baz', 1, [3]), (b'foo', 2, [1, 4])])


    # Check DateValueRangeProcessor works
    context("checking that DateValueRangeProcessor works")
    qp = xapian.QueryParser()
    vrpdate = xapian.DateValueRangeProcessor(1, 1, 1960)
    qp.add_valuerangeprocessor(vrpdate)
    query = qp.parse_query(b'12/03/99..12/04/01')
    expect(str(query), 'Query(0 * VALUE_RANGE 1 19991203 20011204)')

    # Regression test for bug#193, fixed in 1.0.3.
    context("running regression test for bug#193")
    vrp = xapian.NumberValueRangeProcessor(0, b'$', True)
    a = '$10'
    b = '20'
    slot, a, b = vrp(a, b.encode('utf-8'))
    expect(slot, 0)
    expect(xapian.sortable_unserialise(a), 10)
    expect(xapian.sortable_unserialise(b), 20)

    # Feature test for xapian.FieldProcessor
    context("running feature test for xapian.FieldProcessor")
    class testfieldprocessor(xapian.FieldProcessor):
        def __call__(self, s):
            if s == 'spam':
                raise Exception('already spam')
            return xapian.Query("spam")

    qp.add_prefix('spam', testfieldprocessor())
    qp.add_boolean_prefix('boolspam', testfieldprocessor())
    query = qp.parse_query('spam:ignored')
    expect(str(query), 'Query(spam)')

    # FIXME: This doesn't currently work:
    # expect_exception(Exception, 'already spam', qp.parse_query, 'spam:spam')

    # Regression tests copied from PHP (probably always worked in python, but
    # let's check...)
    context("running regression tests for issues which were found in PHP")

    # PHP overload resolution involving boolean types failed.
    enq.set_sort_by_value(1, True)

    # Regression test - fixed in 0.9.10.1.
    oqparser = xapian.QueryParser()
    oquery = oqparser.parse_query(b"I like tea")

    # Regression test for bug#192 - fixed in 1.0.3.
    enq.set_cutoff(100)

    # Test setting and getting metadata
    expect(db.get_metadata(b'Foo'), b'')
    db.set_metadata(b'Foo', b'Foo')
    expect(db.get_metadata(b'Foo'), b'Foo')
    expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.get_metadata, b'')
    expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.set_metadata, b'', b'Foo')
    expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.get_metadata, b'')

    # Test OP_SCALE_WEIGHT and corresponding constructor
    expect_query(xapian.Query(xapian.Query.OP_SCALE_WEIGHT, xapian.Query(b'foo'), 5),
                 "5 * foo")
예제 #28
0
def test_all():
    # Test the version number reporting functions give plausible results.
    v = "%d.%d.%d" % (xapian.major_version(),
                      xapian.minor_version(),
                      xapian.revision())
    v2 = xapian.version_string()
    expect(v2, v, "Unexpected version output")

    stem = xapian.Stem("english")
    expect(stem.get_description(), "Xapian::Stem(english)", "Unexpected stem.get_description()")

    doc = xapian.Document()
    doc.set_data("a\0b")
    if doc.get_data() == "a":
        raise TestFail("get_data+set_data truncates at a zero byte")
    expect(doc.get_data(), "a\0b", "get_data+set_data doesn't transparently handle a zero byte")
    doc.set_data("is there anybody out there?")
    doc.add_term("XYzzy")
    doc.add_posting(stem("is"), 1)
    doc.add_posting(stem("there"), 2)
    doc.add_posting(stem("anybody"), 3)
    doc.add_posting(stem("out"), 4)
    doc.add_posting(stem("there"), 5)

    db = xapian.inmemory_open()
    db.add_document(doc)
    expect(db.get_doccount(), 1, "Unexpected db.get_doccount()")
    terms = ["smoke", "test", "terms"]
    expect_query(xapian.Query(xapian.Query.OP_OR, terms),
                 "(smoke OR test OR terms)")
    query1 = xapian.Query(xapian.Query.OP_PHRASE, ("smoke", "test", "tuple"))
    query2 = xapian.Query(xapian.Query.OP_XOR, (xapian.Query("smoke"), query1, "string"))
    expect_query(query1, "(smoke PHRASE 3 test PHRASE 3 tuple)")
    expect_query(query2, "(smoke XOR (smoke PHRASE 3 test PHRASE 3 tuple) XOR string)")
    subqs = ["a", "b"]
    expect_query(xapian.Query(xapian.Query.OP_OR, subqs), "(a OR b)")
    expect_query(xapian.Query(xapian.Query.OP_VALUE_RANGE, 0, '1', '4'),
                 "VALUE_RANGE 0 1 4")

    # Feature test for Query.__iter__
    term_count = 0
    for term in query2:
        term_count += 1
    expect(term_count, 4, "Unexpected number of terms in query2")

    enq = xapian.Enquire(db)
    enq.set_query(xapian.Query(xapian.Query.OP_OR, "there", "is"))
    mset = enq.get_mset(0, 10)
    expect(mset.size(), 1, "Unexpected mset.size()")

    # Feature test for Enquire.matching_terms(docid)
    term_count = 0
    for term in enq.matching_terms(mset.get_hit(0)):
        term_count += 1
    expect(term_count, 2, "Unexpected number of matching terms")

    # Feature test for MSet.__iter__
    msize = 0
    for match in mset:
        msize += 1
    expect(msize, mset.size(), "Unexpected number of entries in mset")

    terms = " ".join(enq.matching_terms(mset.get_hit(0)))
    expect(terms, "is there", "Unexpected terms")

    # Feature test for ESet.__iter__
    rset = xapian.RSet()
    rset.add_document(1)
    eset = enq.get_eset(10, rset)
    term_count = 0
    for term in eset:
        term_count += 1
    expect(term_count, 3, "Unexpected number of expand terms")

    # Feature test for Database.__iter__
    term_count = 0
    for term in db:
        term_count += 1
    expect(term_count, 5, "Unexpected number of terms in db")

    # Feature test for Database.allterms
    term_count = 0
    for term in db.allterms():
        term_count += 1
    expect(term_count, 5, "Unexpected number of terms in db.allterms")

    # Feature test for Database.postlist
    count = 0
    for posting in db.postlist("there"):
        count += 1
    expect(count, 1, "Unexpected number of entries in db.postlist('there')")

    # Feature test for Database.postlist with empty term (alldocspostlist)
    count = 0
    for posting in db.postlist(""):
        count += 1
    expect(count, 1, "Unexpected number of entries in db.postlist('')")

    # Feature test for Database.termlist
    count = 0
    for term in db.termlist(1):
        count += 1
    expect(count, 5, "Unexpected number of entries in db.termlist(1)")

    # Feature test for Database.positionlist
    count = 0
    for term in db.positionlist(1, "there"):
        count += 1
    expect(count, 2, "Unexpected number of entries in db.positionlist(1, 'there')")

    # Feature test for Document.termlist
    count = 0
    for term in doc.termlist():
        count += 1
    expect(count, 5, "Unexpected number of entries in doc.termlist()")

    # Feature test for TermIter.skip_to
    term = doc.termlist()
    term.skip_to('n')
    while True:
        try:
            x = term.next()
        except StopIteration:
            break
        if x.term < 'n':
            raise TestFail("TermIter.skip_to didn't skip term '%s'" % x.term)

    # Feature test for Document.values
    count = 0
    for term in doc.values():
        count += 1
    expect(count, 0, "Unexpected number of entries in doc.values")

    # Check exception handling for Xapian::DocNotFoundError
    expect_exception(xapian.DocNotFoundError, "Docid 3 not found", db.get_document, 3)

    # Check value of OP_ELITE_SET
    expect(xapian.Query.OP_ELITE_SET, 10, "Unexpected value for OP_ELITE_SET")

    # Feature test for MatchDecider
    doc = xapian.Document()
    doc.set_data("Two")
    doc.add_posting(stem("out"), 1)
    doc.add_posting(stem("outside"), 1)
    doc.add_posting(stem("source"), 2)
    doc.add_value(0, "yes")
    db.add_document(doc)

    class testmatchdecider(xapian.MatchDecider):
        def __call__(self, doc):
            return doc.get_value(0) == "yes"

    query = xapian.Query(stem("out"))
    enquire = xapian.Enquire(db)
    enquire.set_query(query)
    mset = enquire.get_mset(0, 10, None, testmatchdecider())
    expect(mset.size(), 1, "Unexpected number of documents returned by match decider")
    expect(mset.get_docid(0), 2, "MatchDecider mset has wrong docid in")

    # Feature test for ExpandDecider
    class testexpanddecider(xapian.ExpandDecider):
        def __call__(self, term):
            return (not term.startswith('a'))

    enquire = xapian.Enquire(db)
    rset = xapian.RSet()
    rset.add_document(1)
    eset = enquire.get_eset(10, rset, xapian.Enquire.USE_EXACT_TERMFREQ, 1.0, testexpanddecider())
    eset_terms = [term[xapian.ESET_TNAME] for term in eset.items]
    expect(len(eset_terms), eset.size(), "Unexpected number of terms returned by expand")
    if filter(lambda t: t.startswith('a'), eset_terms):
        raise TestFail("ExpandDecider was not used")

    # Check QueryParser parsing error.
    qp = xapian.QueryParser()
    expect_exception(xapian.QueryParserError, "Syntax: <expression> AND <expression>", qp.parse_query, "test AND")

    # Check QueryParser pure NOT option
    qp = xapian.QueryParser()
    expect_query(qp.parse_query("NOT test", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT),
                 "(<alldocuments> AND_NOT test:(pos=1))")

    # Check QueryParser partial option
    qp = xapian.QueryParser()
    qp.set_database(db)
    qp.set_default_op(xapian.Query.OP_AND)
    qp.set_stemming_strategy(qp.STEM_SOME)
    qp.set_stemmer(xapian.Stem('en'))
    expect_query(qp.parse_query("foo o", qp.FLAG_PARTIAL),
                 "(Zfoo:(pos=1) AND (out:(pos=2) OR outsid:(pos=2) OR Zo:(pos=2)))")

    expect_query(qp.parse_query("foo outside", qp.FLAG_PARTIAL),
                 "(Zfoo:(pos=1) AND Zoutsid:(pos=2))")

    # Test supplying unicode strings
    expect_query(xapian.Query(xapian.Query.OP_OR, (u'foo', u'bar')),
                 '(foo OR bar)')
    expect_query(xapian.Query(xapian.Query.OP_OR, ('foo', u'bar\xa3')),
                 '(foo OR bar\xc2\xa3)')
    expect_query(xapian.Query(xapian.Query.OP_OR, ('foo', 'bar\xc2\xa3')),
                 '(foo OR bar\xc2\xa3)')
    expect_query(xapian.Query(xapian.Query.OP_OR, u'foo', u'bar'),
                 '(foo OR bar)')

    expect_query(qp.parse_query(u"NOT t\xe9st", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT),
                 "(<alldocuments> AND_NOT Zt\xc3\xa9st:(pos=1))")

    doc = xapian.Document()
    doc.set_data(u"Unicode with an acc\xe9nt")
    doc.add_posting(stem(u"out\xe9r"), 1)
    expect(doc.get_data(), u"Unicode with an acc\xe9nt".encode('utf-8'))
    term = doc.termlist().next().term
    expect(term, u"out\xe9r".encode('utf-8'))

    # Check simple stopper
    stop = xapian.SimpleStopper()
    qp.set_stopper(stop)
    expect(stop('a'), False)
    expect_query(qp.parse_query(u"foo bar a", qp.FLAG_BOOLEAN),
                 "(Zfoo:(pos=1) AND Zbar:(pos=2) AND Za:(pos=3))")

    stop.add('a')
    expect(stop('a'), True)
    expect_query(qp.parse_query(u"foo bar a", qp.FLAG_BOOLEAN),
                 "(Zfoo:(pos=1) AND Zbar:(pos=2))")

    # Feature test for custom Stopper
    class my_b_stopper(xapian.Stopper):
        def __call__(self, term):
            return term == "b"

        def get_description(self):
            return u"my_b_stopper"

    stop = my_b_stopper()
    expect(stop.get_description(), u"my_b_stopper")
    qp.set_stopper(stop)
    expect(stop('a'), False)
    expect_query(qp.parse_query(u"foo bar a", qp.FLAG_BOOLEAN),
                 "(Zfoo:(pos=1) AND Zbar:(pos=2) AND Za:(pos=3))")

    expect(stop('b'), True)
    expect_query(qp.parse_query(u"foo bar b", qp.FLAG_BOOLEAN),
                 "(Zfoo:(pos=1) AND Zbar:(pos=2))")

    # Test TermGenerator
    termgen = xapian.TermGenerator()
    doc = xapian.Document()
    termgen.set_document(doc)
    termgen.index_text('foo bar baz foo')
    expect([(item.term, item.wdf, [pos for pos in item.positer]) for item in doc.termlist()], [('bar', 1, [2]), ('baz', 1, [3]), ('foo', 2, [1, 4])])


    # Check DateValueRangeProcessor works
    context("checking that DateValueRangeProcessor works")
    qp = xapian.QueryParser()
    vrpdate = xapian.DateValueRangeProcessor(1, 1, 1960)
    qp.add_valuerangeprocessor(vrpdate)
    query = qp.parse_query('12/03/99..12/04/01')
    expect(str(query), 'Xapian::Query(VALUE_RANGE 1 19991203 20011204)')

    # Regression test for bug#193, fixed in 1.0.3.
    context("running regression test for bug#193")
    vrp = xapian.NumberValueRangeProcessor(0, '$', True)
    a = '$10'
    b = '20'
    slot, a, b = vrp(a, b)
    expect(slot, 0)
    expect(xapian.sortable_unserialise(a), 10)
    expect(xapian.sortable_unserialise(b), 20)

    # Regression tests copied from PHP (probably always worked in python, but
    # let's check...)
    context("running regression tests for issues which were found in PHP")

    # PHP overload resolution involving boolean types failed.
    enq.set_sort_by_value(1, True)

    # Regression test - fixed in 0.9.10.1.
    oqparser = xapian.QueryParser()
    oquery = oqparser.parse_query("I like tea")

    # Regression test for bug#192 - fixed in 1.0.3.
    enq.set_cutoff(100)

    # Test setting and getting metadata
    expect(db.get_metadata('Foo'), '')
    db.set_metadata('Foo', 'Foo')
    expect(db.get_metadata('Foo'), 'Foo')
    expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.get_metadata, '')
    expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.set_metadata, '', 'Foo')
    expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.get_metadata, '')

    # Test OP_SCALE_WEIGHT and corresponding constructor
    expect_query(xapian.Query(xapian.Query.OP_SCALE_WEIGHT, xapian.Query('foo'), 5),
                 "5 * foo")
예제 #29
0
 def get_weight(self, doc):
     val = doc.get_value(self.field, self.purpose)
     val = xapian.sortable_unserialise(val)
     if val > self.maxval:
         return self.maxval
     return val
예제 #30
0
import heapq
import os
import sys
import xapian

sys.path.insert(0, "../")
from softwarecenter.enums import XapianValues
from softwarecenter.paths import XAPIAN_BASE_PATH

if __name__ == "__main__":

    topn = 20
    if len(sys.argv) > 1:
        topn = int(sys.argv[1])

    pathname = os.path.join(XAPIAN_BASE_PATH, "xapian")
    db = xapian.Database(pathname)

    heap = []
    for m in db.postlist(""):
        doc = db.get_document(m.docid)
        pkgname = doc.get_value(XapianValues.PKGNAME)
        appname = doc.get_value(XapianValues.APPNAME)
        summary = doc.get_value(XapianValues.SUMMARY)
        popcon = xapian.sortable_unserialise(doc.get_value(
            XapianValues.POPCON))
        heapq.heappush(heap, (popcon, appname, pkgname, summary))

    for (popcon, appname, pkgname, summary) in heapq.nlargest(topn, heap):
        print "[%i] %s - %s [%s]" % (popcon, appname, summary, pkgname)
예제 #31
0
def decode_sortable_date(r):
    t = -xapian.sortable_unserialise(r)
    if t == 0: return None
    return time.strftime(ISO_8601, time.localtime(t))
예제 #32
0
 def popcon_max(self):
     popcon_max = xapian.sortable_unserialise(self.xapiandb.get_metadata("popcon_max_desktop"))
     assert popcon_max > 0
     return popcon_max
예제 #33
0
import heapq
import os
import sys
import xapian

sys.path.insert(0, "../")
from softwarecenter.enums import XapianValues
from softwarecenter.paths import XAPIAN_BASE_PATH

if __name__ == "__main__":

    topn = 20
    if len(sys.argv) > 1:
        topn = int(sys.argv[1])

    pathname = os.path.join(XAPIAN_BASE_PATH, "xapian")
    db = xapian.Database(pathname)

    heap = []
    for m in db.postlist(""):
        doc = db.get_document(m.docid)
        pkgname = doc.get_value(XapianValues.PKGNAME)
        appname = doc.get_value(XapianValues.APPNAME)
        summary = doc.get_value(XapianValues.SUMMARY)
        popcon = xapian.sortable_unserialise(doc.get_value(XapianValues.POPCON))
        heapq.heappush(heap, (popcon, appname, pkgname, summary))

    for (popcon, appname, pkgname, summary) in heapq.nlargest(topn, heap):
        print "[%i] %s - %s [%s]" % (popcon, appname, summary, pkgname)
예제 #34
0
    #parser.set_stemming_strategy(xapian.QueryParser.STEM_ALL)
    parser.set_database(db)
    #parser.add_prefix("pkg", "AP")
    query = parser.parse_query(
        search_term,
        xapian.QueryParser.FLAG_PARTIAL | xapian.QueryParser.FLAG_WILDCARD)

    enquire = xapian.Enquire(db)
    enquire.set_sort_by_value_then_relevance(XapianValues.POPCON)
    enquire.set_query(query)
    matches = enquire.get_mset(0, db.get_doccount())
    print "Matches:"
    for m in matches:
        doc = m.document
        popcon = doc.get_value(XapianValues.POPCON)
        print doc.get_data(), "popcon:", xapian.sortable_unserialise(popcon)
        #for t in doc.termlist():
        #    print "'%s': %s (%s); " % (t.term, t.wdf, t.termfreq),
        #print "\n"
        appname = doc.get_data()

    # calculate a eset
    print "ESet:"
    rset = xapian.RSet()
    for m in matches:
        rset.add_document(m.docid)
    for m in enquire.get_eset(10, rset):
        print m.term

    # calulate the expansions
    completions = []
 def popcon_max(self):
     popcon_max = xapian.sortable_unserialise(
         self.xapiandb.get_metadata("popcon_max_desktop"))
     assert popcon_max > 0
     return popcon_max
예제 #36
0
    def query(self, querystring=None, qtype=None, begin=None, end=None, keywords=[], hashtags=[], synonymslist=[], emotiononly=False):
        if qtype == 'hy':
            self.qp.add_valuerangeprocessor(xapian.NumberValueRangeProcessor(self.timestampvi, ''))
            querystring = begin + '..' + end

            if emotiononly:
                self.qp.add_valuerangeprocessor(xapian.NumberValueRangeProcessor(self.emotiononlyvi, 'f', False))
                querystring += ' 1.0..1.0f'

            query = self.qp.parse_query(querystring)
            print "Parsed query is: %s" % [str(query)]

            self.enquire.set_query(query)
            #matches = self.enquire.get_mset(0, self.maxitems)
            matches = self.enquire.get_mset(0, 10000)
            # Display the results.
            print "%i results found." % matches.size()

            if not self.lowkeywords_proc(matches):
                return
            emotions_list, keywords_list = self.keywords_and_emotions_list_proc(matches)

            return emotions_list, keywords_list

        if qtype == 'yq':
            self.qp.add_valuerangeprocessor(xapian.NumberValueRangeProcessor(self.timestampvi, ''))
            querystring = begin + '..' + end
            query = self.qp.parse_query(querystring)
            print "Parsed query is: %s" % [str(query)]

            self.enquire.set_query(query)
            #matches = self.enquire.get_mset(0,10)
            matches = self.enquire.get_mset(0, self.maxitems)

            # Display the results.
            print "%i results found." % matches.size()

            keywords_arr = []
            for m in matches:
                #hashtag
                hashtags = json.loads(m.document.get_value(self.hashtagsvi))

                #keywords
                keywords_hash = json.loads(m.document.get_value(self.keywordsvi))
                keywords_arr.append(keywords_hash)
                #keywords_counter += Counter(json.loads(m.document.get_value(self.keywordsvi)))

            print 'mapreduce begin: ', str(time.strftime("%H:%M:%S", time.gmtime()))
            mapper = SimpleMapReduce(hasharr_to_list, count_words)
            word_counts = mapper(keywords_arr)
            keywords_hash = {}
            for word, count in word_counts:
                keywords_hash[word] = count
            for synonyms in synonymslist:
                if len(synonyms) >= 2 and synonyms[0] in keywords_hash:
                    for word in synonyms[1:]:
                        if word in keywords_hash:
                            keywords_hash[synonyms[0]] += keywords_hash[word]
                            del keywords_hash[word]
            print 'mapreduce end: ', str(time.strftime("%H:%M:%S", time.gmtime()))

            #print keywords_counter
            return hashtags, keywords_hash

        if qtype == 'lh':
            self.qp.add_valuerangeprocessor(xapian.NumberValueRangeProcessor(self.timestampvi, ''))
            timequerystr = begin + '..' + end
            timequery = self.qp.parse_query(timequerystr)

            hashtags = ['H' + hashtag.lower() for hashtag in hashtags]
            keywords = [keyword.lower() for keyword in keywords]
            keywords.extend(hashtags)
            if len(keywords) > 0:
                wordsquery = xapian.Query(xapian.Query.OP_OR, keywords)
            else:
                return None

            query = xapian.Query(xapian.Query.OP_AND, [timequery, wordsquery])
            print "Parsed query is: %s" % [str(query)]

            self.enquire.set_query(query)
            self.enquire.set_sort_by_value(self.timestampvi, False)
            #matches = self.enquire.get_mset(0,10)
            matches = self.enquire.get_mset(0, self.maxitems)

            # Display the results.
            print "%i results found." % matches.size()

            results = []
            for m in matches:
                result = {}
                result['location'] = m.document.get_value(self.loctvi)
                result['repost_location'] = m.document.get_value(self.reploctvi)
                result['timestamp'] = xapian.sortable_unserialise(m.document.get_value(self.timestampvi))
                results.append(result)

            return results
예제 #37
0
def xapian_search(request):

    if not xapian_avail:
        return HttpResponse('{}')

    
    search_string = request.GET.get('keywords', None)
    try:
        search_limit = int(request.GET.get('slimit', str(settings.MAX_MATCHES_SEARCH)))
    except:
        search_limit = settings.MAX_MATCHES_SEARCH
    try:
        view_limit = int(request.GET.get('vlimit', str(settings.MAX_MATCHES_SHOW)))
    except:
        view_limit = settings.MAX_MATCHES_SHOW
    offset = int(request.GET.get('offset', 0))
    search_max_result_length = int(request.GET.get('maxchars', '0'))
    zoomlevel = int(request.GET.get('zoomlevel', 12))
    cluster = float(request.GET.get('cluster', 0))
    details = str(request.GET.get('details', "no"))
    from_date = str(request.GET.get('datefrom', None))
    to_date = str(request.GET.get('dateto', None))


    seqnum= str(request.GET.get('seqnum', 0))

    coords = {
        'x1': float(request.GET.get('x1', 0)),
        'y1': float(request.GET.get('y1', 0)),
        'x2': float(request.GET.get('x2', 0)),
        'y2': float(request.GET.get('y2', 0)),
    }


    statuses = None
    if 'status' in request.GET:
        statuses = [int(status) for status in
                    request.GET.getlist('status')]

    tags = None
    if 'tag' in request.GET:
        tags = [ "_key_"+str(tag) for tag in
                    request.GET.getlist('tag')]
    elif 'no_tags' in request.GET:
        tags = [ "_no_key" ]

    request.session['mapzoomlevel'] = zoomlevel
    request.session['mapcenterlat'] = ( coords['y1'] + coords['y2'] ) / 2
    request.session['mapcenterlng'] = ( coords['x1'] + coords['x2'] ) / 2

    if int(request.GET.get('nosearch', 0)) > 0:
        return HttpResponse('{}')

    
    if from_date is not "None":
        date_array = from_date.split('/')
        if len(date_array) > 2:
            if int(date_array[2]+date_array[1]+date_array[0]) > 20090101:
                request.session['filter_from_date'] = str( date_array[1]+"/"+date_array[0]+"/"+date_array[2] )
            else:
                request.session['filter_from_date'] = "01/01/2009"
        else:
            from_date="01/01/2009"

    if to_date is not "None":
        date_array = to_date.split('/')
        if len(date_array) > 2:
            if int(date_array[2]+date_array[1]+date_array[0]) < int( datetime.now().strftime('%y%m%d')):
                request.session['filter_to_date'] = str( date_array[1]+"/"+date_array[0]+"/"+date_array[2] )
            else:
                request.session['filter_to_date'] = str( datetime.now().strftime('%m/%d/%Y'))
        else:
            to_date=str( datetime.now().strftime('%d/%m/%Y'))

    if search_string:
        request.session['filter_words'] = search_string
    else:
        request.session['filter_words'] =  ''

    returnarray = []
    
    
#    database = xapian.Database(settings.XAPIAN_MSG_DATABASE_HOME)
    database = xapian.Database( os.path.join(settings.TRACKER_HOME, 'db/' 'xapian-msg-index/'))
    
    enquire = xapian.Enquire(database)

    # First we'll restrict the search space by geographical data:

    lng_keywords = []

    if coords['x1'] != 0 and coords['x2'] != 0:   
        for keyword in get_latlng_keywords(coords['x1'],coords['x2']):
            lng_keywords.append("_glngrange_"+keyword)
        
        lng_query = xapian.Query(xapian.Query.OP_VALUE_RANGE,
                         settings.XAPIAN_LONGITUDE_VALUE,
                         xapian.sortable_serialise(coords['x1']),
                         xapian.sortable_serialise(coords['x2']))

    else:
        lng_query = None

    lat_keywords = []
    
    if coords['y1']!= 0 and coords['y2'] != 0:   
        for keyword in get_latlng_keywords(coords['y1'],coords['y2']):
            lat_keywords.append("_glatrange_"+keyword)

        lat_query = xapian.Query(xapian.Query.OP_VALUE_RANGE,
                        settings.XAPIAN_LATITUDE_VALUE,
                         xapian.sortable_serialise(coords['y1']),
                         xapian.sortable_serialise(coords['y2']))
    else:
        lat_query = None

        
    if lat_query and lng_query:
        lat_wordquery = xapian.Query(xapian.Query.OP_OR, lat_keywords)
        lng_wordquery = xapian.Query(xapian.Query.OP_OR, lng_keywords)
        place_wordquery = xapian.Query(xapian.Query.OP_AND, lat_wordquery, lng_wordquery)
        place_query = xapian.Query(xapian.Query.OP_AND, lat_query, lng_query)
        type_query = xapian.Query(xapian.Query.OP_AND, ["_place"])

        place_query = xapian.Query(xapian.Query.OP_AND, type_query, place_query)
        query = xapian.Query(xapian.Query.OP_AND, place_wordquery, place_query)

        enquire.set_query(query)
        matches = enquire.get_mset(offset, search_limit+offset)
        total_matches=matches.get_matches_estimated()

    screen_id_counter = 0    

    # Filter places with keywords and date:
         
    word_issues = None
    terms = None

    if search_string or from_date or to_date or tags:
        word_issues = {}
        
        issue_query = xapian.Query(xapian.Query.OP_AND, ["_issue"])
            
        if search_string:
            stemmer = xapian.Stem("finnish")
            tmp_terms = re.split (r'[\n-/:-?]', to_lower_case(search_string)) 
            terms = []
            for term in tmp_terms:
                if len(term) > 0 and not is_stopword(term):
                    stemmed = stemmer(term)
        #            if stemmed not in variables.stoplist:
                    terms.append(stemmer(to_lower_case(term)))
            
            keyword_query = xapian.Query(xapian.Query.OP_OR, terms)
    
            issue_query = xapian.Query(xapian.Query.OP_AND, issue_query, keyword_query)
    
        if from_date or to_date:
            if from_date == "None":
                from_date = str("19790626095523.000");
            else:
                from_array = from_date.split('/')
                from_date = str(from_array[2]+from_array[1]+from_array[0]+"000000.000");

            if to_date == "None" :
                to_date = str( datetime.now().strftime('%Y%m%d%H%M%S.000') );
            else:
                to_array = to_date.split('/')
                to_date = str(to_array[2]+to_array[1]+to_array[0]+"235959.999");
#                to_date_date = datetime(to_date)
#                to_date= to_date_date.strftime('%Y%m%d%H%M%S.000')

            modified_query = xapian.Query(xapian.Query.OP_VALUE_RANGE, settings.XAPIAN_MODIFIED_FIELD,
                                            xapian.sortable_serialise(float(from_date)),
                                            xapian.sortable_serialise(float(to_date)) )
            created_query = xapian.Query(xapian.Query.OP_VALUE_RANGE, settings.XAPIAN_CREATED_FIELD,
                                            xapian.sortable_serialise(float(from_date)),
                                            xapian.sortable_serialise(float(to_date)) )
            date_query = xapian.Query(xapian.Query.OP_OR, created_query, modified_query)

            issue_query =  xapian.Query(xapian.Query.OP_AND, issue_query, date_query)

        if tags:
            tag_query = xapian.Query(xapian.Query.OP_AND, tags)
            issue_query =  xapian.Query(xapian.Query.OP_AND, issue_query, tag_query)

        enquire.set_query(issue_query)
        word_matches = enquire.get_mset(offset, view_limit)
        total_matches=word_matches.get_matches_estimated()

        if total_matches == 0:
            word_issues = { "-1" : 1}

        else:
            for m in word_matches:
                word_issues[str(m[xapian.MSET_DOCUMENT].get_value(settings.XAPIAN_ID_FIELD))]=1
        

    if lat_query and lng_query:
        places = {}
        if word_issues:
            for m in matches:
                issue = m[xapian.MSET_DOCUMENT].get_value(settings.XAPIAN_PARENT_ISSUE_FIELD)
                if  word_issues.has_key(issue):
                    y = float(xapian.sortable_unserialise(m[xapian.MSET_DOCUMENT].get_value(settings.XAPIAN_LATITUDE_VALUE)))
                    x = float(xapian.sortable_unserialise(m[xapian.MSET_DOCUMENT].get_value(settings.XAPIAN_LONGITUDE_VALUE)))
        #            if y >= coords['y1'] and y <= coords['y2'] and x >= coords['x1'] and x <= coords['x2']:
                    places[ (m[xapian.MSET_DOCUMENT].get_value(settings.XAPIAN_PARENT_ISSUE_FIELD))
                            +":"+(m[xapian.MSET_DOCUMENT].get_value(settings.XAPIAN_PARENT_MESSAGE_FIELD))
                            +":"+(m[xapian.MSET_DOCUMENT].get_value(settings.XAPIAN_ID_FIELD))
                           +":"+str(m[xapian.MSET_DOCUMENT].get_data())
                                                                                   ] = [y, x]

        else:        
            for m in matches: 
                y = float(xapian.sortable_unserialise(m[xapian.MSET_DOCUMENT].get_value(settings.XAPIAN_LATITUDE_VALUE)))
                x = float(xapian.sortable_unserialise(m[xapian.MSET_DOCUMENT].get_value(settings.XAPIAN_LONGITUDE_VALUE)))
    #            if y >= coords['y1'] and y <= coords['y2'] and x >= coords['x1'] and x <= coords['x2']:
                places[ (m[xapian.MSET_DOCUMENT].get_value(settings.XAPIAN_PARENT_ISSUE_FIELD))
                            +":"+(m[xapian.MSET_DOCUMENT].get_value(settings.XAPIAN_PARENT_MESSAGE_FIELD))
                            +":"+(m[xapian.MSET_DOCUMENT].get_value(settings.XAPIAN_ID_FIELD))
                            +":"+str(m[xapian.MSET_DOCUMENT].get_data())
                                                                               ] = [y, x]
        results = ""
        
        # bad kludge to calculate amount of issues and messages in a cluster!
        pmessages = {}
        pissues = {}    
        
        issue_data = []
        issue_ids = {}
    
        shown_issues = {}
            

    if lat_query and lng_query:
        
        if zoomlevel < 16 and len(places)> 1 and cluster > 0:
            total_matches=0

            clusters = map_utils.cluster_map_markers(places, zoomlevel, cluster_threshold_pixels=cluster*cluster_thr_for_zoomlevel[zoomlevel]/50)
    
            issue_ids = []
            message_ids = []
            
            clusters.reverse()
            
            for cluster in clusters: # Reverse the array so clusters are processed and sent first, followed by the single cases.
    #            print cluster
                if len(cluster[0]) == 1:
                    m = cluster[0][0]
                    if 1 == 1:
                        id = m.split(":")[0]
 #                       print "it's an issue! "  + str(id)                       
                        if shown_issues.has_key(id):
                            continue
                        shown_issues[id] = 1
                        total_matches += 1
    #                    print "Issue id: " + str(issue_id)

                        if 1 == 1:
                                enquire.set_query(xapian.Query(xapian.Query.OP_AND, ["_issue_"+m.split(":")[0]]))
                                match = enquire.get_mset(0, 1)
                                if len(match) > 0:
                                    issue_dict=json.loads(match[0][xapian.MSET_DOCUMENT].get_data())
                                    print m.split(":")[0]
                                    print match[0][xapian.MSET_DOCUMENT].get_data()
                                    print len(issue_dict)
                                    if details == "no":
                                        if issue_dict["options"].has_key("comments"):
                                            del issue_dict["options"]['comments']
                                    issue_dict.update({'screen_id' :screen_id_counter})
                                    screen_id_counter += 1
                                    issue_data.append(issue_dict)
                                else:
                                    print "Api.xapian_search a: looking for _issue_" + m.split(":")[0] + " but could not find it!"

                else:
                    issuecount = 0
                    messagecount = 0
    
                    for iss in cluster[0]:
                        if pissues.has_key(str(iss)):
                            issuecount += 1
                            issue_ids.append(iss)
                        elif pmessages.has_key(str(iss)):
                            messagecount += 1
                            message_ids.append(iss)
    
                    
                    
                    issue_places = [ places[(place_id)] for place_id in cluster[0] ]

                    cluster_places = [ place_id.split(":")[2] for place_id in cluster[0] ]
                    cluster_issues = [ place_id.split(":")[0] for place_id in cluster[0] ]
                    cluster_addresses = [ place_id.split(":")[3] for place_id in cluster[0] ]


                    placelinks ={}
                    for i in range(1,len(cluster_places)):
                        if not cluster_issues[i] in placelinks:
                            placelinks[cluster_issues[i]]=( {"place": cluster_addresses[i], "link": "/r/"+cluster_issues[i]+"/"} )


                    title = str(len(placelinks)) + _(" messages in this area") 

                    sw = { 'lat': str(min([place[0] for place in issue_places])), 'lng': str(min([place[1] for place in issue_places]))}
                    ne = { 'lat': str(max([place[0] for place in issue_places])), 'lng': str(max([place[1] for place in issue_places]))}
                        
                    issue_data.append({ 'title': title,
                                        'options': {
                                            'author' : "TODO: not here yet!",
                                            'date' : "TODO: not here yet!",
                                            'points' : [ {'lng':cluster[1][0], 'lat':cluster[1][1]} ],        
                                            'score': 'NONE',
                                            'id': 'NONE',
                                            'status': 'NONE',
                                            'link': 'NONE', 
                                            'icon': {'name':'/images/merkki_klusteri_'+ str(min([max([len(placelinks),2]), 4])) +'.png',
                                                     'activeiconname': '/images/merkki_klusteri_'+ str(min([max([len(placelinks),2]), 4])) +'_fully_red.png',
                                                     'partlyactiveiconname': '/images/merkki_klusteri_'+ str(min([max([len(placelinks),2]), 4])) +'_partly_red.png',
                                                     'w': 41,
                                                     'h': 46,
                                                     'ax': 10,
                                                     'ay': 44,
                                                     },
                                            'places':cluster_places,
                                            'issues' : len(placelinks),
#                                            'messages' : message_ids,
                                            'type':'cluster',
                                            'sw': sw,
                                            'ne': ne,
                                            'placelinks' : [{"place": value["place"],"link": value["link"] } for value in placelinks.values()],
                                            },
                                        'point':  {'lon': cluster[1][0], 'lat':cluster[1][1]}, 
                                        'screen_id' : screen_id_counter})

                    screen_id_counter+= 1
        
        else:
            total_matches=len(places)

            for m in places.keys():
                id = m.split(":")[0]

                enquire.set_query(xapian.Query(xapian.Query.OP_AND, ["_issue_"+m.split(":")[0]]))
                match = enquire.get_mset(0, 1)
                if len(match) > 0:
                    issue_dict=json.loads(match[0][xapian.MSET_DOCUMENT].get_data())
                    if details == "no":
                            if issue_dict["options"].has_key("comments"):
                                del issue_dict["options"]['comments']
                    issue_dict.update({'screen_id' :screen_id_counter})

                    screen_id_counter += 1
                    issue_data.append(issue_dict)
                else:

                    print "Api.xapian_search b: looking for _issue_" + m.split(":")[0] + " but could not find it!"
    else:
        issue_data=[]
        for m in word_matches:
                itemtype=str(m[xapian.MSET_DOCUMENT].get_value(settings.XAPIAN_DATATYPE_FIELD))
                itemid=str(m[xapian.MSET_DOCUMENT].get_value(settings.XAPIAN_ID_FIELD))                
                issue_dict = json.loads(m[xapian.MSET_DOCUMENT].get_data())
                if details == "no":
                        if issue_dict["options"].has_key("comments"):
                            del issue_dict["options"]['comments']
                issue_dict.update({'screen_id' :screen_id_counter})
                screen_id_counter += 1
                issue_data.append(issue_dict)


    # this rather complicated procedure goes through the text fields
    # and finds the relevant words there:

    if terms:    
        for issue in issue_data: 

                       
            newtitle = ""
            for word in re.split(r'([\n-/:-?])', issue["title"]):
                stemmed = stemmer(to_lower_case(word))
                match1 = 0
                for term in terms:
                    if term == stemmed:
                        newtitle += ' <span class="keywordhighlight">' + word + '</span>'
                        match1 = 1
                        break 
                if match1 == 0:
                    newtitle += word
            
            issue["title"] = newtitle     

            relevant_string= ""
            
            if issue["options"].has_key("comments"):
                for msg in issue["options"]["comments"]:
                    content = ""
                    selected_words = {}
                    if msg.has_key("text"):
                        word_array = re.split(r'([ \n-/:-?])', msg["text"])

                        index = -1
                        for word in word_array:
                            index += 1
                            if len(word) > 1:
                                stemmed = stemmer(to_lower_case(word))
                                for term in terms:
                                    if term == stemmed:
                                        selected_words[index] = 1
                                        word_array[index] = ' <span class="keywordhighlight">' + word + '</span>'
                                        for i in range (1, 12):
                                            if index + i < len(word_array):
                                                selected_words[index + i] = 1
                                        for i in range (1, 12):
                                            if index - i >= 0:
                                                selected_words[index - i] = 1
                                        break

                        oldindex = 0
    #                    if len(selected_words) == 0:
    #                        for i in range (0, 25):
    #                            if i < len(word_array):
    #                                selected_words[i] = 1

    #                    if not selected_words.has_key(0):
                            #content += " ... "
    #                        continue

                        for index in sorted(selected_words.keys()):
                            if index > oldindex + 1:
                                content += " ... "
                            content += word_array[index]
                            oldindex = index
                        if oldindex < len(word_array):
                            content += " ... "


                        if len(content) > 7:
                            relevant_string += content


        
                if len(relevant_string) > 0:
                    issue["search_hit_string"] = relevant_string


    metadata = {'seqnum':seqnum,
                'total_matches': total_matches,
                'first_shown_match' : str(min(offset+1, len(issue_data))),
                'last_shown_match': str(offset+len(issue_data)),
                'shown_matches': str(offset)+"-"+str(offset+len(issue_data) )}
        
    return HttpResponse(dumps([metadata,issue_data]))